1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 /* 32 * This file and its contents are supplied under the terms of the 33 * Common Development and Distribution License ("CDDL"), version 1.0. 34 * You may only use this file in accordance with the terms of version 35 * 1.0 of the CDDL. 36 * 37 * A full copy of the text of the CDDL should have accompanied this 38 * source. A copy of the CDDL is also available via the Internet at 39 * http://www.illumos.org/license/CDDL. 40 * 41 * Copyright 2014 Pluribus Networks Inc. 42 * Copyright 2018 Joyent, Inc. 43 * Copyright 2023 Oxide Computer Company 44 */ 45 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/kmem.h> 52 #include <sys/mutex.h> 53 #include <sys/systm.h> 54 #include <sys/cpuset.h> 55 56 #include <x86/specialreg.h> 57 #include <x86/apicreg.h> 58 59 #include <machine/clock.h> 60 61 #include <machine/vmm.h> 62 #include <sys/vmm_kernel.h> 63 64 #include "vmm_lapic.h" 65 #include "vmm_stat.h" 66 67 #include "vlapic.h" 68 #include "vlapic_priv.h" 69 #include "vioapic.h" 70 71 72 /* 73 * The 4 high bits of a given interrupt vector represent its priority. The same 74 * is true for the contents of the TPR when it is used to calculate the ultimate 75 * PPR of an APIC - the 4 high bits hold the priority. 76 */ 77 #define PRIO(x) ((x) & 0xf0) 78 79 #define VLAPIC_VERSION (0x14) 80 81 /* 82 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the 83 * vlapic_callout_handler() and vcpu accesses to: 84 * - timer_freq_bt, timer_period_bt, timer_fire_bt 85 * - timer LVT register 86 */ 87 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock)) 88 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock)) 89 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock)) 90 91 /* 92 * APIC timer frequency: 93 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 94 * - power-of-two to avoid loss of precision when calculating times 95 */ 96 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 97 98 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL 99 100 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \ 101 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \ 102 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \ 103 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER) 104 105 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 106 static void vlapic_callout_handler(void *arg); 107 108 static __inline bool 109 vlapic_x2mode(const struct vlapic *vlapic) 110 { 111 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); 112 } 113 114 static __inline bool 115 vlapic_hw_disabled(const struct vlapic *vlapic) 116 { 117 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); 118 } 119 120 static __inline bool 121 vlapic_sw_disabled(const struct vlapic *vlapic) 122 { 123 const struct LAPIC *lapic = vlapic->apic_page; 124 125 return ((lapic->svr & APIC_SVR_ENABLE) == 0); 126 } 127 128 static __inline bool 129 vlapic_enabled(const struct vlapic *vlapic) 130 { 131 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); 132 } 133 134 static __inline uint32_t 135 vlapic_get_id(const struct vlapic *vlapic) 136 { 137 138 if (vlapic_x2mode(vlapic)) 139 return (vlapic->vcpuid); 140 else 141 return (vlapic->vcpuid << 24); 142 } 143 144 static uint32_t 145 x2apic_ldr(const struct vlapic *vlapic) 146 { 147 int apicid; 148 uint32_t ldr; 149 150 apicid = vlapic_get_id(vlapic); 151 ldr = 1 << (apicid & 0xf); 152 ldr |= (apicid & 0xffff0) << 12; 153 return (ldr); 154 } 155 156 void 157 vlapic_dfr_write_handler(struct vlapic *vlapic) 158 { 159 struct LAPIC *lapic; 160 161 lapic = vlapic->apic_page; 162 if (vlapic_x2mode(vlapic)) { 163 /* Ignore write to DFR in x2APIC mode */ 164 lapic->dfr = 0; 165 return; 166 } 167 168 lapic->dfr &= APIC_DFR_MODEL_MASK; 169 lapic->dfr |= APIC_DFR_RESERVED; 170 } 171 172 void 173 vlapic_ldr_write_handler(struct vlapic *vlapic) 174 { 175 struct LAPIC *lapic; 176 177 lapic = vlapic->apic_page; 178 179 /* LDR is read-only in x2apic mode */ 180 if (vlapic_x2mode(vlapic)) { 181 /* Ignore write to LDR in x2APIC mode */ 182 lapic->ldr = x2apic_ldr(vlapic); 183 } else { 184 lapic->ldr &= ~APIC_LDR_RESERVED; 185 } 186 } 187 188 void 189 vlapic_id_write_handler(struct vlapic *vlapic) 190 { 191 struct LAPIC *lapic; 192 193 /* 194 * We don't allow the ID register to be modified so reset it back to 195 * its default value. 196 */ 197 lapic = vlapic->apic_page; 198 lapic->id = vlapic_get_id(vlapic); 199 } 200 201 static int 202 vlapic_timer_divisor(uint32_t dcr) 203 { 204 switch (dcr & 0xB) { 205 case APIC_TDCR_1: 206 return (1); 207 case APIC_TDCR_2: 208 return (2); 209 case APIC_TDCR_4: 210 return (4); 211 case APIC_TDCR_8: 212 return (8); 213 case APIC_TDCR_16: 214 return (16); 215 case APIC_TDCR_32: 216 return (32); 217 case APIC_TDCR_64: 218 return (64); 219 case APIC_TDCR_128: 220 return (128); 221 default: 222 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 223 } 224 } 225 226 #if 0 227 static inline void 228 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 229 { 230 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 231 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 232 *lvt & APIC_LVTT_M); 233 } 234 #endif 235 236 static uint32_t 237 vlapic_get_ccr(struct vlapic *vlapic) 238 { 239 struct LAPIC *lapic; 240 uint32_t ccr; 241 242 ccr = 0; 243 lapic = vlapic->apic_page; 244 245 VLAPIC_TIMER_LOCK(vlapic); 246 if (callout_active(&vlapic->callout)) { 247 /* 248 * If the timer is scheduled to expire in the future then 249 * compute the value of 'ccr' based on the remaining time. 250 */ 251 252 const hrtime_t now = gethrtime(); 253 if (vlapic->timer_fire_when > now) { 254 ccr += hrt_freq_count(vlapic->timer_fire_when - now, 255 vlapic->timer_cur_freq); 256 } 257 } 258 259 /* 260 * Clamp CCR value to that programmed in ICR - its theoretical maximum. 261 * Normal operation should never result in this being necessary. Only 262 * strange circumstances due to state importation as part of instance 263 * save/restore or live-migration require such wariness. 264 */ 265 if (ccr > lapic->icr_timer) { 266 ccr = lapic->icr_timer; 267 vlapic->stats.vs_clamp_ccr++; 268 } 269 VLAPIC_TIMER_UNLOCK(vlapic); 270 return (ccr); 271 } 272 273 static void 274 vlapic_update_divider(struct vlapic *vlapic) 275 { 276 struct LAPIC *lapic = vlapic->apic_page; 277 278 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 279 280 vlapic->timer_cur_freq = 281 VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer); 282 vlapic->timer_period = 283 hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer); 284 } 285 286 void 287 vlapic_dcr_write_handler(struct vlapic *vlapic) 288 { 289 /* 290 * Update the timer frequency and the timer period. 291 * 292 * XXX changes to the frequency divider will not take effect until 293 * the timer is reloaded. 294 */ 295 VLAPIC_TIMER_LOCK(vlapic); 296 vlapic_update_divider(vlapic); 297 VLAPIC_TIMER_UNLOCK(vlapic); 298 } 299 300 void 301 vlapic_esr_write_handler(struct vlapic *vlapic) 302 { 303 struct LAPIC *lapic; 304 305 lapic = vlapic->apic_page; 306 lapic->esr = vlapic->esr_pending; 307 vlapic->esr_pending = 0; 308 } 309 310 vcpu_notify_t 311 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 312 { 313 struct LAPIC *lapic; 314 uint32_t *irrptr, *tmrptr, mask, tmr; 315 int idx; 316 317 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 318 319 lapic = vlapic->apic_page; 320 if (!(lapic->svr & APIC_SVR_ENABLE)) { 321 /* ignore interrupt on software-disabled APIC */ 322 return (VCPU_NOTIFY_NONE); 323 } 324 325 if (vector < 16) { 326 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 327 false); 328 329 /* 330 * If the error LVT is configured to interrupt the vCPU, it will 331 * have delivered a notification through that mechanism. 332 */ 333 return (VCPU_NOTIFY_NONE); 334 } 335 336 if (vlapic->ops.set_intr_ready) { 337 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 338 } 339 340 idx = (vector / 32) * 4; 341 mask = 1 << (vector % 32); 342 tmrptr = &lapic->tmr0; 343 irrptr = &lapic->irr0; 344 345 /* 346 * Update TMR for requested vector, if necessary. 347 * This must be done prior to asserting the bit in IRR so that the 348 * proper TMR state is always visible before the to-be-queued interrupt 349 * can be injected. 350 */ 351 tmr = atomic_load_acq_32(&tmrptr[idx]); 352 if ((tmr & mask) != (level ? mask : 0)) { 353 if (level) { 354 atomic_set_int(&tmrptr[idx], mask); 355 } else { 356 atomic_clear_int(&tmrptr[idx], mask); 357 } 358 } 359 360 /* Now set the bit in IRR */ 361 atomic_set_int(&irrptr[idx], mask); 362 363 return (VCPU_NOTIFY_EXIT); 364 } 365 366 static __inline uint32_t * 367 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 368 { 369 struct LAPIC *lapic = vlapic->apic_page; 370 int i; 371 372 switch (offset) { 373 case APIC_OFFSET_CMCI_LVT: 374 return (&lapic->lvt_cmci); 375 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 376 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 377 return ((&lapic->lvt_timer) + i); 378 default: 379 panic("vlapic_get_lvt: invalid LVT\n"); 380 } 381 } 382 383 static __inline int 384 lvt_off_to_idx(uint32_t offset) 385 { 386 int index; 387 388 switch (offset) { 389 case APIC_OFFSET_CMCI_LVT: 390 index = APIC_LVT_CMCI; 391 break; 392 case APIC_OFFSET_TIMER_LVT: 393 index = APIC_LVT_TIMER; 394 break; 395 case APIC_OFFSET_THERM_LVT: 396 index = APIC_LVT_THERMAL; 397 break; 398 case APIC_OFFSET_PERF_LVT: 399 index = APIC_LVT_PMC; 400 break; 401 case APIC_OFFSET_LINT0_LVT: 402 index = APIC_LVT_LINT0; 403 break; 404 case APIC_OFFSET_LINT1_LVT: 405 index = APIC_LVT_LINT1; 406 break; 407 case APIC_OFFSET_ERROR_LVT: 408 index = APIC_LVT_ERROR; 409 break; 410 default: 411 index = -1; 412 break; 413 } 414 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 415 "invalid lvt index %d for offset %x", index, offset)); 416 417 return (index); 418 } 419 420 static __inline uint32_t 421 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 422 { 423 int idx; 424 uint32_t val; 425 426 idx = lvt_off_to_idx(offset); 427 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 428 return (val); 429 } 430 431 void 432 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 433 { 434 uint32_t *lvtptr, mask, val; 435 struct LAPIC *lapic; 436 int idx; 437 438 lapic = vlapic->apic_page; 439 lvtptr = vlapic_get_lvtptr(vlapic, offset); 440 val = *lvtptr; 441 idx = lvt_off_to_idx(offset); 442 443 if (!(lapic->svr & APIC_SVR_ENABLE)) 444 val |= APIC_LVT_M; 445 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 446 switch (offset) { 447 case APIC_OFFSET_TIMER_LVT: 448 mask |= APIC_LVTT_TM; 449 break; 450 case APIC_OFFSET_ERROR_LVT: 451 break; 452 case APIC_OFFSET_LINT0_LVT: 453 case APIC_OFFSET_LINT1_LVT: 454 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 455 /* FALLTHROUGH */ 456 default: 457 mask |= APIC_LVT_DM; 458 break; 459 } 460 val &= mask; 461 *lvtptr = val; 462 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 463 } 464 465 static void 466 vlapic_refresh_lvts(struct vlapic *vlapic) 467 { 468 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 469 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 470 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 471 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 472 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 473 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 474 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 475 } 476 477 static void 478 vlapic_mask_lvts(struct vlapic *vlapic) 479 { 480 struct LAPIC *lapic = vlapic->apic_page; 481 482 lapic->lvt_cmci |= APIC_LVT_M; 483 lapic->lvt_timer |= APIC_LVT_M; 484 lapic->lvt_thermal |= APIC_LVT_M; 485 lapic->lvt_pcint |= APIC_LVT_M; 486 lapic->lvt_lint0 |= APIC_LVT_M; 487 lapic->lvt_lint1 |= APIC_LVT_M; 488 lapic->lvt_error |= APIC_LVT_M; 489 vlapic_refresh_lvts(vlapic); 490 } 491 492 static int 493 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt) 494 { 495 uint32_t mode, reg, vec; 496 vcpu_notify_t notify; 497 498 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 499 500 if (reg & APIC_LVT_M) 501 return (0); 502 vec = reg & APIC_LVT_VECTOR; 503 mode = reg & APIC_LVT_DM; 504 505 switch (mode) { 506 case APIC_LVT_DM_FIXED: 507 if (vec < 16) { 508 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 509 lvt == APIC_LVT_ERROR); 510 return (0); 511 } 512 notify = vlapic_set_intr_ready(vlapic, vec, false); 513 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify); 514 break; 515 case APIC_LVT_DM_NMI: 516 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 517 break; 518 case APIC_LVT_DM_EXTINT: 519 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid); 520 break; 521 default: 522 // Other modes ignored 523 return (0); 524 } 525 return (1); 526 } 527 528 static uint_t 529 vlapic_active_isr(struct vlapic *vlapic) 530 { 531 int i; 532 uint32_t *isrp; 533 534 isrp = &vlapic->apic_page->isr7; 535 536 for (i = 7; i >= 0; i--, isrp -= 4) { 537 uint32_t reg = *isrp; 538 539 if (reg != 0) { 540 uint_t vec = (i * 32) + bsrl(reg); 541 542 if (vec < 16) { 543 /* 544 * Truncate the illegal low vectors to value of 545 * 0, indicating that no active ISR was found. 546 */ 547 return (0); 548 } 549 return (vec); 550 } 551 } 552 553 return (0); 554 } 555 556 /* 557 * After events which might arbitrarily change the value of PPR, such as a TPR 558 * write or an EOI, calculate that new PPR value and store it in the APIC page. 559 */ 560 static void 561 vlapic_update_ppr(struct vlapic *vlapic) 562 { 563 int isrvec, tpr, ppr; 564 565 isrvec = vlapic_active_isr(vlapic); 566 tpr = vlapic->apic_page->tpr; 567 568 /* 569 * Algorithm adopted from section "Interrupt, Task and Processor 570 * Priority" in Intel Architecture Manual Vol 3a. 571 */ 572 if (PRIO(tpr) >= PRIO(isrvec)) { 573 ppr = tpr; 574 } else { 575 ppr = PRIO(isrvec); 576 } 577 578 vlapic->apic_page->ppr = ppr; 579 } 580 581 /* 582 * When a vector is asserted in ISR as in-service, the PPR must be raised to the 583 * priority of that vector, as the vCPU would have been at a lower priority in 584 * order for the vector to be accepted. 585 */ 586 static void 587 vlapic_raise_ppr(struct vlapic *vlapic, int vec) 588 { 589 struct LAPIC *lapic = vlapic->apic_page; 590 int ppr; 591 592 ppr = PRIO(vec); 593 594 lapic->ppr = ppr; 595 } 596 597 void 598 vlapic_sync_tpr(struct vlapic *vlapic) 599 { 600 vlapic_update_ppr(vlapic); 601 } 602 603 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 604 605 static void 606 vlapic_process_eoi(struct vlapic *vlapic) 607 { 608 struct LAPIC *lapic = vlapic->apic_page; 609 uint32_t *isrptr, *tmrptr; 610 int i; 611 uint_t idx, bitpos, vector; 612 613 isrptr = &lapic->isr0; 614 tmrptr = &lapic->tmr0; 615 616 for (i = 7; i >= 0; i--) { 617 idx = i * 4; 618 if (isrptr[idx] != 0) { 619 bitpos = bsrl(isrptr[idx]); 620 vector = i * 32 + bitpos; 621 622 isrptr[idx] &= ~(1 << bitpos); 623 vlapic_update_ppr(vlapic); 624 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 625 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 626 vector); 627 } 628 return; 629 } 630 } 631 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 632 } 633 634 static __inline int 635 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 636 { 637 638 return (lvt & mask); 639 } 640 641 static __inline int 642 vlapic_periodic_timer(struct vlapic *vlapic) 643 { 644 uint32_t lvt; 645 646 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 647 648 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 649 } 650 651 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 652 653 static void 654 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 655 { 656 657 vlapic->esr_pending |= mask; 658 659 /* 660 * Avoid infinite recursion if the error LVT itself is configured with 661 * an illegal vector. 662 */ 663 if (lvt_error) 664 return; 665 666 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 667 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 668 } 669 } 670 671 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 672 673 static void 674 vlapic_fire_timer(struct vlapic *vlapic) 675 { 676 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 677 678 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 679 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 680 } 681 } 682 683 static VMM_STAT(VLAPIC_INTR_CMC, 684 "corrected machine check interrupts generated by vlapic"); 685 686 void 687 vlapic_fire_cmci(struct vlapic *vlapic) 688 { 689 690 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 691 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 692 } 693 } 694 695 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 696 "lvts triggered"); 697 698 int 699 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 700 { 701 if (!vlapic_enabled(vlapic)) { 702 /* 703 * When the local APIC is global/hardware disabled, 704 * LINT[1:0] pins are configured as INTR and NMI pins, 705 * respectively. 706 */ 707 switch (vector) { 708 case APIC_LVT_LINT0: 709 (void) vm_inject_extint(vlapic->vm, 710 vlapic->vcpuid); 711 break; 712 case APIC_LVT_LINT1: 713 (void) vm_inject_nmi(vlapic->vm, 714 vlapic->vcpuid); 715 break; 716 default: 717 break; 718 } 719 return (0); 720 } 721 722 switch (vector) { 723 case APIC_LVT_LINT0: 724 case APIC_LVT_LINT1: 725 case APIC_LVT_TIMER: 726 case APIC_LVT_ERROR: 727 case APIC_LVT_PMC: 728 case APIC_LVT_THERMAL: 729 case APIC_LVT_CMCI: 730 if (vlapic_fire_lvt(vlapic, vector)) { 731 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 732 LVTS_TRIGGERRED, vector, 1); 733 } 734 break; 735 default: 736 return (EINVAL); 737 } 738 return (0); 739 } 740 741 static void 742 vlapic_callout_reset(struct vlapic *vlapic) 743 { 744 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, 745 vlapic_callout_handler, vlapic, C_ABSOLUTE); 746 } 747 748 static void 749 vlapic_callout_handler(void *arg) 750 { 751 struct vlapic *vlapic = arg; 752 753 VLAPIC_TIMER_LOCK(vlapic); 754 if (callout_pending(&vlapic->callout)) /* callout was reset */ 755 goto done; 756 757 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 758 goto done; 759 760 callout_deactivate(&vlapic->callout); 761 762 vlapic_fire_timer(vlapic); 763 764 if (vlapic_periodic_timer(vlapic)) { 765 /* 766 * Compute the delta between when the timer was supposed to 767 * fire and the present time. We can depend on the fact that 768 * cyclics (which underly these callouts) will never be called 769 * early. 770 */ 771 const hrtime_t now = gethrtime(); 772 const hrtime_t delta = now - vlapic->timer_fire_when; 773 if (delta >= vlapic->timer_period) { 774 /* 775 * If we are so behind that we have missed an entire 776 * timer period, reset the time base rather than 777 * attempting to catch up. 778 */ 779 vlapic->timer_fire_when = now + vlapic->timer_period; 780 } else { 781 vlapic->timer_fire_when += vlapic->timer_period; 782 } 783 vlapic_callout_reset(vlapic); 784 } else { 785 /* 786 * Clear the target time so that logic can distinguish from a 787 * timer which has fired (where the value is zero) from one 788 * which is held pending due to the instance being paused (where 789 * the value is non-zero, but the callout is not pending). 790 */ 791 vlapic->timer_fire_when = 0; 792 } 793 done: 794 VLAPIC_TIMER_UNLOCK(vlapic); 795 } 796 797 void 798 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 799 { 800 struct LAPIC *lapic = vlapic->apic_page; 801 802 VLAPIC_TIMER_LOCK(vlapic); 803 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 804 lapic->icr_timer); 805 if (vlapic->timer_period != 0) { 806 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; 807 vlapic_callout_reset(vlapic); 808 } else { 809 vlapic->timer_fire_when = 0; 810 callout_stop(&vlapic->callout); 811 } 812 VLAPIC_TIMER_UNLOCK(vlapic); 813 } 814 815 /* 816 * This function populates 'dmask' with the set of vcpus that match the 817 * addressing specified by the (dest, phys, lowprio) tuple. 818 * 819 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 820 * or xAPIC (8-bit) destination field. 821 */ 822 void 823 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 824 bool lowprio, bool x2apic_dest) 825 { 826 struct vlapic *vlapic; 827 uint32_t dfr, ldr, ldest, cluster; 828 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 829 cpuset_t amask; 830 int vcpuid; 831 832 if ((x2apic_dest && dest == 0xffffffff) || 833 (!x2apic_dest && dest == 0xff)) { 834 /* 835 * Broadcast in both logical and physical modes. 836 */ 837 *dmask = vm_active_cpus(vm); 838 return; 839 } 840 841 if (phys) { 842 /* 843 * Physical mode: destination is APIC ID. 844 */ 845 CPU_ZERO(dmask); 846 vcpuid = vm_apicid2vcpuid(vm, dest); 847 amask = vm_active_cpus(vm); 848 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 849 CPU_SET(vcpuid, dmask); 850 } else { 851 /* 852 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 853 * bitmask. This model is only available in the xAPIC mode. 854 */ 855 mda_flat_ldest = dest & 0xff; 856 857 /* 858 * In the "Cluster Model" the MDA is used to identify a 859 * specific cluster and a set of APICs in that cluster. 860 */ 861 if (x2apic_dest) { 862 mda_cluster_id = dest >> 16; 863 mda_cluster_ldest = dest & 0xffff; 864 } else { 865 mda_cluster_id = (dest >> 4) & 0xf; 866 mda_cluster_ldest = dest & 0xf; 867 } 868 869 /* 870 * Logical mode: match each APIC that has a bit set 871 * in its LDR that matches a bit in the ldest. 872 */ 873 CPU_ZERO(dmask); 874 amask = vm_active_cpus(vm); 875 while ((vcpuid = CPU_FFS(&amask)) != 0) { 876 vcpuid--; 877 CPU_CLR(vcpuid, &amask); 878 879 vlapic = vm_lapic(vm, vcpuid); 880 dfr = vlapic->apic_page->dfr; 881 ldr = vlapic->apic_page->ldr; 882 883 if ((dfr & APIC_DFR_MODEL_MASK) == 884 APIC_DFR_MODEL_FLAT) { 885 ldest = ldr >> 24; 886 mda_ldest = mda_flat_ldest; 887 } else if ((dfr & APIC_DFR_MODEL_MASK) == 888 APIC_DFR_MODEL_CLUSTER) { 889 if (vlapic_x2mode(vlapic)) { 890 cluster = ldr >> 16; 891 ldest = ldr & 0xffff; 892 } else { 893 cluster = ldr >> 28; 894 ldest = (ldr >> 24) & 0xf; 895 } 896 if (cluster != mda_cluster_id) 897 continue; 898 mda_ldest = mda_cluster_ldest; 899 } else { 900 /* 901 * Guest has configured a bad logical 902 * model for this vcpu - skip it. 903 */ 904 continue; 905 } 906 907 if ((mda_ldest & ldest) != 0) { 908 CPU_SET(vcpuid, dmask); 909 if (lowprio) 910 break; 911 } 912 } 913 } 914 } 915 916 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 917 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 918 919 static void 920 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 921 { 922 struct LAPIC *lapic = vlapic->apic_page; 923 924 if (lapic->tpr != val) { 925 lapic->tpr = val; 926 vlapic_update_ppr(vlapic); 927 } 928 } 929 930 void 931 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 932 { 933 uint8_t tpr; 934 935 if (val & ~0xf) { 936 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 937 return; 938 } 939 940 tpr = val << 4; 941 vlapic_set_tpr(vlapic, tpr); 942 } 943 944 uint64_t 945 vlapic_get_cr8(const struct vlapic *vlapic) 946 { 947 const struct LAPIC *lapic = vlapic->apic_page; 948 949 return (lapic->tpr >> 4); 950 } 951 952 static bool 953 vlapic_is_icr_valid(uint64_t icrval) 954 { 955 uint32_t mode = icrval & APIC_DELMODE_MASK; 956 uint32_t level = icrval & APIC_LEVEL_MASK; 957 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 958 uint32_t shorthand = icrval & APIC_DEST_MASK; 959 960 switch (mode) { 961 case APIC_DELMODE_FIXED: 962 if (trigger == APIC_TRIGMOD_EDGE) 963 return (true); 964 /* 965 * AMD allows a level assert IPI and Intel converts a level 966 * assert IPI into an edge IPI. 967 */ 968 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 969 return (true); 970 break; 971 case APIC_DELMODE_LOWPRIO: 972 case APIC_DELMODE_SMI: 973 case APIC_DELMODE_NMI: 974 case APIC_DELMODE_INIT: 975 if (trigger == APIC_TRIGMOD_EDGE && 976 (shorthand == APIC_DEST_DESTFLD || 977 shorthand == APIC_DEST_ALLESELF)) { 978 return (true); 979 } 980 /* 981 * AMD allows a level assert IPI and Intel converts a level 982 * assert IPI into an edge IPI. 983 */ 984 if (trigger == APIC_TRIGMOD_LEVEL && 985 level == APIC_LEVEL_ASSERT && 986 (shorthand == APIC_DEST_DESTFLD || 987 shorthand == APIC_DEST_ALLESELF)) { 988 return (true); 989 } 990 /* 991 * An level triggered deassert INIT is defined in the Intel 992 * Multiprocessor Specification and the Intel Software Developer 993 * Manual. Due to the MPS it's required to send a level assert 994 * INIT to a cpu and then a level deassert INIT. Some operating 995 * systems e.g. FreeBSD or Linux use that algorithm. According 996 * to the SDM a level deassert INIT is only supported by Pentium 997 * and P6 processors. It's always send to all cpus regardless of 998 * the destination or shorthand field. It resets the arbitration 999 * id register. This register is not software accessible and 1000 * only required for the APIC bus arbitration. So, the level 1001 * deassert INIT doesn't need any emulation and we should ignore 1002 * it. The SDM also defines that newer processors don't support 1003 * the level deassert INIT and it's not valid any more. As it's 1004 * defined for older systems, it can't be invalid per se. 1005 * Otherwise, backward compatibility would be broken. However, 1006 * when returning false here, it'll be ignored which is the 1007 * desired behaviour. 1008 */ 1009 if (mode == APIC_DELMODE_INIT && 1010 trigger == APIC_TRIGMOD_LEVEL && 1011 level == APIC_LEVEL_DEASSERT) { 1012 return (false); 1013 } 1014 break; 1015 case APIC_DELMODE_STARTUP: 1016 if (shorthand == APIC_DEST_DESTFLD || 1017 shorthand == APIC_DEST_ALLESELF) { 1018 return (true); 1019 } 1020 break; 1021 case APIC_DELMODE_RR: 1022 /* Only available on AMD! */ 1023 if (trigger == APIC_TRIGMOD_EDGE && 1024 shorthand == APIC_DEST_DESTFLD) { 1025 return (true); 1026 } 1027 break; 1028 case APIC_DELMODE_RESV: 1029 return (false); 1030 default: 1031 panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode); 1032 } 1033 1034 return (false); 1035 } 1036 1037 void 1038 vlapic_icrlo_write_handler(struct vlapic *vlapic) 1039 { 1040 int i; 1041 cpuset_t dmask; 1042 uint64_t icrval; 1043 uint32_t dest, vec, mode, dsh; 1044 struct LAPIC *lapic; 1045 1046 lapic = vlapic->apic_page; 1047 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1048 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1049 1050 /* 1051 * Ignore invalid combinations of the icr. 1052 */ 1053 if (!vlapic_is_icr_valid(icrval)) 1054 return; 1055 1056 if (vlapic_x2mode(vlapic)) 1057 dest = icrval >> 32; 1058 else 1059 dest = icrval >> (32 + 24); 1060 vec = icrval & APIC_VECTOR_MASK; 1061 mode = icrval & APIC_DELMODE_MASK; 1062 dsh = icrval & APIC_DEST_MASK; 1063 1064 if (mode == APIC_DELMODE_FIXED && vec < 16) { 1065 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 1066 return; 1067 } 1068 1069 if (mode == APIC_DELMODE_INIT && 1070 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { 1071 /* No work required to deassert INIT */ 1072 return; 1073 } 1074 1075 switch (dsh) { 1076 case APIC_DEST_DESTFLD: 1077 vlapic_calcdest(vlapic->vm, &dmask, dest, 1078 (icrval & APIC_DESTMODE_LOG) == 0, false, 1079 vlapic_x2mode(vlapic)); 1080 break; 1081 case APIC_DEST_SELF: 1082 CPU_SETOF(vlapic->vcpuid, &dmask); 1083 break; 1084 case APIC_DEST_ALLISELF: 1085 dmask = vm_active_cpus(vlapic->vm); 1086 break; 1087 case APIC_DEST_ALLESELF: 1088 dmask = vm_active_cpus(vlapic->vm); 1089 CPU_CLR(vlapic->vcpuid, &dmask); 1090 break; 1091 default: 1092 /* 1093 * All possible delivery notations are covered above. 1094 * We should never end up here. 1095 */ 1096 panic("unknown delivery shorthand: %x", dsh); 1097 } 1098 1099 while ((i = CPU_FFS(&dmask)) != 0) { 1100 i--; 1101 CPU_CLR(i, &dmask); 1102 switch (mode) { 1103 case APIC_DELMODE_FIXED: 1104 (void) lapic_intr_edge(vlapic->vm, i, vec); 1105 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, 1106 VLAPIC_IPI_SEND, 1); 1107 vmm_stat_incr(vlapic->vm, i, 1108 VLAPIC_IPI_RECV, 1); 1109 break; 1110 case APIC_DELMODE_NMI: 1111 (void) vm_inject_nmi(vlapic->vm, i); 1112 break; 1113 case APIC_DELMODE_INIT: 1114 (void) vm_inject_init(vlapic->vm, i); 1115 break; 1116 case APIC_DELMODE_STARTUP: 1117 (void) vm_inject_sipi(vlapic->vm, i, vec); 1118 break; 1119 case APIC_DELMODE_LOWPRIO: 1120 case APIC_DELMODE_SMI: 1121 default: 1122 /* Unhandled IPI modes (for now) */ 1123 break; 1124 } 1125 } 1126 } 1127 1128 void 1129 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) 1130 { 1131 const int vec = val & 0xff; 1132 1133 /* self-IPI is only exposed via x2APIC */ 1134 ASSERT(vlapic_x2mode(vlapic)); 1135 1136 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1137 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); 1138 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); 1139 } 1140 1141 int 1142 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1143 { 1144 struct LAPIC *lapic = vlapic->apic_page; 1145 int idx, i, bitpos, vector; 1146 uint32_t *irrptr, val; 1147 1148 if (vlapic->ops.sync_state) { 1149 (*vlapic->ops.sync_state)(vlapic); 1150 } 1151 1152 irrptr = &lapic->irr0; 1153 1154 for (i = 7; i >= 0; i--) { 1155 idx = i * 4; 1156 val = atomic_load_acq_int(&irrptr[idx]); 1157 bitpos = fls(val); 1158 if (bitpos != 0) { 1159 vector = i * 32 + (bitpos - 1); 1160 if (PRIO(vector) > PRIO(lapic->ppr)) { 1161 if (vecptr != NULL) 1162 *vecptr = vector; 1163 return (1); 1164 } else 1165 break; 1166 } 1167 } 1168 return (0); 1169 } 1170 1171 void 1172 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1173 { 1174 struct LAPIC *lapic = vlapic->apic_page; 1175 uint32_t *irrptr, *isrptr; 1176 int idx; 1177 1178 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector)); 1179 1180 if (vlapic->ops.intr_accepted) 1181 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1182 1183 /* 1184 * clear the ready bit for vector being accepted in irr 1185 * and set the vector as in service in isr. 1186 */ 1187 idx = (vector / 32) * 4; 1188 1189 irrptr = &lapic->irr0; 1190 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1191 1192 isrptr = &lapic->isr0; 1193 isrptr[idx] |= 1 << (vector % 32); 1194 1195 /* 1196 * The only way a fresh vector could be accepted into ISR is if it was 1197 * of a higher priority than the current PPR. With that vector now 1198 * in-service, the PPR must be raised. 1199 */ 1200 vlapic_raise_ppr(vlapic, vector); 1201 } 1202 1203 void 1204 vlapic_svr_write_handler(struct vlapic *vlapic) 1205 { 1206 struct LAPIC *lapic; 1207 uint32_t old, new, changed; 1208 1209 lapic = vlapic->apic_page; 1210 1211 new = lapic->svr; 1212 old = vlapic->svr_last; 1213 vlapic->svr_last = new; 1214 1215 changed = old ^ new; 1216 if ((changed & APIC_SVR_ENABLE) != 0) { 1217 if ((new & APIC_SVR_ENABLE) == 0) { 1218 /* 1219 * The apic is now disabled so stop the apic timer 1220 * and mask all the LVT entries. 1221 */ 1222 VLAPIC_TIMER_LOCK(vlapic); 1223 callout_stop(&vlapic->callout); 1224 VLAPIC_TIMER_UNLOCK(vlapic); 1225 vlapic_mask_lvts(vlapic); 1226 } else { 1227 /* 1228 * The apic is now enabled so restart the apic timer 1229 * if it is configured in periodic mode. 1230 */ 1231 if (vlapic_periodic_timer(vlapic)) 1232 vlapic_icrtmr_write_handler(vlapic); 1233 } 1234 } 1235 } 1236 1237 static bool 1238 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) 1239 { 1240 struct LAPIC *lapic = vlapic->apic_page; 1241 uint32_t *reg; 1242 int i; 1243 1244 ASSERT3U(offset & 0x3, ==, 0); 1245 ASSERT3U(offset, <, PAGESIZE); 1246 ASSERT3P(outp, !=, NULL); 1247 1248 uint32_t data = 0; 1249 switch (offset) { 1250 case APIC_OFFSET_ID: 1251 data = lapic->id; 1252 break; 1253 case APIC_OFFSET_VER: 1254 data = lapic->version; 1255 break; 1256 case APIC_OFFSET_TPR: 1257 data = lapic->tpr; 1258 break; 1259 case APIC_OFFSET_APR: 1260 data = lapic->apr; 1261 break; 1262 case APIC_OFFSET_PPR: 1263 data = lapic->ppr; 1264 break; 1265 case APIC_OFFSET_LDR: 1266 data = lapic->ldr; 1267 break; 1268 case APIC_OFFSET_DFR: 1269 data = lapic->dfr; 1270 break; 1271 case APIC_OFFSET_SVR: 1272 data = lapic->svr; 1273 break; 1274 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1275 i = (offset - APIC_OFFSET_ISR0) >> 2; 1276 reg = &lapic->isr0; 1277 data = *(reg + i); 1278 break; 1279 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1280 i = (offset - APIC_OFFSET_TMR0) >> 2; 1281 reg = &lapic->tmr0; 1282 data = *(reg + i); 1283 break; 1284 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1285 i = (offset - APIC_OFFSET_IRR0) >> 2; 1286 reg = &lapic->irr0; 1287 data = atomic_load_acq_int(reg + i); 1288 break; 1289 case APIC_OFFSET_ESR: 1290 data = lapic->esr; 1291 break; 1292 case APIC_OFFSET_ICR_LOW: 1293 data = lapic->icr_lo; 1294 break; 1295 case APIC_OFFSET_ICR_HI: 1296 data = lapic->icr_hi; 1297 break; 1298 case APIC_OFFSET_CMCI_LVT: 1299 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1300 data = vlapic_get_lvt(vlapic, offset); 1301 #ifdef INVARIANTS 1302 reg = vlapic_get_lvtptr(vlapic, offset); 1303 ASSERT3U(data, ==, *reg); 1304 #endif 1305 break; 1306 case APIC_OFFSET_TIMER_ICR: 1307 data = lapic->icr_timer; 1308 break; 1309 case APIC_OFFSET_TIMER_CCR: 1310 data = vlapic_get_ccr(vlapic); 1311 break; 1312 case APIC_OFFSET_TIMER_DCR: 1313 data = lapic->dcr_timer; 1314 break; 1315 case APIC_OFFSET_RRR: 1316 data = 0; 1317 break; 1318 1319 case APIC_OFFSET_SELF_IPI: 1320 case APIC_OFFSET_EOI: 1321 /* Write-only register */ 1322 *outp = 0; 1323 return (false); 1324 1325 default: 1326 /* Invalid register */ 1327 *outp = 0; 1328 return (false); 1329 } 1330 1331 *outp = data; 1332 return (true); 1333 } 1334 1335 static bool 1336 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) 1337 { 1338 struct LAPIC *lapic = vlapic->apic_page; 1339 uint32_t *regptr; 1340 1341 ASSERT3U(offset & 0xf, ==, 0); 1342 ASSERT3U(offset, <, PAGESIZE); 1343 1344 switch (offset) { 1345 case APIC_OFFSET_ID: 1346 lapic->id = data; 1347 vlapic_id_write_handler(vlapic); 1348 break; 1349 case APIC_OFFSET_TPR: 1350 vlapic_set_tpr(vlapic, data & 0xff); 1351 break; 1352 case APIC_OFFSET_EOI: 1353 vlapic_process_eoi(vlapic); 1354 break; 1355 case APIC_OFFSET_LDR: 1356 lapic->ldr = data; 1357 vlapic_ldr_write_handler(vlapic); 1358 break; 1359 case APIC_OFFSET_DFR: 1360 lapic->dfr = data; 1361 vlapic_dfr_write_handler(vlapic); 1362 break; 1363 case APIC_OFFSET_SVR: 1364 lapic->svr = data; 1365 vlapic_svr_write_handler(vlapic); 1366 break; 1367 case APIC_OFFSET_ICR_LOW: 1368 lapic->icr_lo = data; 1369 vlapic_icrlo_write_handler(vlapic); 1370 break; 1371 case APIC_OFFSET_ICR_HI: 1372 lapic->icr_hi = data; 1373 break; 1374 case APIC_OFFSET_CMCI_LVT: 1375 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1376 regptr = vlapic_get_lvtptr(vlapic, offset); 1377 *regptr = data; 1378 vlapic_lvt_write_handler(vlapic, offset); 1379 break; 1380 case APIC_OFFSET_TIMER_ICR: 1381 lapic->icr_timer = data; 1382 vlapic_icrtmr_write_handler(vlapic); 1383 break; 1384 1385 case APIC_OFFSET_TIMER_DCR: 1386 lapic->dcr_timer = data; 1387 vlapic_dcr_write_handler(vlapic); 1388 break; 1389 1390 case APIC_OFFSET_ESR: 1391 vlapic_esr_write_handler(vlapic); 1392 break; 1393 1394 case APIC_OFFSET_SELF_IPI: 1395 if (vlapic_x2mode(vlapic)) 1396 vlapic_self_ipi_handler(vlapic, data); 1397 break; 1398 1399 case APIC_OFFSET_VER: 1400 case APIC_OFFSET_APR: 1401 case APIC_OFFSET_PPR: 1402 case APIC_OFFSET_RRR: 1403 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1404 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1405 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1406 case APIC_OFFSET_TIMER_CCR: 1407 /* Read-only register */ 1408 return (false); 1409 1410 default: 1411 /* Invalid register */ 1412 return (false); 1413 } 1414 1415 return (true); 1416 } 1417 1418 void 1419 vlapic_reset(struct vlapic *vlapic) 1420 { 1421 struct LAPIC *lapic = vlapic->apic_page; 1422 uint32_t *isrptr, *tmrptr, *irrptr; 1423 1424 /* Reset any timer-related state first */ 1425 VLAPIC_TIMER_LOCK(vlapic); 1426 callout_stop(&vlapic->callout); 1427 lapic->icr_timer = 0; 1428 lapic->ccr_timer = 0; 1429 lapic->dcr_timer = 0; 1430 vlapic_update_divider(vlapic); 1431 VLAPIC_TIMER_UNLOCK(vlapic); 1432 1433 /* 1434 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so 1435 * it is not leftover after the reset. This is performed after the APIC 1436 * timer has been stopped, in case it happened to fire just prior to 1437 * being deactivated. 1438 */ 1439 if (vlapic->ops.sync_state) { 1440 (*vlapic->ops.sync_state)(vlapic); 1441 } 1442 1443 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1444 if (vlapic->vcpuid == 0) 1445 vlapic->msr_apicbase |= APICBASE_BSP; 1446 1447 lapic->id = vlapic_get_id(vlapic); 1448 lapic->version = VLAPIC_VERSION; 1449 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1450 1451 lapic->tpr = 0; 1452 lapic->apr = 0; 1453 lapic->ppr = 0; 1454 1455 lapic->eoi = 0; 1456 lapic->ldr = 0; 1457 lapic->dfr = 0xffffffff; 1458 lapic->svr = APIC_SVR_VECTOR; 1459 vlapic->svr_last = lapic->svr; 1460 1461 isrptr = &lapic->isr0; 1462 tmrptr = &lapic->tmr0; 1463 irrptr = &lapic->irr0; 1464 for (uint_t i = 0; i < 8; i++) { 1465 atomic_store_rel_int(&isrptr[i * 4], 0); 1466 atomic_store_rel_int(&tmrptr[i * 4], 0); 1467 atomic_store_rel_int(&irrptr[i * 4], 0); 1468 } 1469 1470 lapic->esr = 0; 1471 vlapic->esr_pending = 0; 1472 lapic->icr_lo = 0; 1473 lapic->icr_hi = 0; 1474 1475 lapic->lvt_cmci = 0; 1476 lapic->lvt_timer = 0; 1477 lapic->lvt_thermal = 0; 1478 lapic->lvt_pcint = 0; 1479 lapic->lvt_lint0 = 0; 1480 lapic->lvt_lint1 = 0; 1481 lapic->lvt_error = 0; 1482 vlapic_mask_lvts(vlapic); 1483 } 1484 1485 void 1486 vlapic_init(struct vlapic *vlapic) 1487 { 1488 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1489 KASSERT(vlapic->vcpuid >= 0 && 1490 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1491 ("vlapic_init: vcpuid is not initialized")); 1492 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1493 "initialized")); 1494 1495 /* 1496 * If the vlapic is configured in x2apic mode then it will be 1497 * accessed in the critical section via the MSR emulation code. 1498 * 1499 * Therefore the timer mutex must be a spinlock because blockable 1500 * mutexes cannot be acquired in a critical section. 1501 */ 1502 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL); 1503 callout_init(&vlapic->callout, 1); 1504 1505 vlapic_reset(vlapic); 1506 } 1507 1508 void 1509 vlapic_cleanup(struct vlapic *vlapic) 1510 { 1511 callout_drain(&vlapic->callout); 1512 mutex_destroy(&vlapic->timer_lock); 1513 } 1514 1515 int 1516 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, 1517 uint_t size) 1518 { 1519 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1520 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1521 1522 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1523 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1524 *valp = UINT64_MAX; 1525 return (0); 1526 } 1527 1528 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1529 uint32_t raw = 0; 1530 (void) vlapic_read(vlapic, off & ~0xf, &raw); 1531 1532 /* Shift and mask reads which are small and/or unaligned */ 1533 const uint8_t align = off & 0xf; 1534 if (align < 4) { 1535 *valp = (uint64_t)raw << (align * 8); 1536 } else { 1537 *valp = 0; 1538 } 1539 1540 return (0); 1541 } 1542 1543 int 1544 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, 1545 uint_t size) 1546 { 1547 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1548 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1549 1550 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1551 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1552 return (0); 1553 } 1554 1555 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1556 /* Ignore writes which are not 32-bits wide and 16-byte aligned */ 1557 if ((off & 0xf) != 0 || size != 4) { 1558 return (0); 1559 } 1560 1561 (void) vlapic_write(vlapic, off, (uint32_t)val); 1562 return (0); 1563 } 1564 1565 /* Should attempts to change the APIC base address be rejected with a #GP? */ 1566 int vlapic_gp_on_addr_change = 1; 1567 1568 static vm_msr_result_t 1569 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) 1570 { 1571 const uint64_t diff = vlapic->msr_apicbase ^ val; 1572 1573 /* 1574 * Until the LAPIC emulation for switching between xAPIC and x2APIC 1575 * modes is more polished, it will remain off-limits from being altered 1576 * by the guest. 1577 */ 1578 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | 1579 APICBASE_BSP; 1580 if ((diff & reserved_bits) != 0) { 1581 return (VMR_GP); 1582 } 1583 1584 /* We do not presently allow the LAPIC access address to be modified. */ 1585 if ((diff & APICBASE_ADDR_MASK) != 0) { 1586 /* 1587 * Explicitly rebuffing such requests with a #GP is the most 1588 * straightforward way to handle the situation, but certain 1589 * consumers (such as the KVM unit tests) may balk at the 1590 * otherwise unexpected exception. 1591 */ 1592 if (vlapic_gp_on_addr_change) { 1593 return (VMR_GP); 1594 } 1595 1596 /* If silence is required, just ignore the address change. */ 1597 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; 1598 } 1599 1600 vlapic->msr_apicbase = val; 1601 return (VMR_OK); 1602 } 1603 1604 static __inline uint16_t 1605 vlapic_msr_to_regoff(uint32_t msr) 1606 { 1607 ASSERT3U(msr, >=, MSR_APIC_000); 1608 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); 1609 1610 return ((msr - MSR_APIC_000) << 4); 1611 } 1612 1613 bool 1614 vlapic_owned_msr(uint32_t msr) 1615 { 1616 if (msr == MSR_APICBASE) { 1617 return (true); 1618 } 1619 if (msr >= MSR_APIC_000 && 1620 msr < (MSR_APIC_000 + 0x100)) { 1621 return (true); 1622 } 1623 return (false); 1624 } 1625 1626 vm_msr_result_t 1627 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) 1628 { 1629 ASSERT(vlapic_owned_msr(msr)); 1630 ASSERT3P(valp, !=, NULL); 1631 1632 if (msr == MSR_APICBASE) { 1633 *valp = vlapic->msr_apicbase; 1634 return (VMR_OK); 1635 } 1636 1637 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1638 if (!vlapic_x2mode(vlapic)) { 1639 return (VMR_GP); 1640 } 1641 1642 uint64_t out = 0; 1643 const uint16_t reg = vlapic_msr_to_regoff(msr); 1644 switch (reg) { 1645 case APIC_OFFSET_ICR_LOW: { 1646 /* Read from ICR register gets entire (64-bit) value */ 1647 uint32_t low = 0, high = 0; 1648 bool valid; 1649 1650 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); 1651 VERIFY(valid); 1652 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); 1653 VERIFY(valid); 1654 1655 *valp = ((uint64_t)high << 32) | low; 1656 return (VMR_OK); 1657 } 1658 case APIC_OFFSET_ICR_HI: 1659 /* Already covered by ICR_LOW */ 1660 return (VMR_GP); 1661 default: 1662 break; 1663 } 1664 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { 1665 return (VMR_GP); 1666 } 1667 *valp = out; 1668 return (VMR_OK); 1669 } 1670 1671 vm_msr_result_t 1672 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) 1673 { 1674 ASSERT(vlapic_owned_msr(msr)); 1675 1676 if (msr == MSR_APICBASE) { 1677 return (vlapic_set_apicbase(vlapic, val)); 1678 } 1679 1680 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1681 if (!vlapic_x2mode(vlapic)) { 1682 return (VMR_GP); 1683 } 1684 1685 const uint16_t reg = vlapic_msr_to_regoff(msr); 1686 switch (reg) { 1687 case APIC_OFFSET_ICR_LOW: { 1688 /* Write to ICR register sets entire (64-bit) value */ 1689 bool valid; 1690 1691 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); 1692 VERIFY(valid); 1693 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); 1694 VERIFY(valid); 1695 return (VMR_OK); 1696 } 1697 case APIC_OFFSET_ICR_HI: 1698 /* Already covered by ICR_LOW */ 1699 return (VMR_GP); 1700 case APIC_OFFSET_ESR: 1701 /* Only 0 may be written from x2APIC mode */ 1702 if (val != 0) { 1703 return (VMR_GP); 1704 } 1705 break; 1706 default: 1707 break; 1708 } 1709 if (!vlapic_write(vlapic, reg, val)) { 1710 return (VMR_GP); 1711 } 1712 return (VMR_OK); 1713 } 1714 1715 void 1716 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1717 { 1718 struct vlapic *vlapic; 1719 struct LAPIC *lapic; 1720 1721 vlapic = vm_lapic(vm, vcpuid); 1722 1723 if (state == X2APIC_DISABLED) 1724 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1725 else 1726 vlapic->msr_apicbase |= APICBASE_X2APIC; 1727 1728 /* 1729 * Reset the local APIC registers whose values are mode-dependent. 1730 * 1731 * XXX this works because the APIC mode can be changed only at vcpu 1732 * initialization time. 1733 */ 1734 lapic = vlapic->apic_page; 1735 lapic->id = vlapic_get_id(vlapic); 1736 if (vlapic_x2mode(vlapic)) { 1737 lapic->ldr = x2apic_ldr(vlapic); 1738 lapic->dfr = 0; 1739 } else { 1740 lapic->ldr = 0; 1741 lapic->dfr = 0xffffffff; 1742 } 1743 1744 if (state == X2APIC_ENABLED) { 1745 if (vlapic->ops.enable_x2apic_mode) 1746 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1747 } 1748 } 1749 1750 void 1751 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1752 int delmode, int vec) 1753 { 1754 bool lowprio; 1755 int vcpuid; 1756 cpuset_t dmask; 1757 1758 if (delmode != IOART_DELFIXED && 1759 delmode != IOART_DELLOPRI && 1760 delmode != IOART_DELEXINT) { 1761 /* Invalid delivery mode */ 1762 return; 1763 } 1764 lowprio = (delmode == IOART_DELLOPRI); 1765 1766 /* 1767 * We don't provide any virtual interrupt redirection hardware so 1768 * all interrupts originating from the ioapic or MSI specify the 1769 * 'dest' in the legacy xAPIC format. 1770 */ 1771 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1772 1773 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1774 vcpuid--; 1775 CPU_CLR(vcpuid, &dmask); 1776 if (delmode == IOART_DELEXINT) { 1777 (void) vm_inject_extint(vm, vcpuid); 1778 } else { 1779 (void) lapic_set_intr(vm, vcpuid, vec, level); 1780 } 1781 } 1782 } 1783 1784 void 1785 vlapic_post_intr(struct vlapic *vlapic, int hostcpu) 1786 { 1787 /* 1788 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1789 * 1790 * This is done by leveraging features like Posted Interrupts (Intel) 1791 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1792 * 1793 * If neither of these features are available then fallback to 1794 * sending an IPI to 'hostcpu'. 1795 */ 1796 if (vlapic->ops.post_intr) 1797 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1798 else 1799 poke_cpu(hostcpu); 1800 } 1801 1802 void 1803 vlapic_localize_resources(struct vlapic *vlapic) 1804 { 1805 vmm_glue_callout_localize(&vlapic->callout); 1806 } 1807 1808 void 1809 vlapic_pause(struct vlapic *vlapic) 1810 { 1811 VLAPIC_TIMER_LOCK(vlapic); 1812 callout_stop(&vlapic->callout); 1813 VLAPIC_TIMER_UNLOCK(vlapic); 1814 1815 } 1816 1817 void 1818 vlapic_resume(struct vlapic *vlapic) 1819 { 1820 VLAPIC_TIMER_LOCK(vlapic); 1821 if (vlapic->timer_fire_when != 0) { 1822 vlapic_callout_reset(vlapic); 1823 } 1824 VLAPIC_TIMER_UNLOCK(vlapic); 1825 } 1826 1827 static int 1828 vlapic_data_read(void *datap, const vmm_data_req_t *req) 1829 { 1830 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1831 VERIFY3U(req->vdr_version, ==, 1); 1832 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1)); 1833 1834 struct vlapic *vlapic = datap; 1835 struct vdi_lapic_v1 *out = req->vdr_data; 1836 1837 VLAPIC_TIMER_LOCK(vlapic); 1838 1839 if (vlapic->ops.sync_state) { 1840 (*vlapic->ops.sync_state)(vlapic); 1841 } 1842 1843 out->vl_msr_apicbase = vlapic->msr_apicbase; 1844 out->vl_esr_pending = vlapic->esr_pending; 1845 if (vlapic->timer_fire_when != 0) { 1846 out->vl_timer_target = 1847 vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when); 1848 } else { 1849 out->vl_timer_target = 0; 1850 } 1851 1852 const struct LAPIC *lapic = vlapic->apic_page; 1853 struct vdi_lapic_page_v1 *out_page = &out->vl_lapic; 1854 1855 /* 1856 * While this might appear, at first glance, to be missing some fields, 1857 * they are intentionally omitted: 1858 * - PPR: its contents are always generated at runtime 1859 * - EOI: write-only, and contents are ignored after handling 1860 * - RRD: (aka RRR) read-only and always 0 1861 * - CCR: calculated from underlying timer data 1862 */ 1863 out_page->vlp_id = lapic->id; 1864 out_page->vlp_version = lapic->version; 1865 out_page->vlp_tpr = lapic->tpr; 1866 out_page->vlp_apr = lapic->apr; 1867 out_page->vlp_ldr = lapic->ldr; 1868 out_page->vlp_dfr = lapic->dfr; 1869 out_page->vlp_svr = lapic->svr; 1870 out_page->vlp_esr = lapic->esr; 1871 out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1872 out_page->vlp_icr_timer = lapic->icr_timer; 1873 out_page->vlp_dcr_timer = lapic->dcr_timer; 1874 1875 out_page->vlp_lvt_cmci = lapic->lvt_cmci; 1876 out_page->vlp_lvt_timer = lapic->lvt_timer; 1877 out_page->vlp_lvt_thermal = lapic->lvt_thermal; 1878 out_page->vlp_lvt_pcint = lapic->lvt_pcint; 1879 out_page->vlp_lvt_lint0 = lapic->lvt_lint0; 1880 out_page->vlp_lvt_lint1 = lapic->lvt_lint1; 1881 out_page->vlp_lvt_error = lapic->lvt_error; 1882 1883 const uint32_t *isrptr = &lapic->isr0; 1884 const uint32_t *tmrptr = &lapic->tmr0; 1885 const uint32_t *irrptr = &lapic->irr0; 1886 for (uint_t i = 0; i < 8; i++) { 1887 out_page->vlp_isr[i] = isrptr[i * 4]; 1888 out_page->vlp_tmr[i] = tmrptr[i * 4]; 1889 out_page->vlp_irr[i] = irrptr[i * 4]; 1890 } 1891 VLAPIC_TIMER_UNLOCK(vlapic); 1892 1893 return (0); 1894 } 1895 1896 static uint8_t 1897 popc8(uint8_t val) 1898 { 1899 uint8_t cnt; 1900 1901 for (cnt = 0; val != 0; val &= (val - 1)) { 1902 cnt++; 1903 } 1904 return (cnt); 1905 } 1906 1907 /* 1908 * Descriptions for the various failures which can occur when validating 1909 * to-be-written vlapic state. 1910 */ 1911 enum vlapic_validation_error { 1912 VVE_OK, 1913 VVE_BAD_ID, 1914 VVE_BAD_VERSION, 1915 VVE_BAD_MSR_BASE, 1916 VVE_BAD_ESR, 1917 VVE_BAD_TPR, 1918 VVE_LOW_VECTOR, 1919 VVE_ISR_PRIORITY, 1920 }; 1921 1922 static enum vlapic_validation_error 1923 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req) 1924 { 1925 ASSERT(req->vdr_version == 1 && 1926 req->vdr_len >= sizeof (struct vdi_lapic_v1)); 1927 const struct vdi_lapic_v1 *src = req->vdr_data; 1928 1929 if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 || 1930 (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) { 1931 return (VVE_BAD_ESR); 1932 } 1933 1934 /* Use the same restrictions as the wrmsr accessor for now */ 1935 const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC | 1936 APICBASE_BSP; 1937 const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase; 1938 if ((diff & apicbase_reserved) != 0) { 1939 return (VVE_BAD_MSR_BASE); 1940 } 1941 1942 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1943 /* 1944 * Demand that ID match for now. This can be further updated when some 1945 * of the x2apic handling is improved. 1946 */ 1947 if (page->vlp_id != vlapic_get_id(vlapic)) { 1948 return (VVE_BAD_ID); 1949 } 1950 1951 if (page->vlp_version != vlapic->apic_page->version) { 1952 return (VVE_BAD_VERSION); 1953 } 1954 1955 if (page->vlp_tpr > 0xff) { 1956 return (VVE_BAD_TPR); 1957 } 1958 1959 /* Vectors 0-15 are not expected to be handled by the lapic */ 1960 if ((page->vlp_isr[0] & 0xffff) != 0 || 1961 (page->vlp_irr[0] & 0xffff) != 0 || 1962 (page->vlp_tmr[0] & 0xffff) != 0) { 1963 return (VVE_LOW_VECTOR); 1964 } 1965 1966 /* Only one interrupt should be in-service for each priority level */ 1967 for (uint_t i = 0; i < 8; i++) { 1968 if (popc8((uint8_t)page->vlp_isr[i]) > 1 || 1969 popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 || 1970 popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 || 1971 popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) { 1972 return (VVE_ISR_PRIORITY); 1973 } 1974 } 1975 1976 return (VVE_OK); 1977 } 1978 1979 static int 1980 vlapic_data_write(void *datap, const vmm_data_req_t *req) 1981 { 1982 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1983 VERIFY3U(req->vdr_version, ==, 1); 1984 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1)); 1985 1986 struct vlapic *vlapic = datap; 1987 if (vlapic_data_validate(vlapic, req) != VVE_OK) { 1988 return (EINVAL); 1989 } 1990 const struct vdi_lapic_v1 *src = req->vdr_data; 1991 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1992 struct LAPIC *lapic = vlapic->apic_page; 1993 1994 VLAPIC_TIMER_LOCK(vlapic); 1995 1996 /* Already ensured by vlapic_data_validate() */ 1997 VERIFY3U(page->vlp_version, ==, lapic->version); 1998 1999 vlapic->msr_apicbase = src->vl_msr_apicbase; 2000 vlapic->esr_pending = src->vl_esr_pending; 2001 2002 lapic->tpr = page->vlp_tpr; 2003 lapic->apr = page->vlp_apr; 2004 lapic->ldr = page->vlp_ldr; 2005 lapic->dfr = page->vlp_dfr; 2006 lapic->svr = page->vlp_svr; 2007 lapic->esr = page->vlp_esr; 2008 lapic->icr_lo = (uint32_t)page->vlp_icr; 2009 lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32); 2010 2011 lapic->icr_timer = page->vlp_icr_timer; 2012 lapic->dcr_timer = page->vlp_dcr_timer; 2013 vlapic_update_divider(vlapic); 2014 2015 /* cleanse LDR/DFR */ 2016 vlapic_ldr_write_handler(vlapic); 2017 vlapic_dfr_write_handler(vlapic); 2018 2019 lapic->lvt_cmci = page->vlp_lvt_cmci; 2020 lapic->lvt_timer = page->vlp_lvt_timer; 2021 lapic->lvt_thermal = page->vlp_lvt_thermal; 2022 lapic->lvt_pcint = page->vlp_lvt_pcint; 2023 lapic->lvt_lint0 = page->vlp_lvt_lint0; 2024 lapic->lvt_lint1 = page->vlp_lvt_lint1; 2025 lapic->lvt_error = page->vlp_lvt_error; 2026 /* cleanse LVTs */ 2027 vlapic_refresh_lvts(vlapic); 2028 2029 uint32_t *isrptr = &lapic->isr0; 2030 uint32_t *tmrptr = &lapic->tmr0; 2031 uint32_t *irrptr = &lapic->irr0; 2032 for (uint_t i = 0; i < 8; i++) { 2033 isrptr[i * 4] = page->vlp_isr[i]; 2034 tmrptr[i * 4] = page->vlp_tmr[i]; 2035 irrptr[i * 4] = page->vlp_irr[i]; 2036 } 2037 2038 if (src->vl_timer_target != 0) { 2039 vlapic->timer_fire_when = 2040 vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target); 2041 2042 /* 2043 * Check to see if timer expiration would result computed CCR 2044 * values in excess of what is configured in ICR/DCR. 2045 */ 2046 const hrtime_t now = gethrtime(); 2047 if (vlapic->timer_fire_when > now) { 2048 const uint32_t ccr = hrt_freq_count( 2049 vlapic->timer_fire_when - now, 2050 vlapic->timer_cur_freq); 2051 2052 /* 2053 * Until we have a richer event/logging system 2054 * available, just note such an overage as a stat. 2055 */ 2056 if (ccr > lapic->icr_timer) { 2057 vlapic->stats.vs_import_timer_overage++; 2058 } 2059 } 2060 2061 if (!vm_is_paused(vlapic->vm)) { 2062 vlapic_callout_reset(vlapic); 2063 } 2064 } else { 2065 vlapic->timer_fire_when = 0; 2066 } 2067 2068 if (vlapic->ops.sync_state) { 2069 (*vlapic->ops.sync_state)(vlapic); 2070 } 2071 VLAPIC_TIMER_UNLOCK(vlapic); 2072 2073 return (0); 2074 } 2075 2076 static const vmm_data_version_entry_t lapic_v1 = { 2077 .vdve_class = VDC_LAPIC, 2078 .vdve_version = 1, 2079 .vdve_len_expect = sizeof (struct vdi_lapic_v1), 2080 .vdve_readf = vlapic_data_read, 2081 .vdve_writef = vlapic_data_write, 2082 }; 2083 VMM_DATA_VERSION(lapic_v1); 2084