1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 /* 32 * This file and its contents are supplied under the terms of the 33 * Common Development and Distribution License ("CDDL"), version 1.0. 34 * You may only use this file in accordance with the terms of version 35 * 1.0 of the CDDL. 36 * 37 * A full copy of the text of the CDDL should have accompanied this 38 * source. A copy of the CDDL is also available via the Internet at 39 * http://www.illumos.org/license/CDDL. 40 * 41 * Copyright 2014 Pluribus Networks Inc. 42 * Copyright 2018 Joyent, Inc. 43 * Copyright 2022 Oxide Computer Company 44 */ 45 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/kmem.h> 52 #include <sys/mutex.h> 53 #include <sys/systm.h> 54 #include <sys/cpuset.h> 55 56 #include <x86/specialreg.h> 57 #include <x86/apicreg.h> 58 59 #include <machine/clock.h> 60 61 #include <machine/vmm.h> 62 #include <sys/vmm_kernel.h> 63 64 #include "vmm_lapic.h" 65 #include "vmm_stat.h" 66 67 #include "vlapic.h" 68 #include "vlapic_priv.h" 69 #include "vioapic.h" 70 71 72 /* 73 * The 4 high bits of a given interrupt vector represent its priority. The same 74 * is true for the contents of the TPR when it is used to calculate the ultimate 75 * PPR of an APIC - the 4 high bits hold the priority. 76 */ 77 #define PRIO(x) ((x) & 0xf0) 78 79 #define VLAPIC_VERSION (16) 80 81 /* 82 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the 83 * vlapic_callout_handler() and vcpu accesses to: 84 * - timer_freq_bt, timer_period_bt, timer_fire_bt 85 * - timer LVT register 86 */ 87 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock)) 88 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock)) 89 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock)) 90 91 /* 92 * APIC timer frequency: 93 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 94 * - power-of-two to avoid loss of precision when calculating times 95 */ 96 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 97 98 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL 99 100 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \ 101 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \ 102 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \ 103 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER) 104 105 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 106 static void vlapic_callout_handler(void *arg); 107 108 static __inline bool 109 vlapic_x2mode(const struct vlapic *vlapic) 110 { 111 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); 112 } 113 114 static __inline bool 115 vlapic_hw_disabled(const struct vlapic *vlapic) 116 { 117 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); 118 } 119 120 static __inline bool 121 vlapic_sw_disabled(const struct vlapic *vlapic) 122 { 123 const struct LAPIC *lapic = vlapic->apic_page; 124 125 return ((lapic->svr & APIC_SVR_ENABLE) == 0); 126 } 127 128 static __inline bool 129 vlapic_enabled(const struct vlapic *vlapic) 130 { 131 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); 132 } 133 134 static __inline uint32_t 135 vlapic_get_id(const struct vlapic *vlapic) 136 { 137 138 if (vlapic_x2mode(vlapic)) 139 return (vlapic->vcpuid); 140 else 141 return (vlapic->vcpuid << 24); 142 } 143 144 static uint32_t 145 x2apic_ldr(const struct vlapic *vlapic) 146 { 147 int apicid; 148 uint32_t ldr; 149 150 apicid = vlapic_get_id(vlapic); 151 ldr = 1 << (apicid & 0xf); 152 ldr |= (apicid & 0xffff0) << 12; 153 return (ldr); 154 } 155 156 void 157 vlapic_dfr_write_handler(struct vlapic *vlapic) 158 { 159 struct LAPIC *lapic; 160 161 lapic = vlapic->apic_page; 162 if (vlapic_x2mode(vlapic)) { 163 /* Ignore write to DFR in x2APIC mode */ 164 lapic->dfr = 0; 165 return; 166 } 167 168 lapic->dfr &= APIC_DFR_MODEL_MASK; 169 lapic->dfr |= APIC_DFR_RESERVED; 170 } 171 172 void 173 vlapic_ldr_write_handler(struct vlapic *vlapic) 174 { 175 struct LAPIC *lapic; 176 177 lapic = vlapic->apic_page; 178 179 /* LDR is read-only in x2apic mode */ 180 if (vlapic_x2mode(vlapic)) { 181 /* Ignore write to LDR in x2APIC mode */ 182 lapic->ldr = x2apic_ldr(vlapic); 183 } else { 184 lapic->ldr &= ~APIC_LDR_RESERVED; 185 } 186 } 187 188 void 189 vlapic_id_write_handler(struct vlapic *vlapic) 190 { 191 struct LAPIC *lapic; 192 193 /* 194 * We don't allow the ID register to be modified so reset it back to 195 * its default value. 196 */ 197 lapic = vlapic->apic_page; 198 lapic->id = vlapic_get_id(vlapic); 199 } 200 201 static int 202 vlapic_timer_divisor(uint32_t dcr) 203 { 204 switch (dcr & 0xB) { 205 case APIC_TDCR_1: 206 return (1); 207 case APIC_TDCR_2: 208 return (2); 209 case APIC_TDCR_4: 210 return (4); 211 case APIC_TDCR_8: 212 return (8); 213 case APIC_TDCR_16: 214 return (16); 215 case APIC_TDCR_32: 216 return (32); 217 case APIC_TDCR_64: 218 return (64); 219 case APIC_TDCR_128: 220 return (128); 221 default: 222 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 223 } 224 } 225 226 #if 0 227 static inline void 228 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 229 { 230 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 231 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 232 *lvt & APIC_LVTT_M); 233 } 234 #endif 235 236 static uint32_t 237 vlapic_get_ccr(struct vlapic *vlapic) 238 { 239 struct LAPIC *lapic; 240 uint32_t ccr; 241 242 ccr = 0; 243 lapic = vlapic->apic_page; 244 245 VLAPIC_TIMER_LOCK(vlapic); 246 if (callout_active(&vlapic->callout)) { 247 /* 248 * If the timer is scheduled to expire in the future then 249 * compute the value of 'ccr' based on the remaining time. 250 */ 251 252 const hrtime_t now = gethrtime(); 253 if (vlapic->timer_fire_when > now) { 254 ccr += hrt_freq_count(vlapic->timer_fire_when - now, 255 vlapic->timer_cur_freq); 256 } 257 } 258 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, " 259 "icr_timer is %x", ccr, lapic->icr_timer)); 260 VLAPIC_TIMER_UNLOCK(vlapic); 261 return (ccr); 262 } 263 264 static void 265 vlapic_update_divider(struct vlapic *vlapic) 266 { 267 struct LAPIC *lapic = vlapic->apic_page; 268 269 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 270 271 vlapic->timer_cur_freq = 272 VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer); 273 vlapic->timer_period = 274 hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer); 275 } 276 277 void 278 vlapic_dcr_write_handler(struct vlapic *vlapic) 279 { 280 /* 281 * Update the timer frequency and the timer period. 282 * 283 * XXX changes to the frequency divider will not take effect until 284 * the timer is reloaded. 285 */ 286 VLAPIC_TIMER_LOCK(vlapic); 287 vlapic_update_divider(vlapic); 288 VLAPIC_TIMER_UNLOCK(vlapic); 289 } 290 291 void 292 vlapic_esr_write_handler(struct vlapic *vlapic) 293 { 294 struct LAPIC *lapic; 295 296 lapic = vlapic->apic_page; 297 lapic->esr = vlapic->esr_pending; 298 vlapic->esr_pending = 0; 299 } 300 301 vcpu_notify_t 302 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 303 { 304 struct LAPIC *lapic; 305 uint32_t *irrptr, *tmrptr, mask, tmr; 306 int idx; 307 308 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 309 310 lapic = vlapic->apic_page; 311 if (!(lapic->svr & APIC_SVR_ENABLE)) { 312 /* ignore interrupt on software-disabled APIC */ 313 return (VCPU_NOTIFY_NONE); 314 } 315 316 if (vector < 16) { 317 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 318 false); 319 320 /* 321 * If the error LVT is configured to interrupt the vCPU, it will 322 * have delivered a notification through that mechanism. 323 */ 324 return (VCPU_NOTIFY_NONE); 325 } 326 327 if (vlapic->ops.set_intr_ready) { 328 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 329 } 330 331 idx = (vector / 32) * 4; 332 mask = 1 << (vector % 32); 333 tmrptr = &lapic->tmr0; 334 irrptr = &lapic->irr0; 335 336 /* 337 * Update TMR for requested vector, if necessary. 338 * This must be done prior to asserting the bit in IRR so that the 339 * proper TMR state is always visible before the to-be-queued interrupt 340 * can be injected. 341 */ 342 tmr = atomic_load_acq_32(&tmrptr[idx]); 343 if ((tmr & mask) != (level ? mask : 0)) { 344 if (level) { 345 atomic_set_int(&tmrptr[idx], mask); 346 } else { 347 atomic_clear_int(&tmrptr[idx], mask); 348 } 349 } 350 351 /* Now set the bit in IRR */ 352 atomic_set_int(&irrptr[idx], mask); 353 354 return (VCPU_NOTIFY_EXIT); 355 } 356 357 static __inline uint32_t * 358 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 359 { 360 struct LAPIC *lapic = vlapic->apic_page; 361 int i; 362 363 switch (offset) { 364 case APIC_OFFSET_CMCI_LVT: 365 return (&lapic->lvt_cmci); 366 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 367 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 368 return ((&lapic->lvt_timer) + i); 369 default: 370 panic("vlapic_get_lvt: invalid LVT\n"); 371 } 372 } 373 374 static __inline int 375 lvt_off_to_idx(uint32_t offset) 376 { 377 int index; 378 379 switch (offset) { 380 case APIC_OFFSET_CMCI_LVT: 381 index = APIC_LVT_CMCI; 382 break; 383 case APIC_OFFSET_TIMER_LVT: 384 index = APIC_LVT_TIMER; 385 break; 386 case APIC_OFFSET_THERM_LVT: 387 index = APIC_LVT_THERMAL; 388 break; 389 case APIC_OFFSET_PERF_LVT: 390 index = APIC_LVT_PMC; 391 break; 392 case APIC_OFFSET_LINT0_LVT: 393 index = APIC_LVT_LINT0; 394 break; 395 case APIC_OFFSET_LINT1_LVT: 396 index = APIC_LVT_LINT1; 397 break; 398 case APIC_OFFSET_ERROR_LVT: 399 index = APIC_LVT_ERROR; 400 break; 401 default: 402 index = -1; 403 break; 404 } 405 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 406 "invalid lvt index %d for offset %x", index, offset)); 407 408 return (index); 409 } 410 411 static __inline uint32_t 412 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 413 { 414 int idx; 415 uint32_t val; 416 417 idx = lvt_off_to_idx(offset); 418 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 419 return (val); 420 } 421 422 void 423 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 424 { 425 uint32_t *lvtptr, mask, val; 426 struct LAPIC *lapic; 427 int idx; 428 429 lapic = vlapic->apic_page; 430 lvtptr = vlapic_get_lvtptr(vlapic, offset); 431 val = *lvtptr; 432 idx = lvt_off_to_idx(offset); 433 434 if (!(lapic->svr & APIC_SVR_ENABLE)) 435 val |= APIC_LVT_M; 436 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 437 switch (offset) { 438 case APIC_OFFSET_TIMER_LVT: 439 mask |= APIC_LVTT_TM; 440 break; 441 case APIC_OFFSET_ERROR_LVT: 442 break; 443 case APIC_OFFSET_LINT0_LVT: 444 case APIC_OFFSET_LINT1_LVT: 445 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 446 /* FALLTHROUGH */ 447 default: 448 mask |= APIC_LVT_DM; 449 break; 450 } 451 val &= mask; 452 *lvtptr = val; 453 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 454 } 455 456 static void 457 vlapic_refresh_lvts(struct vlapic *vlapic) 458 { 459 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 460 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 461 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 462 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 463 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 464 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 465 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 466 } 467 468 static void 469 vlapic_mask_lvts(struct vlapic *vlapic) 470 { 471 struct LAPIC *lapic = vlapic->apic_page; 472 473 lapic->lvt_cmci |= APIC_LVT_M; 474 lapic->lvt_timer |= APIC_LVT_M; 475 lapic->lvt_thermal |= APIC_LVT_M; 476 lapic->lvt_pcint |= APIC_LVT_M; 477 lapic->lvt_lint0 |= APIC_LVT_M; 478 lapic->lvt_lint1 |= APIC_LVT_M; 479 lapic->lvt_error |= APIC_LVT_M; 480 vlapic_refresh_lvts(vlapic); 481 } 482 483 static int 484 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt) 485 { 486 uint32_t mode, reg, vec; 487 vcpu_notify_t notify; 488 489 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 490 491 if (reg & APIC_LVT_M) 492 return (0); 493 vec = reg & APIC_LVT_VECTOR; 494 mode = reg & APIC_LVT_DM; 495 496 switch (mode) { 497 case APIC_LVT_DM_FIXED: 498 if (vec < 16) { 499 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 500 lvt == APIC_LVT_ERROR); 501 return (0); 502 } 503 notify = vlapic_set_intr_ready(vlapic, vec, false); 504 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify); 505 break; 506 case APIC_LVT_DM_NMI: 507 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 508 break; 509 case APIC_LVT_DM_EXTINT: 510 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid); 511 break; 512 default: 513 // Other modes ignored 514 return (0); 515 } 516 return (1); 517 } 518 519 static uint_t 520 vlapic_active_isr(struct vlapic *vlapic) 521 { 522 int i; 523 uint32_t *isrp; 524 525 isrp = &vlapic->apic_page->isr7; 526 527 for (i = 7; i >= 0; i--, isrp -= 4) { 528 uint32_t reg = *isrp; 529 530 if (reg != 0) { 531 uint_t vec = (i * 32) + bsrl(reg); 532 533 if (vec < 16) { 534 /* 535 * Truncate the illegal low vectors to value of 536 * 0, indicating that no active ISR was found. 537 */ 538 return (0); 539 } 540 return (vec); 541 } 542 } 543 544 return (0); 545 } 546 547 /* 548 * After events which might arbitrarily change the value of PPR, such as a TPR 549 * write or an EOI, calculate that new PPR value and store it in the APIC page. 550 */ 551 static void 552 vlapic_update_ppr(struct vlapic *vlapic) 553 { 554 int isrvec, tpr, ppr; 555 556 isrvec = vlapic_active_isr(vlapic); 557 tpr = vlapic->apic_page->tpr; 558 559 /* 560 * Algorithm adopted from section "Interrupt, Task and Processor 561 * Priority" in Intel Architecture Manual Vol 3a. 562 */ 563 if (PRIO(tpr) >= PRIO(isrvec)) { 564 ppr = tpr; 565 } else { 566 ppr = PRIO(isrvec); 567 } 568 569 vlapic->apic_page->ppr = ppr; 570 } 571 572 /* 573 * When a vector is asserted in ISR as in-service, the PPR must be raised to the 574 * priority of that vector, as the vCPU would have been at a lower priority in 575 * order for the vector to be accepted. 576 */ 577 static void 578 vlapic_raise_ppr(struct vlapic *vlapic, int vec) 579 { 580 struct LAPIC *lapic = vlapic->apic_page; 581 int ppr; 582 583 ppr = PRIO(vec); 584 585 lapic->ppr = ppr; 586 } 587 588 void 589 vlapic_sync_tpr(struct vlapic *vlapic) 590 { 591 vlapic_update_ppr(vlapic); 592 } 593 594 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 595 596 static void 597 vlapic_process_eoi(struct vlapic *vlapic) 598 { 599 struct LAPIC *lapic = vlapic->apic_page; 600 uint32_t *isrptr, *tmrptr; 601 int i; 602 uint_t idx, bitpos, vector; 603 604 isrptr = &lapic->isr0; 605 tmrptr = &lapic->tmr0; 606 607 for (i = 7; i >= 0; i--) { 608 idx = i * 4; 609 if (isrptr[idx] != 0) { 610 bitpos = bsrl(isrptr[idx]); 611 vector = i * 32 + bitpos; 612 613 isrptr[idx] &= ~(1 << bitpos); 614 vlapic_update_ppr(vlapic); 615 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 616 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 617 vector); 618 } 619 return; 620 } 621 } 622 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 623 } 624 625 static __inline int 626 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 627 { 628 629 return (lvt & mask); 630 } 631 632 static __inline int 633 vlapic_periodic_timer(struct vlapic *vlapic) 634 { 635 uint32_t lvt; 636 637 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 638 639 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 640 } 641 642 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 643 644 static void 645 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 646 { 647 648 vlapic->esr_pending |= mask; 649 650 /* 651 * Avoid infinite recursion if the error LVT itself is configured with 652 * an illegal vector. 653 */ 654 if (lvt_error) 655 return; 656 657 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 658 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 659 } 660 } 661 662 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 663 664 static void 665 vlapic_fire_timer(struct vlapic *vlapic) 666 { 667 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 668 669 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 670 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 671 } 672 } 673 674 static VMM_STAT(VLAPIC_INTR_CMC, 675 "corrected machine check interrupts generated by vlapic"); 676 677 void 678 vlapic_fire_cmci(struct vlapic *vlapic) 679 { 680 681 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 682 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 683 } 684 } 685 686 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 687 "lvts triggered"); 688 689 int 690 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 691 { 692 if (!vlapic_enabled(vlapic)) { 693 /* 694 * When the local APIC is global/hardware disabled, 695 * LINT[1:0] pins are configured as INTR and NMI pins, 696 * respectively. 697 */ 698 switch (vector) { 699 case APIC_LVT_LINT0: 700 (void) vm_inject_extint(vlapic->vm, 701 vlapic->vcpuid); 702 break; 703 case APIC_LVT_LINT1: 704 (void) vm_inject_nmi(vlapic->vm, 705 vlapic->vcpuid); 706 break; 707 default: 708 break; 709 } 710 return (0); 711 } 712 713 switch (vector) { 714 case APIC_LVT_LINT0: 715 case APIC_LVT_LINT1: 716 case APIC_LVT_TIMER: 717 case APIC_LVT_ERROR: 718 case APIC_LVT_PMC: 719 case APIC_LVT_THERMAL: 720 case APIC_LVT_CMCI: 721 if (vlapic_fire_lvt(vlapic, vector)) { 722 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 723 LVTS_TRIGGERRED, vector, 1); 724 } 725 break; 726 default: 727 return (EINVAL); 728 } 729 return (0); 730 } 731 732 static void 733 vlapic_callout_reset(struct vlapic *vlapic) 734 { 735 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, 736 vlapic_callout_handler, vlapic, C_ABSOLUTE); 737 } 738 739 static void 740 vlapic_callout_handler(void *arg) 741 { 742 struct vlapic *vlapic = arg; 743 744 VLAPIC_TIMER_LOCK(vlapic); 745 if (callout_pending(&vlapic->callout)) /* callout was reset */ 746 goto done; 747 748 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 749 goto done; 750 751 callout_deactivate(&vlapic->callout); 752 753 vlapic_fire_timer(vlapic); 754 755 if (vlapic_periodic_timer(vlapic)) { 756 /* 757 * Compute the delta between when the timer was supposed to 758 * fire and the present time. We can depend on the fact that 759 * cyclics (which underly these callouts) will never be called 760 * early. 761 */ 762 const hrtime_t now = gethrtime(); 763 const hrtime_t delta = now - vlapic->timer_fire_when; 764 if (delta >= vlapic->timer_period) { 765 /* 766 * If we are so behind that we have missed an entire 767 * timer period, reset the time base rather than 768 * attempting to catch up. 769 */ 770 vlapic->timer_fire_when = now + vlapic->timer_period; 771 } else { 772 vlapic->timer_fire_when += vlapic->timer_period; 773 } 774 vlapic_callout_reset(vlapic); 775 } 776 done: 777 VLAPIC_TIMER_UNLOCK(vlapic); 778 } 779 780 void 781 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 782 { 783 struct LAPIC *lapic = vlapic->apic_page; 784 785 VLAPIC_TIMER_LOCK(vlapic); 786 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 787 lapic->icr_timer); 788 if (vlapic->timer_period != 0) { 789 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; 790 vlapic_callout_reset(vlapic); 791 } else { 792 vlapic->timer_fire_when = 0; 793 callout_stop(&vlapic->callout); 794 } 795 VLAPIC_TIMER_UNLOCK(vlapic); 796 } 797 798 /* 799 * This function populates 'dmask' with the set of vcpus that match the 800 * addressing specified by the (dest, phys, lowprio) tuple. 801 * 802 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 803 * or xAPIC (8-bit) destination field. 804 */ 805 void 806 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 807 bool lowprio, bool x2apic_dest) 808 { 809 struct vlapic *vlapic; 810 uint32_t dfr, ldr, ldest, cluster; 811 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 812 cpuset_t amask; 813 int vcpuid; 814 815 if ((x2apic_dest && dest == 0xffffffff) || 816 (!x2apic_dest && dest == 0xff)) { 817 /* 818 * Broadcast in both logical and physical modes. 819 */ 820 *dmask = vm_active_cpus(vm); 821 return; 822 } 823 824 if (phys) { 825 /* 826 * Physical mode: destination is APIC ID. 827 */ 828 CPU_ZERO(dmask); 829 vcpuid = vm_apicid2vcpuid(vm, dest); 830 amask = vm_active_cpus(vm); 831 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 832 CPU_SET(vcpuid, dmask); 833 } else { 834 /* 835 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 836 * bitmask. This model is only available in the xAPIC mode. 837 */ 838 mda_flat_ldest = dest & 0xff; 839 840 /* 841 * In the "Cluster Model" the MDA is used to identify a 842 * specific cluster and a set of APICs in that cluster. 843 */ 844 if (x2apic_dest) { 845 mda_cluster_id = dest >> 16; 846 mda_cluster_ldest = dest & 0xffff; 847 } else { 848 mda_cluster_id = (dest >> 4) & 0xf; 849 mda_cluster_ldest = dest & 0xf; 850 } 851 852 /* 853 * Logical mode: match each APIC that has a bit set 854 * in its LDR that matches a bit in the ldest. 855 */ 856 CPU_ZERO(dmask); 857 amask = vm_active_cpus(vm); 858 while ((vcpuid = CPU_FFS(&amask)) != 0) { 859 vcpuid--; 860 CPU_CLR(vcpuid, &amask); 861 862 vlapic = vm_lapic(vm, vcpuid); 863 dfr = vlapic->apic_page->dfr; 864 ldr = vlapic->apic_page->ldr; 865 866 if ((dfr & APIC_DFR_MODEL_MASK) == 867 APIC_DFR_MODEL_FLAT) { 868 ldest = ldr >> 24; 869 mda_ldest = mda_flat_ldest; 870 } else if ((dfr & APIC_DFR_MODEL_MASK) == 871 APIC_DFR_MODEL_CLUSTER) { 872 if (vlapic_x2mode(vlapic)) { 873 cluster = ldr >> 16; 874 ldest = ldr & 0xffff; 875 } else { 876 cluster = ldr >> 28; 877 ldest = (ldr >> 24) & 0xf; 878 } 879 if (cluster != mda_cluster_id) 880 continue; 881 mda_ldest = mda_cluster_ldest; 882 } else { 883 /* 884 * Guest has configured a bad logical 885 * model for this vcpu - skip it. 886 */ 887 continue; 888 } 889 890 if ((mda_ldest & ldest) != 0) { 891 CPU_SET(vcpuid, dmask); 892 if (lowprio) 893 break; 894 } 895 } 896 } 897 } 898 899 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 900 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 901 902 static void 903 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 904 { 905 struct LAPIC *lapic = vlapic->apic_page; 906 907 if (lapic->tpr != val) { 908 lapic->tpr = val; 909 vlapic_update_ppr(vlapic); 910 } 911 } 912 913 void 914 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 915 { 916 uint8_t tpr; 917 918 if (val & ~0xf) { 919 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 920 return; 921 } 922 923 tpr = val << 4; 924 vlapic_set_tpr(vlapic, tpr); 925 } 926 927 uint64_t 928 vlapic_get_cr8(const struct vlapic *vlapic) 929 { 930 const struct LAPIC *lapic = vlapic->apic_page; 931 932 return (lapic->tpr >> 4); 933 } 934 935 void 936 vlapic_icrlo_write_handler(struct vlapic *vlapic) 937 { 938 int i; 939 cpuset_t dmask; 940 uint64_t icrval; 941 uint32_t dest, vec, mode, dsh; 942 struct LAPIC *lapic; 943 944 lapic = vlapic->apic_page; 945 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 946 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 947 948 if (vlapic_x2mode(vlapic)) 949 dest = icrval >> 32; 950 else 951 dest = icrval >> (32 + 24); 952 vec = icrval & APIC_VECTOR_MASK; 953 mode = icrval & APIC_DELMODE_MASK; 954 dsh = icrval & APIC_DEST_MASK; 955 956 if (mode == APIC_DELMODE_FIXED && vec < 16) { 957 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 958 return; 959 } 960 if (mode == APIC_DELMODE_INIT && 961 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { 962 /* No work required to deassert INIT */ 963 return; 964 } 965 if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) && 966 !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) { 967 /* 968 * While Intel makes no mention of restrictions for destination 969 * shorthand when sending INIT or SIPI, AMD requires either a 970 * specific destination or all-excluding self. Common use seems 971 * to be restricted to those two cases. Until handling is in 972 * place to halt a guest which makes such a frivolous request, 973 * we will ignore them. 974 */ 975 return; 976 } 977 978 switch (dsh) { 979 case APIC_DEST_DESTFLD: 980 vlapic_calcdest(vlapic->vm, &dmask, dest, 981 (icrval & APIC_DESTMODE_LOG) == 0, false, 982 vlapic_x2mode(vlapic)); 983 break; 984 case APIC_DEST_SELF: 985 CPU_SETOF(vlapic->vcpuid, &dmask); 986 break; 987 case APIC_DEST_ALLISELF: 988 dmask = vm_active_cpus(vlapic->vm); 989 break; 990 case APIC_DEST_ALLESELF: 991 dmask = vm_active_cpus(vlapic->vm); 992 CPU_CLR(vlapic->vcpuid, &dmask); 993 break; 994 default: 995 /* 996 * All possible delivery notations are covered above. 997 * We should never end up here. 998 */ 999 panic("unknown delivery shorthand: %x", dsh); 1000 } 1001 1002 while ((i = CPU_FFS(&dmask)) != 0) { 1003 i--; 1004 CPU_CLR(i, &dmask); 1005 switch (mode) { 1006 case APIC_DELMODE_FIXED: 1007 (void) lapic_intr_edge(vlapic->vm, i, vec); 1008 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, 1009 VLAPIC_IPI_SEND, 1); 1010 vmm_stat_incr(vlapic->vm, i, 1011 VLAPIC_IPI_RECV, 1); 1012 break; 1013 case APIC_DELMODE_NMI: 1014 (void) vm_inject_nmi(vlapic->vm, i); 1015 break; 1016 case APIC_DELMODE_INIT: 1017 (void) vm_inject_init(vlapic->vm, i); 1018 break; 1019 case APIC_DELMODE_STARTUP: 1020 (void) vm_inject_sipi(vlapic->vm, i, vec); 1021 break; 1022 case APIC_DELMODE_LOWPRIO: 1023 case APIC_DELMODE_SMI: 1024 default: 1025 /* Unhandled IPI modes (for now) */ 1026 break; 1027 } 1028 } 1029 } 1030 1031 void 1032 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) 1033 { 1034 const int vec = val & 0xff; 1035 1036 /* self-IPI is only exposed via x2APIC */ 1037 ASSERT(vlapic_x2mode(vlapic)); 1038 1039 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1040 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); 1041 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); 1042 } 1043 1044 int 1045 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1046 { 1047 struct LAPIC *lapic = vlapic->apic_page; 1048 int idx, i, bitpos, vector; 1049 uint32_t *irrptr, val; 1050 1051 if (vlapic->ops.sync_state) { 1052 (*vlapic->ops.sync_state)(vlapic); 1053 } 1054 1055 irrptr = &lapic->irr0; 1056 1057 for (i = 7; i >= 0; i--) { 1058 idx = i * 4; 1059 val = atomic_load_acq_int(&irrptr[idx]); 1060 bitpos = fls(val); 1061 if (bitpos != 0) { 1062 vector = i * 32 + (bitpos - 1); 1063 if (PRIO(vector) > PRIO(lapic->ppr)) { 1064 if (vecptr != NULL) 1065 *vecptr = vector; 1066 return (1); 1067 } else 1068 break; 1069 } 1070 } 1071 return (0); 1072 } 1073 1074 void 1075 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1076 { 1077 struct LAPIC *lapic = vlapic->apic_page; 1078 uint32_t *irrptr, *isrptr; 1079 int idx; 1080 1081 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector)); 1082 1083 if (vlapic->ops.intr_accepted) 1084 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1085 1086 /* 1087 * clear the ready bit for vector being accepted in irr 1088 * and set the vector as in service in isr. 1089 */ 1090 idx = (vector / 32) * 4; 1091 1092 irrptr = &lapic->irr0; 1093 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1094 1095 isrptr = &lapic->isr0; 1096 isrptr[idx] |= 1 << (vector % 32); 1097 1098 /* 1099 * The only way a fresh vector could be accepted into ISR is if it was 1100 * of a higher priority than the current PPR. With that vector now 1101 * in-service, the PPR must be raised. 1102 */ 1103 vlapic_raise_ppr(vlapic, vector); 1104 } 1105 1106 void 1107 vlapic_svr_write_handler(struct vlapic *vlapic) 1108 { 1109 struct LAPIC *lapic; 1110 uint32_t old, new, changed; 1111 1112 lapic = vlapic->apic_page; 1113 1114 new = lapic->svr; 1115 old = vlapic->svr_last; 1116 vlapic->svr_last = new; 1117 1118 changed = old ^ new; 1119 if ((changed & APIC_SVR_ENABLE) != 0) { 1120 if ((new & APIC_SVR_ENABLE) == 0) { 1121 /* 1122 * The apic is now disabled so stop the apic timer 1123 * and mask all the LVT entries. 1124 */ 1125 VLAPIC_TIMER_LOCK(vlapic); 1126 callout_stop(&vlapic->callout); 1127 VLAPIC_TIMER_UNLOCK(vlapic); 1128 vlapic_mask_lvts(vlapic); 1129 } else { 1130 /* 1131 * The apic is now enabled so restart the apic timer 1132 * if it is configured in periodic mode. 1133 */ 1134 if (vlapic_periodic_timer(vlapic)) 1135 vlapic_icrtmr_write_handler(vlapic); 1136 } 1137 } 1138 } 1139 1140 static bool 1141 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) 1142 { 1143 struct LAPIC *lapic = vlapic->apic_page; 1144 uint32_t *reg; 1145 int i; 1146 1147 ASSERT3U(offset & 0x3, ==, 0); 1148 ASSERT3U(offset, <, PAGESIZE); 1149 ASSERT3P(outp, !=, NULL); 1150 1151 uint32_t data = 0; 1152 switch (offset) { 1153 case APIC_OFFSET_ID: 1154 data = lapic->id; 1155 break; 1156 case APIC_OFFSET_VER: 1157 data = lapic->version; 1158 break; 1159 case APIC_OFFSET_TPR: 1160 data = lapic->tpr; 1161 break; 1162 case APIC_OFFSET_APR: 1163 data = lapic->apr; 1164 break; 1165 case APIC_OFFSET_PPR: 1166 data = lapic->ppr; 1167 break; 1168 case APIC_OFFSET_LDR: 1169 data = lapic->ldr; 1170 break; 1171 case APIC_OFFSET_DFR: 1172 data = lapic->dfr; 1173 break; 1174 case APIC_OFFSET_SVR: 1175 data = lapic->svr; 1176 break; 1177 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1178 i = (offset - APIC_OFFSET_ISR0) >> 2; 1179 reg = &lapic->isr0; 1180 data = *(reg + i); 1181 break; 1182 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1183 i = (offset - APIC_OFFSET_TMR0) >> 2; 1184 reg = &lapic->tmr0; 1185 data = *(reg + i); 1186 break; 1187 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1188 i = (offset - APIC_OFFSET_IRR0) >> 2; 1189 reg = &lapic->irr0; 1190 data = atomic_load_acq_int(reg + i); 1191 break; 1192 case APIC_OFFSET_ESR: 1193 data = lapic->esr; 1194 break; 1195 case APIC_OFFSET_ICR_LOW: 1196 data = lapic->icr_lo; 1197 break; 1198 case APIC_OFFSET_ICR_HI: 1199 data = lapic->icr_hi; 1200 break; 1201 case APIC_OFFSET_CMCI_LVT: 1202 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1203 data = vlapic_get_lvt(vlapic, offset); 1204 #ifdef INVARIANTS 1205 reg = vlapic_get_lvtptr(vlapic, offset); 1206 ASSERT3U(data, ==, *reg); 1207 #endif 1208 break; 1209 case APIC_OFFSET_TIMER_ICR: 1210 data = lapic->icr_timer; 1211 break; 1212 case APIC_OFFSET_TIMER_CCR: 1213 data = vlapic_get_ccr(vlapic); 1214 break; 1215 case APIC_OFFSET_TIMER_DCR: 1216 data = lapic->dcr_timer; 1217 break; 1218 case APIC_OFFSET_RRR: 1219 data = 0; 1220 break; 1221 1222 case APIC_OFFSET_SELF_IPI: 1223 case APIC_OFFSET_EOI: 1224 /* Write-only register */ 1225 *outp = 0; 1226 return (false); 1227 1228 default: 1229 /* Invalid register */ 1230 *outp = 0; 1231 return (false); 1232 } 1233 1234 *outp = data; 1235 return (true); 1236 } 1237 1238 static bool 1239 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) 1240 { 1241 struct LAPIC *lapic = vlapic->apic_page; 1242 uint32_t *regptr; 1243 1244 ASSERT3U(offset & 0xf, ==, 0); 1245 ASSERT3U(offset, <, PAGESIZE); 1246 1247 switch (offset) { 1248 case APIC_OFFSET_ID: 1249 lapic->id = data; 1250 vlapic_id_write_handler(vlapic); 1251 break; 1252 case APIC_OFFSET_TPR: 1253 vlapic_set_tpr(vlapic, data & 0xff); 1254 break; 1255 case APIC_OFFSET_EOI: 1256 vlapic_process_eoi(vlapic); 1257 break; 1258 case APIC_OFFSET_LDR: 1259 lapic->ldr = data; 1260 vlapic_ldr_write_handler(vlapic); 1261 break; 1262 case APIC_OFFSET_DFR: 1263 lapic->dfr = data; 1264 vlapic_dfr_write_handler(vlapic); 1265 break; 1266 case APIC_OFFSET_SVR: 1267 lapic->svr = data; 1268 vlapic_svr_write_handler(vlapic); 1269 break; 1270 case APIC_OFFSET_ICR_LOW: 1271 lapic->icr_lo = data; 1272 vlapic_icrlo_write_handler(vlapic); 1273 break; 1274 case APIC_OFFSET_ICR_HI: 1275 lapic->icr_hi = data; 1276 break; 1277 case APIC_OFFSET_CMCI_LVT: 1278 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1279 regptr = vlapic_get_lvtptr(vlapic, offset); 1280 *regptr = data; 1281 vlapic_lvt_write_handler(vlapic, offset); 1282 break; 1283 case APIC_OFFSET_TIMER_ICR: 1284 lapic->icr_timer = data; 1285 vlapic_icrtmr_write_handler(vlapic); 1286 break; 1287 1288 case APIC_OFFSET_TIMER_DCR: 1289 lapic->dcr_timer = data; 1290 vlapic_dcr_write_handler(vlapic); 1291 break; 1292 1293 case APIC_OFFSET_ESR: 1294 vlapic_esr_write_handler(vlapic); 1295 break; 1296 1297 case APIC_OFFSET_SELF_IPI: 1298 if (vlapic_x2mode(vlapic)) 1299 vlapic_self_ipi_handler(vlapic, data); 1300 break; 1301 1302 case APIC_OFFSET_VER: 1303 case APIC_OFFSET_APR: 1304 case APIC_OFFSET_PPR: 1305 case APIC_OFFSET_RRR: 1306 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1307 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1308 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1309 case APIC_OFFSET_TIMER_CCR: 1310 /* Read-only register */ 1311 return (false); 1312 1313 default: 1314 /* Invalid register */ 1315 return (false); 1316 } 1317 1318 return (true); 1319 } 1320 1321 void 1322 vlapic_reset(struct vlapic *vlapic) 1323 { 1324 struct LAPIC *lapic = vlapic->apic_page; 1325 uint32_t *isrptr, *tmrptr, *irrptr; 1326 1327 /* Reset any timer-related state first */ 1328 VLAPIC_TIMER_LOCK(vlapic); 1329 callout_stop(&vlapic->callout); 1330 lapic->icr_timer = 0; 1331 lapic->ccr_timer = 0; 1332 lapic->dcr_timer = 0; 1333 vlapic_update_divider(vlapic); 1334 VLAPIC_TIMER_UNLOCK(vlapic); 1335 1336 /* 1337 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so 1338 * it is not leftover after the reset. This is performed after the APIC 1339 * timer has been stopped, in case it happened to fire just prior to 1340 * being deactivated. 1341 */ 1342 if (vlapic->ops.sync_state) { 1343 (*vlapic->ops.sync_state)(vlapic); 1344 } 1345 1346 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1347 if (vlapic->vcpuid == 0) 1348 vlapic->msr_apicbase |= APICBASE_BSP; 1349 1350 lapic->id = vlapic_get_id(vlapic); 1351 lapic->version = VLAPIC_VERSION; 1352 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1353 1354 lapic->tpr = 0; 1355 lapic->apr = 0; 1356 lapic->ppr = 0; 1357 1358 lapic->eoi = 0; 1359 lapic->ldr = 0; 1360 lapic->dfr = 0xffffffff; 1361 lapic->svr = APIC_SVR_VECTOR; 1362 vlapic->svr_last = lapic->svr; 1363 1364 isrptr = &lapic->isr0; 1365 tmrptr = &lapic->tmr0; 1366 irrptr = &lapic->irr0; 1367 for (uint_t i = 0; i < 8; i++) { 1368 atomic_store_rel_int(&isrptr[i * 4], 0); 1369 atomic_store_rel_int(&tmrptr[i * 4], 0); 1370 atomic_store_rel_int(&irrptr[i * 4], 0); 1371 } 1372 1373 lapic->esr = 0; 1374 vlapic->esr_pending = 0; 1375 lapic->icr_lo = 0; 1376 lapic->icr_hi = 0; 1377 1378 lapic->lvt_cmci = 0; 1379 lapic->lvt_timer = 0; 1380 lapic->lvt_thermal = 0; 1381 lapic->lvt_pcint = 0; 1382 lapic->lvt_lint0 = 0; 1383 lapic->lvt_lint1 = 0; 1384 lapic->lvt_error = 0; 1385 vlapic_mask_lvts(vlapic); 1386 } 1387 1388 void 1389 vlapic_init(struct vlapic *vlapic) 1390 { 1391 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1392 KASSERT(vlapic->vcpuid >= 0 && 1393 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1394 ("vlapic_init: vcpuid is not initialized")); 1395 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1396 "initialized")); 1397 1398 /* 1399 * If the vlapic is configured in x2apic mode then it will be 1400 * accessed in the critical section via the MSR emulation code. 1401 * 1402 * Therefore the timer mutex must be a spinlock because blockable 1403 * mutexes cannot be acquired in a critical section. 1404 */ 1405 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL); 1406 callout_init(&vlapic->callout, 1); 1407 1408 vlapic_reset(vlapic); 1409 } 1410 1411 void 1412 vlapic_cleanup(struct vlapic *vlapic) 1413 { 1414 callout_drain(&vlapic->callout); 1415 mutex_destroy(&vlapic->timer_lock); 1416 } 1417 1418 int 1419 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, 1420 uint_t size) 1421 { 1422 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1423 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1424 1425 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1426 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1427 *valp = UINT64_MAX; 1428 return (0); 1429 } 1430 1431 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1432 uint32_t raw = 0; 1433 (void) vlapic_read(vlapic, off & ~0xf, &raw); 1434 1435 /* Shift and mask reads which are small and/or unaligned */ 1436 const uint8_t align = off & 0xf; 1437 if (align < 4) { 1438 *valp = (uint64_t)raw << (align * 8); 1439 } else { 1440 *valp = 0; 1441 } 1442 1443 return (0); 1444 } 1445 1446 int 1447 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, 1448 uint_t size) 1449 { 1450 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1451 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1452 1453 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1454 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1455 return (0); 1456 } 1457 1458 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1459 /* Ignore writes which are not 32-bits wide and 16-byte aligned */ 1460 if ((off & 0xf) != 0 || size != 4) { 1461 return (0); 1462 } 1463 1464 (void) vlapic_write(vlapic, off, (uint32_t)val); 1465 return (0); 1466 } 1467 1468 /* Should attempts to change the APIC base address be rejected with a #GP? */ 1469 int vlapic_gp_on_addr_change = 1; 1470 1471 static vm_msr_result_t 1472 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) 1473 { 1474 const uint64_t diff = vlapic->msr_apicbase ^ val; 1475 1476 /* 1477 * Until the LAPIC emulation for switching between xAPIC and x2APIC 1478 * modes is more polished, it will remain off-limits from being altered 1479 * by the guest. 1480 */ 1481 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | 1482 APICBASE_BSP; 1483 if ((diff & reserved_bits) != 0) { 1484 return (VMR_GP); 1485 } 1486 1487 /* We do not presently allow the LAPIC access address to be modified. */ 1488 if ((diff & APICBASE_ADDR_MASK) != 0) { 1489 /* 1490 * Explicitly rebuffing such requests with a #GP is the most 1491 * straightforward way to handle the situation, but certain 1492 * consumers (such as the KVM unit tests) may balk at the 1493 * otherwise unexpected exception. 1494 */ 1495 if (vlapic_gp_on_addr_change) { 1496 return (VMR_GP); 1497 } 1498 1499 /* If silence is required, just ignore the address change. */ 1500 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; 1501 } 1502 1503 vlapic->msr_apicbase = val; 1504 return (VMR_OK); 1505 } 1506 1507 static __inline uint16_t 1508 vlapic_msr_to_regoff(uint32_t msr) 1509 { 1510 ASSERT3U(msr, >=, MSR_APIC_000); 1511 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); 1512 1513 return ((msr - MSR_APIC_000) << 4); 1514 } 1515 1516 bool 1517 vlapic_owned_msr(uint32_t msr) 1518 { 1519 if (msr == MSR_APICBASE) { 1520 return (true); 1521 } 1522 if (msr >= MSR_APIC_000 && 1523 msr < (MSR_APIC_000 + 0x100)) { 1524 return (true); 1525 } 1526 return (false); 1527 } 1528 1529 vm_msr_result_t 1530 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) 1531 { 1532 ASSERT(vlapic_owned_msr(msr)); 1533 ASSERT3P(valp, !=, NULL); 1534 1535 if (msr == MSR_APICBASE) { 1536 *valp = vlapic->msr_apicbase; 1537 return (VMR_OK); 1538 } 1539 1540 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1541 if (!vlapic_x2mode(vlapic)) { 1542 return (VMR_GP); 1543 } 1544 1545 uint64_t out = 0; 1546 const uint16_t reg = vlapic_msr_to_regoff(msr); 1547 switch (reg) { 1548 case APIC_OFFSET_ICR_LOW: { 1549 /* Read from ICR register gets entire (64-bit) value */ 1550 uint32_t low = 0, high = 0; 1551 bool valid; 1552 1553 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); 1554 VERIFY(valid); 1555 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); 1556 VERIFY(valid); 1557 1558 *valp = ((uint64_t)high << 32) | low; 1559 return (VMR_OK); 1560 } 1561 case APIC_OFFSET_ICR_HI: 1562 /* Already covered by ICR_LOW */ 1563 return (VMR_GP); 1564 default: 1565 break; 1566 } 1567 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { 1568 return (VMR_GP); 1569 } 1570 *valp = out; 1571 return (VMR_OK); 1572 } 1573 1574 vm_msr_result_t 1575 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) 1576 { 1577 ASSERT(vlapic_owned_msr(msr)); 1578 1579 if (msr == MSR_APICBASE) { 1580 return (vlapic_set_apicbase(vlapic, val)); 1581 } 1582 1583 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1584 if (!vlapic_x2mode(vlapic)) { 1585 return (VMR_GP); 1586 } 1587 1588 const uint16_t reg = vlapic_msr_to_regoff(msr); 1589 switch (reg) { 1590 case APIC_OFFSET_ICR_LOW: { 1591 /* Write to ICR register sets entire (64-bit) value */ 1592 bool valid; 1593 1594 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); 1595 VERIFY(valid); 1596 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); 1597 VERIFY(valid); 1598 return (VMR_OK); 1599 } 1600 case APIC_OFFSET_ICR_HI: 1601 /* Already covered by ICR_LOW */ 1602 return (VMR_GP); 1603 case APIC_OFFSET_ESR: 1604 /* Only 0 may be written from x2APIC mode */ 1605 if (val != 0) { 1606 return (VMR_GP); 1607 } 1608 break; 1609 default: 1610 break; 1611 } 1612 if (!vlapic_write(vlapic, reg, val)) { 1613 return (VMR_GP); 1614 } 1615 return (VMR_OK); 1616 } 1617 1618 void 1619 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1620 { 1621 struct vlapic *vlapic; 1622 struct LAPIC *lapic; 1623 1624 vlapic = vm_lapic(vm, vcpuid); 1625 1626 if (state == X2APIC_DISABLED) 1627 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1628 else 1629 vlapic->msr_apicbase |= APICBASE_X2APIC; 1630 1631 /* 1632 * Reset the local APIC registers whose values are mode-dependent. 1633 * 1634 * XXX this works because the APIC mode can be changed only at vcpu 1635 * initialization time. 1636 */ 1637 lapic = vlapic->apic_page; 1638 lapic->id = vlapic_get_id(vlapic); 1639 if (vlapic_x2mode(vlapic)) { 1640 lapic->ldr = x2apic_ldr(vlapic); 1641 lapic->dfr = 0; 1642 } else { 1643 lapic->ldr = 0; 1644 lapic->dfr = 0xffffffff; 1645 } 1646 1647 if (state == X2APIC_ENABLED) { 1648 if (vlapic->ops.enable_x2apic_mode) 1649 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1650 } 1651 } 1652 1653 void 1654 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1655 int delmode, int vec) 1656 { 1657 bool lowprio; 1658 int vcpuid; 1659 cpuset_t dmask; 1660 1661 if (delmode != IOART_DELFIXED && 1662 delmode != IOART_DELLOPRI && 1663 delmode != IOART_DELEXINT) { 1664 /* Invalid delivery mode */ 1665 return; 1666 } 1667 lowprio = (delmode == IOART_DELLOPRI); 1668 1669 /* 1670 * We don't provide any virtual interrupt redirection hardware so 1671 * all interrupts originating from the ioapic or MSI specify the 1672 * 'dest' in the legacy xAPIC format. 1673 */ 1674 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1675 1676 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1677 vcpuid--; 1678 CPU_CLR(vcpuid, &dmask); 1679 if (delmode == IOART_DELEXINT) { 1680 (void) vm_inject_extint(vm, vcpuid); 1681 } else { 1682 (void) lapic_set_intr(vm, vcpuid, vec, level); 1683 } 1684 } 1685 } 1686 1687 void 1688 vlapic_post_intr(struct vlapic *vlapic, int hostcpu) 1689 { 1690 /* 1691 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1692 * 1693 * This is done by leveraging features like Posted Interrupts (Intel) 1694 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1695 * 1696 * If neither of these features are available then fallback to 1697 * sending an IPI to 'hostcpu'. 1698 */ 1699 if (vlapic->ops.post_intr) 1700 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1701 else 1702 poke_cpu(hostcpu); 1703 } 1704 1705 void 1706 vlapic_localize_resources(struct vlapic *vlapic) 1707 { 1708 vmm_glue_callout_localize(&vlapic->callout); 1709 } 1710 1711 static int 1712 vlapic_data_read(void *datap, const vmm_data_req_t *req) 1713 { 1714 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1715 VERIFY3U(req->vdr_version, ==, 1); 1716 VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_lapic_v1)); 1717 1718 struct vlapic *vlapic = datap; 1719 struct vdi_lapic_v1 *out = req->vdr_data; 1720 1721 VLAPIC_TIMER_LOCK(vlapic); 1722 1723 if (vlapic->ops.sync_state) { 1724 (*vlapic->ops.sync_state)(vlapic); 1725 } 1726 1727 out->vl_msr_apicbase = vlapic->msr_apicbase; 1728 out->vl_esr_pending = vlapic->esr_pending; 1729 if (callout_pending(&vlapic->callout)) { 1730 out->vl_timer_target = 1731 vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when); 1732 } else { 1733 out->vl_timer_target = 0; 1734 } 1735 1736 const struct LAPIC *lapic = vlapic->apic_page; 1737 struct vdi_lapic_page_v1 *out_page = &out->vl_lapic; 1738 1739 /* 1740 * While this might appear, at first glance, to be missing some fields, 1741 * they are intentionally omitted: 1742 * - PPR: its contents are always generated at runtime 1743 * - EOI: write-only, and contents are ignored after handling 1744 * - RRD: (aka RRR) read-only and always 0 1745 * - CCR: calculated from underlying timer data 1746 */ 1747 out_page->vlp_id = lapic->id; 1748 out_page->vlp_version = lapic->version; 1749 out_page->vlp_tpr = lapic->tpr; 1750 out_page->vlp_apr = lapic->apr; 1751 out_page->vlp_ldr = lapic->ldr; 1752 out_page->vlp_dfr = lapic->dfr; 1753 out_page->vlp_svr = lapic->svr; 1754 out_page->vlp_esr = lapic->esr; 1755 out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1756 out_page->vlp_icr_timer = lapic->icr_timer; 1757 out_page->vlp_dcr_timer = lapic->dcr_timer; 1758 1759 out_page->vlp_lvt_cmci = lapic->lvt_cmci; 1760 out_page->vlp_lvt_timer = lapic->lvt_timer; 1761 out_page->vlp_lvt_thermal = lapic->lvt_thermal; 1762 out_page->vlp_lvt_pcint = lapic->lvt_pcint; 1763 out_page->vlp_lvt_lint0 = lapic->lvt_lint0; 1764 out_page->vlp_lvt_lint1 = lapic->lvt_lint1; 1765 out_page->vlp_lvt_error = lapic->lvt_error; 1766 1767 const uint32_t *isrptr = &lapic->isr0; 1768 const uint32_t *tmrptr = &lapic->tmr0; 1769 const uint32_t *irrptr = &lapic->irr0; 1770 for (uint_t i = 0; i < 8; i++) { 1771 out_page->vlp_isr[i] = isrptr[i * 4]; 1772 out_page->vlp_tmr[i] = tmrptr[i * 4]; 1773 out_page->vlp_irr[i] = irrptr[i * 4]; 1774 } 1775 VLAPIC_TIMER_UNLOCK(vlapic); 1776 1777 return (0); 1778 } 1779 1780 static uint8_t 1781 popc8(uint8_t val) 1782 { 1783 uint8_t cnt; 1784 1785 for (cnt = 0; val != 0; val &= (val - 1)) { 1786 cnt++; 1787 } 1788 return (cnt); 1789 } 1790 1791 /* 1792 * Descriptions for the various failures which can occur when validating 1793 * to-be-written vlapic state. 1794 */ 1795 enum vlapic_validation_error { 1796 VVE_OK, 1797 VVE_BAD_ID, 1798 VVE_BAD_VERSION, 1799 VVE_BAD_MSR_BASE, 1800 VVE_BAD_ESR, 1801 VVE_BAD_TPR, 1802 VVE_LOW_VECTOR, 1803 VVE_ISR_PRIORITY, 1804 }; 1805 1806 static enum vlapic_validation_error 1807 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req) 1808 { 1809 ASSERT(req->vdr_version == 1 && 1810 req->vdr_len == sizeof (struct vdi_lapic_v1)); 1811 const struct vdi_lapic_v1 *src = req->vdr_data; 1812 1813 if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 || 1814 (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) { 1815 return (VVE_BAD_ESR); 1816 } 1817 1818 /* Use the same restrictions as the wrmsr accessor for now */ 1819 const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC | 1820 APICBASE_BSP; 1821 const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase; 1822 if ((diff & apicbase_reserved) != 0) { 1823 return (VVE_BAD_MSR_BASE); 1824 } 1825 1826 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1827 /* 1828 * Demand that ID match for now. This can be further updated when some 1829 * of the x2apic handling is improved. 1830 */ 1831 if (page->vlp_id != vlapic_get_id(vlapic)) { 1832 return (VVE_BAD_ID); 1833 } 1834 1835 if (page->vlp_version != vlapic->apic_page->version) { 1836 return (VVE_BAD_VERSION); 1837 } 1838 1839 if (page->vlp_tpr > 0xff) { 1840 return (VVE_BAD_TPR); 1841 } 1842 1843 /* Vectors 0-15 are not expected to be handled by the lapic */ 1844 if ((page->vlp_isr[0] & 0xffff) != 0 || 1845 (page->vlp_irr[0] & 0xffff) != 0 || 1846 (page->vlp_tmr[0] & 0xffff) != 0) { 1847 return (VVE_LOW_VECTOR); 1848 } 1849 1850 /* Only one interrupt should be in-service for each priority level */ 1851 for (uint_t i = 0; i < 8; i++) { 1852 if (popc8((uint8_t)page->vlp_isr[i]) > 1 || 1853 popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 || 1854 popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 || 1855 popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) { 1856 return (VVE_ISR_PRIORITY); 1857 } 1858 } 1859 1860 return (VVE_OK); 1861 } 1862 1863 static int 1864 vlapic_data_write(void *datap, const vmm_data_req_t *req) 1865 { 1866 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1867 VERIFY3U(req->vdr_version, ==, 1); 1868 VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_lapic_v1)); 1869 1870 struct vlapic *vlapic = datap; 1871 if (vlapic_data_validate(vlapic, req) != VVE_OK) { 1872 return (EINVAL); 1873 } 1874 const struct vdi_lapic_v1 *src = req->vdr_data; 1875 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1876 struct LAPIC *lapic = vlapic->apic_page; 1877 1878 VLAPIC_TIMER_LOCK(vlapic); 1879 1880 /* Already ensured by vlapic_data_validate() */ 1881 VERIFY3U(page->vlp_id, ==, lapic->id); 1882 VERIFY3U(page->vlp_version, ==, lapic->version); 1883 1884 vlapic->msr_apicbase = src->vl_msr_apicbase; 1885 vlapic->esr_pending = src->vl_esr_pending; 1886 1887 lapic->tpr = page->vlp_tpr; 1888 lapic->apr = page->vlp_apr; 1889 lapic->ldr = page->vlp_ldr; 1890 lapic->dfr = page->vlp_dfr; 1891 lapic->svr = page->vlp_svr; 1892 lapic->esr = page->vlp_esr; 1893 lapic->icr_lo = (uint32_t)page->vlp_icr; 1894 lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32); 1895 1896 lapic->icr_timer = page->vlp_icr_timer; 1897 lapic->dcr_timer = page->vlp_dcr_timer; 1898 vlapic_update_divider(vlapic); 1899 1900 /* cleanse LDR/DFR */ 1901 vlapic_ldr_write_handler(vlapic); 1902 vlapic_dfr_write_handler(vlapic); 1903 1904 lapic->lvt_cmci = page->vlp_lvt_cmci; 1905 lapic->lvt_timer = page->vlp_lvt_timer; 1906 lapic->lvt_thermal = page->vlp_lvt_thermal; 1907 lapic->lvt_pcint = page->vlp_lvt_pcint; 1908 lapic->lvt_lint0 = page->vlp_lvt_lint0; 1909 lapic->lvt_lint1 = page->vlp_lvt_lint1; 1910 lapic->lvt_error = page->vlp_lvt_error; 1911 /* cleanse LVTs */ 1912 vlapic_refresh_lvts(vlapic); 1913 1914 uint32_t *isrptr = &lapic->isr0; 1915 uint32_t *tmrptr = &lapic->tmr0; 1916 uint32_t *irrptr = &lapic->irr0; 1917 for (uint_t i = 0; i < 8; i++) { 1918 isrptr[i * 4] = page->vlp_isr[i]; 1919 tmrptr[i * 4] = page->vlp_tmr[i]; 1920 irrptr[i * 4] = page->vlp_irr[i]; 1921 } 1922 1923 if (src->vl_timer_target != 0) { 1924 vlapic->timer_fire_when = 1925 vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target); 1926 vlapic_callout_reset(vlapic); 1927 } 1928 1929 if (vlapic->ops.sync_state) { 1930 (*vlapic->ops.sync_state)(vlapic); 1931 } 1932 VLAPIC_TIMER_UNLOCK(vlapic); 1933 1934 return (0); 1935 } 1936 1937 static const vmm_data_version_entry_t lapic_v1 = { 1938 .vdve_class = VDC_LAPIC, 1939 .vdve_version = 1, 1940 .vdve_len_expect = sizeof (struct vdi_lapic_v1), 1941 .vdve_readf = vlapic_data_read, 1942 .vdve_writef = vlapic_data_write, 1943 }; 1944 VMM_DATA_VERSION(lapic_v1); 1945