1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 /* 32 * This file and its contents are supplied under the terms of the 33 * Common Development and Distribution License ("CDDL"), version 1.0. 34 * You may only use this file in accordance with the terms of version 35 * 1.0 of the CDDL. 36 * 37 * A full copy of the text of the CDDL should have accompanied this 38 * source. A copy of the CDDL is also available via the Internet at 39 * http://www.illumos.org/license/CDDL. 40 * 41 * Copyright 2014 Pluribus Networks Inc. 42 * Copyright 2018 Joyent, Inc. 43 * Copyright 2022 Oxide Computer Company 44 */ 45 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/kmem.h> 52 #include <sys/mutex.h> 53 #include <sys/systm.h> 54 #include <sys/cpuset.h> 55 56 #include <x86/specialreg.h> 57 #include <x86/apicreg.h> 58 59 #include <machine/clock.h> 60 61 #include <machine/vmm.h> 62 #include <sys/vmm_kernel.h> 63 64 #include "vmm_lapic.h" 65 #include "vmm_stat.h" 66 67 #include "vlapic.h" 68 #include "vlapic_priv.h" 69 #include "vioapic.h" 70 71 72 /* 73 * The 4 high bits of a given interrupt vector represent its priority. The same 74 * is true for the contents of the TPR when it is used to calculate the ultimate 75 * PPR of an APIC - the 4 high bits hold the priority. 76 */ 77 #define PRIO(x) ((x) & 0xf0) 78 79 #define VLAPIC_VERSION (16) 80 81 /* 82 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the 83 * vlapic_callout_handler() and vcpu accesses to: 84 * - timer_freq_bt, timer_period_bt, timer_fire_bt 85 * - timer LVT register 86 */ 87 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock)) 88 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock)) 89 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock)) 90 91 /* 92 * APIC timer frequency: 93 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 94 * - power-of-two to avoid loss of precision when calculating times 95 */ 96 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 97 98 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL 99 100 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \ 101 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \ 102 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \ 103 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER) 104 105 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 106 static void vlapic_callout_handler(void *arg); 107 108 static __inline bool 109 vlapic_x2mode(const struct vlapic *vlapic) 110 { 111 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); 112 } 113 114 static __inline bool 115 vlapic_hw_disabled(const struct vlapic *vlapic) 116 { 117 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); 118 } 119 120 static __inline bool 121 vlapic_sw_disabled(const struct vlapic *vlapic) 122 { 123 const struct LAPIC *lapic = vlapic->apic_page; 124 125 return ((lapic->svr & APIC_SVR_ENABLE) == 0); 126 } 127 128 static __inline bool 129 vlapic_enabled(const struct vlapic *vlapic) 130 { 131 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); 132 } 133 134 static __inline uint32_t 135 vlapic_get_id(const struct vlapic *vlapic) 136 { 137 138 if (vlapic_x2mode(vlapic)) 139 return (vlapic->vcpuid); 140 else 141 return (vlapic->vcpuid << 24); 142 } 143 144 static uint32_t 145 x2apic_ldr(const struct vlapic *vlapic) 146 { 147 int apicid; 148 uint32_t ldr; 149 150 apicid = vlapic_get_id(vlapic); 151 ldr = 1 << (apicid & 0xf); 152 ldr |= (apicid & 0xffff0) << 12; 153 return (ldr); 154 } 155 156 void 157 vlapic_dfr_write_handler(struct vlapic *vlapic) 158 { 159 struct LAPIC *lapic; 160 161 lapic = vlapic->apic_page; 162 if (vlapic_x2mode(vlapic)) { 163 /* Ignore write to DFR in x2APIC mode */ 164 lapic->dfr = 0; 165 return; 166 } 167 168 lapic->dfr &= APIC_DFR_MODEL_MASK; 169 lapic->dfr |= APIC_DFR_RESERVED; 170 } 171 172 void 173 vlapic_ldr_write_handler(struct vlapic *vlapic) 174 { 175 struct LAPIC *lapic; 176 177 lapic = vlapic->apic_page; 178 179 /* LDR is read-only in x2apic mode */ 180 if (vlapic_x2mode(vlapic)) { 181 /* Ignore write to LDR in x2APIC mode */ 182 lapic->ldr = x2apic_ldr(vlapic); 183 } else { 184 lapic->ldr &= ~APIC_LDR_RESERVED; 185 } 186 } 187 188 void 189 vlapic_id_write_handler(struct vlapic *vlapic) 190 { 191 struct LAPIC *lapic; 192 193 /* 194 * We don't allow the ID register to be modified so reset it back to 195 * its default value. 196 */ 197 lapic = vlapic->apic_page; 198 lapic->id = vlapic_get_id(vlapic); 199 } 200 201 static int 202 vlapic_timer_divisor(uint32_t dcr) 203 { 204 switch (dcr & 0xB) { 205 case APIC_TDCR_1: 206 return (1); 207 case APIC_TDCR_2: 208 return (2); 209 case APIC_TDCR_4: 210 return (4); 211 case APIC_TDCR_8: 212 return (8); 213 case APIC_TDCR_16: 214 return (16); 215 case APIC_TDCR_32: 216 return (32); 217 case APIC_TDCR_64: 218 return (64); 219 case APIC_TDCR_128: 220 return (128); 221 default: 222 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 223 } 224 } 225 226 #if 0 227 static inline void 228 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 229 { 230 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 231 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 232 *lvt & APIC_LVTT_M); 233 } 234 #endif 235 236 static uint32_t 237 vlapic_get_ccr(struct vlapic *vlapic) 238 { 239 struct LAPIC *lapic; 240 uint32_t ccr; 241 242 ccr = 0; 243 lapic = vlapic->apic_page; 244 245 VLAPIC_TIMER_LOCK(vlapic); 246 if (callout_active(&vlapic->callout)) { 247 /* 248 * If the timer is scheduled to expire in the future then 249 * compute the value of 'ccr' based on the remaining time. 250 */ 251 252 const hrtime_t now = gethrtime(); 253 if (vlapic->timer_fire_when > now) { 254 ccr += hrt_freq_count(vlapic->timer_fire_when - now, 255 vlapic->timer_cur_freq); 256 } 257 } 258 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, " 259 "icr_timer is %x", ccr, lapic->icr_timer)); 260 VLAPIC_TIMER_UNLOCK(vlapic); 261 return (ccr); 262 } 263 264 static void 265 vlapic_update_divider(struct vlapic *vlapic) 266 { 267 struct LAPIC *lapic = vlapic->apic_page; 268 269 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 270 271 vlapic->timer_cur_freq = 272 VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer); 273 vlapic->timer_period = 274 hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer); 275 } 276 277 void 278 vlapic_dcr_write_handler(struct vlapic *vlapic) 279 { 280 /* 281 * Update the timer frequency and the timer period. 282 * 283 * XXX changes to the frequency divider will not take effect until 284 * the timer is reloaded. 285 */ 286 VLAPIC_TIMER_LOCK(vlapic); 287 vlapic_update_divider(vlapic); 288 VLAPIC_TIMER_UNLOCK(vlapic); 289 } 290 291 void 292 vlapic_esr_write_handler(struct vlapic *vlapic) 293 { 294 struct LAPIC *lapic; 295 296 lapic = vlapic->apic_page; 297 lapic->esr = vlapic->esr_pending; 298 vlapic->esr_pending = 0; 299 } 300 301 vcpu_notify_t 302 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 303 { 304 struct LAPIC *lapic; 305 uint32_t *irrptr, *tmrptr, mask, tmr; 306 int idx; 307 308 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 309 310 lapic = vlapic->apic_page; 311 if (!(lapic->svr & APIC_SVR_ENABLE)) { 312 /* ignore interrupt on software-disabled APIC */ 313 return (VCPU_NOTIFY_NONE); 314 } 315 316 if (vector < 16) { 317 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 318 false); 319 320 /* 321 * If the error LVT is configured to interrupt the vCPU, it will 322 * have delivered a notification through that mechanism. 323 */ 324 return (VCPU_NOTIFY_NONE); 325 } 326 327 if (vlapic->ops.set_intr_ready) { 328 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 329 } 330 331 idx = (vector / 32) * 4; 332 mask = 1 << (vector % 32); 333 tmrptr = &lapic->tmr0; 334 irrptr = &lapic->irr0; 335 336 /* 337 * Update TMR for requested vector, if necessary. 338 * This must be done prior to asserting the bit in IRR so that the 339 * proper TMR state is always visible before the to-be-queued interrupt 340 * can be injected. 341 */ 342 tmr = atomic_load_acq_32(&tmrptr[idx]); 343 if ((tmr & mask) != (level ? mask : 0)) { 344 if (level) { 345 atomic_set_int(&tmrptr[idx], mask); 346 } else { 347 atomic_clear_int(&tmrptr[idx], mask); 348 } 349 } 350 351 /* Now set the bit in IRR */ 352 atomic_set_int(&irrptr[idx], mask); 353 354 return (VCPU_NOTIFY_EXIT); 355 } 356 357 static __inline uint32_t * 358 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 359 { 360 struct LAPIC *lapic = vlapic->apic_page; 361 int i; 362 363 switch (offset) { 364 case APIC_OFFSET_CMCI_LVT: 365 return (&lapic->lvt_cmci); 366 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 367 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 368 return ((&lapic->lvt_timer) + i); 369 default: 370 panic("vlapic_get_lvt: invalid LVT\n"); 371 } 372 } 373 374 static __inline int 375 lvt_off_to_idx(uint32_t offset) 376 { 377 int index; 378 379 switch (offset) { 380 case APIC_OFFSET_CMCI_LVT: 381 index = APIC_LVT_CMCI; 382 break; 383 case APIC_OFFSET_TIMER_LVT: 384 index = APIC_LVT_TIMER; 385 break; 386 case APIC_OFFSET_THERM_LVT: 387 index = APIC_LVT_THERMAL; 388 break; 389 case APIC_OFFSET_PERF_LVT: 390 index = APIC_LVT_PMC; 391 break; 392 case APIC_OFFSET_LINT0_LVT: 393 index = APIC_LVT_LINT0; 394 break; 395 case APIC_OFFSET_LINT1_LVT: 396 index = APIC_LVT_LINT1; 397 break; 398 case APIC_OFFSET_ERROR_LVT: 399 index = APIC_LVT_ERROR; 400 break; 401 default: 402 index = -1; 403 break; 404 } 405 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 406 "invalid lvt index %d for offset %x", index, offset)); 407 408 return (index); 409 } 410 411 static __inline uint32_t 412 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 413 { 414 int idx; 415 uint32_t val; 416 417 idx = lvt_off_to_idx(offset); 418 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 419 return (val); 420 } 421 422 void 423 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 424 { 425 uint32_t *lvtptr, mask, val; 426 struct LAPIC *lapic; 427 int idx; 428 429 lapic = vlapic->apic_page; 430 lvtptr = vlapic_get_lvtptr(vlapic, offset); 431 val = *lvtptr; 432 idx = lvt_off_to_idx(offset); 433 434 if (!(lapic->svr & APIC_SVR_ENABLE)) 435 val |= APIC_LVT_M; 436 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 437 switch (offset) { 438 case APIC_OFFSET_TIMER_LVT: 439 mask |= APIC_LVTT_TM; 440 break; 441 case APIC_OFFSET_ERROR_LVT: 442 break; 443 case APIC_OFFSET_LINT0_LVT: 444 case APIC_OFFSET_LINT1_LVT: 445 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 446 /* FALLTHROUGH */ 447 default: 448 mask |= APIC_LVT_DM; 449 break; 450 } 451 val &= mask; 452 *lvtptr = val; 453 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 454 } 455 456 static void 457 vlapic_refresh_lvts(struct vlapic *vlapic) 458 { 459 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 460 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 461 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 462 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 463 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 464 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 465 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 466 } 467 468 static void 469 vlapic_mask_lvts(struct vlapic *vlapic) 470 { 471 struct LAPIC *lapic = vlapic->apic_page; 472 473 lapic->lvt_cmci |= APIC_LVT_M; 474 lapic->lvt_timer |= APIC_LVT_M; 475 lapic->lvt_thermal |= APIC_LVT_M; 476 lapic->lvt_pcint |= APIC_LVT_M; 477 lapic->lvt_lint0 |= APIC_LVT_M; 478 lapic->lvt_lint1 |= APIC_LVT_M; 479 lapic->lvt_error |= APIC_LVT_M; 480 vlapic_refresh_lvts(vlapic); 481 } 482 483 static int 484 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt) 485 { 486 uint32_t mode, reg, vec; 487 vcpu_notify_t notify; 488 489 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 490 491 if (reg & APIC_LVT_M) 492 return (0); 493 vec = reg & APIC_LVT_VECTOR; 494 mode = reg & APIC_LVT_DM; 495 496 switch (mode) { 497 case APIC_LVT_DM_FIXED: 498 if (vec < 16) { 499 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 500 lvt == APIC_LVT_ERROR); 501 return (0); 502 } 503 notify = vlapic_set_intr_ready(vlapic, vec, false); 504 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify); 505 break; 506 case APIC_LVT_DM_NMI: 507 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 508 break; 509 case APIC_LVT_DM_EXTINT: 510 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid); 511 break; 512 default: 513 // Other modes ignored 514 return (0); 515 } 516 return (1); 517 } 518 519 static uint_t 520 vlapic_active_isr(struct vlapic *vlapic) 521 { 522 int i; 523 uint32_t *isrp; 524 525 isrp = &vlapic->apic_page->isr7; 526 527 for (i = 7; i >= 0; i--, isrp -= 4) { 528 uint32_t reg = *isrp; 529 530 if (reg != 0) { 531 uint_t vec = (i * 32) + bsrl(reg); 532 533 if (vec < 16) { 534 /* 535 * Truncate the illegal low vectors to value of 536 * 0, indicating that no active ISR was found. 537 */ 538 return (0); 539 } 540 return (vec); 541 } 542 } 543 544 return (0); 545 } 546 547 /* 548 * After events which might arbitrarily change the value of PPR, such as a TPR 549 * write or an EOI, calculate that new PPR value and store it in the APIC page. 550 */ 551 static void 552 vlapic_update_ppr(struct vlapic *vlapic) 553 { 554 int isrvec, tpr, ppr; 555 556 isrvec = vlapic_active_isr(vlapic); 557 tpr = vlapic->apic_page->tpr; 558 559 /* 560 * Algorithm adopted from section "Interrupt, Task and Processor 561 * Priority" in Intel Architecture Manual Vol 3a. 562 */ 563 if (PRIO(tpr) >= PRIO(isrvec)) { 564 ppr = tpr; 565 } else { 566 ppr = PRIO(isrvec); 567 } 568 569 vlapic->apic_page->ppr = ppr; 570 } 571 572 /* 573 * When a vector is asserted in ISR as in-service, the PPR must be raised to the 574 * priority of that vector, as the vCPU would have been at a lower priority in 575 * order for the vector to be accepted. 576 */ 577 static void 578 vlapic_raise_ppr(struct vlapic *vlapic, int vec) 579 { 580 struct LAPIC *lapic = vlapic->apic_page; 581 int ppr; 582 583 ppr = PRIO(vec); 584 585 lapic->ppr = ppr; 586 } 587 588 void 589 vlapic_sync_tpr(struct vlapic *vlapic) 590 { 591 vlapic_update_ppr(vlapic); 592 } 593 594 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 595 596 static void 597 vlapic_process_eoi(struct vlapic *vlapic) 598 { 599 struct LAPIC *lapic = vlapic->apic_page; 600 uint32_t *isrptr, *tmrptr; 601 int i; 602 uint_t idx, bitpos, vector; 603 604 isrptr = &lapic->isr0; 605 tmrptr = &lapic->tmr0; 606 607 for (i = 7; i >= 0; i--) { 608 idx = i * 4; 609 if (isrptr[idx] != 0) { 610 bitpos = bsrl(isrptr[idx]); 611 vector = i * 32 + bitpos; 612 613 isrptr[idx] &= ~(1 << bitpos); 614 vlapic_update_ppr(vlapic); 615 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 616 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 617 vector); 618 } 619 return; 620 } 621 } 622 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 623 } 624 625 static __inline int 626 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 627 { 628 629 return (lvt & mask); 630 } 631 632 static __inline int 633 vlapic_periodic_timer(struct vlapic *vlapic) 634 { 635 uint32_t lvt; 636 637 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 638 639 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 640 } 641 642 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 643 644 static void 645 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 646 { 647 648 vlapic->esr_pending |= mask; 649 650 /* 651 * Avoid infinite recursion if the error LVT itself is configured with 652 * an illegal vector. 653 */ 654 if (lvt_error) 655 return; 656 657 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 658 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 659 } 660 } 661 662 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 663 664 static void 665 vlapic_fire_timer(struct vlapic *vlapic) 666 { 667 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 668 669 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 670 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 671 } 672 } 673 674 static VMM_STAT(VLAPIC_INTR_CMC, 675 "corrected machine check interrupts generated by vlapic"); 676 677 void 678 vlapic_fire_cmci(struct vlapic *vlapic) 679 { 680 681 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 682 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 683 } 684 } 685 686 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 687 "lvts triggered"); 688 689 int 690 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 691 { 692 if (!vlapic_enabled(vlapic)) { 693 /* 694 * When the local APIC is global/hardware disabled, 695 * LINT[1:0] pins are configured as INTR and NMI pins, 696 * respectively. 697 */ 698 switch (vector) { 699 case APIC_LVT_LINT0: 700 (void) vm_inject_extint(vlapic->vm, 701 vlapic->vcpuid); 702 break; 703 case APIC_LVT_LINT1: 704 (void) vm_inject_nmi(vlapic->vm, 705 vlapic->vcpuid); 706 break; 707 default: 708 break; 709 } 710 return (0); 711 } 712 713 switch (vector) { 714 case APIC_LVT_LINT0: 715 case APIC_LVT_LINT1: 716 case APIC_LVT_TIMER: 717 case APIC_LVT_ERROR: 718 case APIC_LVT_PMC: 719 case APIC_LVT_THERMAL: 720 case APIC_LVT_CMCI: 721 if (vlapic_fire_lvt(vlapic, vector)) { 722 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 723 LVTS_TRIGGERRED, vector, 1); 724 } 725 break; 726 default: 727 return (EINVAL); 728 } 729 return (0); 730 } 731 732 static void 733 vlapic_callout_reset(struct vlapic *vlapic) 734 { 735 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, 736 vlapic_callout_handler, vlapic, C_ABSOLUTE); 737 } 738 739 static void 740 vlapic_callout_handler(void *arg) 741 { 742 struct vlapic *vlapic = arg; 743 744 VLAPIC_TIMER_LOCK(vlapic); 745 if (callout_pending(&vlapic->callout)) /* callout was reset */ 746 goto done; 747 748 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 749 goto done; 750 751 callout_deactivate(&vlapic->callout); 752 753 vlapic_fire_timer(vlapic); 754 755 if (vlapic_periodic_timer(vlapic)) { 756 /* 757 * Compute the delta between when the timer was supposed to 758 * fire and the present time. We can depend on the fact that 759 * cyclics (which underly these callouts) will never be called 760 * early. 761 */ 762 const hrtime_t now = gethrtime(); 763 const hrtime_t delta = now - vlapic->timer_fire_when; 764 if (delta >= vlapic->timer_period) { 765 /* 766 * If we are so behind that we have missed an entire 767 * timer period, reset the time base rather than 768 * attempting to catch up. 769 */ 770 vlapic->timer_fire_when = now + vlapic->timer_period; 771 } else { 772 vlapic->timer_fire_when += vlapic->timer_period; 773 } 774 vlapic_callout_reset(vlapic); 775 } else { 776 /* 777 * Clear the target time so that logic can distinguish from a 778 * timer which has fired (where the value is zero) from one 779 * which is held pending due to the instance being paused (where 780 * the value is non-zero, but the callout is not pending). 781 */ 782 vlapic->timer_fire_when = 0; 783 } 784 done: 785 VLAPIC_TIMER_UNLOCK(vlapic); 786 } 787 788 void 789 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 790 { 791 struct LAPIC *lapic = vlapic->apic_page; 792 793 VLAPIC_TIMER_LOCK(vlapic); 794 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 795 lapic->icr_timer); 796 if (vlapic->timer_period != 0) { 797 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; 798 vlapic_callout_reset(vlapic); 799 } else { 800 vlapic->timer_fire_when = 0; 801 callout_stop(&vlapic->callout); 802 } 803 VLAPIC_TIMER_UNLOCK(vlapic); 804 } 805 806 /* 807 * This function populates 'dmask' with the set of vcpus that match the 808 * addressing specified by the (dest, phys, lowprio) tuple. 809 * 810 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 811 * or xAPIC (8-bit) destination field. 812 */ 813 void 814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 815 bool lowprio, bool x2apic_dest) 816 { 817 struct vlapic *vlapic; 818 uint32_t dfr, ldr, ldest, cluster; 819 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 820 cpuset_t amask; 821 int vcpuid; 822 823 if ((x2apic_dest && dest == 0xffffffff) || 824 (!x2apic_dest && dest == 0xff)) { 825 /* 826 * Broadcast in both logical and physical modes. 827 */ 828 *dmask = vm_active_cpus(vm); 829 return; 830 } 831 832 if (phys) { 833 /* 834 * Physical mode: destination is APIC ID. 835 */ 836 CPU_ZERO(dmask); 837 vcpuid = vm_apicid2vcpuid(vm, dest); 838 amask = vm_active_cpus(vm); 839 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 840 CPU_SET(vcpuid, dmask); 841 } else { 842 /* 843 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 844 * bitmask. This model is only available in the xAPIC mode. 845 */ 846 mda_flat_ldest = dest & 0xff; 847 848 /* 849 * In the "Cluster Model" the MDA is used to identify a 850 * specific cluster and a set of APICs in that cluster. 851 */ 852 if (x2apic_dest) { 853 mda_cluster_id = dest >> 16; 854 mda_cluster_ldest = dest & 0xffff; 855 } else { 856 mda_cluster_id = (dest >> 4) & 0xf; 857 mda_cluster_ldest = dest & 0xf; 858 } 859 860 /* 861 * Logical mode: match each APIC that has a bit set 862 * in its LDR that matches a bit in the ldest. 863 */ 864 CPU_ZERO(dmask); 865 amask = vm_active_cpus(vm); 866 while ((vcpuid = CPU_FFS(&amask)) != 0) { 867 vcpuid--; 868 CPU_CLR(vcpuid, &amask); 869 870 vlapic = vm_lapic(vm, vcpuid); 871 dfr = vlapic->apic_page->dfr; 872 ldr = vlapic->apic_page->ldr; 873 874 if ((dfr & APIC_DFR_MODEL_MASK) == 875 APIC_DFR_MODEL_FLAT) { 876 ldest = ldr >> 24; 877 mda_ldest = mda_flat_ldest; 878 } else if ((dfr & APIC_DFR_MODEL_MASK) == 879 APIC_DFR_MODEL_CLUSTER) { 880 if (vlapic_x2mode(vlapic)) { 881 cluster = ldr >> 16; 882 ldest = ldr & 0xffff; 883 } else { 884 cluster = ldr >> 28; 885 ldest = (ldr >> 24) & 0xf; 886 } 887 if (cluster != mda_cluster_id) 888 continue; 889 mda_ldest = mda_cluster_ldest; 890 } else { 891 /* 892 * Guest has configured a bad logical 893 * model for this vcpu - skip it. 894 */ 895 continue; 896 } 897 898 if ((mda_ldest & ldest) != 0) { 899 CPU_SET(vcpuid, dmask); 900 if (lowprio) 901 break; 902 } 903 } 904 } 905 } 906 907 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 908 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 909 910 static void 911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 912 { 913 struct LAPIC *lapic = vlapic->apic_page; 914 915 if (lapic->tpr != val) { 916 lapic->tpr = val; 917 vlapic_update_ppr(vlapic); 918 } 919 } 920 921 void 922 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 923 { 924 uint8_t tpr; 925 926 if (val & ~0xf) { 927 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 928 return; 929 } 930 931 tpr = val << 4; 932 vlapic_set_tpr(vlapic, tpr); 933 } 934 935 uint64_t 936 vlapic_get_cr8(const struct vlapic *vlapic) 937 { 938 const struct LAPIC *lapic = vlapic->apic_page; 939 940 return (lapic->tpr >> 4); 941 } 942 943 void 944 vlapic_icrlo_write_handler(struct vlapic *vlapic) 945 { 946 int i; 947 cpuset_t dmask; 948 uint64_t icrval; 949 uint32_t dest, vec, mode, dsh; 950 struct LAPIC *lapic; 951 952 lapic = vlapic->apic_page; 953 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 954 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 955 956 if (vlapic_x2mode(vlapic)) 957 dest = icrval >> 32; 958 else 959 dest = icrval >> (32 + 24); 960 vec = icrval & APIC_VECTOR_MASK; 961 mode = icrval & APIC_DELMODE_MASK; 962 dsh = icrval & APIC_DEST_MASK; 963 964 if (mode == APIC_DELMODE_FIXED && vec < 16) { 965 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 966 return; 967 } 968 if (mode == APIC_DELMODE_INIT && 969 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { 970 /* No work required to deassert INIT */ 971 return; 972 } 973 if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) && 974 !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) { 975 /* 976 * While Intel makes no mention of restrictions for destination 977 * shorthand when sending INIT or SIPI, AMD requires either a 978 * specific destination or all-excluding self. Common use seems 979 * to be restricted to those two cases. Until handling is in 980 * place to halt a guest which makes such a frivolous request, 981 * we will ignore them. 982 */ 983 return; 984 } 985 986 switch (dsh) { 987 case APIC_DEST_DESTFLD: 988 vlapic_calcdest(vlapic->vm, &dmask, dest, 989 (icrval & APIC_DESTMODE_LOG) == 0, false, 990 vlapic_x2mode(vlapic)); 991 break; 992 case APIC_DEST_SELF: 993 CPU_SETOF(vlapic->vcpuid, &dmask); 994 break; 995 case APIC_DEST_ALLISELF: 996 dmask = vm_active_cpus(vlapic->vm); 997 break; 998 case APIC_DEST_ALLESELF: 999 dmask = vm_active_cpus(vlapic->vm); 1000 CPU_CLR(vlapic->vcpuid, &dmask); 1001 break; 1002 default: 1003 /* 1004 * All possible delivery notations are covered above. 1005 * We should never end up here. 1006 */ 1007 panic("unknown delivery shorthand: %x", dsh); 1008 } 1009 1010 while ((i = CPU_FFS(&dmask)) != 0) { 1011 i--; 1012 CPU_CLR(i, &dmask); 1013 switch (mode) { 1014 case APIC_DELMODE_FIXED: 1015 (void) lapic_intr_edge(vlapic->vm, i, vec); 1016 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, 1017 VLAPIC_IPI_SEND, 1); 1018 vmm_stat_incr(vlapic->vm, i, 1019 VLAPIC_IPI_RECV, 1); 1020 break; 1021 case APIC_DELMODE_NMI: 1022 (void) vm_inject_nmi(vlapic->vm, i); 1023 break; 1024 case APIC_DELMODE_INIT: 1025 (void) vm_inject_init(vlapic->vm, i); 1026 break; 1027 case APIC_DELMODE_STARTUP: 1028 (void) vm_inject_sipi(vlapic->vm, i, vec); 1029 break; 1030 case APIC_DELMODE_LOWPRIO: 1031 case APIC_DELMODE_SMI: 1032 default: 1033 /* Unhandled IPI modes (for now) */ 1034 break; 1035 } 1036 } 1037 } 1038 1039 void 1040 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) 1041 { 1042 const int vec = val & 0xff; 1043 1044 /* self-IPI is only exposed via x2APIC */ 1045 ASSERT(vlapic_x2mode(vlapic)); 1046 1047 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1048 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); 1049 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); 1050 } 1051 1052 int 1053 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1054 { 1055 struct LAPIC *lapic = vlapic->apic_page; 1056 int idx, i, bitpos, vector; 1057 uint32_t *irrptr, val; 1058 1059 if (vlapic->ops.sync_state) { 1060 (*vlapic->ops.sync_state)(vlapic); 1061 } 1062 1063 irrptr = &lapic->irr0; 1064 1065 for (i = 7; i >= 0; i--) { 1066 idx = i * 4; 1067 val = atomic_load_acq_int(&irrptr[idx]); 1068 bitpos = fls(val); 1069 if (bitpos != 0) { 1070 vector = i * 32 + (bitpos - 1); 1071 if (PRIO(vector) > PRIO(lapic->ppr)) { 1072 if (vecptr != NULL) 1073 *vecptr = vector; 1074 return (1); 1075 } else 1076 break; 1077 } 1078 } 1079 return (0); 1080 } 1081 1082 void 1083 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1084 { 1085 struct LAPIC *lapic = vlapic->apic_page; 1086 uint32_t *irrptr, *isrptr; 1087 int idx; 1088 1089 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector)); 1090 1091 if (vlapic->ops.intr_accepted) 1092 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1093 1094 /* 1095 * clear the ready bit for vector being accepted in irr 1096 * and set the vector as in service in isr. 1097 */ 1098 idx = (vector / 32) * 4; 1099 1100 irrptr = &lapic->irr0; 1101 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1102 1103 isrptr = &lapic->isr0; 1104 isrptr[idx] |= 1 << (vector % 32); 1105 1106 /* 1107 * The only way a fresh vector could be accepted into ISR is if it was 1108 * of a higher priority than the current PPR. With that vector now 1109 * in-service, the PPR must be raised. 1110 */ 1111 vlapic_raise_ppr(vlapic, vector); 1112 } 1113 1114 void 1115 vlapic_svr_write_handler(struct vlapic *vlapic) 1116 { 1117 struct LAPIC *lapic; 1118 uint32_t old, new, changed; 1119 1120 lapic = vlapic->apic_page; 1121 1122 new = lapic->svr; 1123 old = vlapic->svr_last; 1124 vlapic->svr_last = new; 1125 1126 changed = old ^ new; 1127 if ((changed & APIC_SVR_ENABLE) != 0) { 1128 if ((new & APIC_SVR_ENABLE) == 0) { 1129 /* 1130 * The apic is now disabled so stop the apic timer 1131 * and mask all the LVT entries. 1132 */ 1133 VLAPIC_TIMER_LOCK(vlapic); 1134 callout_stop(&vlapic->callout); 1135 VLAPIC_TIMER_UNLOCK(vlapic); 1136 vlapic_mask_lvts(vlapic); 1137 } else { 1138 /* 1139 * The apic is now enabled so restart the apic timer 1140 * if it is configured in periodic mode. 1141 */ 1142 if (vlapic_periodic_timer(vlapic)) 1143 vlapic_icrtmr_write_handler(vlapic); 1144 } 1145 } 1146 } 1147 1148 static bool 1149 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) 1150 { 1151 struct LAPIC *lapic = vlapic->apic_page; 1152 uint32_t *reg; 1153 int i; 1154 1155 ASSERT3U(offset & 0x3, ==, 0); 1156 ASSERT3U(offset, <, PAGESIZE); 1157 ASSERT3P(outp, !=, NULL); 1158 1159 uint32_t data = 0; 1160 switch (offset) { 1161 case APIC_OFFSET_ID: 1162 data = lapic->id; 1163 break; 1164 case APIC_OFFSET_VER: 1165 data = lapic->version; 1166 break; 1167 case APIC_OFFSET_TPR: 1168 data = lapic->tpr; 1169 break; 1170 case APIC_OFFSET_APR: 1171 data = lapic->apr; 1172 break; 1173 case APIC_OFFSET_PPR: 1174 data = lapic->ppr; 1175 break; 1176 case APIC_OFFSET_LDR: 1177 data = lapic->ldr; 1178 break; 1179 case APIC_OFFSET_DFR: 1180 data = lapic->dfr; 1181 break; 1182 case APIC_OFFSET_SVR: 1183 data = lapic->svr; 1184 break; 1185 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1186 i = (offset - APIC_OFFSET_ISR0) >> 2; 1187 reg = &lapic->isr0; 1188 data = *(reg + i); 1189 break; 1190 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1191 i = (offset - APIC_OFFSET_TMR0) >> 2; 1192 reg = &lapic->tmr0; 1193 data = *(reg + i); 1194 break; 1195 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1196 i = (offset - APIC_OFFSET_IRR0) >> 2; 1197 reg = &lapic->irr0; 1198 data = atomic_load_acq_int(reg + i); 1199 break; 1200 case APIC_OFFSET_ESR: 1201 data = lapic->esr; 1202 break; 1203 case APIC_OFFSET_ICR_LOW: 1204 data = lapic->icr_lo; 1205 break; 1206 case APIC_OFFSET_ICR_HI: 1207 data = lapic->icr_hi; 1208 break; 1209 case APIC_OFFSET_CMCI_LVT: 1210 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1211 data = vlapic_get_lvt(vlapic, offset); 1212 #ifdef INVARIANTS 1213 reg = vlapic_get_lvtptr(vlapic, offset); 1214 ASSERT3U(data, ==, *reg); 1215 #endif 1216 break; 1217 case APIC_OFFSET_TIMER_ICR: 1218 data = lapic->icr_timer; 1219 break; 1220 case APIC_OFFSET_TIMER_CCR: 1221 data = vlapic_get_ccr(vlapic); 1222 break; 1223 case APIC_OFFSET_TIMER_DCR: 1224 data = lapic->dcr_timer; 1225 break; 1226 case APIC_OFFSET_RRR: 1227 data = 0; 1228 break; 1229 1230 case APIC_OFFSET_SELF_IPI: 1231 case APIC_OFFSET_EOI: 1232 /* Write-only register */ 1233 *outp = 0; 1234 return (false); 1235 1236 default: 1237 /* Invalid register */ 1238 *outp = 0; 1239 return (false); 1240 } 1241 1242 *outp = data; 1243 return (true); 1244 } 1245 1246 static bool 1247 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) 1248 { 1249 struct LAPIC *lapic = vlapic->apic_page; 1250 uint32_t *regptr; 1251 1252 ASSERT3U(offset & 0xf, ==, 0); 1253 ASSERT3U(offset, <, PAGESIZE); 1254 1255 switch (offset) { 1256 case APIC_OFFSET_ID: 1257 lapic->id = data; 1258 vlapic_id_write_handler(vlapic); 1259 break; 1260 case APIC_OFFSET_TPR: 1261 vlapic_set_tpr(vlapic, data & 0xff); 1262 break; 1263 case APIC_OFFSET_EOI: 1264 vlapic_process_eoi(vlapic); 1265 break; 1266 case APIC_OFFSET_LDR: 1267 lapic->ldr = data; 1268 vlapic_ldr_write_handler(vlapic); 1269 break; 1270 case APIC_OFFSET_DFR: 1271 lapic->dfr = data; 1272 vlapic_dfr_write_handler(vlapic); 1273 break; 1274 case APIC_OFFSET_SVR: 1275 lapic->svr = data; 1276 vlapic_svr_write_handler(vlapic); 1277 break; 1278 case APIC_OFFSET_ICR_LOW: 1279 lapic->icr_lo = data; 1280 vlapic_icrlo_write_handler(vlapic); 1281 break; 1282 case APIC_OFFSET_ICR_HI: 1283 lapic->icr_hi = data; 1284 break; 1285 case APIC_OFFSET_CMCI_LVT: 1286 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1287 regptr = vlapic_get_lvtptr(vlapic, offset); 1288 *regptr = data; 1289 vlapic_lvt_write_handler(vlapic, offset); 1290 break; 1291 case APIC_OFFSET_TIMER_ICR: 1292 lapic->icr_timer = data; 1293 vlapic_icrtmr_write_handler(vlapic); 1294 break; 1295 1296 case APIC_OFFSET_TIMER_DCR: 1297 lapic->dcr_timer = data; 1298 vlapic_dcr_write_handler(vlapic); 1299 break; 1300 1301 case APIC_OFFSET_ESR: 1302 vlapic_esr_write_handler(vlapic); 1303 break; 1304 1305 case APIC_OFFSET_SELF_IPI: 1306 if (vlapic_x2mode(vlapic)) 1307 vlapic_self_ipi_handler(vlapic, data); 1308 break; 1309 1310 case APIC_OFFSET_VER: 1311 case APIC_OFFSET_APR: 1312 case APIC_OFFSET_PPR: 1313 case APIC_OFFSET_RRR: 1314 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1315 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1316 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1317 case APIC_OFFSET_TIMER_CCR: 1318 /* Read-only register */ 1319 return (false); 1320 1321 default: 1322 /* Invalid register */ 1323 return (false); 1324 } 1325 1326 return (true); 1327 } 1328 1329 void 1330 vlapic_reset(struct vlapic *vlapic) 1331 { 1332 struct LAPIC *lapic = vlapic->apic_page; 1333 uint32_t *isrptr, *tmrptr, *irrptr; 1334 1335 /* Reset any timer-related state first */ 1336 VLAPIC_TIMER_LOCK(vlapic); 1337 callout_stop(&vlapic->callout); 1338 lapic->icr_timer = 0; 1339 lapic->ccr_timer = 0; 1340 lapic->dcr_timer = 0; 1341 vlapic_update_divider(vlapic); 1342 VLAPIC_TIMER_UNLOCK(vlapic); 1343 1344 /* 1345 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so 1346 * it is not leftover after the reset. This is performed after the APIC 1347 * timer has been stopped, in case it happened to fire just prior to 1348 * being deactivated. 1349 */ 1350 if (vlapic->ops.sync_state) { 1351 (*vlapic->ops.sync_state)(vlapic); 1352 } 1353 1354 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1355 if (vlapic->vcpuid == 0) 1356 vlapic->msr_apicbase |= APICBASE_BSP; 1357 1358 lapic->id = vlapic_get_id(vlapic); 1359 lapic->version = VLAPIC_VERSION; 1360 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1361 1362 lapic->tpr = 0; 1363 lapic->apr = 0; 1364 lapic->ppr = 0; 1365 1366 lapic->eoi = 0; 1367 lapic->ldr = 0; 1368 lapic->dfr = 0xffffffff; 1369 lapic->svr = APIC_SVR_VECTOR; 1370 vlapic->svr_last = lapic->svr; 1371 1372 isrptr = &lapic->isr0; 1373 tmrptr = &lapic->tmr0; 1374 irrptr = &lapic->irr0; 1375 for (uint_t i = 0; i < 8; i++) { 1376 atomic_store_rel_int(&isrptr[i * 4], 0); 1377 atomic_store_rel_int(&tmrptr[i * 4], 0); 1378 atomic_store_rel_int(&irrptr[i * 4], 0); 1379 } 1380 1381 lapic->esr = 0; 1382 vlapic->esr_pending = 0; 1383 lapic->icr_lo = 0; 1384 lapic->icr_hi = 0; 1385 1386 lapic->lvt_cmci = 0; 1387 lapic->lvt_timer = 0; 1388 lapic->lvt_thermal = 0; 1389 lapic->lvt_pcint = 0; 1390 lapic->lvt_lint0 = 0; 1391 lapic->lvt_lint1 = 0; 1392 lapic->lvt_error = 0; 1393 vlapic_mask_lvts(vlapic); 1394 } 1395 1396 void 1397 vlapic_init(struct vlapic *vlapic) 1398 { 1399 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1400 KASSERT(vlapic->vcpuid >= 0 && 1401 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1402 ("vlapic_init: vcpuid is not initialized")); 1403 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1404 "initialized")); 1405 1406 /* 1407 * If the vlapic is configured in x2apic mode then it will be 1408 * accessed in the critical section via the MSR emulation code. 1409 * 1410 * Therefore the timer mutex must be a spinlock because blockable 1411 * mutexes cannot be acquired in a critical section. 1412 */ 1413 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL); 1414 callout_init(&vlapic->callout, 1); 1415 1416 vlapic_reset(vlapic); 1417 } 1418 1419 void 1420 vlapic_cleanup(struct vlapic *vlapic) 1421 { 1422 callout_drain(&vlapic->callout); 1423 mutex_destroy(&vlapic->timer_lock); 1424 } 1425 1426 int 1427 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, 1428 uint_t size) 1429 { 1430 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1431 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1432 1433 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1434 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1435 *valp = UINT64_MAX; 1436 return (0); 1437 } 1438 1439 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1440 uint32_t raw = 0; 1441 (void) vlapic_read(vlapic, off & ~0xf, &raw); 1442 1443 /* Shift and mask reads which are small and/or unaligned */ 1444 const uint8_t align = off & 0xf; 1445 if (align < 4) { 1446 *valp = (uint64_t)raw << (align * 8); 1447 } else { 1448 *valp = 0; 1449 } 1450 1451 return (0); 1452 } 1453 1454 int 1455 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, 1456 uint_t size) 1457 { 1458 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1459 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1460 1461 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1462 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1463 return (0); 1464 } 1465 1466 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1467 /* Ignore writes which are not 32-bits wide and 16-byte aligned */ 1468 if ((off & 0xf) != 0 || size != 4) { 1469 return (0); 1470 } 1471 1472 (void) vlapic_write(vlapic, off, (uint32_t)val); 1473 return (0); 1474 } 1475 1476 /* Should attempts to change the APIC base address be rejected with a #GP? */ 1477 int vlapic_gp_on_addr_change = 1; 1478 1479 static vm_msr_result_t 1480 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) 1481 { 1482 const uint64_t diff = vlapic->msr_apicbase ^ val; 1483 1484 /* 1485 * Until the LAPIC emulation for switching between xAPIC and x2APIC 1486 * modes is more polished, it will remain off-limits from being altered 1487 * by the guest. 1488 */ 1489 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | 1490 APICBASE_BSP; 1491 if ((diff & reserved_bits) != 0) { 1492 return (VMR_GP); 1493 } 1494 1495 /* We do not presently allow the LAPIC access address to be modified. */ 1496 if ((diff & APICBASE_ADDR_MASK) != 0) { 1497 /* 1498 * Explicitly rebuffing such requests with a #GP is the most 1499 * straightforward way to handle the situation, but certain 1500 * consumers (such as the KVM unit tests) may balk at the 1501 * otherwise unexpected exception. 1502 */ 1503 if (vlapic_gp_on_addr_change) { 1504 return (VMR_GP); 1505 } 1506 1507 /* If silence is required, just ignore the address change. */ 1508 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; 1509 } 1510 1511 vlapic->msr_apicbase = val; 1512 return (VMR_OK); 1513 } 1514 1515 static __inline uint16_t 1516 vlapic_msr_to_regoff(uint32_t msr) 1517 { 1518 ASSERT3U(msr, >=, MSR_APIC_000); 1519 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); 1520 1521 return ((msr - MSR_APIC_000) << 4); 1522 } 1523 1524 bool 1525 vlapic_owned_msr(uint32_t msr) 1526 { 1527 if (msr == MSR_APICBASE) { 1528 return (true); 1529 } 1530 if (msr >= MSR_APIC_000 && 1531 msr < (MSR_APIC_000 + 0x100)) { 1532 return (true); 1533 } 1534 return (false); 1535 } 1536 1537 vm_msr_result_t 1538 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) 1539 { 1540 ASSERT(vlapic_owned_msr(msr)); 1541 ASSERT3P(valp, !=, NULL); 1542 1543 if (msr == MSR_APICBASE) { 1544 *valp = vlapic->msr_apicbase; 1545 return (VMR_OK); 1546 } 1547 1548 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1549 if (!vlapic_x2mode(vlapic)) { 1550 return (VMR_GP); 1551 } 1552 1553 uint64_t out = 0; 1554 const uint16_t reg = vlapic_msr_to_regoff(msr); 1555 switch (reg) { 1556 case APIC_OFFSET_ICR_LOW: { 1557 /* Read from ICR register gets entire (64-bit) value */ 1558 uint32_t low = 0, high = 0; 1559 bool valid; 1560 1561 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); 1562 VERIFY(valid); 1563 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); 1564 VERIFY(valid); 1565 1566 *valp = ((uint64_t)high << 32) | low; 1567 return (VMR_OK); 1568 } 1569 case APIC_OFFSET_ICR_HI: 1570 /* Already covered by ICR_LOW */ 1571 return (VMR_GP); 1572 default: 1573 break; 1574 } 1575 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { 1576 return (VMR_GP); 1577 } 1578 *valp = out; 1579 return (VMR_OK); 1580 } 1581 1582 vm_msr_result_t 1583 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) 1584 { 1585 ASSERT(vlapic_owned_msr(msr)); 1586 1587 if (msr == MSR_APICBASE) { 1588 return (vlapic_set_apicbase(vlapic, val)); 1589 } 1590 1591 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1592 if (!vlapic_x2mode(vlapic)) { 1593 return (VMR_GP); 1594 } 1595 1596 const uint16_t reg = vlapic_msr_to_regoff(msr); 1597 switch (reg) { 1598 case APIC_OFFSET_ICR_LOW: { 1599 /* Write to ICR register sets entire (64-bit) value */ 1600 bool valid; 1601 1602 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); 1603 VERIFY(valid); 1604 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); 1605 VERIFY(valid); 1606 return (VMR_OK); 1607 } 1608 case APIC_OFFSET_ICR_HI: 1609 /* Already covered by ICR_LOW */ 1610 return (VMR_GP); 1611 case APIC_OFFSET_ESR: 1612 /* Only 0 may be written from x2APIC mode */ 1613 if (val != 0) { 1614 return (VMR_GP); 1615 } 1616 break; 1617 default: 1618 break; 1619 } 1620 if (!vlapic_write(vlapic, reg, val)) { 1621 return (VMR_GP); 1622 } 1623 return (VMR_OK); 1624 } 1625 1626 void 1627 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1628 { 1629 struct vlapic *vlapic; 1630 struct LAPIC *lapic; 1631 1632 vlapic = vm_lapic(vm, vcpuid); 1633 1634 if (state == X2APIC_DISABLED) 1635 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1636 else 1637 vlapic->msr_apicbase |= APICBASE_X2APIC; 1638 1639 /* 1640 * Reset the local APIC registers whose values are mode-dependent. 1641 * 1642 * XXX this works because the APIC mode can be changed only at vcpu 1643 * initialization time. 1644 */ 1645 lapic = vlapic->apic_page; 1646 lapic->id = vlapic_get_id(vlapic); 1647 if (vlapic_x2mode(vlapic)) { 1648 lapic->ldr = x2apic_ldr(vlapic); 1649 lapic->dfr = 0; 1650 } else { 1651 lapic->ldr = 0; 1652 lapic->dfr = 0xffffffff; 1653 } 1654 1655 if (state == X2APIC_ENABLED) { 1656 if (vlapic->ops.enable_x2apic_mode) 1657 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1658 } 1659 } 1660 1661 void 1662 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1663 int delmode, int vec) 1664 { 1665 bool lowprio; 1666 int vcpuid; 1667 cpuset_t dmask; 1668 1669 if (delmode != IOART_DELFIXED && 1670 delmode != IOART_DELLOPRI && 1671 delmode != IOART_DELEXINT) { 1672 /* Invalid delivery mode */ 1673 return; 1674 } 1675 lowprio = (delmode == IOART_DELLOPRI); 1676 1677 /* 1678 * We don't provide any virtual interrupt redirection hardware so 1679 * all interrupts originating from the ioapic or MSI specify the 1680 * 'dest' in the legacy xAPIC format. 1681 */ 1682 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1683 1684 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1685 vcpuid--; 1686 CPU_CLR(vcpuid, &dmask); 1687 if (delmode == IOART_DELEXINT) { 1688 (void) vm_inject_extint(vm, vcpuid); 1689 } else { 1690 (void) lapic_set_intr(vm, vcpuid, vec, level); 1691 } 1692 } 1693 } 1694 1695 void 1696 vlapic_post_intr(struct vlapic *vlapic, int hostcpu) 1697 { 1698 /* 1699 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1700 * 1701 * This is done by leveraging features like Posted Interrupts (Intel) 1702 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1703 * 1704 * If neither of these features are available then fallback to 1705 * sending an IPI to 'hostcpu'. 1706 */ 1707 if (vlapic->ops.post_intr) 1708 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1709 else 1710 poke_cpu(hostcpu); 1711 } 1712 1713 void 1714 vlapic_localize_resources(struct vlapic *vlapic) 1715 { 1716 vmm_glue_callout_localize(&vlapic->callout); 1717 } 1718 1719 void 1720 vlapic_pause(struct vlapic *vlapic) 1721 { 1722 VLAPIC_TIMER_LOCK(vlapic); 1723 callout_stop(&vlapic->callout); 1724 VLAPIC_TIMER_UNLOCK(vlapic); 1725 1726 } 1727 1728 void 1729 vlapic_resume(struct vlapic *vlapic) 1730 { 1731 VLAPIC_TIMER_LOCK(vlapic); 1732 if (vlapic->timer_fire_when != 0) { 1733 vlapic_callout_reset(vlapic); 1734 } 1735 VLAPIC_TIMER_UNLOCK(vlapic); 1736 } 1737 1738 static int 1739 vlapic_data_read(void *datap, const vmm_data_req_t *req) 1740 { 1741 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1742 VERIFY3U(req->vdr_version, ==, 1); 1743 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1)); 1744 1745 struct vlapic *vlapic = datap; 1746 struct vdi_lapic_v1 *out = req->vdr_data; 1747 1748 VLAPIC_TIMER_LOCK(vlapic); 1749 1750 if (vlapic->ops.sync_state) { 1751 (*vlapic->ops.sync_state)(vlapic); 1752 } 1753 1754 out->vl_msr_apicbase = vlapic->msr_apicbase; 1755 out->vl_esr_pending = vlapic->esr_pending; 1756 if (vlapic->timer_fire_when != 0) { 1757 out->vl_timer_target = 1758 vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when); 1759 } else { 1760 out->vl_timer_target = 0; 1761 } 1762 1763 const struct LAPIC *lapic = vlapic->apic_page; 1764 struct vdi_lapic_page_v1 *out_page = &out->vl_lapic; 1765 1766 /* 1767 * While this might appear, at first glance, to be missing some fields, 1768 * they are intentionally omitted: 1769 * - PPR: its contents are always generated at runtime 1770 * - EOI: write-only, and contents are ignored after handling 1771 * - RRD: (aka RRR) read-only and always 0 1772 * - CCR: calculated from underlying timer data 1773 */ 1774 out_page->vlp_id = lapic->id; 1775 out_page->vlp_version = lapic->version; 1776 out_page->vlp_tpr = lapic->tpr; 1777 out_page->vlp_apr = lapic->apr; 1778 out_page->vlp_ldr = lapic->ldr; 1779 out_page->vlp_dfr = lapic->dfr; 1780 out_page->vlp_svr = lapic->svr; 1781 out_page->vlp_esr = lapic->esr; 1782 out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1783 out_page->vlp_icr_timer = lapic->icr_timer; 1784 out_page->vlp_dcr_timer = lapic->dcr_timer; 1785 1786 out_page->vlp_lvt_cmci = lapic->lvt_cmci; 1787 out_page->vlp_lvt_timer = lapic->lvt_timer; 1788 out_page->vlp_lvt_thermal = lapic->lvt_thermal; 1789 out_page->vlp_lvt_pcint = lapic->lvt_pcint; 1790 out_page->vlp_lvt_lint0 = lapic->lvt_lint0; 1791 out_page->vlp_lvt_lint1 = lapic->lvt_lint1; 1792 out_page->vlp_lvt_error = lapic->lvt_error; 1793 1794 const uint32_t *isrptr = &lapic->isr0; 1795 const uint32_t *tmrptr = &lapic->tmr0; 1796 const uint32_t *irrptr = &lapic->irr0; 1797 for (uint_t i = 0; i < 8; i++) { 1798 out_page->vlp_isr[i] = isrptr[i * 4]; 1799 out_page->vlp_tmr[i] = tmrptr[i * 4]; 1800 out_page->vlp_irr[i] = irrptr[i * 4]; 1801 } 1802 VLAPIC_TIMER_UNLOCK(vlapic); 1803 1804 return (0); 1805 } 1806 1807 static uint8_t 1808 popc8(uint8_t val) 1809 { 1810 uint8_t cnt; 1811 1812 for (cnt = 0; val != 0; val &= (val - 1)) { 1813 cnt++; 1814 } 1815 return (cnt); 1816 } 1817 1818 /* 1819 * Descriptions for the various failures which can occur when validating 1820 * to-be-written vlapic state. 1821 */ 1822 enum vlapic_validation_error { 1823 VVE_OK, 1824 VVE_BAD_ID, 1825 VVE_BAD_VERSION, 1826 VVE_BAD_MSR_BASE, 1827 VVE_BAD_ESR, 1828 VVE_BAD_TPR, 1829 VVE_LOW_VECTOR, 1830 VVE_ISR_PRIORITY, 1831 }; 1832 1833 static enum vlapic_validation_error 1834 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req) 1835 { 1836 ASSERT(req->vdr_version == 1 && 1837 req->vdr_len >= sizeof (struct vdi_lapic_v1)); 1838 const struct vdi_lapic_v1 *src = req->vdr_data; 1839 1840 if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 || 1841 (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) { 1842 return (VVE_BAD_ESR); 1843 } 1844 1845 /* Use the same restrictions as the wrmsr accessor for now */ 1846 const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC | 1847 APICBASE_BSP; 1848 const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase; 1849 if ((diff & apicbase_reserved) != 0) { 1850 return (VVE_BAD_MSR_BASE); 1851 } 1852 1853 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1854 /* 1855 * Demand that ID match for now. This can be further updated when some 1856 * of the x2apic handling is improved. 1857 */ 1858 if (page->vlp_id != vlapic_get_id(vlapic)) { 1859 return (VVE_BAD_ID); 1860 } 1861 1862 if (page->vlp_version != vlapic->apic_page->version) { 1863 return (VVE_BAD_VERSION); 1864 } 1865 1866 if (page->vlp_tpr > 0xff) { 1867 return (VVE_BAD_TPR); 1868 } 1869 1870 /* Vectors 0-15 are not expected to be handled by the lapic */ 1871 if ((page->vlp_isr[0] & 0xffff) != 0 || 1872 (page->vlp_irr[0] & 0xffff) != 0 || 1873 (page->vlp_tmr[0] & 0xffff) != 0) { 1874 return (VVE_LOW_VECTOR); 1875 } 1876 1877 /* Only one interrupt should be in-service for each priority level */ 1878 for (uint_t i = 0; i < 8; i++) { 1879 if (popc8((uint8_t)page->vlp_isr[i]) > 1 || 1880 popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 || 1881 popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 || 1882 popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) { 1883 return (VVE_ISR_PRIORITY); 1884 } 1885 } 1886 1887 return (VVE_OK); 1888 } 1889 1890 static int 1891 vlapic_data_write(void *datap, const vmm_data_req_t *req) 1892 { 1893 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1894 VERIFY3U(req->vdr_version, ==, 1); 1895 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1)); 1896 1897 struct vlapic *vlapic = datap; 1898 if (vlapic_data_validate(vlapic, req) != VVE_OK) { 1899 return (EINVAL); 1900 } 1901 const struct vdi_lapic_v1 *src = req->vdr_data; 1902 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1903 struct LAPIC *lapic = vlapic->apic_page; 1904 1905 VLAPIC_TIMER_LOCK(vlapic); 1906 1907 /* Already ensured by vlapic_data_validate() */ 1908 VERIFY3U(page->vlp_id, ==, lapic->id); 1909 VERIFY3U(page->vlp_version, ==, lapic->version); 1910 1911 vlapic->msr_apicbase = src->vl_msr_apicbase; 1912 vlapic->esr_pending = src->vl_esr_pending; 1913 1914 lapic->tpr = page->vlp_tpr; 1915 lapic->apr = page->vlp_apr; 1916 lapic->ldr = page->vlp_ldr; 1917 lapic->dfr = page->vlp_dfr; 1918 lapic->svr = page->vlp_svr; 1919 lapic->esr = page->vlp_esr; 1920 lapic->icr_lo = (uint32_t)page->vlp_icr; 1921 lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32); 1922 1923 lapic->icr_timer = page->vlp_icr_timer; 1924 lapic->dcr_timer = page->vlp_dcr_timer; 1925 vlapic_update_divider(vlapic); 1926 1927 /* cleanse LDR/DFR */ 1928 vlapic_ldr_write_handler(vlapic); 1929 vlapic_dfr_write_handler(vlapic); 1930 1931 lapic->lvt_cmci = page->vlp_lvt_cmci; 1932 lapic->lvt_timer = page->vlp_lvt_timer; 1933 lapic->lvt_thermal = page->vlp_lvt_thermal; 1934 lapic->lvt_pcint = page->vlp_lvt_pcint; 1935 lapic->lvt_lint0 = page->vlp_lvt_lint0; 1936 lapic->lvt_lint1 = page->vlp_lvt_lint1; 1937 lapic->lvt_error = page->vlp_lvt_error; 1938 /* cleanse LVTs */ 1939 vlapic_refresh_lvts(vlapic); 1940 1941 uint32_t *isrptr = &lapic->isr0; 1942 uint32_t *tmrptr = &lapic->tmr0; 1943 uint32_t *irrptr = &lapic->irr0; 1944 for (uint_t i = 0; i < 8; i++) { 1945 isrptr[i * 4] = page->vlp_isr[i]; 1946 tmrptr[i * 4] = page->vlp_tmr[i]; 1947 irrptr[i * 4] = page->vlp_irr[i]; 1948 } 1949 1950 if (src->vl_timer_target != 0) { 1951 vlapic->timer_fire_when = 1952 vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target); 1953 1954 if (!vm_is_paused(vlapic->vm)) { 1955 vlapic_callout_reset(vlapic); 1956 } 1957 } else { 1958 vlapic->timer_fire_when = 0; 1959 } 1960 1961 if (vlapic->ops.sync_state) { 1962 (*vlapic->ops.sync_state)(vlapic); 1963 } 1964 VLAPIC_TIMER_UNLOCK(vlapic); 1965 1966 return (0); 1967 } 1968 1969 static const vmm_data_version_entry_t lapic_v1 = { 1970 .vdve_class = VDC_LAPIC, 1971 .vdve_version = 1, 1972 .vdve_len_expect = sizeof (struct vdi_lapic_v1), 1973 .vdve_readf = vlapic_data_read, 1974 .vdve_writef = vlapic_data_write, 1975 }; 1976 VMM_DATA_VERSION(lapic_v1); 1977