1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * evtchn.c 29 * 30 * Communication via hypervisor event channels. 31 * 32 * Copyright (c) 2002-2005, K A Fraser 33 * 34 * This file may be distributed separately from the Linux kernel, or 35 * incorporated into other software packages, subject to the following license: 36 * 37 * Permission is hereby granted, free of charge, to any person obtaining a copy 38 * of this source file (the "Software"), to deal in the Software without 39 * restriction, including without limitation the rights to use, copy, modify, 40 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 41 * and to permit persons to whom the Software is furnished to do so, subject to 42 * the following conditions: 43 * 44 * The above copyright notice and this permission notice shall be included in 45 * all copies or substantial portions of the Software. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 48 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 49 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 50 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 51 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 52 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 53 * IN THE SOFTWARE. 54 */ 55 56 /* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */ 57 58 /* 59 * 60 * Copyright (c) 2004 Christian Limpach. 61 * All rights reserved. 62 * 63 * Redistribution and use in source and binary forms, with or without 64 * modification, are permitted provided that the following conditions 65 * are met: 66 * 1. Redistributions of source code must retain the above copyright 67 * notice, this list of conditions and the following disclaimer. 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in the 70 * documentation and/or other materials provided with the distribution. 71 * 3. This section intentionally left blank. 72 * 4. The name of the author may not be used to endorse or promote products 73 * derived from this software without specific prior written permission. 74 * 75 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 76 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 77 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 78 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 79 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 80 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 81 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 82 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 83 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 84 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 85 */ 86 /* 87 * Section 3 of the above license was updated in response to bug 6379571. 88 */ 89 90 #include <sys/types.h> 91 #include <sys/hypervisor.h> 92 #include <sys/machsystm.h> 93 #include <sys/mutex.h> 94 #include <sys/evtchn_impl.h> 95 #include <sys/ddi_impldefs.h> 96 #include <sys/avintr.h> 97 #include <sys/cpuvar.h> 98 #include <sys/smp_impldefs.h> 99 #include <sys/archsystm.h> 100 #include <sys/sysmacros.h> 101 #include <sys/cmn_err.h> 102 #include <sys/promif.h> 103 #include <sys/debug.h> 104 #include <sys/psm.h> 105 #include <sys/privregs.h> 106 #include <sys/trap.h> 107 #include <sys/atomic.h> 108 #include <sys/cpu.h> 109 #include <sys/psw.h> 110 #include <sys/traptrace.h> 111 #include <sys/stack.h> 112 #include <sys/x_call.h> 113 #include <xen/public/physdev.h> 114 115 /* 116 * This file manages our association between hypervisor event channels and 117 * Solaris's IRQs. This is a one-to-one mapping, with the exception of 118 * IPI IRQs, for which there is one event channel per CPU participating 119 * in the IPI, and the clock VIRQ which also has an event channel per cpu 120 * and the IRQ for /dev/xen/evtchn. The IRQ types are: 121 * 122 * IRQT_VIRQ: 123 * The hypervisor's standard virtual IRQ, used for the clock timer, for 124 * example. This code allows any cpu to bind to one of these, although 125 * some are treated specially (i.e. VIRQ_DEBUG). 126 * Event channel binding is done via EVTCHNOP_bind_virq. 127 * 128 * IRQT_PIRQ: 129 * These associate a physical IRQ with an event channel via 130 * EVTCHNOP_bind_pirq. 131 * 132 * IRQT_IPI: 133 * A cross-call IRQ. Maps to "ncpus" event channels, each of which is 134 * bound to exactly one of the vcpus. We do not currently support 135 * unbinding of IPIs (since Solaris doesn't need it). Uses 136 * EVTCHNOP_bind_ipi. 137 * 138 * IRQT_EVTCHN: 139 * A "normal" binding to an event channel, typically used by the frontend 140 * drivers to bind to the their backend event channel. 141 * 142 * IRQT_DEV_EVTCHN: 143 * This is a one-time IRQ used by /dev/xen/evtchn. Unlike other IRQs, we 144 * have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for 145 * these event channels, which are managed via ec_irq_add/rm_evtchn(). 146 * We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn) 147 * is zero, and make any calls to irq_evtchn() an error, to prevent 148 * accidentally attempting to use the illegal evtchn 0. 149 * 150 * Suspend/resume 151 * 152 * During a suspend/resume cycle, we need to tear down the event channels. 153 * All other mapping data is kept. The drivers will remove their own event 154 * channels via xendev on receiving a DDI_SUSPEND. This leaves us with 155 * the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume() 156 * below. 157 * 158 * CPU binding 159 * 160 * When an event channel is bound to a CPU, we set a bit in a mask present 161 * in the machcpu (evt_affinity) to indicate that this CPU can accept this 162 * event channel. For both IPIs and VIRQs, this binding is fixed at 163 * allocation time and we never modify it. All other event channels are 164 * bound via the PSM either as part of add_avintr(), or interrupt 165 * redistribution (xen_psm_dis/enable_intr()) as a result of CPU 166 * offline/online. 167 * 168 * Locking 169 * 170 * Updates are done holding the ec_lock. The xen_callback_handler() 171 * routine reads the mapping data in a lockless fashion. Additionally 172 * suspend takes ec_lock to prevent update races during a suspend/resume 173 * cycle. The IPI info is also examined without the lock; this is OK 174 * since we only ever change IPI info during initial setup and resume. 175 */ 176 177 #define IRQ_IS_CPUPOKE(irq) (ipi_info[XC_CPUPOKE_PIL].mi_irq == (irq)) 178 179 #define EVTCHN_MASKED(ev) \ 180 (HYPERVISOR_shared_info->evtchn_mask[(ev) >> EVTCHN_SHIFT] & \ 181 (1ul << ((ev) & ((1ul << EVTCHN_SHIFT) - 1)))) 182 183 static short evtchn_to_irq[NR_EVENT_CHANNELS]; 184 static cpuset_t evtchn_cpus[NR_EVENT_CHANNELS]; 185 static int evtchn_owner[NR_EVENT_CHANNELS]; 186 #ifdef DEBUG 187 static kthread_t *evtchn_owner_thread[NR_EVENT_CHANNELS]; 188 #endif 189 190 static irq_info_t irq_info[NR_IRQS]; 191 static mec_info_t ipi_info[MAXIPL]; 192 static mec_info_t virq_info[NR_VIRQS]; 193 194 /* 195 * See the locking description above. 196 */ 197 kmutex_t ec_lock; 198 199 /* 200 * Bitmap indicating which PIRQs require the hypervisor to be notified 201 * on unmask. 202 */ 203 static unsigned long pirq_needs_eoi[NR_PIRQS / (sizeof (unsigned long) * NBBY)]; 204 205 static int ec_debug_irq = INVALID_IRQ; 206 int ec_dev_irq = INVALID_IRQ; 207 208 int 209 xen_bind_virq(unsigned int virq, processorid_t cpu, int *port) 210 { 211 evtchn_bind_virq_t bind; 212 int err; 213 214 bind.virq = virq; 215 bind.vcpu = cpu; 216 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind)) == 0) 217 *port = bind.port; 218 else 219 err = xen_xlate_errcode(err); 220 return (err); 221 } 222 223 int 224 xen_bind_interdomain(int domid, int remote_port, int *port) 225 { 226 evtchn_bind_interdomain_t bind; 227 int err; 228 229 bind.remote_dom = domid; 230 bind.remote_port = remote_port; 231 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 232 &bind)) == 0) 233 *port = bind.local_port; 234 else 235 err = xen_xlate_errcode(err); 236 return (err); 237 } 238 239 int 240 xen_alloc_unbound_evtchn(int domid, int *evtchnp) 241 { 242 evtchn_alloc_unbound_t alloc; 243 int err; 244 245 alloc.dom = DOMID_SELF; 246 alloc.remote_dom = domid; 247 248 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 249 &alloc)) == 0) { 250 *evtchnp = alloc.port; 251 /* ensure evtchn is masked till we're ready to use it */ 252 (void) ec_mask_evtchn(*evtchnp); 253 } else { 254 err = xen_xlate_errcode(err); 255 } 256 257 return (err); 258 } 259 260 static int 261 xen_close_evtchn(int evtchn) 262 { 263 evtchn_close_t close; 264 int err; 265 266 close.port = evtchn; 267 err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); 268 if (err) 269 err = xen_xlate_errcode(err); 270 return (err); 271 } 272 273 static int 274 xen_bind_ipi(processorid_t cpu) 275 { 276 evtchn_bind_ipi_t bind; 277 278 ASSERT(MUTEX_HELD(&ec_lock)); 279 280 bind.vcpu = cpu; 281 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind) != 0) 282 panic("xen_bind_ipi() failed"); 283 return (bind.port); 284 } 285 286 /* Send future instances of this interrupt to other vcpu. */ 287 static void 288 xen_bind_vcpu(int evtchn, int cpu) 289 { 290 evtchn_bind_vcpu_t bind; 291 292 ASSERT(MUTEX_HELD(&ec_lock)); 293 294 bind.port = evtchn; 295 bind.vcpu = cpu; 296 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind) != 0) 297 panic("xen_bind_vcpu() failed"); 298 } 299 300 static int 301 xen_bind_pirq(int pirq) 302 { 303 evtchn_bind_pirq_t bind; 304 int ret; 305 306 bind.pirq = pirq; 307 bind.flags = BIND_PIRQ__WILL_SHARE; 308 if ((ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind)) != 0) 309 panic("xen_bind_pirq() failed (err %d)", ret); 310 return (bind.port); 311 } 312 313 /* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */ 314 static void 315 xen_evtchn_unmask(int evtchn) 316 { 317 evtchn_unmask_t unmask; 318 319 unmask.port = evtchn; 320 if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0) 321 panic("xen_evtchn_unmask() failed"); 322 } 323 324 static void 325 update_evtchn_affinity(int evtchn) 326 { 327 cpu_t *cp; 328 struct xen_evt_data *cpe; 329 330 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ); 331 ASSERT(MUTEX_HELD(&ec_lock)); 332 333 /* 334 * Use lockless search of cpu_list, similar to mutex_vector_enter(). 335 */ 336 kpreempt_disable(); 337 cp = cpu_list; 338 do { 339 cpe = cp->cpu_m.mcpu_evt_pend; 340 if (CPU_IN_SET(evtchn_cpus[evtchn], cp->cpu_id)) 341 SET_EVTCHN_BIT(evtchn, cpe->evt_affinity); 342 else 343 CLEAR_EVTCHN_BIT(evtchn, cpe->evt_affinity); 344 } while ((cp = cp->cpu_next) != cpu_list); 345 kpreempt_enable(); 346 } 347 348 static void 349 bind_evtchn_to_cpuset(int evtchn, cpuset_t cpus) 350 { 351 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ); 352 353 CPUSET_ZERO(evtchn_cpus[evtchn]); 354 CPUSET_OR(evtchn_cpus[evtchn], cpus); 355 update_evtchn_affinity(evtchn); 356 } 357 358 static void 359 clear_evtchn_affinity(int evtchn) 360 { 361 CPUSET_ZERO(evtchn_cpus[evtchn]); 362 update_evtchn_affinity(evtchn); 363 } 364 365 static void 366 alloc_irq_evtchn(int irq, int index, int evtchn, int cpu) 367 { 368 irq_info_t *irqp = &irq_info[irq]; 369 370 switch (irqp->ii_type) { 371 case IRQT_IPI: 372 ipi_info[index].mi_evtchns[cpu] = evtchn; 373 irqp->ii_u.index = index; 374 break; 375 case IRQT_VIRQ: 376 virq_info[index].mi_evtchns[cpu] = evtchn; 377 irqp->ii_u.index = index; 378 break; 379 default: 380 irqp->ii_u.evtchn = evtchn; 381 break; 382 } 383 384 evtchn_to_irq[evtchn] = irq; 385 386 /* 387 * If a CPU is not specified, we expect to bind it to a CPU later via 388 * the PSM. 389 */ 390 if (cpu != -1) { 391 cpuset_t tcpus; 392 CPUSET_ONLY(tcpus, cpu); 393 bind_evtchn_to_cpuset(evtchn, tcpus); 394 } 395 } 396 397 static int 398 alloc_irq(int type, int index, int evtchn, int cpu) 399 { 400 int irq; 401 irq_info_t *irqp; 402 403 ASSERT(MUTEX_HELD(&ec_lock)); 404 ASSERT(type != IRQT_IPI || cpu != -1); 405 406 for (irq = 0; irq < NR_IRQS; irq++) { 407 if (irq_info[irq].ii_type == IRQT_UNBOUND) 408 break; 409 } 410 411 if (irq == NR_IRQS) 412 panic("No available IRQ to bind to: increase NR_IRQS!\n"); 413 414 irqp = &irq_info[irq]; 415 416 irqp->ii_type = type; 417 /* 418 * Set irq/has_handler field to zero which means handler not installed 419 */ 420 irqp->ii_u2.has_handler = 0; 421 422 alloc_irq_evtchn(irq, index, evtchn, cpu); 423 return (irq); 424 } 425 426 static int 427 irq_evtchn(irq_info_t *irqp) 428 { 429 int evtchn; 430 431 ASSERT(irqp->ii_type != IRQT_DEV_EVTCHN); 432 433 switch (irqp->ii_type) { 434 case IRQT_IPI: 435 ASSERT(irqp->ii_u.index != 0); 436 evtchn = ipi_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id]; 437 break; 438 case IRQT_VIRQ: 439 evtchn = virq_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id]; 440 break; 441 default: 442 evtchn = irqp->ii_u.evtchn; 443 break; 444 } 445 446 return (evtchn); 447 } 448 449 int 450 ec_is_edge_pirq(int irq) 451 { 452 return (irq_info[irq].ii_type == IRQT_PIRQ && 453 !TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0])); 454 } 455 456 static void 457 unbind_evtchn(ushort_t *evtchnp) 458 { 459 int err; 460 461 ASSERT(MUTEX_HELD(&ec_lock)); 462 463 ASSERT(*evtchnp != 0); 464 465 err = xen_close_evtchn(*evtchnp); 466 ASSERT(err == 0); 467 clear_evtchn_affinity(*evtchnp); 468 evtchn_to_irq[*evtchnp] = INVALID_IRQ; 469 *evtchnp = 0; 470 } 471 472 static void 473 pirq_unmask_notify(int pirq) 474 { 475 struct physdev_eoi eoi; 476 477 if (TEST_EVTCHN_BIT(pirq, &pirq_needs_eoi[0])) { 478 eoi.irq = pirq; 479 (void) HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); 480 } 481 } 482 483 static void 484 pirq_query_unmask(int pirq) 485 { 486 struct physdev_irq_status_query irq_status; 487 488 irq_status.irq = pirq; 489 (void) HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status); 490 CLEAR_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]); 491 if (irq_status.flags & XENIRQSTAT_needs_eoi) 492 SET_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]); 493 } 494 495 static void 496 end_pirq(int irq) 497 { 498 int evtchn = irq_evtchn(&irq_info[irq]); 499 500 /* 501 * If it is an edge-triggered interrupt we have already unmasked 502 */ 503 if (TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0])) { 504 ec_unmask_evtchn(evtchn); 505 pirq_unmask_notify(IRQ_TO_PIRQ(irq)); 506 } 507 } 508 509 /* 510 * Bind an event channel to a vcpu 511 */ 512 void 513 ec_bind_vcpu(int evtchn, int cpu) 514 { 515 mutex_enter(&ec_lock); 516 xen_bind_vcpu(evtchn, cpu); 517 mutex_exit(&ec_lock); 518 } 519 520 /* 521 * Set up a physical device irq to be associated with an event channel. 522 */ 523 void 524 ec_setup_pirq(int irq, int ipl, cpuset_t *cpusp) 525 { 526 int evtchn; 527 irq_info_t *irqp = &irq_info[irq]; 528 529 /* 530 * Test if this PIRQ is already bound to an evtchn, 531 * which means it is a shared IRQ and we don't want to 532 * bind and do some initial setup that has already been 533 * done for this irq on a previous trip through this code. 534 */ 535 if (irqp->ii_u.evtchn == INVALID_EVTCHN) { 536 evtchn = xen_bind_pirq(irq); 537 538 pirq_query_unmask(IRQ_TO_PIRQ(irq)); 539 540 irqp->ii_type = IRQT_PIRQ; 541 irqp->ii_u.evtchn = evtchn; 542 543 evtchn_to_irq[evtchn] = irq; 544 irqp->ii_u2.ipl = ipl; 545 ec_set_irq_affinity(irq, *cpusp); 546 ec_enable_irq(irq); 547 pirq_unmask_notify(IRQ_TO_PIRQ(irq)); 548 } else { 549 ASSERT(irqp->ii_u2.ipl != 0); 550 cmn_err(CE_NOTE, "!IRQ%d is shared", irq); 551 if (ipl > irqp->ii_u2.ipl) 552 irqp->ii_u2.ipl = ipl; 553 *cpusp = evtchn_cpus[irqp->ii_u.evtchn]; 554 } 555 } 556 557 void 558 ec_unbind_irq(int irq) 559 { 560 irq_info_t *irqp = &irq_info[irq]; 561 mec_info_t *virqp; 562 int drop_lock = 0; 563 int type, i; 564 565 /* 566 * Nasty, but we need this during suspend. 567 */ 568 if (mutex_owner(&ec_lock) != curthread) { 569 mutex_enter(&ec_lock); 570 drop_lock = 1; 571 } 572 573 type = irqp->ii_type; 574 575 ASSERT((type == IRQT_EVTCHN) || (type == IRQT_PIRQ) || 576 (type == IRQT_VIRQ)); 577 578 if ((type == IRQT_EVTCHN) || (type == IRQT_PIRQ)) { 579 /* There's only one event channel associated with this irq */ 580 unbind_evtchn(&irqp->ii_u.evtchn); 581 } else if (type == IRQT_VIRQ) { 582 /* 583 * Each cpu on the system can have it's own event channel 584 * associated with a virq. Unbind them all. 585 */ 586 virqp = &virq_info[irqp->ii_u.index]; 587 for (i = 0; i < NCPU; i++) { 588 if (virqp->mi_evtchns[i] != 0) 589 unbind_evtchn(&virqp->mi_evtchns[i]); 590 } 591 /* Mark the virq structure as invalid. */ 592 virqp->mi_irq = INVALID_IRQ; 593 } 594 595 bzero(irqp, sizeof (*irqp)); 596 /* Re-reserve PIRQ. */ 597 if (type == IRQT_PIRQ) 598 irqp->ii_type = IRQT_PIRQ; 599 600 if (drop_lock) 601 mutex_exit(&ec_lock); 602 } 603 604 /* 605 * Rebind an event channel for delivery to a CPU. 606 */ 607 void 608 ec_set_irq_affinity(int irq, cpuset_t dest) 609 { 610 int evtchn, tcpu; 611 irq_info_t *irqp = &irq_info[irq]; 612 613 mutex_enter(&ec_lock); 614 615 ASSERT(irq < NR_IRQS); 616 ASSERT(irqp->ii_type != IRQT_UNBOUND); 617 618 /* 619 * Binding is done at allocation time for these types, so we should 620 * never modify them. 621 */ 622 if (irqp->ii_type == IRQT_IPI || irqp->ii_type == IRQT_VIRQ || 623 irqp->ii_type == IRQT_DEV_EVTCHN) { 624 mutex_exit(&ec_lock); 625 return; 626 } 627 628 CPUSET_FIND(dest, tcpu); 629 ASSERT(tcpu != CPUSET_NOTINSET); 630 631 evtchn = irq_evtchn(irqp); 632 633 xen_bind_vcpu(evtchn, tcpu); 634 635 bind_evtchn_to_cpuset(evtchn, dest); 636 637 mutex_exit(&ec_lock); 638 639 /* 640 * Now send the new target processor a NOP IPI. 641 * It will check for any pending interrupts, and so service any that 642 * got delivered to the wrong processor by mistake. 643 */ 644 if (ncpus > 1) 645 poke_cpu(tcpu); 646 } 647 648 int 649 ec_set_irq_priority(int irq, int pri) 650 { 651 irq_info_t *irqp; 652 653 if (irq >= NR_IRQS) 654 return (-1); 655 656 irqp = &irq_info[irq]; 657 658 if (irqp->ii_type == IRQT_UNBOUND) 659 return (-1); 660 661 irqp->ii_u2.ipl = pri; 662 663 return (0); 664 } 665 666 void 667 ec_clear_irq_priority(int irq) 668 { 669 irq_info_t *irqp = &irq_info[irq]; 670 671 ASSERT(irq < NR_IRQS); 672 ASSERT(irqp->ii_type != IRQT_UNBOUND); 673 674 irqp->ii_u2.ipl = 0; 675 } 676 677 int 678 ec_bind_evtchn_to_irq(int evtchn) 679 { 680 mutex_enter(&ec_lock); 681 682 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 683 684 (void) alloc_irq(IRQT_EVTCHN, 0, evtchn, -1); 685 686 mutex_exit(&ec_lock); 687 return (evtchn_to_irq[evtchn]); 688 } 689 690 int 691 ec_bind_virq_to_irq(int virq, int cpu) 692 { 693 int err; 694 int evtchn; 695 mec_info_t *virqp; 696 697 virqp = &virq_info[virq]; 698 mutex_enter(&ec_lock); 699 700 err = xen_bind_virq(virq, cpu, &evtchn); 701 ASSERT(err == 0); 702 703 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 704 705 if (virqp->mi_irq == INVALID_IRQ) { 706 virqp->mi_irq = alloc_irq(IRQT_VIRQ, virq, evtchn, cpu); 707 } else { 708 alloc_irq_evtchn(virqp->mi_irq, virq, evtchn, cpu); 709 } 710 711 mutex_exit(&ec_lock); 712 713 return (virqp->mi_irq); 714 } 715 716 int 717 ec_bind_ipi_to_irq(int ipl, int cpu) 718 { 719 int evtchn; 720 ulong_t flags; 721 mec_info_t *ipip; 722 723 mutex_enter(&ec_lock); 724 725 ipip = &ipi_info[ipl]; 726 727 evtchn = xen_bind_ipi(cpu); 728 729 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 730 731 if (ipip->mi_irq == INVALID_IRQ) { 732 ipip->mi_irq = alloc_irq(IRQT_IPI, ipl, evtchn, cpu); 733 } else { 734 alloc_irq_evtchn(ipip->mi_irq, ipl, evtchn, cpu); 735 } 736 737 /* 738 * Unmask the new evtchn so that it can be seen by the target cpu 739 */ 740 flags = intr_clear(); 741 ec_unmask_evtchn(evtchn); 742 intr_restore(flags); 743 744 mutex_exit(&ec_lock); 745 return (ipip->mi_irq); 746 } 747 748 /* 749 * When bringing up a CPU, bind to all the IPIs that CPU0 bound. 750 */ 751 void 752 ec_bind_cpu_ipis(int cpu) 753 { 754 int i; 755 756 for (i = 0; i < MAXIPL; i++) { 757 mec_info_t *ipip = &ipi_info[i]; 758 if (ipip->mi_irq == INVALID_IRQ) 759 continue; 760 761 (void) ec_bind_ipi_to_irq(i, cpu); 762 } 763 } 764 765 /* 766 * Can this IRQ be rebound to another CPU? 767 */ 768 int 769 ec_irq_rebindable(int irq) 770 { 771 irq_info_t *irqp = &irq_info[irq]; 772 773 if (irqp->ii_u.evtchn == 0) 774 return (0); 775 776 return (irqp->ii_type == IRQT_EVTCHN || irqp->ii_type == IRQT_PIRQ); 777 } 778 779 /* 780 * Should this IRQ be unbound from this CPU (which is being offlined) to 781 * another? 782 */ 783 int 784 ec_irq_needs_rebind(int irq, int cpu) 785 { 786 irq_info_t *irqp = &irq_info[irq]; 787 788 return (ec_irq_rebindable(irq) && 789 CPU_IN_SET(evtchn_cpus[irqp->ii_u.evtchn], cpu)); 790 } 791 792 void 793 ec_send_ipi(int ipl, int cpu) 794 { 795 mec_info_t *ipip = &ipi_info[ipl]; 796 797 ASSERT(ipip->mi_irq != INVALID_IRQ); 798 799 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]); 800 } 801 802 void 803 ec_try_ipi(int ipl, int cpu) 804 { 805 mec_info_t *ipip = &ipi_info[ipl]; 806 807 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0) 808 return; 809 810 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]); 811 } 812 813 void 814 ec_irq_add_evtchn(int irq, int evtchn) 815 { 816 mutex_enter(&ec_lock); 817 818 /* 819 * See description of IRQT_DEV_EVTCHN above. 820 */ 821 ASSERT(irq == ec_dev_irq); 822 823 alloc_irq_evtchn(irq, 0, evtchn, 0); 824 /* 825 * We enforce that the representative event channel for IRQT_DEV_EVTCHN 826 * is zero, so PSM operations on it have no effect. 827 */ 828 irq_info[irq].ii_u.evtchn = 0; 829 mutex_exit(&ec_lock); 830 } 831 832 void 833 ec_irq_rm_evtchn(int irq, int evtchn) 834 { 835 ushort_t ec = evtchn; 836 837 mutex_enter(&ec_lock); 838 ASSERT(irq == ec_dev_irq); 839 unbind_evtchn(&ec); 840 mutex_exit(&ec_lock); 841 } 842 843 /* 844 * Allocate an /dev/xen/evtchn IRQ. See the big comment at the top 845 * for an explanation. 846 */ 847 int 848 ec_dev_alloc_irq(void) 849 { 850 int i; 851 irq_info_t *irqp; 852 853 for (i = 0; i < NR_IRQS; i++) { 854 if (irq_info[i].ii_type == IRQT_UNBOUND) 855 break; 856 } 857 858 ASSERT(i != NR_IRQS); 859 860 irqp = &irq_info[i]; 861 irqp->ii_type = IRQT_DEV_EVTCHN; 862 irqp->ii_u2.ipl = IPL_EVTCHN; 863 /* 864 * Force the evtchn to zero for the special evtchn device irq 865 */ 866 irqp->ii_u.evtchn = 0; 867 return (i); 868 } 869 870 void 871 ec_enable_irq(unsigned int irq) 872 { 873 ulong_t flag; 874 irq_info_t *irqp = &irq_info[irq]; 875 876 if (irqp->ii_type == IRQT_DEV_EVTCHN) 877 return; 878 879 flag = intr_clear(); 880 ec_unmask_evtchn(irq_evtchn(irqp)); 881 intr_restore(flag); 882 } 883 884 void 885 ec_disable_irq(unsigned int irq) 886 { 887 irq_info_t *irqp = &irq_info[irq]; 888 889 if (irqp->ii_type == IRQT_DEV_EVTCHN) 890 return; 891 892 /* 893 * Spin till we are the one to mask the evtchn 894 * Ensures no one else can be servicing this evtchn. 895 */ 896 while (!ec_mask_evtchn(irq_evtchn(irqp))) 897 SMT_PAUSE(); 898 } 899 900 static int 901 ec_evtchn_pending(uint_t ev) 902 { 903 uint_t evi; 904 shared_info_t *si = HYPERVISOR_shared_info; 905 906 evi = ev >> EVTCHN_SHIFT; 907 ev &= (1ul << EVTCHN_SHIFT) - 1; 908 return ((si->evtchn_pending[evi] & (1ul << ev)) != 0); 909 } 910 911 int 912 ec_pending_irq(unsigned int irq) 913 { 914 int evtchn = irq_evtchn(&irq_info[irq]); 915 916 return (ec_evtchn_pending(evtchn)); 917 } 918 919 void 920 ec_clear_irq(int irq) 921 { 922 irq_info_t *irqp = &irq_info[irq]; 923 int evtchn; 924 925 if (irqp->ii_type == IRQT_DEV_EVTCHN) 926 return; 927 928 ASSERT(irqp->ii_type != IRQT_UNBOUND); 929 930 evtchn = irq_evtchn(irqp); 931 932 ASSERT(EVTCHN_MASKED(evtchn)); 933 ec_clear_evtchn(evtchn); 934 } 935 936 void 937 ec_unmask_irq(int irq) 938 { 939 ulong_t flags; 940 irq_info_t *irqp = &irq_info[irq]; 941 942 flags = intr_clear(); 943 switch (irqp->ii_type) { 944 case IRQT_PIRQ: 945 end_pirq(irq); 946 break; 947 case IRQT_DEV_EVTCHN: 948 break; 949 default: 950 ec_unmask_evtchn(irq_evtchn(irqp)); 951 break; 952 } 953 intr_restore(flags); 954 } 955 956 void 957 ec_try_unmask_irq(int irq) 958 { 959 ulong_t flags; 960 irq_info_t *irqp = &irq_info[irq]; 961 int evtchn; 962 963 flags = intr_clear(); 964 switch (irqp->ii_type) { 965 case IRQT_PIRQ: 966 end_pirq(irq); 967 break; 968 case IRQT_DEV_EVTCHN: 969 break; 970 default: 971 if ((evtchn = irq_evtchn(irqp)) != 0) 972 ec_unmask_evtchn(evtchn); 973 break; 974 } 975 intr_restore(flags); 976 } 977 978 /* 979 * Poll until an event channel is ready or 'check_func' returns true. This can 980 * only be used in a situation where interrupts are masked, otherwise we have a 981 * classic time-of-check vs. time-of-use race. 982 */ 983 void 984 ec_wait_on_evtchn(int evtchn, int (*check_func)(void *), void *arg) 985 { 986 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 987 while (!check_func(arg)) 988 (void) HYPERVISOR_yield(); 989 return; 990 } 991 992 ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 993 994 for (;;) { 995 evtchn_port_t ports[1]; 996 997 ports[0] = evtchn; 998 999 ec_clear_evtchn(evtchn); 1000 1001 if (check_func(arg)) 1002 return; 1003 1004 (void) HYPERVISOR_poll(ports, 1, 0); 1005 } 1006 } 1007 1008 void 1009 ec_wait_on_ipi(int ipl, int (*check_func)(void *), void *arg) 1010 { 1011 mec_info_t *ipip = &ipi_info[ipl]; 1012 1013 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0) 1014 return; 1015 1016 ec_wait_on_evtchn(ipip->mi_evtchns[CPU->cpu_id], check_func, arg); 1017 } 1018 1019 void 1020 ec_suspend(void) 1021 { 1022 irq_info_t *irqp; 1023 ushort_t *evtchnp; 1024 int i; 1025 int c; 1026 1027 ASSERT(MUTEX_HELD(&ec_lock)); 1028 1029 for (i = 0; i < MAXIPL; i++) { 1030 if (ipi_info[i].mi_irq == INVALID_IRQ) 1031 continue; 1032 1033 for (c = 0; c < NCPU; c++) { 1034 if (cpu[c] == NULL) 1035 continue; 1036 1037 if (CPU_IN_SET(cpu_suspend_lost_set, c)) 1038 continue; 1039 1040 evtchnp = &ipi_info[i].mi_evtchns[c]; 1041 ASSERT(*evtchnp != 0); 1042 unbind_evtchn(evtchnp); 1043 } 1044 } 1045 1046 for (i = 0; i < NR_VIRQS; i++) { 1047 if (virq_info[i].mi_irq == INVALID_IRQ) 1048 continue; 1049 1050 /* 1051 * If we're sharing a single event channel across all CPUs, we 1052 * should only unbind once. 1053 */ 1054 if (virq_info[i].mi_shared) { 1055 evtchnp = &virq_info[i].mi_evtchns[0]; 1056 unbind_evtchn(evtchnp); 1057 for (c = 1; c < NCPU; c++) 1058 virq_info[i].mi_evtchns[c] = 0; 1059 } else { 1060 for (c = 0; c < NCPU; c++) { 1061 if (cpu[c] == NULL) 1062 continue; 1063 1064 evtchnp = &virq_info[i].mi_evtchns[c]; 1065 if (*evtchnp != 0) 1066 unbind_evtchn(evtchnp); 1067 } 1068 } 1069 } 1070 1071 for (i = 0; i < NR_IRQS; i++) { 1072 irqp = &irq_info[i]; 1073 1074 switch (irqp->ii_type) { 1075 case IRQT_EVTCHN: 1076 case IRQT_DEV_EVTCHN: 1077 (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 1078 break; 1079 case IRQT_PIRQ: 1080 if (irqp->ii_u.evtchn != 0) 1081 (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 1082 break; 1083 default: 1084 break; 1085 } 1086 } 1087 } 1088 1089 /* 1090 * The debug irq is special, we only have one evtchn and irq but we allow all 1091 * cpus to service it. It's marked as shared and we propogate the event 1092 * channel into all CPUs by hand. 1093 */ 1094 static void 1095 share_virq(mec_info_t *virqp) 1096 { 1097 int evtchn = virqp->mi_evtchns[0]; 1098 cpuset_t tset; 1099 int i; 1100 1101 ASSERT(evtchn != 0); 1102 1103 virqp->mi_shared = 1; 1104 1105 for (i = 1; i < NCPU; i++) 1106 virqp->mi_evtchns[i] = evtchn; 1107 CPUSET_ALL(tset); 1108 bind_evtchn_to_cpuset(evtchn, tset); 1109 } 1110 1111 static void 1112 virq_resume(int virq) 1113 { 1114 mec_info_t *virqp = &virq_info[virq]; 1115 int evtchn; 1116 int i, err; 1117 1118 for (i = 0; i < NCPU; i++) { 1119 cpuset_t tcpus; 1120 1121 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i)) 1122 continue; 1123 1124 err = xen_bind_virq(virq, i, &evtchn); 1125 ASSERT(err == 0); 1126 1127 virqp->mi_evtchns[i] = evtchn; 1128 evtchn_to_irq[evtchn] = virqp->mi_irq; 1129 CPUSET_ONLY(tcpus, i); 1130 bind_evtchn_to_cpuset(evtchn, tcpus); 1131 ec_unmask_evtchn(evtchn); 1132 /* 1133 * only timer VIRQ is bound to all cpus 1134 */ 1135 if (virq != VIRQ_TIMER) 1136 break; 1137 } 1138 1139 if (virqp->mi_shared) 1140 share_virq(virqp); 1141 } 1142 1143 static void 1144 ipi_resume(int ipl) 1145 { 1146 mec_info_t *ipip = &ipi_info[ipl]; 1147 int i; 1148 1149 for (i = 0; i < NCPU; i++) { 1150 cpuset_t tcpus; 1151 int evtchn; 1152 1153 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i)) 1154 continue; 1155 1156 evtchn = xen_bind_ipi(i); 1157 ipip->mi_evtchns[i] = evtchn; 1158 evtchn_to_irq[evtchn] = ipip->mi_irq; 1159 CPUSET_ONLY(tcpus, i); 1160 bind_evtchn_to_cpuset(evtchn, tcpus); 1161 ec_unmask_evtchn(evtchn); 1162 } 1163 } 1164 1165 void 1166 ec_resume(void) 1167 { 1168 int i; 1169 1170 /* New event-channel space is not 'live' yet. */ 1171 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1172 (void) ec_mask_evtchn(i); 1173 1174 for (i = 0; i < MAXIPL; i++) { 1175 if (ipi_info[i].mi_irq == INVALID_IRQ) 1176 continue; 1177 ipi_resume(i); 1178 } 1179 1180 for (i = 0; i < NR_VIRQS; i++) { 1181 if (virq_info[i].mi_irq == INVALID_IRQ) 1182 continue; 1183 virq_resume(i); 1184 } 1185 } 1186 1187 int 1188 ec_init(void) 1189 { 1190 int i; 1191 mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7)); 1192 1193 for (i = 0; i < NR_EVENT_CHANNELS; i++) { 1194 CPUSET_ZERO(evtchn_cpus[i]); 1195 evtchn_to_irq[i] = INVALID_IRQ; 1196 (void) ec_mask_evtchn(i); 1197 } 1198 1199 for (i = 0; i < MAXIPL; i++) 1200 ipi_info[i].mi_irq = INVALID_IRQ; 1201 1202 for (i = 0; i < NR_VIRQS; i++) 1203 virq_info[i].mi_irq = INVALID_IRQ; 1204 1205 /* 1206 * Phys IRQ space is statically bound (1:1 mapping), grab the IRQs 1207 * now. 1208 */ 1209 for (i = PIRQ_BASE; i < NR_PIRQS; i++) { 1210 irq_info[PIRQ_TO_IRQ(i)].ii_type = IRQT_PIRQ; 1211 } 1212 1213 return (0); 1214 } 1215 1216 void 1217 ec_init_debug_irq() 1218 { 1219 int irq; 1220 1221 irq = ec_bind_virq_to_irq(VIRQ_DEBUG, 0); 1222 (void) add_avintr(NULL, IPL_DEBUG, xen_debug_handler, 1223 "debug", irq, NULL, NULL, NULL, NULL); 1224 1225 mutex_enter(&ec_lock); 1226 share_virq(&virq_info[irq_info[irq].ii_u.index]); 1227 mutex_exit(&ec_lock); 1228 ec_debug_irq = irq; 1229 } 1230 1231 #define UNBLOCKED_EVENTS(si, ix, cpe, cpu_id) \ 1232 ((si)->evtchn_pending[ix] & ~(si)->evtchn_mask[ix] & \ 1233 (cpe)->evt_affinity[ix]) 1234 1235 1236 /* 1237 * This is the entry point for processing events from xen 1238 * 1239 * (See the commentary associated with the shared_info_st structure 1240 * in hypervisor-if.h) 1241 * 1242 * Since the event channel mechanism doesn't really implement the 1243 * concept of priority like hardware interrupt controllers, we simulate 1244 * that in software here using the cpu priority field and the pending 1245 * interrupts field. Events/interrupts that are not able to be serviced 1246 * now because they are at a lower priority than the current cpu priority 1247 * cause a level bit to be recorded in the pending interrupts word. When 1248 * the priority is lowered (either by spl or interrupt exit code) the pending 1249 * levels are checked and an upcall is scheduled if there are events/interrupts 1250 * that have become deliverable. 1251 */ 1252 void 1253 xen_callback_handler(struct regs *rp, trap_trace_rec_t *ttp) 1254 { 1255 ulong_t pending_sels, pe, selbit; 1256 int i, j, port, pri, curpri, irq, sipri; 1257 uint16_t pending_ints, sip; 1258 struct cpu *cpu = CPU; 1259 volatile shared_info_t *si = HYPERVISOR_shared_info; 1260 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 1261 volatile struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 1262 volatile uint16_t *cpu_ipp = &cpu->cpu_m.mcpu_intr_pending; 1263 extern void dosoftint(struct regs *); 1264 1265 ASSERT(rp->r_trapno == T_AST && rp->r_err == 0); 1266 ASSERT(&si->vcpu_info[cpu->cpu_id] == vci); 1267 ASSERT_STACK_ALIGNED(); 1268 1269 vci->evtchn_upcall_pending = 0; 1270 1271 /* 1272 * To expedite scanning of pending notifications, any 0->1 1273 * pending transition on an unmasked channel causes a 1274 * corresponding bit in evtchn_pending_sel to be set. 1275 * Each bit in the selector covers a 32-bit word in 1276 * the evtchn_pending[] array. 1277 */ 1278 membar_enter(); 1279 do { 1280 pending_sels = vci->evtchn_pending_sel; 1281 } while (atomic_cas_ulong((volatile ulong_t *)&vci->evtchn_pending_sel, 1282 pending_sels, 0) != pending_sels); 1283 1284 pending_ints = *cpu_ipp; 1285 while ((i = ffs(pending_sels)) != 0) { 1286 i--; 1287 selbit = 1ul << i; 1288 pending_sels &= ~selbit; 1289 1290 membar_enter(); 1291 while ((pe = UNBLOCKED_EVENTS(si, i, cpe, cpu->cpu_id)) != 0) { 1292 j = ffs(pe) - 1; 1293 pe &= ~(1ul << j); 1294 1295 port = (i << EVTCHN_SHIFT) + j; 1296 1297 irq = evtchn_to_irq[port]; 1298 1299 /* 1300 * If no irq set, just ignore the event. 1301 * On e.g. netbsd they call evtchn_device_upcall(port) 1302 * We require the evtchn driver to install a handler 1303 * so there will be an irq associated with user mode 1304 * evtchns. 1305 */ 1306 if (irq == INVALID_IRQ) { 1307 ec_clear_evtchn(port); 1308 continue; 1309 } 1310 1311 /* 1312 * If there's no handler, it could be a poke, so just 1313 * accept the event and continue. 1314 */ 1315 if (!irq_info[irq].ii_u2.has_handler) { 1316 #ifdef TRAPTRACE 1317 ttp->ttr_ipl = 0xff; 1318 if (IRQ_IS_CPUPOKE(irq)) { 1319 ttp->ttr_ipl = XC_CPUPOKE_PIL; 1320 ttp->ttr_marker = TT_INTERRUPT; 1321 } 1322 ttp->ttr_pri = cpu->cpu_pri; 1323 ttp->ttr_spl = cpu->cpu_base_spl; 1324 ttp->ttr_vector = 0xff; 1325 #endif /* TRAPTRACE */ 1326 if (ec_mask_evtchn(port)) { 1327 ec_clear_evtchn(port); 1328 ec_unmask_evtchn(port); 1329 continue; 1330 } 1331 } 1332 1333 pri = irq_info[irq].ii_u2.ipl; 1334 1335 /* 1336 * If we are the cpu that successfully masks 1337 * the event, then record it as a pending event 1338 * for this cpu to service 1339 */ 1340 if (ec_mask_evtchn(port)) { 1341 if (ec_evtchn_pending(port)) { 1342 cpe->pending_sel[pri] |= selbit; 1343 cpe->pending_evts[pri][i] |= (1ul << j); 1344 pending_ints |= 1 << pri; 1345 /* 1346 * We have recorded a pending interrupt 1347 * for this cpu. If it is an edge 1348 * triggered interrupt then we go ahead 1349 * and clear the pending and mask bits 1350 * from the shared info to avoid having 1351 * the hypervisor see the pending event 1352 * again and possibly disabling the 1353 * interrupt. This should also help 1354 * keep us from missing an interrupt. 1355 */ 1356 if (ec_is_edge_pirq(irq)) { 1357 ec_clear_evtchn(port); 1358 ec_unmask_evtchn(port); 1359 } 1360 } else { 1361 /* 1362 * another cpu serviced this event 1363 * before us, clear the mask. 1364 */ 1365 ec_unmask_evtchn(port); 1366 } 1367 } 1368 } 1369 } 1370 *cpu_ipp = pending_ints; 1371 if (pending_ints == 0) 1372 return; 1373 /* 1374 * We have gathered all the pending events/interrupts, 1375 * go service all the ones we can from highest priority to lowest. 1376 * Note: This loop may not actually complete and service all 1377 * pending interrupts since one of the interrupt threads may 1378 * block and the pinned thread runs. In that case, when we 1379 * exit the interrupt thread that blocked we will check for 1380 * any unserviced interrupts and re-post an upcall to process 1381 * any unserviced pending events. 1382 */ 1383 restart: 1384 curpri = cpu->cpu_pri; 1385 pri = bsrw_insn(*cpu_ipp); 1386 while (pri > curpri) { 1387 while ((pending_sels = cpe->pending_sel[pri]) != 0) { 1388 i = ffs(pending_sels) - 1; 1389 while ((pe = cpe->pending_evts[pri][i]) != 0) { 1390 j = ffs(pe) - 1; 1391 port = (i << EVTCHN_SHIFT) + j; 1392 pe &= ~(1ul << j); 1393 cpe->pending_evts[pri][i] = pe; 1394 if (pe == 0) { 1395 /* 1396 * Must reload pending selector bits 1397 * here as they could have changed on 1398 * a previous trip around the inner loop 1399 * while we were interrupt enabled 1400 * in a interrupt service routine. 1401 */ 1402 pending_sels = cpe->pending_sel[pri]; 1403 pending_sels &= ~(1ul << i); 1404 cpe->pending_sel[pri] = pending_sels; 1405 if (pending_sels == 0) 1406 *cpu_ipp &= ~(1 << pri); 1407 } 1408 irq = evtchn_to_irq[port]; 1409 if (irq == INVALID_IRQ) { 1410 /* 1411 * No longer a handler for this event 1412 * channel. Clear the event and 1413 * ignore it, unmask the event. 1414 */ 1415 ec_clear_evtchn(port); 1416 ec_unmask_evtchn(port); 1417 continue; 1418 } 1419 if (irq == ec_dev_irq) { 1420 ASSERT(cpu->cpu_m.mcpu_ec_mbox == 0); 1421 cpu->cpu_m.mcpu_ec_mbox = port; 1422 } 1423 /* 1424 * Set up the regs struct to 1425 * look like a normal hardware int 1426 * and do normal interrupt handling. 1427 */ 1428 rp->r_trapno = irq; 1429 do_interrupt(rp, ttp); 1430 /* 1431 * Check for cpu priority change 1432 * Can happen if int thread blocks 1433 */ 1434 if (cpu->cpu_pri != curpri) 1435 goto restart; 1436 } 1437 } 1438 /* 1439 * Dispatch any soft interrupts that are 1440 * higher priority than any hard ones remaining. 1441 */ 1442 pri = bsrw_insn(*cpu_ipp); 1443 sip = (uint16_t)cpu->cpu_softinfo.st_pending; 1444 if (sip != 0) { 1445 sipri = bsrw_insn(sip); 1446 if (sipri > pri && sipri > cpu->cpu_pri) { 1447 dosoftint(rp); 1448 /* 1449 * Check for cpu priority change 1450 * Can happen if softint thread blocks 1451 */ 1452 if (cpu->cpu_pri != curpri) 1453 goto restart; 1454 } 1455 } 1456 } 1457 /* 1458 * Deliver any pending soft interrupts. 1459 */ 1460 if (cpu->cpu_softinfo.st_pending) 1461 dosoftint(rp); 1462 } 1463 1464 1465 void 1466 ec_unmask_evtchn(unsigned int ev) 1467 { 1468 uint_t evi, evb; 1469 volatile shared_info_t *si = HYPERVISOR_shared_info; 1470 volatile vcpu_info_t *vci = CPU->cpu_m.mcpu_vcpu_info; 1471 volatile ulong_t *ulp; 1472 1473 ASSERT(!interrupts_enabled()); 1474 /* 1475 * Check if we need to take slow path 1476 */ 1477 if (!CPU_IN_SET(evtchn_cpus[ev], CPU->cpu_id)) { 1478 xen_evtchn_unmask(ev); 1479 return; 1480 } 1481 evi = ev >> EVTCHN_SHIFT; 1482 evb = ev & ((1ul << EVTCHN_SHIFT) - 1); 1483 ulp = (volatile ulong_t *)&si->evtchn_mask[evi]; 1484 atomic_and_ulong(ulp, ~(1ul << evb)); 1485 /* 1486 * The following is basically the equivalent of 1487 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the 1488 * interrupt edge' if the channel is masked. 1489 * XXPV - slight race if upcall was about to be set, we may get 1490 * an extra upcall. 1491 */ 1492 membar_enter(); 1493 if (si->evtchn_pending[evi] & (1ul << evb)) { 1494 membar_consumer(); 1495 ulp = (volatile ulong_t *)&vci->evtchn_pending_sel; 1496 if (!(*ulp & (1ul << evi))) { 1497 atomic_or_ulong(ulp, (1ul << evi)); 1498 } 1499 vci->evtchn_upcall_pending = 1; 1500 } 1501 } 1502 1503 /* 1504 * Set a bit in an evtchan mask word, return true if we are the cpu that 1505 * set the bit. 1506 */ 1507 int 1508 ec_mask_evtchn(unsigned int ev) 1509 { 1510 uint_t evi, evb; 1511 ulong_t new, old, bit; 1512 volatile shared_info_t *si = HYPERVISOR_shared_info; 1513 volatile ulong_t *maskp; 1514 int masked; 1515 1516 kpreempt_disable(); 1517 evi = ev >> EVTCHN_SHIFT; 1518 evb = ev & ((1ul << EVTCHN_SHIFT) - 1); 1519 bit = 1ul << evb; 1520 maskp = (volatile ulong_t *)&si->evtchn_mask[evi]; 1521 do { 1522 old = si->evtchn_mask[evi]; 1523 new = old | bit; 1524 } while (atomic_cas_ulong(maskp, old, new) != old); 1525 masked = (old & bit) == 0; 1526 if (masked) { 1527 evtchn_owner[ev] = CPU->cpu_id; 1528 #ifdef DEBUG 1529 evtchn_owner_thread[ev] = curthread; 1530 #endif 1531 } 1532 kpreempt_enable(); 1533 return (masked); 1534 } 1535 1536 void 1537 ec_clear_evtchn(unsigned int ev) 1538 { 1539 uint_t evi; 1540 shared_info_t *si = HYPERVISOR_shared_info; 1541 volatile ulong_t *pendp; 1542 1543 evi = ev >> EVTCHN_SHIFT; 1544 ev &= (1ul << EVTCHN_SHIFT) - 1; 1545 pendp = (volatile ulong_t *)&si->evtchn_pending[evi]; 1546 atomic_and_ulong(pendp, ~(1ul << ev)); 1547 } 1548 1549 void 1550 ec_notify_via_evtchn(unsigned int port) 1551 { 1552 evtchn_send_t send; 1553 1554 ASSERT(port != INVALID_EVTCHN); 1555 1556 send.port = port; 1557 (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); 1558 } 1559 1560 int 1561 ec_block_irq(int irq) 1562 { 1563 irq_info_t *irqp = &irq_info[irq]; 1564 int evtchn; 1565 1566 1567 evtchn = irq_evtchn(irqp); 1568 (void) ec_mask_evtchn(evtchn); 1569 return (evtchn_owner[evtchn]); 1570 } 1571 1572 /* 1573 * Make a event that is pending for delivery on the current cpu "go away" 1574 * without servicing the interrupt. 1575 */ 1576 void 1577 ec_unpend_irq(int irq) 1578 { 1579 irq_info_t *irqp = &irq_info[irq]; 1580 int pri = irqp->ii_u2.ipl; 1581 ulong_t flags; 1582 uint_t evtchn, evi, bit; 1583 unsigned long pe, pending_sels; 1584 struct xen_evt_data *cpe; 1585 1586 /* 1587 * The evtchn must be masked 1588 */ 1589 evtchn = irq_evtchn(irqp); 1590 ASSERT(EVTCHN_MASKED(evtchn)); 1591 evi = evtchn >> EVTCHN_SHIFT; 1592 bit = evtchn & (1ul << EVTCHN_SHIFT) - 1; 1593 flags = intr_clear(); 1594 cpe = CPU->cpu_m.mcpu_evt_pend; 1595 pe = cpe->pending_evts[pri][evi] & ~(1ul << bit); 1596 cpe->pending_evts[pri][evi] = pe; 1597 if (pe == 0) { 1598 pending_sels = cpe->pending_sel[pri]; 1599 pending_sels &= ~(1ul << evi); 1600 cpe->pending_sel[pri] = pending_sels; 1601 if (pending_sels == 0) 1602 CPU->cpu_m.mcpu_intr_pending &= ~(1 << pri); 1603 } 1604 intr_restore(flags); 1605 } 1606