1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * apic_introp.c:
27  *	Has code for Advanced DDI interrupt framework support.
28  */
29 
30 #include <sys/cpuvar.h>
31 #include <sys/psm.h>
32 #include <sys/archsystm.h>
33 #include <sys/apic.h>
34 #include <sys/sunddi.h>
35 #include <sys/ddi_impldefs.h>
36 #include <sys/mach_intr.h>
37 #include <sys/sysmacros.h>
38 #include <sys/trap.h>
39 #include <sys/pci.h>
40 #include <sys/pci_intr_lib.h>
41 
42 extern struct av_head autovect[];
43 
44 /*
45  *	Local Function Prototypes
46  */
47 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
48 
49 /*
50  * MSI support flag:
51  * reflects whether MSI is supported at APIC level
52  * it can also be patched through /etc/system
53  *
54  *  0 = default value - don't know and need to call apic_check_msi_support()
55  *      to find out then set it accordingly
56  *  1 = supported
57  * -1 = not supported
58  */
59 int	apic_support_msi = 0;
60 
61 /* Multiple vector support for MSI */
62 int	apic_multi_msi_enable = 1;
63 
64 /* Multiple vector support for MSI-X */
65 int	apic_msix_enable = 1;
66 
67 /*
68  * apic_pci_msi_enable_vector:
69  *	Set the address/data fields in the MSI/X capability structure
70  *	XXX: MSI-X support
71  */
72 /* ARGSUSED */
73 void
74 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
75     int count, int target_apic_id)
76 {
77 	uint64_t		msi_addr, msi_data;
78 	ushort_t		msi_ctrl;
79 	dev_info_t		*dip = irq_ptr->airq_dip;
80 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
81 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
82 #if !defined(__xpv)
83 	msi_regs_t		msi_regs;
84 #endif	/* ! __xpv */
85 
86 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
87 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
88 	    ddi_driver_name(dip), inum, vector, target_apic_id));
89 
90 	ASSERT((handle != NULL) && (cap_ptr != 0));
91 
92 #if !defined(__xpv)
93 	msi_regs.mr_data = vector;
94 	msi_regs.mr_addr = target_apic_id;
95 
96 	apic_vt_ops->apic_intrmap_alloc_entry(irq_ptr);
97 	apic_vt_ops->apic_intrmap_map_entry(irq_ptr, (void *)&msi_regs);
98 	apic_vt_ops->apic_intrmap_record_msi(irq_ptr, &msi_regs);
99 
100 	/* MSI Address */
101 	msi_addr = msi_regs.mr_addr;
102 
103 	/* MSI Data: MSI is edge triggered according to spec */
104 	msi_data = msi_regs.mr_data;
105 #else
106 	/* MSI Address */
107 	msi_addr = (MSI_ADDR_HDR |
108 	    (target_apic_id << MSI_ADDR_DEST_SHIFT));
109 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
110 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
111 
112 	/* MSI Data: MSI is edge triggered according to spec */
113 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
114 #endif	/* ! __xpv */
115 
116 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
117 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
118 
119 	if (type == DDI_INTR_TYPE_MSI) {
120 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
121 
122 		/* Set the bits to inform how many MSIs are enabled */
123 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
124 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
125 
126 #if !defined(__xpv)
127 		/*
128 		 * Only set vector if not on hypervisor
129 		 */
130 		pci_config_put32(handle,
131 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
132 
133 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
134 			pci_config_put32(handle,
135 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
136 			pci_config_put16(handle,
137 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
138 		} else {
139 			pci_config_put16(handle,
140 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
141 		}
142 
143 	} else if (type == DDI_INTR_TYPE_MSIX) {
144 		uintptr_t	off;
145 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
146 
147 		ASSERT(msix_p != NULL);
148 
149 		/* Offset into the "inum"th entry in the MSI-X table */
150 		off = (uintptr_t)msix_p->msix_tbl_addr +
151 		    (inum  * PCI_MSIX_VECTOR_SIZE);
152 
153 		ddi_put32(msix_p->msix_tbl_hdl,
154 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
155 		ddi_put64(msix_p->msix_tbl_hdl,
156 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
157 #endif	/* ! __xpv */
158 	}
159 }
160 
161 
162 #if !defined(__xpv)
163 
164 /*
165  * This function returns the no. of vectors available for the pri.
166  * dip is not used at this moment.  If we really don't need that,
167  * it will be removed.
168  */
169 /*ARGSUSED*/
170 int
171 apic_navail_vector(dev_info_t *dip, int pri)
172 {
173 	int	lowest, highest, i, navail, count;
174 
175 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
176 	    (void *)dip, pri));
177 
178 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
179 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
180 	navail = count = 0;
181 
182 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
183 		lowest -= APIC_VECTOR_PER_IPL;
184 
185 	/* It has to be contiguous */
186 	for (i = lowest; i <= highest; i++) {
187 		count = 0;
188 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
189 		    (i <= highest)) {
190 			if (APIC_CHECK_RESERVE_VECTORS(i))
191 				break;
192 			count++;
193 			i++;
194 		}
195 		if (count > navail)
196 			navail = count;
197 	}
198 	return (navail);
199 }
200 
201 #endif	/* ! __xpv */
202 
203 /*
204  * Finds "count" contiguous MSI vectors starting at the proper alignment
205  * at "pri".
206  * Caller needs to make sure that count has to be power of 2 and should not
207  * be < 1.
208  */
209 uchar_t
210 apic_find_multi_vectors(int pri, int count)
211 {
212 	int	lowest, highest, i, navail, start, msibits;
213 
214 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
215 	    pri, count));
216 
217 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
218 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
219 	navail = 0;
220 
221 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
222 		lowest -= APIC_VECTOR_PER_IPL;
223 
224 	/*
225 	 * msibits is the no. of lower order message data bits for the
226 	 * allocated MSI vectors and is used to calculate the aligned
227 	 * starting vector
228 	 */
229 	msibits = count - 1;
230 
231 	/* It has to be contiguous */
232 	for (i = lowest; i <= highest; i++) {
233 		navail = 0;
234 
235 		/*
236 		 * starting vector has to be aligned accordingly for
237 		 * multiple MSIs
238 		 */
239 		if (msibits)
240 			i = (i + msibits) & ~msibits;
241 		start = i;
242 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
243 		    (i <= highest)) {
244 			if (APIC_CHECK_RESERVE_VECTORS(i))
245 				break;
246 			navail++;
247 			if (navail >= count)
248 				return (start);
249 			i++;
250 		}
251 	}
252 	return (0);
253 }
254 
255 
256 /*
257  * It finds the apic_irq_t associates with the dip, ispec and type.
258  */
259 apic_irq_t *
260 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
261 {
262 	apic_irq_t	*irqp;
263 	int i;
264 
265 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
266 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
267 	    ispec->intrspec_pri, type));
268 
269 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
270 		for (irqp = apic_irq_table[i]; irqp; irqp = irqp->airq_next) {
271 			if ((irqp->airq_dip == dip) &&
272 			    (irqp->airq_origirq == ispec->intrspec_vec) &&
273 			    (irqp->airq_ipl == ispec->intrspec_pri)) {
274 				if (type == DDI_INTR_TYPE_MSI) {
275 					if (irqp->airq_mps_intr_index ==
276 					    MSI_INDEX)
277 						return (irqp);
278 				} else if (type == DDI_INTR_TYPE_MSIX) {
279 					if (irqp->airq_mps_intr_index ==
280 					    MSIX_INDEX)
281 						return (irqp);
282 				} else
283 					return (irqp);
284 			}
285 		}
286 	}
287 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
288 	return (NULL);
289 }
290 
291 
292 #if !defined(__xpv)
293 
294 /*
295  * This function will return the pending bit of the irqp.
296  * It either comes from the IRR register of the APIC or the RDT
297  * entry of the I/O APIC.
298  * For the IRR to work, it needs to be to its binding CPU
299  */
300 static int
301 apic_get_pending(apic_irq_t *irqp, int type)
302 {
303 	int			bit, index, irr, pending;
304 	int			intin_no;
305 	int			apic_ix;
306 
307 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
308 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
309 	    type));
310 
311 	/* need to get on the bound cpu */
312 	mutex_enter(&cpu_lock);
313 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
314 
315 	index = irqp->airq_vector / 32;
316 	bit = irqp->airq_vector % 32;
317 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
318 
319 	affinity_clear();
320 	mutex_exit(&cpu_lock);
321 
322 	pending = (irr & (1 << bit)) ? 1 : 0;
323 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
324 		/* check I/O APIC for fixed interrupt */
325 		intin_no = irqp->airq_intin_no;
326 		apic_ix = irqp->airq_ioapicindex;
327 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
328 		    AV_PENDING) ? 1 : 0;
329 	}
330 	return (pending);
331 }
332 
333 
334 /*
335  * This function will clear the mask for the interrupt on the I/O APIC
336  */
337 static void
338 apic_clear_mask(apic_irq_t *irqp)
339 {
340 	int			intin_no;
341 	ulong_t			iflag;
342 	int32_t			rdt_entry;
343 	int 			apic_ix;
344 
345 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
346 	    (void *)irqp));
347 
348 	intin_no = irqp->airq_intin_no;
349 	apic_ix = irqp->airq_ioapicindex;
350 
351 	iflag = intr_clear();
352 	lock_set(&apic_ioapic_lock);
353 
354 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
355 
356 	/* clear mask */
357 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
358 	    ((~AV_MASK) & rdt_entry));
359 
360 	lock_clear(&apic_ioapic_lock);
361 	intr_restore(iflag);
362 }
363 
364 
365 /*
366  * This function will mask the interrupt on the I/O APIC
367  */
368 static void
369 apic_set_mask(apic_irq_t *irqp)
370 {
371 	int			intin_no;
372 	int 			apic_ix;
373 	ulong_t			iflag;
374 	int32_t			rdt_entry;
375 
376 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
377 
378 	intin_no = irqp->airq_intin_no;
379 	apic_ix = irqp->airq_ioapicindex;
380 
381 	iflag = intr_clear();
382 
383 	lock_set(&apic_ioapic_lock);
384 
385 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
386 
387 	/* mask it */
388 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
389 	    (AV_MASK | rdt_entry));
390 
391 	lock_clear(&apic_ioapic_lock);
392 	intr_restore(iflag);
393 }
394 
395 
396 void
397 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
398 {
399 	int i;
400 	apic_irq_t *irqptr;
401 	struct intrspec ispec;
402 
403 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
404 	    "count: %x pri: %x type: %x\n",
405 	    (void *)dip, inum, count, pri, type));
406 
407 	/* for MSI/X only */
408 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
409 		return;
410 
411 	for (i = 0; i < count; i++) {
412 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
413 		    "pri=0x%x count=0x%x\n", inum, pri, count));
414 		ispec.intrspec_vec = inum + i;
415 		ispec.intrspec_pri = pri;
416 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
417 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
418 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
419 			    "failed\n", (void *)dip, inum, pri));
420 			continue;
421 		}
422 		irqptr->airq_mps_intr_index = FREE_INDEX;
423 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
424 	}
425 }
426 
427 #endif	/* ! __xpv */
428 
429 /*
430  * check whether the system supports MSI
431  *
432  * If PCI-E capability is found, then this must be a PCI-E system.
433  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
434  * to indicate this system supports MSI.
435  */
436 int
437 apic_check_msi_support()
438 {
439 	dev_info_t *cdip;
440 	char dev_type[16];
441 	int dev_len;
442 
443 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
444 
445 	/*
446 	 * check whether the first level children of root_node have
447 	 * PCI-E capability
448 	 */
449 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
450 	    cdip = ddi_get_next_sibling(cdip)) {
451 
452 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
453 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
454 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
455 		    ddi_node_name(cdip)));
456 		dev_len = sizeof (dev_type);
457 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
458 		    "device_type", (caddr_t)dev_type, &dev_len)
459 		    != DDI_PROP_SUCCESS)
460 			continue;
461 		if (strcmp(dev_type, "pciex") == 0)
462 			return (PSM_SUCCESS);
463 	}
464 
465 	/* MSI is not supported on this system */
466 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
467 	    "device_type found\n"));
468 	return (PSM_FAILURE);
469 }
470 
471 #if !defined(__xpv)
472 
473 /*
474  * apic_pci_msi_unconfigure:
475  *
476  * This and next two interfaces are copied from pci_intr_lib.c
477  * Do ensure that these two files stay in sync.
478  * These needed to be copied over here to avoid a deadlock situation on
479  * certain mp systems that use MSI interrupts.
480  *
481  * IMPORTANT regards next three interfaces:
482  * i) are called only for MSI/X interrupts.
483  * ii) called with interrupts disabled, and must not block
484  */
485 void
486 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
487 {
488 	ushort_t		msi_ctrl;
489 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
490 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
491 
492 	ASSERT((handle != NULL) && (cap_ptr != 0));
493 
494 	if (type == DDI_INTR_TYPE_MSI) {
495 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
496 		msi_ctrl &= (~PCI_MSI_MME_MASK);
497 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
498 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
499 
500 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
501 			pci_config_put16(handle,
502 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
503 			pci_config_put32(handle,
504 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
505 		} else {
506 			pci_config_put16(handle,
507 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
508 		}
509 
510 	} else if (type == DDI_INTR_TYPE_MSIX) {
511 		uintptr_t	off;
512 		uint32_t	mask;
513 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
514 
515 		ASSERT(msix_p != NULL);
516 
517 		/* Offset into "inum"th entry in the MSI-X table & mask it */
518 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
519 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
520 
521 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
522 
523 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
524 
525 		/* Offset into the "inum"th entry in the MSI-X table */
526 		off = (uintptr_t)msix_p->msix_tbl_addr +
527 		    (inum * PCI_MSIX_VECTOR_SIZE);
528 
529 		/* Reset the "data" and "addr" bits */
530 		ddi_put32(msix_p->msix_tbl_hdl,
531 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
532 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
533 	}
534 }
535 
536 #endif	/* __xpv */
537 
538 /*
539  * apic_pci_msi_enable_mode:
540  */
541 void
542 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
543 {
544 	ushort_t		msi_ctrl;
545 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
546 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
547 
548 	ASSERT((handle != NULL) && (cap_ptr != 0));
549 
550 	if (type == DDI_INTR_TYPE_MSI) {
551 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
552 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
553 			return;
554 
555 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
556 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
557 
558 	} else if (type == DDI_INTR_TYPE_MSIX) {
559 		uintptr_t	off;
560 		uint32_t	mask;
561 		ddi_intr_msix_t	*msix_p;
562 
563 		msix_p = i_ddi_get_msix(rdip);
564 
565 		ASSERT(msix_p != NULL);
566 
567 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
568 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
569 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
570 
571 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
572 
573 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
574 
575 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
576 
577 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
578 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
579 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
580 			    msi_ctrl);
581 		}
582 	}
583 }
584 
585 /*
586  * apic_pci_msi_disable_mode:
587  */
588 void
589 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
590 {
591 	ushort_t		msi_ctrl;
592 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
593 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
594 
595 	ASSERT((handle != NULL) && (cap_ptr != 0));
596 
597 	if (type == DDI_INTR_TYPE_MSI) {
598 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
599 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
600 			return;
601 
602 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
603 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
604 
605 	} else if (type == DDI_INTR_TYPE_MSIX) {
606 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
607 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
608 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
609 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
610 			    msi_ctrl);
611 		}
612 	}
613 }
614 
615 #if !defined(__xpv)
616 
617 static int
618 apic_set_cpu(int irqno, int cpu, int *result)
619 {
620 	apic_irq_t *irqp;
621 	ulong_t iflag;
622 	int ret;
623 
624 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
625 
626 	mutex_enter(&airq_mutex);
627 	irqp = apic_irq_table[irqno];
628 	mutex_exit(&airq_mutex);
629 
630 	if (irqp == NULL) {
631 		*result = ENXIO;
632 		return (PSM_FAILURE);
633 	}
634 
635 	/* Fail if this is an MSI intr and is part of a group. */
636 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
637 	    (irqp->airq_intin_no > 1)) {
638 		*result = ENXIO;
639 		return (PSM_FAILURE);
640 	}
641 
642 	iflag = intr_clear();
643 	lock_set(&apic_ioapic_lock);
644 
645 	ret = apic_rebind_all(irqp, cpu);
646 
647 	lock_clear(&apic_ioapic_lock);
648 	intr_restore(iflag);
649 
650 	if (ret) {
651 		*result = EIO;
652 		return (PSM_FAILURE);
653 	}
654 	/*
655 	 * keep tracking the default interrupt cpu binding
656 	 */
657 	irqp->airq_cpu = cpu;
658 
659 	*result = 0;
660 	return (PSM_SUCCESS);
661 }
662 
663 static int
664 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
665 {
666 	dev_info_t *orig_dip;
667 	uint32_t orig_cpu;
668 	ulong_t iflag;
669 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
670 	int i;
671 	int cap_ptr;
672 	int msi_mask_off;
673 	ushort_t msi_ctrl;
674 	uint32_t msi_pvm;
675 	ddi_acc_handle_t handle;
676 	int num_vectors = 0;
677 	uint32_t vector;
678 
679 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
680 
681 	/*
682 	 * Take mutex to insure that table doesn't change out from underneath
683 	 * us while we're playing with it.
684 	 */
685 	mutex_enter(&airq_mutex);
686 	irqps[0] = apic_irq_table[irqno];
687 	orig_cpu = irqps[0]->airq_temp_cpu;
688 	orig_dip = irqps[0]->airq_dip;
689 	num_vectors = irqps[0]->airq_intin_no;
690 	vector = irqps[0]->airq_vector;
691 
692 	/* A "group" of 1 */
693 	if (num_vectors == 1) {
694 		mutex_exit(&airq_mutex);
695 		return (apic_set_cpu(irqno, new_cpu, result));
696 	}
697 
698 	*result = ENXIO;
699 
700 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
701 		mutex_exit(&airq_mutex);
702 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
703 		goto set_grp_intr_done;
704 	}
705 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
706 		mutex_exit(&airq_mutex);
707 		DDI_INTR_IMPLDBG((CE_CONT,
708 		    "set_grp: base vec not part of a grp or not aligned: "
709 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
710 		goto set_grp_intr_done;
711 	}
712 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
713 	    num_vectors));
714 
715 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
716 
717 	*result = EIO;
718 
719 	/*
720 	 * All IRQ entries in the table for the given device will be not
721 	 * shared.  Since they are not shared, the dip in the table will
722 	 * be true to the device of interest.
723 	 */
724 	for (i = 1; i < num_vectors; i++) {
725 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
726 		if (irqps[i] == NULL) {
727 			mutex_exit(&airq_mutex);
728 			goto set_grp_intr_done;
729 		}
730 #ifdef DEBUG
731 		/* Sanity check: CPU and dip is the same for all entries. */
732 		if ((irqps[i]->airq_dip != orig_dip) ||
733 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
734 			mutex_exit(&airq_mutex);
735 			DDI_INTR_IMPLDBG((CE_CONT,
736 			    "set_grp: cpu or dip for vec 0x%x difft than for "
737 			    "vec 0x%x\n", vector, vector + i));
738 			DDI_INTR_IMPLDBG((CE_CONT,
739 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
740 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
741 			    (void *)irqps[i]->airq_dip));
742 			goto set_grp_intr_done;
743 		}
744 #endif /* DEBUG */
745 	}
746 	mutex_exit(&airq_mutex);
747 
748 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
749 	handle = i_ddi_get_pci_config_handle(orig_dip);
750 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
751 
752 	/* MSI Per vector masking is supported. */
753 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
754 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
755 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
756 		else
757 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
758 		msi_pvm = pci_config_get32(handle, msi_mask_off);
759 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
760 		DDI_INTR_IMPLDBG((CE_CONT,
761 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
762 		    pci_config_get32(handle, msi_mask_off)));
763 	}
764 
765 	iflag = intr_clear();
766 	lock_set(&apic_ioapic_lock);
767 
768 	/*
769 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
770 	 * an error if the CPU is not accepting interrupts.  If the first one
771 	 * succeeds they all will.
772 	 */
773 	if (apic_rebind_all(irqps[0], new_cpu))
774 		(void) apic_rebind_all(irqps[0], orig_cpu);
775 	else {
776 		irqps[0]->airq_cpu = new_cpu;
777 
778 		for (i = 1; i < num_vectors; i++) {
779 			(void) apic_rebind_all(irqps[i], new_cpu);
780 			irqps[i]->airq_cpu = new_cpu;
781 		}
782 		*result = 0;	/* SUCCESS */
783 	}
784 
785 	lock_clear(&apic_ioapic_lock);
786 	intr_restore(iflag);
787 
788 	/* Reenable vectors if per vector masking is supported. */
789 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
790 		pci_config_put32(handle, msi_mask_off, msi_pvm);
791 		DDI_INTR_IMPLDBG((CE_CONT,
792 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
793 		    pci_config_get32(handle, msi_mask_off)));
794 	}
795 
796 set_grp_intr_done:
797 	if (*result != 0)
798 		return (PSM_FAILURE);
799 
800 	return (PSM_SUCCESS);
801 }
802 
803 #else	/* __xpv */
804 
805 /*
806  * We let the hypervisor deal with msi configutation
807  * so just stub this out.
808  */
809 
810 /* ARGSUSED */
811 void
812 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
813 {
814 }
815 
816 #endif	/* __xpv */
817 
818 int
819 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
820 {
821 	struct autovec *av_dev;
822 	uchar_t irqno;
823 	int i;
824 	apic_irq_t *irq_p;
825 
826 	/* Sanity check the vector/irq argument. */
827 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
828 
829 	mutex_enter(&airq_mutex);
830 
831 	/*
832 	 * Convert the vecirq arg to an irq using vector_to_irq table
833 	 * if the arg is a vector.  Pass thru if already an irq.
834 	 */
835 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
836 	    PSMGI_INTRBY_VEC)
837 		irqno = apic_vector_to_irq[vecirq];
838 	else
839 		irqno = vecirq;
840 
841 	irq_p = apic_irq_table[irqno];
842 
843 	if ((irq_p == NULL) ||
844 	    ((irq_p->airq_mps_intr_index != RESERVE_INDEX) &&
845 	    ((irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
846 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)))) {
847 		mutex_exit(&airq_mutex);
848 		return (PSM_FAILURE);
849 	}
850 
851 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
852 
853 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
854 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
855 
856 		/* Return user bound info for intrd. */
857 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
858 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
859 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
860 		}
861 	}
862 
863 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
864 		intr_params_p->avgi_vector = irq_p->airq_vector;
865 
866 	if (intr_params_p->avgi_req_flags &
867 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
868 		/* Get number of devices from apic_irq table shared field. */
869 		intr_params_p->avgi_num_devs = irq_p->airq_share;
870 
871 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
872 
873 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
874 
875 		/* Some devices have NULL dip.  Don't count these. */
876 		if (intr_params_p->avgi_num_devs > 0) {
877 			for (i = 0, av_dev = autovect[irqno].avh_link;
878 			    av_dev; av_dev = av_dev->av_link)
879 				if (av_dev->av_vector && av_dev->av_dip)
880 					i++;
881 			intr_params_p->avgi_num_devs =
882 			    MIN(intr_params_p->avgi_num_devs, i);
883 		}
884 
885 		/* There are no viable dips to return. */
886 		if (intr_params_p->avgi_num_devs == 0)
887 			intr_params_p->avgi_dip_list = NULL;
888 
889 		else {	/* Return list of dips */
890 
891 			/* Allocate space in array for that number of devs. */
892 			intr_params_p->avgi_dip_list = kmem_zalloc(
893 			    intr_params_p->avgi_num_devs *
894 			    sizeof (dev_info_t *),
895 			    KM_SLEEP);
896 
897 			/*
898 			 * Loop through the device list of the autovec table
899 			 * filling in the dip array.
900 			 *
901 			 * Note that the autovect table may have some special
902 			 * entries which contain NULL dips.  These will be
903 			 * ignored.
904 			 */
905 			for (i = 0, av_dev = autovect[irqno].avh_link;
906 			    av_dev; av_dev = av_dev->av_link)
907 				if (av_dev->av_vector && av_dev->av_dip)
908 					intr_params_p->avgi_dip_list[i++] =
909 					    av_dev->av_dip;
910 		}
911 	}
912 
913 	mutex_exit(&airq_mutex);
914 
915 	return (PSM_SUCCESS);
916 }
917 
918 
919 #if !defined(__xpv)
920 
921 /*
922  * This function provides external interface to the nexus for all
923  * functionalities related to the new DDI interrupt framework.
924  *
925  * Input:
926  * dip     - pointer to the dev_info structure of the requested device
927  * hdlp    - pointer to the internal interrupt handle structure for the
928  *	     requested interrupt
929  * intr_op - opcode for this call
930  * result  - pointer to the integer that will hold the result to be
931  *	     passed back if return value is PSM_SUCCESS
932  *
933  * Output:
934  * return value is either PSM_SUCCESS or PSM_FAILURE
935  */
936 int
937 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
938     psm_intr_op_t intr_op, int *result)
939 {
940 	int		cap;
941 	int		count_vec;
942 	int		old_priority;
943 	int		new_priority;
944 	int		new_cpu;
945 	apic_irq_t	*irqp;
946 	struct intrspec *ispec, intr_spec;
947 
948 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
949 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
950 
951 	ispec = &intr_spec;
952 	ispec->intrspec_pri = hdlp->ih_pri;
953 	ispec->intrspec_vec = hdlp->ih_inum;
954 	ispec->intrspec_func = hdlp->ih_cb_func;
955 
956 	switch (intr_op) {
957 	case PSM_INTR_OP_CHECK_MSI:
958 		/*
959 		 * Check MSI/X is supported or not at APIC level and
960 		 * masked off the MSI/X bits in hdlp->ih_type if not
961 		 * supported before return.  If MSI/X is supported,
962 		 * leave the ih_type unchanged and return.
963 		 *
964 		 * hdlp->ih_type passed in from the nexus has all the
965 		 * interrupt types supported by the device.
966 		 */
967 		if (apic_support_msi == 0) {
968 			/*
969 			 * if apic_support_msi is not set, call
970 			 * apic_check_msi_support() to check whether msi
971 			 * is supported first
972 			 */
973 			if (apic_check_msi_support() == PSM_SUCCESS)
974 				apic_support_msi = 1;
975 			else
976 				apic_support_msi = -1;
977 		}
978 		if (apic_support_msi == 1) {
979 			if (apic_msix_enable)
980 				*result = hdlp->ih_type;
981 			else
982 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
983 		} else
984 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
985 			    DDI_INTR_TYPE_MSIX);
986 		break;
987 	case PSM_INTR_OP_ALLOC_VECTORS:
988 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
989 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
990 			    hdlp->ih_scratch1, hdlp->ih_pri,
991 			    (int)(uintptr_t)hdlp->ih_scratch2);
992 		else
993 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
994 			    hdlp->ih_scratch1, hdlp->ih_pri,
995 			    (int)(uintptr_t)hdlp->ih_scratch2);
996 		break;
997 	case PSM_INTR_OP_FREE_VECTORS:
998 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
999 		    hdlp->ih_pri, hdlp->ih_type);
1000 		break;
1001 	case PSM_INTR_OP_NAVAIL_VECTORS:
1002 		*result = apic_navail_vector(dip, hdlp->ih_pri);
1003 		break;
1004 	case PSM_INTR_OP_XLATE_VECTOR:
1005 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1006 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
1007 		if (*result == -1)
1008 			return (PSM_FAILURE);
1009 		break;
1010 	case PSM_INTR_OP_GET_PENDING:
1011 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1012 			return (PSM_FAILURE);
1013 		*result = apic_get_pending(irqp, hdlp->ih_type);
1014 		break;
1015 	case PSM_INTR_OP_CLEAR_MASK:
1016 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1017 			return (PSM_FAILURE);
1018 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
1019 		if (irqp == NULL)
1020 			return (PSM_FAILURE);
1021 		apic_clear_mask(irqp);
1022 		break;
1023 	case PSM_INTR_OP_SET_MASK:
1024 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1025 			return (PSM_FAILURE);
1026 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1027 			return (PSM_FAILURE);
1028 		apic_set_mask(irqp);
1029 		break;
1030 	case PSM_INTR_OP_GET_CAP:
1031 		cap = DDI_INTR_FLAG_PENDING;
1032 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1033 			cap |= DDI_INTR_FLAG_MASKABLE;
1034 		else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX)
1035 			cap |= DDI_INTR_FLAG_RETARGETABLE;
1036 		*result = cap;
1037 		break;
1038 	case PSM_INTR_OP_GET_SHARED:
1039 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1040 			return (PSM_FAILURE);
1041 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1042 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1043 			return (PSM_FAILURE);
1044 		*result = (irqp->airq_share > 1) ? 1: 0;
1045 		break;
1046 	case PSM_INTR_OP_SET_PRI:
1047 		old_priority = hdlp->ih_pri;	/* save old value */
1048 		new_priority = *(int *)result;	/* try the new value */
1049 
1050 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1051 			return (PSM_SUCCESS);
1052 		}
1053 
1054 		/* Now allocate the vectors */
1055 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI) {
1056 			/* SET_PRI does not support the case of multiple MSI */
1057 			if (i_ddi_intr_get_current_nintrs(hdlp->ih_dip) > 1)
1058 				return (PSM_FAILURE);
1059 
1060 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
1061 			    1, new_priority,
1062 			    DDI_INTR_ALLOC_STRICT);
1063 		} else {
1064 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
1065 			    1, new_priority,
1066 			    DDI_INTR_ALLOC_STRICT);
1067 		}
1068 
1069 		/* Did we get new vectors? */
1070 		if (!count_vec)
1071 			return (PSM_FAILURE);
1072 
1073 		/* Finally, free the previously allocated vectors */
1074 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
1075 		    old_priority, hdlp->ih_type);
1076 		break;
1077 	case PSM_INTR_OP_SET_CPU:
1078 	case PSM_INTR_OP_GRP_SET_CPU:
1079 		/*
1080 		 * The interrupt handle given here has been allocated
1081 		 * specifically for this command, and ih_private carries
1082 		 * a CPU value.
1083 		 */
1084 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1085 		if (!apic_cpu_in_range(new_cpu)) {
1086 			DDI_INTR_IMPLDBG((CE_CONT,
1087 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1088 			*result = EINVAL;
1089 			return (PSM_FAILURE);
1090 		}
1091 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
1092 			DDI_INTR_IMPLDBG((CE_CONT,
1093 			    "[grp_]set_cpu: vector out of range: %d\n",
1094 			    hdlp->ih_vector));
1095 			*result = EINVAL;
1096 			return (PSM_FAILURE);
1097 		}
1098 		if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ))
1099 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
1100 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1101 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1102 			    PSM_SUCCESS)
1103 				return (PSM_FAILURE);
1104 		} else {
1105 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1106 			    result) != PSM_SUCCESS)
1107 				return (PSM_FAILURE);
1108 		}
1109 		break;
1110 	case PSM_INTR_OP_GET_INTR:
1111 		/*
1112 		 * The interrupt handle given here has been allocated
1113 		 * specifically for this command, and ih_private carries
1114 		 * a pointer to a apic_get_intr_t.
1115 		 */
1116 		if (apic_get_vector_intr_info(
1117 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1118 			return (PSM_FAILURE);
1119 		break;
1120 	case PSM_INTR_OP_APIC_TYPE:
1121 		hdlp->ih_private = apic_get_apic_type();
1122 		hdlp->ih_ver = apic_get_apic_version();
1123 		break;
1124 	case PSM_INTR_OP_SET_CAP:
1125 	default:
1126 		return (PSM_FAILURE);
1127 	}
1128 	return (PSM_SUCCESS);
1129 }
1130 #endif	/* !__xpv */
1131