1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * apic_introp.c:
28  *	Has code for Advanced DDI interrupt framework support.
29  */
30 
31 #include <sys/cpuvar.h>
32 #include <sys/psm.h>
33 #include <sys/archsystm.h>
34 #include <sys/apic.h>
35 #include <sys/sunddi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/mach_intr.h>
38 #include <sys/sysmacros.h>
39 #include <sys/trap.h>
40 #include <sys/pci.h>
41 #include <sys/pci_intr_lib.h>
42 
43 extern struct av_head autovect[];
44 
45 /*
46  *	Local Function Prototypes
47  */
48 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
49 
50 /*
51  * MSI support flag:
52  * reflects whether MSI is supported at APIC level
53  * it can also be patched through /etc/system
54  *
55  *  0 = default value - don't know and need to call apic_check_msi_support()
56  *      to find out then set it accordingly
57  *  1 = supported
58  * -1 = not supported
59  */
60 int	apic_support_msi = 0;
61 
62 /* Multiple vector support for MSI */
63 int	apic_multi_msi_enable = 1;
64 
65 /* Multiple vector support for MSI-X */
66 int	apic_msix_enable = 1;
67 
68 /*
69  * apic_pci_msi_enable_vector:
70  *	Set the address/data fields in the MSI/X capability structure
71  *	XXX: MSI-X support
72  */
73 /* ARGSUSED */
74 void
75 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
76     int count, int target_apic_id)
77 {
78 	uint64_t		msi_addr, msi_data;
79 	ushort_t		msi_ctrl;
80 	dev_info_t		*dip = irq_ptr->airq_dip;
81 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
82 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
83 #if !defined(__xpv)
84 	msi_regs_t		msi_regs;
85 #endif	/* ! __xpv */
86 
87 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
88 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
89 	    ddi_driver_name(dip), inum, vector, target_apic_id));
90 
91 	ASSERT((handle != NULL) && (cap_ptr != 0));
92 
93 #if !defined(__xpv)
94 	msi_regs.mr_data = vector;
95 	msi_regs.mr_addr = target_apic_id;
96 
97 	apic_vt_ops->apic_intrr_alloc_entry(irq_ptr);
98 	apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&msi_regs);
99 	apic_vt_ops->apic_intrr_record_msi(irq_ptr, &msi_regs);
100 
101 	/* MSI Address */
102 	msi_addr = msi_regs.mr_addr;
103 
104 	/* MSI Data: MSI is edge triggered according to spec */
105 	msi_data = msi_regs.mr_data;
106 #else
107 	/* MSI Address */
108 	msi_addr = (MSI_ADDR_HDR |
109 	    (target_apic_id << MSI_ADDR_DEST_SHIFT));
110 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
111 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
112 
113 	/* MSI Data: MSI is edge triggered according to spec */
114 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
115 #endif	/* ! __xpv */
116 
117 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
118 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
119 
120 	if (type == DDI_INTR_TYPE_MSI) {
121 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
122 
123 		/* Set the bits to inform how many MSIs are enabled */
124 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
125 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
126 
127 #if !defined(__xpv)
128 		/*
129 		 * Only set vector if not on hypervisor
130 		 */
131 		pci_config_put32(handle,
132 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
133 
134 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
135 			pci_config_put32(handle,
136 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
137 			pci_config_put16(handle,
138 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
139 		} else {
140 			pci_config_put16(handle,
141 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
142 		}
143 
144 	} else if (type == DDI_INTR_TYPE_MSIX) {
145 		uintptr_t	off;
146 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
147 
148 		ASSERT(msix_p != NULL);
149 
150 		/* Offset into the "inum"th entry in the MSI-X table */
151 		off = (uintptr_t)msix_p->msix_tbl_addr +
152 		    (inum  * PCI_MSIX_VECTOR_SIZE);
153 
154 		ddi_put32(msix_p->msix_tbl_hdl,
155 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
156 		ddi_put64(msix_p->msix_tbl_hdl,
157 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
158 #endif	/* ! __xpv */
159 	}
160 }
161 
162 
163 #if !defined(__xpv)
164 
165 /*
166  * This function returns the no. of vectors available for the pri.
167  * dip is not used at this moment.  If we really don't need that,
168  * it will be removed.
169  */
170 /*ARGSUSED*/
171 int
172 apic_navail_vector(dev_info_t *dip, int pri)
173 {
174 	int	lowest, highest, i, navail, count;
175 
176 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
177 	    (void *)dip, pri));
178 
179 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
180 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
181 	navail = count = 0;
182 
183 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
184 		lowest -= APIC_VECTOR_PER_IPL;
185 
186 	/* It has to be contiguous */
187 	for (i = lowest; i < highest; i++) {
188 		count = 0;
189 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
190 		    (i < highest)) {
191 			if (APIC_CHECK_RESERVE_VECTORS(i))
192 				break;
193 			count++;
194 			i++;
195 		}
196 		if (count > navail)
197 			navail = count;
198 	}
199 	return (navail);
200 }
201 
202 #endif	/* ! __xpv */
203 
204 /*
205  * Finds "count" contiguous MSI vectors starting at the proper alignment
206  * at "pri".
207  * Caller needs to make sure that count has to be power of 2 and should not
208  * be < 1.
209  */
210 uchar_t
211 apic_find_multi_vectors(int pri, int count)
212 {
213 	int	lowest, highest, i, navail, start, msibits;
214 
215 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
216 	    pri, count));
217 
218 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
219 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
220 	navail = 0;
221 
222 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
223 		lowest -= APIC_VECTOR_PER_IPL;
224 
225 	/*
226 	 * msibits is the no. of lower order message data bits for the
227 	 * allocated MSI vectors and is used to calculate the aligned
228 	 * starting vector
229 	 */
230 	msibits = count - 1;
231 
232 	/* It has to be contiguous */
233 	for (i = lowest; i < highest; i++) {
234 		navail = 0;
235 
236 		/*
237 		 * starting vector has to be aligned accordingly for
238 		 * multiple MSIs
239 		 */
240 		if (msibits)
241 			i = (i + msibits) & ~msibits;
242 		start = i;
243 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
244 		    (i < highest)) {
245 			if (APIC_CHECK_RESERVE_VECTORS(i))
246 				break;
247 			navail++;
248 			if (navail >= count)
249 				return (start);
250 			i++;
251 		}
252 	}
253 	return (0);
254 }
255 
256 
257 /*
258  * It finds the apic_irq_t associates with the dip, ispec and type.
259  */
260 apic_irq_t *
261 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
262 {
263 	apic_irq_t	*irqp;
264 	int i;
265 
266 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
267 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
268 	    ispec->intrspec_pri, type));
269 
270 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
271 		if ((irqp = apic_irq_table[i]) == NULL)
272 			continue;
273 		if ((irqp->airq_dip == dip) &&
274 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
275 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
276 			if (type == DDI_INTR_TYPE_MSI) {
277 				if (irqp->airq_mps_intr_index == MSI_INDEX)
278 					return (irqp);
279 			} else if (type == DDI_INTR_TYPE_MSIX) {
280 				if (irqp->airq_mps_intr_index == MSIX_INDEX)
281 					return (irqp);
282 			} else
283 				return (irqp);
284 		}
285 	}
286 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
287 	return (NULL);
288 }
289 
290 
291 #if !defined(__xpv)
292 
293 /*
294  * This function will return the pending bit of the irqp.
295  * It either comes from the IRR register of the APIC or the RDT
296  * entry of the I/O APIC.
297  * For the IRR to work, it needs to be to its binding CPU
298  */
299 static int
300 apic_get_pending(apic_irq_t *irqp, int type)
301 {
302 	int			bit, index, irr, pending;
303 	int			intin_no;
304 	int			apic_ix;
305 
306 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
307 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
308 	    type));
309 
310 	/* need to get on the bound cpu */
311 	mutex_enter(&cpu_lock);
312 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
313 
314 	index = irqp->airq_vector / 32;
315 	bit = irqp->airq_vector % 32;
316 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
317 
318 	affinity_clear();
319 	mutex_exit(&cpu_lock);
320 
321 	pending = (irr & (1 << bit)) ? 1 : 0;
322 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
323 		/* check I/O APIC for fixed interrupt */
324 		intin_no = irqp->airq_intin_no;
325 		apic_ix = irqp->airq_ioapicindex;
326 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
327 		    AV_PENDING) ? 1 : 0;
328 	}
329 	return (pending);
330 }
331 
332 
333 /*
334  * This function will clear the mask for the interrupt on the I/O APIC
335  */
336 static void
337 apic_clear_mask(apic_irq_t *irqp)
338 {
339 	int			intin_no;
340 	ulong_t			iflag;
341 	int32_t			rdt_entry;
342 	int 			apic_ix;
343 
344 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
345 	    (void *)irqp));
346 
347 	intin_no = irqp->airq_intin_no;
348 	apic_ix = irqp->airq_ioapicindex;
349 
350 	iflag = intr_clear();
351 	lock_set(&apic_ioapic_lock);
352 
353 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
354 
355 	/* clear mask */
356 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
357 	    ((~AV_MASK) & rdt_entry));
358 
359 	lock_clear(&apic_ioapic_lock);
360 	intr_restore(iflag);
361 }
362 
363 
364 /*
365  * This function will mask the interrupt on the I/O APIC
366  */
367 static void
368 apic_set_mask(apic_irq_t *irqp)
369 {
370 	int			intin_no;
371 	int 			apic_ix;
372 	ulong_t			iflag;
373 	int32_t			rdt_entry;
374 
375 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
376 
377 	intin_no = irqp->airq_intin_no;
378 	apic_ix = irqp->airq_ioapicindex;
379 
380 	iflag = intr_clear();
381 
382 	lock_set(&apic_ioapic_lock);
383 
384 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
385 
386 	/* mask it */
387 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
388 	    (AV_MASK | rdt_entry));
389 
390 	lock_clear(&apic_ioapic_lock);
391 	intr_restore(iflag);
392 }
393 
394 
395 void
396 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
397 {
398 	int i;
399 	apic_irq_t *irqptr;
400 	struct intrspec ispec;
401 
402 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
403 	    "count: %x pri: %x type: %x\n",
404 	    (void *)dip, inum, count, pri, type));
405 
406 	/* for MSI/X only */
407 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
408 		return;
409 
410 	for (i = 0; i < count; i++) {
411 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
412 		    "pri=0x%x count=0x%x\n", inum, pri, count));
413 		ispec.intrspec_vec = inum + i;
414 		ispec.intrspec_pri = pri;
415 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
416 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
417 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
418 			    "failed\n", (void *)dip, inum, pri));
419 			continue;
420 		}
421 		irqptr->airq_mps_intr_index = FREE_INDEX;
422 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
423 	}
424 }
425 
426 #endif	/* ! __xpv */
427 
428 /*
429  * check whether the system supports MSI
430  *
431  * If PCI-E capability is found, then this must be a PCI-E system.
432  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
433  * to indicate this system supports MSI.
434  */
435 int
436 apic_check_msi_support()
437 {
438 	dev_info_t *cdip;
439 	char dev_type[16];
440 	int dev_len;
441 
442 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
443 
444 	/*
445 	 * check whether the first level children of root_node have
446 	 * PCI-E capability
447 	 */
448 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
449 	    cdip = ddi_get_next_sibling(cdip)) {
450 
451 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
452 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
453 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
454 		    ddi_node_name(cdip)));
455 		dev_len = sizeof (dev_type);
456 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
457 		    "device_type", (caddr_t)dev_type, &dev_len)
458 		    != DDI_PROP_SUCCESS)
459 			continue;
460 		if (strcmp(dev_type, "pciex") == 0)
461 			return (PSM_SUCCESS);
462 	}
463 
464 	/* MSI is not supported on this system */
465 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
466 	    "device_type found\n"));
467 	return (PSM_FAILURE);
468 }
469 
470 #if !defined(__xpv)
471 
472 /*
473  * apic_pci_msi_unconfigure:
474  *
475  * This and next two interfaces are copied from pci_intr_lib.c
476  * Do ensure that these two files stay in sync.
477  * These needed to be copied over here to avoid a deadlock situation on
478  * certain mp systems that use MSI interrupts.
479  *
480  * IMPORTANT regards next three interfaces:
481  * i) are called only for MSI/X interrupts.
482  * ii) called with interrupts disabled, and must not block
483  */
484 void
485 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
486 {
487 	ushort_t		msi_ctrl;
488 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
489 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
490 
491 	ASSERT((handle != NULL) && (cap_ptr != 0));
492 
493 	if (type == DDI_INTR_TYPE_MSI) {
494 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
495 		msi_ctrl &= (~PCI_MSI_MME_MASK);
496 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
497 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
498 
499 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
500 			pci_config_put16(handle,
501 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
502 			pci_config_put32(handle,
503 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
504 		} else {
505 			pci_config_put16(handle,
506 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
507 		}
508 
509 	} else if (type == DDI_INTR_TYPE_MSIX) {
510 		uintptr_t	off;
511 		uint32_t	mask;
512 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
513 
514 		ASSERT(msix_p != NULL);
515 
516 		/* Offset into "inum"th entry in the MSI-X table & mask it */
517 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
518 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
519 
520 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
521 
522 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
523 
524 		/* Offset into the "inum"th entry in the MSI-X table */
525 		off = (uintptr_t)msix_p->msix_tbl_addr +
526 		    (inum * PCI_MSIX_VECTOR_SIZE);
527 
528 		/* Reset the "data" and "addr" bits */
529 		ddi_put32(msix_p->msix_tbl_hdl,
530 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
531 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
532 	}
533 }
534 
535 #endif	/* __xpv */
536 
537 /*
538  * apic_pci_msi_enable_mode:
539  */
540 void
541 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
542 {
543 	ushort_t		msi_ctrl;
544 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
545 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
546 
547 	ASSERT((handle != NULL) && (cap_ptr != 0));
548 
549 	if (type == DDI_INTR_TYPE_MSI) {
550 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
551 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
552 			return;
553 
554 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
555 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
556 
557 	} else if (type == DDI_INTR_TYPE_MSIX) {
558 		uintptr_t	off;
559 		uint32_t	mask;
560 		ddi_intr_msix_t	*msix_p;
561 
562 		msix_p = i_ddi_get_msix(rdip);
563 
564 		ASSERT(msix_p != NULL);
565 
566 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
567 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
568 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
569 
570 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
571 
572 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
573 
574 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
575 
576 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
577 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
578 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
579 			    msi_ctrl);
580 		}
581 	}
582 }
583 
584 /*
585  * apic_pci_msi_disable_mode:
586  */
587 void
588 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
589 {
590 	ushort_t		msi_ctrl;
591 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
592 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
593 
594 	ASSERT((handle != NULL) && (cap_ptr != 0));
595 
596 	if (type == DDI_INTR_TYPE_MSI) {
597 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
598 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
599 			return;
600 
601 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
602 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
603 
604 	} else if (type == DDI_INTR_TYPE_MSIX) {
605 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
606 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
607 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
608 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
609 			    msi_ctrl);
610 		}
611 	}
612 }
613 
614 #if !defined(__xpv)
615 
616 static int
617 apic_set_cpu(int irqno, int cpu, int *result)
618 {
619 	apic_irq_t *irqp;
620 	ulong_t iflag;
621 	int ret;
622 
623 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
624 
625 	mutex_enter(&airq_mutex);
626 	irqp = apic_irq_table[irqno];
627 	mutex_exit(&airq_mutex);
628 
629 	if (irqp == NULL) {
630 		*result = ENXIO;
631 		return (PSM_FAILURE);
632 	}
633 
634 	/* Fail if this is an MSI intr and is part of a group. */
635 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
636 	    (irqp->airq_intin_no > 1)) {
637 		*result = ENXIO;
638 		return (PSM_FAILURE);
639 	}
640 
641 	iflag = intr_clear();
642 	lock_set(&apic_ioapic_lock);
643 
644 	ret = apic_rebind_all(irqp, cpu);
645 
646 	lock_clear(&apic_ioapic_lock);
647 	intr_restore(iflag);
648 
649 	if (ret) {
650 		*result = EIO;
651 		return (PSM_FAILURE);
652 	}
653 	/*
654 	 * keep tracking the default interrupt cpu binding
655 	 */
656 	irqp->airq_cpu = cpu;
657 
658 	*result = 0;
659 	return (PSM_SUCCESS);
660 }
661 
662 static int
663 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
664 {
665 	dev_info_t *orig_dip;
666 	uint32_t orig_cpu;
667 	ulong_t iflag;
668 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
669 	int i;
670 	int cap_ptr;
671 	int msi_mask_off;
672 	ushort_t msi_ctrl;
673 	uint32_t msi_pvm;
674 	ddi_acc_handle_t handle;
675 	int num_vectors = 0;
676 	uint32_t vector;
677 
678 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
679 
680 	/*
681 	 * Take mutex to insure that table doesn't change out from underneath
682 	 * us while we're playing with it.
683 	 */
684 	mutex_enter(&airq_mutex);
685 	irqps[0] = apic_irq_table[irqno];
686 	orig_cpu = irqps[0]->airq_temp_cpu;
687 	orig_dip = irqps[0]->airq_dip;
688 	num_vectors = irqps[0]->airq_intin_no;
689 	vector = irqps[0]->airq_vector;
690 
691 	/* A "group" of 1 */
692 	if (num_vectors == 1) {
693 		mutex_exit(&airq_mutex);
694 		return (apic_set_cpu(irqno, new_cpu, result));
695 	}
696 
697 	*result = ENXIO;
698 
699 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
700 		mutex_exit(&airq_mutex);
701 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
702 		goto set_grp_intr_done;
703 	}
704 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
705 		mutex_exit(&airq_mutex);
706 		DDI_INTR_IMPLDBG((CE_CONT,
707 		    "set_grp: base vec not part of a grp or not aligned: "
708 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
709 		goto set_grp_intr_done;
710 	}
711 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
712 	    num_vectors));
713 
714 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
715 
716 	*result = EIO;
717 
718 	/*
719 	 * All IRQ entries in the table for the given device will be not
720 	 * shared.  Since they are not shared, the dip in the table will
721 	 * be true to the device of interest.
722 	 */
723 	for (i = 1; i < num_vectors; i++) {
724 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
725 		if (irqps[i] == NULL) {
726 			mutex_exit(&airq_mutex);
727 			goto set_grp_intr_done;
728 		}
729 #ifdef DEBUG
730 		/* Sanity check: CPU and dip is the same for all entries. */
731 		if ((irqps[i]->airq_dip != orig_dip) ||
732 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
733 			mutex_exit(&airq_mutex);
734 			DDI_INTR_IMPLDBG((CE_CONT,
735 			    "set_grp: cpu or dip for vec 0x%x difft than for "
736 			    "vec 0x%x\n", vector, vector + i));
737 			DDI_INTR_IMPLDBG((CE_CONT,
738 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
739 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
740 			    (void *)irqps[i]->airq_dip));
741 			goto set_grp_intr_done;
742 		}
743 #endif /* DEBUG */
744 	}
745 	mutex_exit(&airq_mutex);
746 
747 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
748 	handle = i_ddi_get_pci_config_handle(orig_dip);
749 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
750 
751 	/* MSI Per vector masking is supported. */
752 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
753 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
754 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
755 		else
756 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
757 		msi_pvm = pci_config_get32(handle, msi_mask_off);
758 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
759 		DDI_INTR_IMPLDBG((CE_CONT,
760 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
761 		    pci_config_get32(handle, msi_mask_off)));
762 	}
763 
764 	iflag = intr_clear();
765 	lock_set(&apic_ioapic_lock);
766 
767 	/*
768 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
769 	 * an error if the CPU is not accepting interrupts.  If the first one
770 	 * succeeds they all will.
771 	 */
772 	if (apic_rebind_all(irqps[0], new_cpu))
773 		(void) apic_rebind_all(irqps[0], orig_cpu);
774 	else {
775 		irqps[0]->airq_cpu = new_cpu;
776 
777 		for (i = 1; i < num_vectors; i++) {
778 			(void) apic_rebind_all(irqps[i], new_cpu);
779 			irqps[i]->airq_cpu = new_cpu;
780 		}
781 		*result = 0;	/* SUCCESS */
782 	}
783 
784 	lock_clear(&apic_ioapic_lock);
785 	intr_restore(iflag);
786 
787 	/* Reenable vectors if per vector masking is supported. */
788 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
789 		pci_config_put32(handle, msi_mask_off, msi_pvm);
790 		DDI_INTR_IMPLDBG((CE_CONT,
791 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
792 		    pci_config_get32(handle, msi_mask_off)));
793 	}
794 
795 set_grp_intr_done:
796 	if (*result != 0)
797 		return (PSM_FAILURE);
798 
799 	return (PSM_SUCCESS);
800 }
801 
802 #else	/* __xpv */
803 
804 /*
805  * We let the hypervisor deal with msi configutation
806  * so just stub this out.
807  */
808 
809 /* ARGSUSED */
810 void
811 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
812 {
813 }
814 
815 #endif	/* __xpv */
816 
817 int
818 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
819 {
820 	struct autovec *av_dev;
821 	uchar_t irqno;
822 	int i;
823 	apic_irq_t *irq_p;
824 
825 	/* Sanity check the vector/irq argument. */
826 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
827 
828 	mutex_enter(&airq_mutex);
829 
830 	/*
831 	 * Convert the vecirq arg to an irq using vector_to_irq table
832 	 * if the arg is a vector.  Pass thru if already an irq.
833 	 */
834 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
835 	    PSMGI_INTRBY_VEC)
836 		irqno = apic_vector_to_irq[vecirq];
837 	else
838 		irqno = vecirq;
839 
840 	irq_p = apic_irq_table[irqno];
841 
842 	if ((irq_p == NULL) ||
843 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
844 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
845 		mutex_exit(&airq_mutex);
846 		return (PSM_FAILURE);
847 	}
848 
849 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
850 
851 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
852 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
853 
854 		/* Return user bound info for intrd. */
855 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
856 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
857 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
858 		}
859 	}
860 
861 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
862 		intr_params_p->avgi_vector = irq_p->airq_vector;
863 
864 	if (intr_params_p->avgi_req_flags &
865 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
866 		/* Get number of devices from apic_irq table shared field. */
867 		intr_params_p->avgi_num_devs = irq_p->airq_share;
868 
869 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
870 
871 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
872 
873 		/* Some devices have NULL dip.  Don't count these. */
874 		if (intr_params_p->avgi_num_devs > 0) {
875 			for (i = 0, av_dev = autovect[irqno].avh_link;
876 			    av_dev; av_dev = av_dev->av_link)
877 				if (av_dev->av_vector && av_dev->av_dip)
878 					i++;
879 			intr_params_p->avgi_num_devs =
880 			    MIN(intr_params_p->avgi_num_devs, i);
881 		}
882 
883 		/* There are no viable dips to return. */
884 		if (intr_params_p->avgi_num_devs == 0)
885 			intr_params_p->avgi_dip_list = NULL;
886 
887 		else {	/* Return list of dips */
888 
889 			/* Allocate space in array for that number of devs. */
890 			intr_params_p->avgi_dip_list = kmem_zalloc(
891 			    intr_params_p->avgi_num_devs *
892 			    sizeof (dev_info_t *),
893 			    KM_SLEEP);
894 
895 			/*
896 			 * Loop through the device list of the autovec table
897 			 * filling in the dip array.
898 			 *
899 			 * Note that the autovect table may have some special
900 			 * entries which contain NULL dips.  These will be
901 			 * ignored.
902 			 */
903 			for (i = 0, av_dev = autovect[irqno].avh_link;
904 			    av_dev; av_dev = av_dev->av_link)
905 				if (av_dev->av_vector && av_dev->av_dip)
906 					intr_params_p->avgi_dip_list[i++] =
907 					    av_dev->av_dip;
908 		}
909 	}
910 
911 	mutex_exit(&airq_mutex);
912 
913 	return (PSM_SUCCESS);
914 }
915 
916 
917 #if !defined(__xpv)
918 
919 /*
920  * This function provides external interface to the nexus for all
921  * functionalities related to the new DDI interrupt framework.
922  *
923  * Input:
924  * dip     - pointer to the dev_info structure of the requested device
925  * hdlp    - pointer to the internal interrupt handle structure for the
926  *	     requested interrupt
927  * intr_op - opcode for this call
928  * result  - pointer to the integer that will hold the result to be
929  *	     passed back if return value is PSM_SUCCESS
930  *
931  * Output:
932  * return value is either PSM_SUCCESS or PSM_FAILURE
933  */
934 int
935 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
936     psm_intr_op_t intr_op, int *result)
937 {
938 	int		cap;
939 	int		count_vec;
940 	int		old_priority;
941 	int		new_priority;
942 	int		new_cpu;
943 	apic_irq_t	*irqp;
944 	struct intrspec *ispec, intr_spec;
945 
946 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
947 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
948 
949 	ispec = &intr_spec;
950 	ispec->intrspec_pri = hdlp->ih_pri;
951 	ispec->intrspec_vec = hdlp->ih_inum;
952 	ispec->intrspec_func = hdlp->ih_cb_func;
953 
954 	switch (intr_op) {
955 	case PSM_INTR_OP_CHECK_MSI:
956 		/*
957 		 * Check MSI/X is supported or not at APIC level and
958 		 * masked off the MSI/X bits in hdlp->ih_type if not
959 		 * supported before return.  If MSI/X is supported,
960 		 * leave the ih_type unchanged and return.
961 		 *
962 		 * hdlp->ih_type passed in from the nexus has all the
963 		 * interrupt types supported by the device.
964 		 */
965 		if (apic_support_msi == 0) {
966 			/*
967 			 * if apic_support_msi is not set, call
968 			 * apic_check_msi_support() to check whether msi
969 			 * is supported first
970 			 */
971 			if (apic_check_msi_support() == PSM_SUCCESS)
972 				apic_support_msi = 1;
973 			else
974 				apic_support_msi = -1;
975 		}
976 		if (apic_support_msi == 1) {
977 			if (apic_msix_enable)
978 				*result = hdlp->ih_type;
979 			else
980 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
981 		} else
982 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
983 			    DDI_INTR_TYPE_MSIX);
984 		break;
985 	case PSM_INTR_OP_ALLOC_VECTORS:
986 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
987 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
988 			    hdlp->ih_scratch1, hdlp->ih_pri,
989 			    (int)(uintptr_t)hdlp->ih_scratch2);
990 		else
991 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
992 			    hdlp->ih_scratch1, hdlp->ih_pri,
993 			    (int)(uintptr_t)hdlp->ih_scratch2);
994 		break;
995 	case PSM_INTR_OP_FREE_VECTORS:
996 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
997 		    hdlp->ih_pri, hdlp->ih_type);
998 		break;
999 	case PSM_INTR_OP_NAVAIL_VECTORS:
1000 		*result = apic_navail_vector(dip, hdlp->ih_pri);
1001 		break;
1002 	case PSM_INTR_OP_XLATE_VECTOR:
1003 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1004 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
1005 		break;
1006 	case PSM_INTR_OP_GET_PENDING:
1007 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1008 			return (PSM_FAILURE);
1009 		*result = apic_get_pending(irqp, hdlp->ih_type);
1010 		break;
1011 	case PSM_INTR_OP_CLEAR_MASK:
1012 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1013 			return (PSM_FAILURE);
1014 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
1015 		if (irqp == NULL)
1016 			return (PSM_FAILURE);
1017 		apic_clear_mask(irqp);
1018 		break;
1019 	case PSM_INTR_OP_SET_MASK:
1020 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1021 			return (PSM_FAILURE);
1022 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1023 			return (PSM_FAILURE);
1024 		apic_set_mask(irqp);
1025 		break;
1026 	case PSM_INTR_OP_GET_CAP:
1027 		cap = DDI_INTR_FLAG_PENDING;
1028 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1029 			cap |= DDI_INTR_FLAG_MASKABLE;
1030 		else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX)
1031 			cap |= DDI_INTR_FLAG_RETARGETABLE;
1032 		*result = cap;
1033 		break;
1034 	case PSM_INTR_OP_GET_SHARED:
1035 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1036 			return (PSM_FAILURE);
1037 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1038 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1039 			return (PSM_FAILURE);
1040 		*result = (irqp->airq_share > 1) ? 1: 0;
1041 		break;
1042 	case PSM_INTR_OP_SET_PRI:
1043 		old_priority = hdlp->ih_pri;	/* save old value */
1044 		new_priority = *(int *)result;	/* try the new value */
1045 
1046 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1047 			return (PSM_SUCCESS);
1048 		}
1049 
1050 		/* Now allocate the vectors */
1051 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI) {
1052 			/* SET_PRI does not support the case of multiple MSI */
1053 			if (i_ddi_intr_get_current_nintrs(hdlp->ih_dip) > 1)
1054 				return (PSM_FAILURE);
1055 
1056 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
1057 			    1, new_priority,
1058 			    DDI_INTR_ALLOC_STRICT);
1059 		} else {
1060 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
1061 			    1, new_priority,
1062 			    DDI_INTR_ALLOC_STRICT);
1063 		}
1064 
1065 		/* Did we get new vectors? */
1066 		if (!count_vec)
1067 			return (PSM_FAILURE);
1068 
1069 		/* Finally, free the previously allocated vectors */
1070 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
1071 		    old_priority, hdlp->ih_type);
1072 		break;
1073 	case PSM_INTR_OP_SET_CPU:
1074 	case PSM_INTR_OP_GRP_SET_CPU:
1075 		/*
1076 		 * The interrupt handle given here has been allocated
1077 		 * specifically for this command, and ih_private carries
1078 		 * a CPU value.
1079 		 */
1080 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1081 		if (!apic_cpu_in_range(new_cpu)) {
1082 			DDI_INTR_IMPLDBG((CE_CONT,
1083 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1084 			*result = EINVAL;
1085 			return (PSM_FAILURE);
1086 		}
1087 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
1088 			DDI_INTR_IMPLDBG((CE_CONT,
1089 			    "[grp_]set_cpu: vector out of range: %d\n",
1090 			    hdlp->ih_vector));
1091 			*result = EINVAL;
1092 			return (PSM_FAILURE);
1093 		}
1094 		if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ))
1095 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
1096 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1097 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1098 			    PSM_SUCCESS)
1099 				return (PSM_FAILURE);
1100 		} else {
1101 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1102 			    result) != PSM_SUCCESS)
1103 				return (PSM_FAILURE);
1104 		}
1105 		break;
1106 	case PSM_INTR_OP_GET_INTR:
1107 		/*
1108 		 * The interrupt handle given here has been allocated
1109 		 * specifically for this command, and ih_private carries
1110 		 * a pointer to a apic_get_intr_t.
1111 		 */
1112 		if (apic_get_vector_intr_info(
1113 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1114 			return (PSM_FAILURE);
1115 		break;
1116 	case PSM_INTR_OP_APIC_TYPE:
1117 		hdlp->ih_private = apic_get_apic_type();
1118 		hdlp->ih_ver = apic_get_apic_version();
1119 		break;
1120 	case PSM_INTR_OP_SET_CAP:
1121 	default:
1122 		return (PSM_FAILURE);
1123 	}
1124 	return (PSM_SUCCESS);
1125 }
1126 #endif	/* !__xpv */
1127