xref: /illumos-gate/usr/src/uts/i86pc/io/immu_intrmap.c (revision ccd81fdd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
24  * All rights reserved.
25  */
26 
27 /*
28  * Copyright (c) 2009, Intel Corporation.
29  * All rights reserved.
30  */
31 
32 
33 #include <sys/apic.h>
34 #include <vm/hat_i86.h>
35 #include <sys/sysmacros.h>
36 #include <sys/smp_impldefs.h>
37 #include <sys/immu.h>
38 
39 
40 typedef struct intrmap_private {
41 	immu_t		*ir_immu;
42 	uint16_t	ir_idx;
43 	uint32_t	ir_sid_svt_sq;
44 } intrmap_private_t;
45 
46 #define	INTRMAP_PRIVATE(airq) ((intrmap_private_t *)airq->airq_intrmap_private)
47 #define	AIRQ_PRIVATE(airq) (airq->airq_intrmap_private)
48 
49 /* interrupt remapping table entry */
50 typedef struct intrmap_rte {
51 	uint64_t	lo;
52 	uint64_t	hi;
53 } intrmap_rte_t;
54 
55 #define	IRTE_HIGH(sid_svt_sq) (sid_svt_sq)
56 #define	IRTE_LOW(dst, vector, dlm, tm, rh, dm, fpd, p)	\
57 	    (((uint64_t)(dst) << 32) |  \
58 	    ((uint64_t)(vector) << 16) | \
59 	    ((uint64_t)(dlm) << 5) | \
60 	    ((uint64_t)(tm) << 4) | \
61 	    ((uint64_t)(rh) << 3) | \
62 	    ((uint64_t)(dm) << 2) | \
63 	    ((uint64_t)(fpd) << 1) | \
64 	    (p))
65 
66 typedef enum {
67 	SVT_NO_VERIFY = 0, 	/* no verification */
68 	SVT_ALL_VERIFY,		/* using sid and sq to verify */
69 	SVT_BUS_VERIFY,		/* verify #startbus and #endbus */
70 	SVT_RSVD
71 } intrmap_svt_t;
72 
73 typedef enum {
74 	SQ_VERIFY_ALL = 0,	/* verify all 16 bits */
75 	SQ_VERIFY_IGR_1,	/* ignore bit 3 */
76 	SQ_VERIFY_IGR_2,	/* ignore bit 2-3 */
77 	SQ_VERIFY_IGR_3		/* ignore bit 1-3 */
78 } intrmap_sq_t;
79 
80 /*
81  * S field of the Interrupt Remapping Table Address Register
82  * the size of the interrupt remapping table is 1 << (immu_intrmap_irta_s + 1)
83  */
84 static uint_t intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
85 
86 /*
87  * If true, arrange to suppress broadcast EOI by setting edge-triggered mode
88  * even for level-triggered interrupts in the interrupt-remapping engine.
89  * If false, broadcast EOI can still be suppressed if the CPU supports the
90  * APIC_SVR_SUPPRESS_BROADCAST_EOI bit.  In both cases, the IOAPIC is still
91  * programmed with the correct trigger mode, and pcplusmp must send an EOI
92  * to the IOAPIC by writing to the IOAPIC's EOI register to make up for the
93  * missing broadcast EOI.
94  */
95 static int intrmap_suppress_brdcst_eoi = 0;
96 
97 /*
98  * whether verify the source id of interrupt request
99  */
100 static int intrmap_enable_sid_verify = 0;
101 
102 /* fault types for DVMA remapping */
103 static char *immu_dvma_faults[] = {
104 	"Reserved",
105 	"The present field in root-entry is Clear",
106 	"The present field in context-entry is Clear",
107 	"Hardware detected invalid programming of a context-entry",
108 	"The DMA request attempted to access an address beyond max support",
109 	"The Write field in a page-table entry is Clear when DMA write",
110 	"The Read field in a page-table entry is Clear when DMA read",
111 	"Access the next level page table resulted in error",
112 	"Access the root-entry table resulted in error",
113 	"Access the context-entry table resulted in error",
114 	"Reserved field not initialized to zero in a present root-entry",
115 	"Reserved field not initialized to zero in a present context-entry",
116 	"Reserved field not initialized to zero in a present page-table entry",
117 	"DMA blocked due to the Translation Type field in context-entry",
118 	"Incorrect fault event reason number",
119 };
120 #define	DVMA_MAX_FAULTS (sizeof (immu_dvma_faults)/(sizeof (char *))) - 1
121 
122 /* fault types for interrupt remapping */
123 static char *immu_intrmap_faults[] = {
124 	"reserved field set in IRTE",
125 	"interrupt_index exceed the intr-remap table size",
126 	"present field in IRTE is clear",
127 	"hardware access intr-remap table address resulted in error",
128 	"reserved field set in IRTE, include various conditional",
129 	"hardware blocked an interrupt request in Compatibility format",
130 	"remappable interrupt request blocked due to verification failure"
131 };
132 #define	INTRMAP_MAX_FAULTS \
133 	(sizeof (immu_intrmap_faults) / (sizeof (char *))) - 1
134 
135 /* Function prototypes */
136 static int immu_intrmap_init(int apic_mode);
137 static void immu_intrmap_switchon(int suppress_brdcst_eoi);
138 static void immu_intrmap_alloc(apic_irq_t *irq_ptr);
139 static void immu_intrmap_map(apic_irq_t *irq_ptr, void *intrmap_data);
140 static void immu_intrmap_free(apic_irq_t *irq_ptr);
141 static void immu_intrmap_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
142 static void immu_intrmap_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);
143 
144 static struct apic_intrmap_ops intrmap_ops = {
145 	immu_intrmap_init,
146 	immu_intrmap_switchon,
147 	immu_intrmap_alloc,
148 	immu_intrmap_map,
149 	immu_intrmap_free,
150 	immu_intrmap_rdt,
151 	immu_intrmap_msi,
152 };
153 
154 /* apic mode, APIC/X2APIC */
155 static int intrmap_apic_mode = LOCAL_APIC;
156 
157 
158 /*
159  * helper functions
160  */
161 static uint_t
162 bitset_find_free(bitset_t *b, uint_t post)
163 {
164 	uint_t	i;
165 	uint_t	cap = bitset_capacity(b);
166 
167 	if (post == cap)
168 		post = 0;
169 
170 	ASSERT(post < cap);
171 
172 	for (i = post; i < cap; i++) {
173 		if (!bitset_in_set(b, i))
174 			return (i);
175 	}
176 
177 	for (i = 0; i < post; i++) {
178 		if (!bitset_in_set(b, i))
179 			return (i);
180 	}
181 
182 	return (INTRMAP_IDX_FULL);	/* no free index */
183 }
184 
185 /*
186  * helper function to find 'count' contigous free
187  * interrupt remapping table entries
188  */
189 static uint_t
190 bitset_find_multi_free(bitset_t *b, uint_t post, uint_t count)
191 {
192 	uint_t  i, j;
193 	uint_t	cap = bitset_capacity(b);
194 
195 	if (post == INTRMAP_IDX_FULL) {
196 		return (INTRMAP_IDX_FULL);
197 	}
198 
199 	if (count > cap)
200 		return (INTRMAP_IDX_FULL);
201 
202 	ASSERT(post < cap);
203 
204 	for (i = post; (i + count) <= cap; i++) {
205 		for (j = 0; j < count; j++) {
206 			if (bitset_in_set(b, (i + j))) {
207 				i = i + j;
208 				break;
209 			}
210 			if (j == count - 1)
211 				return (i);
212 		}
213 	}
214 
215 	for (i = 0; (i < post) && ((i + count) <= cap); i++) {
216 		for (j = 0; j < count; j++) {
217 			if (bitset_in_set(b, (i + j))) {
218 				i = i + j;
219 				break;
220 			}
221 			if (j == count - 1)
222 				return (i);
223 		}
224 	}
225 
226 	return (INTRMAP_IDX_FULL);  		/* no free index */
227 }
228 
229 /* alloc one interrupt remapping table entry */
230 static int
231 alloc_tbl_entry(intrmap_t *intrmap)
232 {
233 	uint32_t idx;
234 
235 	for (;;) {
236 		mutex_enter(&intrmap->intrmap_lock);
237 		idx = intrmap->intrmap_free;
238 		if (idx != INTRMAP_IDX_FULL) {
239 			bitset_add(&intrmap->intrmap_map, idx);
240 			intrmap->intrmap_free =
241 			    bitset_find_free(&intrmap->intrmap_map, idx + 1);
242 			mutex_exit(&intrmap->intrmap_lock);
243 			break;
244 		}
245 
246 		/* no free intr entry, use compatible format intr */
247 		mutex_exit(&intrmap->intrmap_lock);
248 
249 		if (intrmap_apic_mode != LOCAL_X2APIC) {
250 			break;
251 		}
252 
253 		/*
254 		 * x2apic mode not allowed compatible
255 		 * interrupt
256 		 */
257 		delay(IMMU_ALLOC_RESOURCE_DELAY);
258 	}
259 
260 	return (idx);
261 }
262 
263 /* alloc 'cnt' contigous interrupt remapping table entries */
264 static int
265 alloc_tbl_multi_entries(intrmap_t *intrmap, uint_t cnt)
266 {
267 	uint_t idx, pos, i;
268 
269 	for (; ; ) {
270 		mutex_enter(&intrmap->intrmap_lock);
271 		pos = intrmap->intrmap_free;
272 		idx = bitset_find_multi_free(&intrmap->intrmap_map, pos, cnt);
273 
274 		if (idx != INTRMAP_IDX_FULL) {
275 			if (idx <= pos && pos < (idx + cnt)) {
276 				intrmap->intrmap_free = bitset_find_free(
277 				    &intrmap->intrmap_map, idx + cnt);
278 			}
279 			for (i = 0; i < cnt; i++) {
280 				bitset_add(&intrmap->intrmap_map, idx + i);
281 			}
282 			mutex_exit(&intrmap->intrmap_lock);
283 		}
284 
285 		mutex_exit(&intrmap->intrmap_lock);
286 
287 		if (intrmap_apic_mode != LOCAL_X2APIC) {
288 			break;
289 		}
290 
291 		/* x2apic mode not allowed comapitible interrupt */
292 		delay(IMMU_ALLOC_RESOURCE_DELAY);
293 	}
294 
295 	return (idx);
296 }
297 
298 /* init interrupt remapping table */
299 static int
300 init_unit(immu_t *immu)
301 {
302 	intrmap_t *intrmap;
303 	size_t size;
304 
305 	ddi_dma_attr_t intrmap_dma_attr = {
306 		DMA_ATTR_V0,
307 		0U,
308 		0xffffffffU,
309 		0xffffffffU,
310 		MMU_PAGESIZE,	/* page aligned */
311 		0x1,
312 		0x1,
313 		0xffffffffU,
314 		0xffffffffU,
315 		1,
316 		4,
317 		0
318 	};
319 
320 	ddi_device_acc_attr_t intrmap_acc_attr = {
321 		DDI_DEVICE_ATTR_V0,
322 		DDI_NEVERSWAP_ACC,
323 		DDI_STRICTORDER_ACC
324 	};
325 
326 	/*
327 	 * Using interrupt remapping implies using the queue
328 	 * invalidation interface. According to Intel,
329 	 * hardware that supports interrupt remapping should
330 	 * also support QI.
331 	 */
332 	ASSERT(IMMU_ECAP_GET_QI(immu->immu_regs_excap));
333 
334 	if (intrmap_apic_mode == LOCAL_X2APIC) {
335 		if (!IMMU_ECAP_GET_EIM(immu->immu_regs_excap)) {
336 			return (DDI_FAILURE);
337 		}
338 	}
339 
340 	if (intrmap_irta_s > INTRMAP_MAX_IRTA_SIZE) {
341 		intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
342 	}
343 
344 	intrmap =  kmem_zalloc(sizeof (intrmap_t), KM_SLEEP);
345 
346 	if (ddi_dma_alloc_handle(immu->immu_dip,
347 	    &intrmap_dma_attr,
348 	    DDI_DMA_SLEEP,
349 	    NULL,
350 	    &(intrmap->intrmap_dma_hdl)) != DDI_SUCCESS) {
351 		kmem_free(intrmap, sizeof (intrmap_t));
352 		return (DDI_FAILURE);
353 	}
354 
355 	intrmap->intrmap_size = 1 << (intrmap_irta_s + 1);
356 	size = intrmap->intrmap_size * INTRMAP_RTE_SIZE;
357 	if (ddi_dma_mem_alloc(intrmap->intrmap_dma_hdl,
358 	    size,
359 	    &intrmap_acc_attr,
360 	    DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED,
361 	    DDI_DMA_SLEEP,
362 	    NULL,
363 	    &(intrmap->intrmap_vaddr),
364 	    &size,
365 	    &(intrmap->intrmap_acc_hdl)) != DDI_SUCCESS) {
366 		ddi_dma_free_handle(&(intrmap->intrmap_dma_hdl));
367 		kmem_free(intrmap, sizeof (intrmap_t));
368 		return (DDI_FAILURE);
369 	}
370 
371 	ASSERT(!((uintptr_t)intrmap->intrmap_vaddr & MMU_PAGEOFFSET));
372 	bzero(intrmap->intrmap_vaddr, size);
373 	intrmap->intrmap_paddr = pfn_to_pa(
374 	    hat_getpfnum(kas.a_hat, intrmap->intrmap_vaddr));
375 
376 	mutex_init(&(intrmap->intrmap_lock), NULL, MUTEX_DRIVER, NULL);
377 	bitset_init(&intrmap->intrmap_map);
378 	bitset_resize(&intrmap->intrmap_map, intrmap->intrmap_size);
379 	intrmap->intrmap_free = 0;
380 
381 	immu->immu_intrmap = intrmap;
382 
383 	return (DDI_SUCCESS);
384 }
385 
386 static void
387 get_immu(apic_irq_t *irq_ptr)
388 {
389 	immu_t	*immu = NULL;
390 
391 	ASSERT(INTRMAP_PRIVATE(irq_ptr)->ir_immu == NULL);
392 
393 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
394 		immu = immu_dmar_ioapic_immu(irq_ptr->airq_ioapicindex);
395 	} else {
396 		if (irq_ptr->airq_dip != NULL) {
397 			immu = immu_dmar_get_immu(irq_ptr->airq_dip);
398 		}
399 	}
400 
401 	if (immu && (immu->immu_intrmap_running == B_TRUE)) {
402 		INTRMAP_PRIVATE(irq_ptr)->ir_immu = immu;
403 	}
404 }
405 
406 static int
407 get_top_pcibridge(dev_info_t *dip, void *arg)
408 {
409 	dev_info_t **topdipp = arg;
410 	immu_devi_t *immu_devi;
411 
412 	mutex_enter(&(DEVI(dip)->devi_lock));
413 	immu_devi = DEVI(dip)->devi_iommu;
414 	mutex_exit(&(DEVI(dip)->devi_lock));
415 
416 	if (immu_devi == NULL || immu_devi->imd_pcib_type == IMMU_PCIB_BAD ||
417 	    immu_devi->imd_pcib_type == IMMU_PCIB_ENDPOINT) {
418 		return (DDI_WALK_CONTINUE);
419 	}
420 
421 	*topdipp = dip;
422 
423 	return (DDI_WALK_CONTINUE);
424 }
425 
426 static dev_info_t *
427 intrmap_top_pcibridge(dev_info_t *rdip)
428 {
429 	dev_info_t *top_pcibridge = NULL;
430 
431 	if (immu_walk_ancestor(rdip, NULL, get_top_pcibridge,
432 	    &top_pcibridge, NULL, 0) != DDI_SUCCESS) {
433 		return (NULL);
434 	}
435 
436 	return (top_pcibridge);
437 }
438 
439 /* function to get interrupt request source id */
440 static void
441 get_sid(apic_irq_t *irq_ptr)
442 {
443 	dev_info_t	*dip, *pdip;
444 	immu_devi_t	*immu_devi;
445 	uint16_t	sid;
446 	uchar_t		svt, sq;
447 
448 	if (!intrmap_enable_sid_verify) {
449 		return;
450 	}
451 
452 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
453 		/* for interrupt through I/O APIC */
454 		sid = immu_dmar_ioapic_sid(irq_ptr->airq_ioapicindex);
455 		svt = SVT_ALL_VERIFY;
456 		sq = SQ_VERIFY_ALL;
457 	} else {
458 		/* MSI/MSI-X interrupt */
459 		dip = irq_ptr->airq_dip;
460 		ASSERT(dip);
461 		pdip = intrmap_top_pcibridge(dip);
462 		ASSERT(pdip);
463 		immu_devi = DEVI(pdip)->devi_iommu;
464 		ASSERT(immu_devi);
465 		if (immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCI) {
466 			/* device behind pcie to pci bridge */
467 			sid = (immu_devi->imd_bus << 8) | immu_devi->imd_sec;
468 			svt = SVT_BUS_VERIFY;
469 			sq = SQ_VERIFY_ALL;
470 		} else {
471 			/* pcie device or device behind pci to pci bridge */
472 			sid = (immu_devi->imd_bus << 8) |
473 			    immu_devi->imd_devfunc;
474 			svt = SVT_ALL_VERIFY;
475 			sq = SQ_VERIFY_ALL;
476 		}
477 	}
478 
479 	INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq =
480 	    sid | (svt << 18) | (sq << 16);
481 }
482 
483 static void
484 intrmap_enable(immu_t *immu)
485 {
486 	intrmap_t *intrmap;
487 	uint64_t irta_reg;
488 
489 	intrmap = immu->immu_intrmap;
490 
491 	irta_reg = intrmap->intrmap_paddr | intrmap_irta_s;
492 	if (intrmap_apic_mode == LOCAL_X2APIC) {
493 		irta_reg |= (0x1 << 11);
494 	}
495 
496 	immu_regs_intrmap_enable(immu, irta_reg);
497 }
498 
499 /* ####################################################################### */
500 
501 /*
502  * immu_intr_handler()
503  * 	the fault event handler for a single immu unit
504  */
505 int
506 immu_intr_handler(immu_t *immu)
507 {
508 	uint32_t status;
509 	int index, fault_reg_offset;
510 	int max_fault_index;
511 	boolean_t found_fault;
512 	dev_info_t *idip;
513 
514 	mutex_enter(&(immu->immu_intr_lock));
515 	mutex_enter(&(immu->immu_regs_lock));
516 
517 	/* read the fault status */
518 	status = immu_regs_get32(immu, IMMU_REG_FAULT_STS);
519 
520 	idip = immu->immu_dip;
521 	ASSERT(idip);
522 
523 	/* check if we have a pending fault for this immu unit */
524 	if ((status & IMMU_FAULT_STS_PPF) == 0) {
525 		mutex_exit(&(immu->immu_regs_lock));
526 		mutex_exit(&(immu->immu_intr_lock));
527 		return (DDI_INTR_UNCLAIMED);
528 	}
529 
530 	/*
531 	 * handle all primary pending faults
532 	 */
533 	index = IMMU_FAULT_GET_INDEX(status);
534 	max_fault_index =  IMMU_CAP_GET_NFR(immu->immu_regs_cap) - 1;
535 	fault_reg_offset = IMMU_CAP_GET_FRO(immu->immu_regs_cap);
536 
537 	found_fault = B_FALSE;
538 	_NOTE(CONSTCOND)
539 	while (1) {
540 		uint64_t val;
541 		uint8_t fault_reason;
542 		uint8_t fault_type;
543 		uint16_t sid;
544 		uint64_t pg_addr;
545 		uint64_t idx;
546 
547 		/* read the higher 64bits */
548 		val = immu_regs_get64(immu, fault_reg_offset + index * 16 + 8);
549 
550 		/* check if this fault register has pending fault */
551 		if (!IMMU_FRR_GET_F(val)) {
552 			break;
553 		}
554 
555 		found_fault = B_TRUE;
556 
557 		/* get the fault reason, fault type and sid */
558 		fault_reason = IMMU_FRR_GET_FR(val);
559 		fault_type = IMMU_FRR_GET_FT(val);
560 		sid = IMMU_FRR_GET_SID(val);
561 
562 		/* read the first 64bits */
563 		val = immu_regs_get64(immu, fault_reg_offset + index * 16);
564 		pg_addr = val & IMMU_PAGEMASK;
565 		idx = val >> 48;
566 
567 		/* clear the fault */
568 		immu_regs_put32(immu, fault_reg_offset + index * 16 + 12,
569 		    (((uint32_t)1) << 31));
570 
571 		/* report the fault info */
572 		if (fault_reason < 0x20) {
573 			/* immu-remapping fault */
574 			ddi_err(DER_WARN, idip,
575 			    "generated a fault event when translating DMA %s\n"
576 			    "\t on address 0x%" PRIx64 " for PCI(%d, %d, %d), "
577 			    "the reason is:\n\t %s",
578 			    fault_type ? "read" : "write", pg_addr,
579 			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
580 			    immu_dvma_faults[MIN(fault_reason,
581 			    DVMA_MAX_FAULTS)]);
582 		} else if (fault_reason < 0x27) {
583 			/* intr-remapping fault */
584 			ddi_err(DER_WARN, idip,
585 			    "generated a fault event when translating "
586 			    "interrupt request\n"
587 			    "\t on index 0x%" PRIx64 " for PCI(%d, %d, %d), "
588 			    "the reason is:\n\t %s",
589 			    idx,
590 			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
591 			    immu_intrmap_faults[MIN((fault_reason - 0x20),
592 			    INTRMAP_MAX_FAULTS)]);
593 		} else {
594 			ddi_err(DER_WARN, idip, "Unknown fault reason: 0x%x",
595 			    fault_reason);
596 		}
597 
598 		index++;
599 		if (index > max_fault_index)
600 			index = 0;
601 	}
602 
603 	/* Clear the fault */
604 	if (!found_fault) {
605 		ddi_err(DER_MODE, idip,
606 		    "Fault register set but no fault present");
607 	}
608 	immu_regs_put32(immu, IMMU_REG_FAULT_STS, 1);
609 	mutex_exit(&(immu->immu_regs_lock));
610 	mutex_exit(&(immu->immu_intr_lock));
611 	return (DDI_INTR_CLAIMED);
612 }
613 /* ######################################################################### */
614 
615 /*
616  * Interrupt remap entry points
617  */
618 
619 /* initialize interrupt remapping */
620 static int
621 immu_intrmap_init(int apic_mode)
622 {
623 	immu_t *immu;
624 	int error = DDI_FAILURE;
625 
626 	if (immu_intrmap_enable == B_FALSE) {
627 		return (DDI_SUCCESS);
628 	}
629 
630 	intrmap_apic_mode = apic_mode;
631 
632 	immu = list_head(&immu_list);
633 	for (; immu; immu = list_next(&immu_list, immu)) {
634 		if ((immu->immu_intrmap_running == B_TRUE) &&
635 		    IMMU_ECAP_GET_IR(immu->immu_regs_excap)) {
636 			if (init_unit(immu) == DDI_SUCCESS) {
637 				error = DDI_SUCCESS;
638 			}
639 		}
640 	}
641 
642 	/*
643 	 * if all IOMMU units disable intr remapping,
644 	 * return FAILURE
645 	 */
646 	return (error);
647 }
648 
649 
650 
651 /* enable interrupt remapping */
652 static void
653 immu_intrmap_switchon(int suppress_brdcst_eoi)
654 {
655 	immu_t *immu;
656 
657 
658 	intrmap_suppress_brdcst_eoi = suppress_brdcst_eoi;
659 
660 	immu = list_head(&immu_list);
661 	for (; immu; immu = list_next(&immu_list, immu)) {
662 		if (immu->immu_intrmap_setup == B_TRUE) {
663 			intrmap_enable(immu);
664 		}
665 	}
666 }
667 
668 /* alloc remapping entry for the interrupt */
669 static void
670 immu_intrmap_alloc(apic_irq_t *irq_ptr)
671 {
672 	immu_t	*immu;
673 	intrmap_t *intrmap;
674 	uint32_t		idx, cnt, i;
675 	uint_t			vector, irqno;
676 	uint32_t		sid_svt_sq;
677 
678 	if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE ||
679 	    AIRQ_PRIVATE(irq_ptr) != NULL) {
680 		return;
681 	}
682 
683 	AIRQ_PRIVATE(irq_ptr) =
684 	    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);
685 
686 	get_immu(irq_ptr);
687 
688 	immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu;
689 	if (immu == NULL) {
690 		goto intrmap_disable;
691 	}
692 
693 	intrmap = immu->immu_intrmap;
694 
695 	if (irq_ptr->airq_mps_intr_index == MSI_INDEX) {
696 		cnt = irq_ptr->airq_intin_no;
697 	} else {
698 		cnt = 1;
699 	}
700 
701 	if (cnt == 1) {
702 		idx = alloc_tbl_entry(intrmap);
703 	} else {
704 		idx = alloc_tbl_multi_entries(intrmap, cnt);
705 	}
706 
707 	if (idx == INTRMAP_IDX_FULL) {
708 		goto intrmap_disable;
709 	}
710 
711 	INTRMAP_PRIVATE(irq_ptr)->ir_idx = idx;
712 
713 	get_sid(irq_ptr);
714 
715 	if (cnt == 1) {
716 		if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
717 			immu_qinv_intr_one_cache(immu, idx);
718 		} else {
719 			immu_regs_wbf_flush(immu);
720 		}
721 		return;
722 	}
723 
724 	sid_svt_sq = INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq;
725 
726 	vector = irq_ptr->airq_vector;
727 
728 	for (i = 1; i < cnt; i++) {
729 		irqno = apic_vector_to_irq[vector + i];
730 		irq_ptr = apic_irq_table[irqno];
731 
732 		ASSERT(irq_ptr);
733 
734 		AIRQ_PRIVATE(irq_ptr) =
735 		    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);
736 
737 		INTRMAP_PRIVATE(irq_ptr)->ir_immu = immu;
738 		INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq = sid_svt_sq;
739 		INTRMAP_PRIVATE(irq_ptr)->ir_idx = idx + i;
740 	}
741 
742 	if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
743 		immu_qinv_intr_caches(immu, idx, cnt);
744 	} else {
745 		immu_regs_wbf_flush(immu);
746 	}
747 
748 	return;
749 
750 intrmap_disable:
751 	kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intrmap_private_t));
752 	AIRQ_PRIVATE(irq_ptr) = INTRMAP_DISABLE;
753 }
754 
755 
756 /* remapping the interrupt */
757 static void
758 immu_intrmap_map(apic_irq_t *irq_ptr, void *intrmap_data)
759 {
760 	immu_t	*immu;
761 	intrmap_t	*intrmap;
762 	ioapic_rdt_t	*irdt = (ioapic_rdt_t *)intrmap_data;
763 	msi_regs_t	*mregs = (msi_regs_t *)intrmap_data;
764 	intrmap_rte_t	irte;
765 	uint_t		idx, i, cnt;
766 	uint32_t	dst, sid_svt_sq;
767 	uchar_t		vector, dlm, tm, rh, dm;
768 
769 	if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE) {
770 		return;
771 	}
772 
773 	if (irq_ptr->airq_mps_intr_index == MSI_INDEX) {
774 		cnt = irq_ptr->airq_intin_no;
775 	} else {
776 		cnt = 1;
777 	}
778 
779 	idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;
780 	immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu;
781 	intrmap = immu->immu_intrmap;
782 	sid_svt_sq = INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq;
783 	vector = irq_ptr->airq_vector;
784 
785 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
786 		dm = RDT_DM(irdt->ir_lo);
787 		rh = 0;
788 		tm = RDT_TM(irdt->ir_lo);
789 		dlm = RDT_DLM(irdt->ir_lo);
790 		dst = irdt->ir_hi;
791 
792 		/*
793 		 * Mark the IRTE's TM as Edge to suppress broadcast EOI.
794 		 */
795 		if (intrmap_suppress_brdcst_eoi) {
796 			tm = TRIGGER_MODE_EDGE;
797 		}
798 	} else {
799 		dm = MSI_ADDR_DM_PHYSICAL;
800 		rh = MSI_ADDR_RH_FIXED;
801 		tm = TRIGGER_MODE_EDGE;
802 		dlm = 0;
803 		dst = mregs->mr_addr;
804 	}
805 
806 	if (intrmap_apic_mode == LOCAL_APIC)
807 		dst = (dst & 0xFF) << 8;
808 
809 	if (cnt == 1) {
810 		irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
811 		irte.hi = IRTE_HIGH(sid_svt_sq);
812 
813 		/* set interrupt remapping table entry */
814 		bcopy(&irte, intrmap->intrmap_vaddr +
815 		    idx * INTRMAP_RTE_SIZE,
816 		    INTRMAP_RTE_SIZE);
817 
818 		immu_qinv_intr_one_cache(immu, idx);
819 
820 	} else {
821 		vector = irq_ptr->airq_vector;
822 		for (i = 0; i < cnt; i++) {
823 			irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
824 			irte.hi = IRTE_HIGH(sid_svt_sq);
825 
826 			/* set interrupt remapping table entry */
827 			bcopy(&irte, intrmap->intrmap_vaddr +
828 			    idx * INTRMAP_RTE_SIZE,
829 			    INTRMAP_RTE_SIZE);
830 			vector++;
831 			idx++;
832 		}
833 
834 		immu_qinv_intr_caches(immu, idx, cnt);
835 	}
836 }
837 
838 /* free the remapping entry */
839 static void
840 immu_intrmap_free(apic_irq_t *irq_ptr)
841 {
842 	immu_t *immu;
843 	intrmap_t *intrmap;
844 	uint32_t idx;
845 
846 	if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE) {
847 		AIRQ_PRIVATE(irq_ptr) = NULL;
848 		return;
849 	}
850 
851 	immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu;
852 	intrmap = immu->immu_intrmap;
853 	idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;
854 
855 	bzero(intrmap->intrmap_vaddr + idx * INTRMAP_RTE_SIZE,
856 	    INTRMAP_RTE_SIZE);
857 
858 	immu_qinv_intr_one_cache(immu, idx);
859 
860 	mutex_enter(&intrmap->intrmap_lock);
861 	bitset_del(&intrmap->intrmap_map, idx);
862 	if (intrmap->intrmap_free == INTRMAP_IDX_FULL) {
863 		intrmap->intrmap_free = idx;
864 	}
865 	mutex_exit(&intrmap->intrmap_lock);
866 
867 	kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intrmap_private_t));
868 	AIRQ_PRIVATE(irq_ptr) = NULL;
869 }
870 
871 /* record the ioapic rdt entry */
872 static void
873 immu_intrmap_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
874 {
875 	uint32_t rdt_entry, tm, pol, idx, vector;
876 
877 	rdt_entry = irdt->ir_lo;
878 
879 	if (INTRMAP_PRIVATE(irq_ptr) != NULL &&
880 	    INTRMAP_PRIVATE(irq_ptr) != INTRMAP_DISABLE) {
881 		idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;
882 		tm = RDT_TM(rdt_entry);
883 		pol = RDT_POL(rdt_entry);
884 		vector = irq_ptr->airq_vector;
885 		irdt->ir_lo = (tm << INTRMAP_IOAPIC_TM_SHIFT) |
886 		    (pol << INTRMAP_IOAPIC_POL_SHIFT) |
887 		    ((idx >> 15) << INTRMAP_IOAPIC_IDX15_SHIFT) |
888 		    vector;
889 		irdt->ir_hi = (idx << INTRMAP_IOAPIC_IDX_SHIFT) |
890 		    (1 << INTRMAP_IOAPIC_FORMAT_SHIFT);
891 	} else {
892 		irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
893 	}
894 }
895 
896 /* record the msi interrupt structure */
897 /*ARGSUSED*/
898 static void
899 immu_intrmap_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
900 {
901 	uint_t	idx;
902 
903 	if (INTRMAP_PRIVATE(irq_ptr) != NULL &&
904 	    INTRMAP_PRIVATE(irq_ptr) != INTRMAP_DISABLE) {
905 		idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx;
906 
907 		mregs->mr_data = 0;
908 		mregs->mr_addr = MSI_ADDR_HDR |
909 		    ((idx & 0x7fff) << INTRMAP_MSI_IDX_SHIFT) |
910 		    (1 << INTRMAP_MSI_FORMAT_SHIFT) |
911 		    (1 << INTRMAP_MSI_SHV_SHIFT) |
912 		    ((idx >> 15) << INTRMAP_MSI_IDX15_SHIFT);
913 	} else {
914 		mregs->mr_addr = MSI_ADDR_HDR |
915 		    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
916 		    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
917 		    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
918 		mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
919 		    mregs->mr_data;
920 	}
921 }
922 
923 /* ######################################################################### */
924 /*
925  * Functions exported by immu_intr.c
926  */
927 void
928 immu_intrmap_setup(list_t *listp)
929 {
930 	immu_t *immu;
931 
932 	/*
933 	 * Check if ACPI DMAR tables say that
934 	 * interrupt remapping is supported
935 	 */
936 	if (immu_dmar_intrmap_supported() == B_FALSE) {
937 		return;
938 	}
939 
940 	/*
941 	 * Check if interrupt remapping is disabled.
942 	 */
943 	if (immu_intrmap_enable == B_FALSE) {
944 		return;
945 	}
946 
947 	psm_vt_ops = &intrmap_ops;
948 
949 	immu = list_head(listp);
950 	for (; immu; immu = list_next(listp, immu)) {
951 		mutex_init(&(immu->immu_intrmap_lock), NULL,
952 		    MUTEX_DEFAULT, NULL);
953 		mutex_enter(&(immu->immu_intrmap_lock));
954 		immu->immu_intrmap_setup = B_TRUE;
955 		mutex_exit(&(immu->immu_intrmap_lock));
956 	}
957 }
958 
959 void
960 immu_intrmap_startup(immu_t *immu)
961 {
962 	/* do nothing */
963 	mutex_enter(&(immu->immu_intrmap_lock));
964 	if (immu->immu_intrmap_setup == B_TRUE) {
965 		immu->immu_intrmap_running = B_TRUE;
966 	}
967 	mutex_exit(&(immu->immu_intrmap_lock));
968 }
969 
970 /*
971  * Register a Intel IOMMU unit (i.e. DMAR unit's)
972  * interrupt handler
973  */
974 void
975 immu_intr_register(immu_t *immu)
976 {
977 	int irq, vect;
978 	char intr_handler_name[IMMU_MAXNAMELEN];
979 	uint32_t msi_data;
980 	uint32_t uaddr;
981 	uint32_t msi_addr;
982 
983 	msi_addr = (MSI_ADDR_HDR |
984 	    apic_cpus[0].aci_local_id & 0xFF) << ((MSI_ADDR_DEST_SHIFT) |
985 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
986 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
987 
988 	if (intrmap_apic_mode == LOCAL_X2APIC) {
989 		uaddr = (apic_cpus[0].aci_local_id & 0xFFFFFF00);
990 	} else {
991 		uaddr = 0;
992 	}
993 
994 	/* Dont need to hold immu_intr_lock since we are in boot */
995 	irq = psm_get_ipivect(IMMU_INTR_IPL, -1);
996 	vect = apic_irq_table[irq]->airq_vector;
997 	msi_data = ((MSI_DATA_DELIVERY_FIXED <<
998 	    MSI_DATA_DELIVERY_SHIFT) | vect);
999 
1000 	(void) snprintf(intr_handler_name, sizeof (intr_handler_name),
1001 	    "%s-intr-handler", immu->immu_name);
1002 
1003 	(void) add_avintr((void *)NULL, IMMU_INTR_IPL,
1004 	    (avfunc)(immu_intr_handler), intr_handler_name, irq,
1005 	    (caddr_t)immu, NULL, NULL, NULL);
1006 
1007 	immu_regs_intr_enable(immu, msi_addr, msi_data, uaddr);
1008 
1009 	(void) immu_intr_handler(immu);
1010 }
1011