xref: /freebsd/sys/x86/x86/msi.c (revision d6b92ffa)
1 /*-
2  * Copyright (c) 2006 Yahoo!, Inc.
3  * All rights reserved.
4  * Written by: John Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the author nor the names of any co-contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * Support for PCI Message Signalled Interrupts (MSI).  MSI interrupts on
33  * x86 are basically APIC messages that the northbridge delivers directly
34  * to the local APICs as if they had come from an I/O APIC.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_acpi.h"
41 
42 #include <sys/param.h>
43 #include <sys/bus.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mutex.h>
48 #include <sys/sx.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <x86/apicreg.h>
52 #include <machine/cputypes.h>
53 #include <machine/md_var.h>
54 #include <machine/frame.h>
55 #include <machine/intr_machdep.h>
56 #include <x86/apicvar.h>
57 #include <x86/iommu/iommu_intrmap.h>
58 #include <machine/specialreg.h>
59 #include <dev/pci/pcivar.h>
60 
61 /* Fields in address for Intel MSI messages. */
62 #define	MSI_INTEL_ADDR_DEST		0x000ff000
63 #define	MSI_INTEL_ADDR_RH		0x00000008
64 # define MSI_INTEL_ADDR_RH_ON		0x00000008
65 # define MSI_INTEL_ADDR_RH_OFF		0x00000000
66 #define	MSI_INTEL_ADDR_DM		0x00000004
67 # define MSI_INTEL_ADDR_DM_PHYSICAL	0x00000000
68 # define MSI_INTEL_ADDR_DM_LOGICAL	0x00000004
69 
70 /* Fields in data for Intel MSI messages. */
71 #define	MSI_INTEL_DATA_TRGRMOD		IOART_TRGRMOD	/* Trigger mode. */
72 # define MSI_INTEL_DATA_TRGREDG		IOART_TRGREDG
73 # define MSI_INTEL_DATA_TRGRLVL		IOART_TRGRLVL
74 #define	MSI_INTEL_DATA_LEVEL		0x00004000	/* Polarity. */
75 # define MSI_INTEL_DATA_DEASSERT	0x00000000
76 # define MSI_INTEL_DATA_ASSERT		0x00004000
77 #define	MSI_INTEL_DATA_DELMOD		IOART_DELMOD	/* Delivery mode. */
78 # define MSI_INTEL_DATA_DELFIXED	IOART_DELFIXED
79 # define MSI_INTEL_DATA_DELLOPRI	IOART_DELLOPRI
80 # define MSI_INTEL_DATA_DELSMI		IOART_DELSMI
81 # define MSI_INTEL_DATA_DELNMI		IOART_DELNMI
82 # define MSI_INTEL_DATA_DELINIT		IOART_DELINIT
83 # define MSI_INTEL_DATA_DELEXINT	IOART_DELEXINT
84 #define	MSI_INTEL_DATA_INTVEC		IOART_INTVEC	/* Interrupt vector. */
85 
86 /*
87  * Build Intel MSI message and data values from a source.  AMD64 systems
88  * seem to be compatible, so we use the same function for both.
89  */
90 #define	INTEL_ADDR(msi)							\
91 	(MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 |			\
92 	    MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
93 #define	INTEL_DATA(msi)							\
94 	(MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
95 
96 static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
97 
98 /*
99  * MSI sources are bunched into groups.  This is because MSI forces
100  * all of the messages to share the address and data registers and
101  * thus certain properties (such as the local APIC ID target on x86).
102  * Each group has a 'first' source that contains information global to
103  * the group.  These fields are marked with (g) below.
104  *
105  * Note that local APIC ID is kind of special.  Each message will be
106  * assigned an ID by the system; however, a group will use the ID from
107  * the first message.
108  *
109  * For MSI-X, each message is isolated.
110  */
111 struct msi_intsrc {
112 	struct intsrc msi_intsrc;
113 	device_t msi_dev;		/* Owning device. (g) */
114 	struct msi_intsrc *msi_first;	/* First source in group. */
115 	u_int msi_irq;			/* IRQ cookie. */
116 	u_int msi_msix;			/* MSI-X message. */
117 	u_int msi_vector:8;		/* IDT vector. */
118 	u_int msi_cpu;			/* Local APIC ID. (g) */
119 	u_int msi_count:8;		/* Messages in this group. (g) */
120 	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
121 	int *msi_irqs;			/* Group's IRQ list. (g) */
122 	u_int msi_remap_cookie;
123 };
124 
125 static void	msi_create_source(void);
126 static void	msi_enable_source(struct intsrc *isrc);
127 static void	msi_disable_source(struct intsrc *isrc, int eoi);
128 static void	msi_eoi_source(struct intsrc *isrc);
129 static void	msi_enable_intr(struct intsrc *isrc);
130 static void	msi_disable_intr(struct intsrc *isrc);
131 static int	msi_vector(struct intsrc *isrc);
132 static int	msi_source_pending(struct intsrc *isrc);
133 static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
134 		    enum intr_polarity pol);
135 static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
136 
137 struct pic msi_pic = {
138 	.pic_enable_source = msi_enable_source,
139 	.pic_disable_source = msi_disable_source,
140 	.pic_eoi_source = msi_eoi_source,
141 	.pic_enable_intr = msi_enable_intr,
142 	.pic_disable_intr = msi_disable_intr,
143 	.pic_vector = msi_vector,
144 	.pic_source_pending = msi_source_pending,
145 	.pic_suspend = NULL,
146 	.pic_resume = NULL,
147 	.pic_config_intr = msi_config_intr,
148 	.pic_assign_cpu = msi_assign_cpu,
149 	.pic_reprogram_pin = NULL,
150 };
151 
152 #ifdef SMP
153 /**
154  * Xen hypervisors prior to 4.6.0 do not properly handle updates to
155  * enabled MSI-X table entries.  Allow migration of MSI-X interrupts
156  * to be disabled via a tunable. Values have the following meaning:
157  *
158  * -1: automatic detection by FreeBSD
159  *  0: enable migration
160  *  1: disable migration
161  */
162 int msix_disable_migration = -1;
163 SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN,
164     &msix_disable_migration, 0,
165     "Disable migration of MSI-X interrupts between CPUs");
166 #endif
167 
168 static int msi_enabled;
169 static int msi_last_irq;
170 static struct mtx msi_lock;
171 
172 static void
173 msi_enable_source(struct intsrc *isrc)
174 {
175 }
176 
177 static void
178 msi_disable_source(struct intsrc *isrc, int eoi)
179 {
180 
181 	if (eoi == PIC_EOI)
182 		lapic_eoi();
183 }
184 
185 static void
186 msi_eoi_source(struct intsrc *isrc)
187 {
188 
189 	lapic_eoi();
190 }
191 
192 static void
193 msi_enable_intr(struct intsrc *isrc)
194 {
195 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
196 
197 	apic_enable_vector(msi->msi_cpu, msi->msi_vector);
198 }
199 
200 static void
201 msi_disable_intr(struct intsrc *isrc)
202 {
203 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
204 
205 	apic_disable_vector(msi->msi_cpu, msi->msi_vector);
206 }
207 
208 static int
209 msi_vector(struct intsrc *isrc)
210 {
211 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
212 
213 	return (msi->msi_irq);
214 }
215 
216 static int
217 msi_source_pending(struct intsrc *isrc)
218 {
219 
220 	return (0);
221 }
222 
223 static int
224 msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
225     enum intr_polarity pol)
226 {
227 
228 	return (ENODEV);
229 }
230 
231 static int
232 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
233 {
234 	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
235 	int old_vector;
236 	u_int old_id;
237 	int i, vector;
238 
239 	/*
240 	 * Only allow CPUs to be assigned to the first message for an
241 	 * MSI group.
242 	 */
243 	if (msi->msi_first != msi)
244 		return (EINVAL);
245 
246 #ifdef SMP
247 	if (msix_disable_migration && msi->msi_msix)
248 		return (EINVAL);
249 #endif
250 
251 	/* Store information to free existing irq. */
252 	old_vector = msi->msi_vector;
253 	old_id = msi->msi_cpu;
254 	if (old_id == apic_id)
255 		return (0);
256 
257 	/* Allocate IDT vectors on this cpu. */
258 	if (msi->msi_count > 1) {
259 		KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
260 		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
261 		    msi->msi_count, msi->msi_maxcount);
262 	} else
263 		vector = apic_alloc_vector(apic_id, msi->msi_irq);
264 	if (vector == 0)
265 		return (ENOSPC);
266 
267 	msi->msi_cpu = apic_id;
268 	msi->msi_vector = vector;
269 	if (msi->msi_intsrc.is_handlers > 0)
270 		apic_enable_vector(msi->msi_cpu, msi->msi_vector);
271 	if (bootverbose)
272 		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
273 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
274 		    msi->msi_cpu, msi->msi_vector);
275 	for (i = 1; i < msi->msi_count; i++) {
276 		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
277 		sib->msi_cpu = apic_id;
278 		sib->msi_vector = vector + i;
279 		if (sib->msi_intsrc.is_handlers > 0)
280 			apic_enable_vector(sib->msi_cpu, sib->msi_vector);
281 		if (bootverbose)
282 			printf(
283 		    "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
284 			    sib->msi_irq, sib->msi_cpu, sib->msi_vector);
285 	}
286 	BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev,
287 	    msi->msi_irq);
288 
289 	/*
290 	 * Free the old vector after the new one is established.  This is done
291 	 * to prevent races where we could miss an interrupt.
292 	 */
293 	if (msi->msi_intsrc.is_handlers > 0)
294 		apic_disable_vector(old_id, old_vector);
295 	apic_free_vector(old_id, old_vector, msi->msi_irq);
296 	for (i = 1; i < msi->msi_count; i++) {
297 		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
298 		if (sib->msi_intsrc.is_handlers > 0)
299 			apic_disable_vector(old_id, old_vector + i);
300 		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
301 	}
302 	return (0);
303 }
304 
305 void
306 msi_init(void)
307 {
308 
309 	/* Check if we have a supported CPU. */
310 	switch (cpu_vendor_id) {
311 	case CPU_VENDOR_INTEL:
312 	case CPU_VENDOR_AMD:
313 		break;
314 	case CPU_VENDOR_CENTAUR:
315 		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
316 		    CPUID_TO_MODEL(cpu_id) >= 0xf)
317 			break;
318 		/* FALLTHROUGH */
319 	default:
320 		return;
321 	}
322 
323 #ifdef SMP
324 	if (msix_disable_migration == -1) {
325 		/* The default is to allow migration of MSI-X interrupts. */
326 		msix_disable_migration = 0;
327 	}
328 #endif
329 
330 	msi_enabled = 1;
331 	intr_register_pic(&msi_pic);
332 	mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
333 }
334 
335 static void
336 msi_create_source(void)
337 {
338 	struct msi_intsrc *msi;
339 	u_int irq;
340 
341 	mtx_lock(&msi_lock);
342 	if (msi_last_irq >= NUM_MSI_INTS) {
343 		mtx_unlock(&msi_lock);
344 		return;
345 	}
346 	irq = msi_last_irq + FIRST_MSI_INT;
347 	msi_last_irq++;
348 	mtx_unlock(&msi_lock);
349 
350 	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
351 	msi->msi_intsrc.is_pic = &msi_pic;
352 	msi->msi_irq = irq;
353 	intr_register_source(&msi->msi_intsrc);
354 	nexus_add_irq(irq);
355 }
356 
357 /*
358  * Try to allocate 'count' interrupt sources with contiguous IDT values.
359  */
360 int
361 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
362 {
363 	struct msi_intsrc *msi, *fsrc;
364 	u_int cpu;
365 	int cnt, i, *mirqs, vector;
366 #ifdef ACPI_DMAR
367 	u_int cookies[count];
368 	int error;
369 #endif
370 
371 	if (!msi_enabled)
372 		return (ENXIO);
373 
374 	if (count > 1)
375 		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
376 	else
377 		mirqs = NULL;
378 again:
379 	mtx_lock(&msi_lock);
380 
381 	/* Try to find 'count' free IRQs. */
382 	cnt = 0;
383 	for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
384 		msi = (struct msi_intsrc *)intr_lookup_source(i);
385 
386 		/* End of allocated sources, so break. */
387 		if (msi == NULL)
388 			break;
389 
390 		/* If this is a free one, save its IRQ in the array. */
391 		if (msi->msi_dev == NULL) {
392 			irqs[cnt] = i;
393 			cnt++;
394 			if (cnt == count)
395 				break;
396 		}
397 	}
398 
399 	/* Do we need to create some new sources? */
400 	if (cnt < count) {
401 		/* If we would exceed the max, give up. */
402 		if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
403 			mtx_unlock(&msi_lock);
404 			free(mirqs, M_MSI);
405 			return (ENXIO);
406 		}
407 		mtx_unlock(&msi_lock);
408 
409 		/* We need count - cnt more sources. */
410 		while (cnt < count) {
411 			msi_create_source();
412 			cnt++;
413 		}
414 		goto again;
415 	}
416 
417 	/* Ok, we now have the IRQs allocated. */
418 	KASSERT(cnt == count, ("count mismatch"));
419 
420 	/* Allocate 'count' IDT vectors. */
421 	cpu = intr_next_cpu();
422 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
423 	if (vector == 0) {
424 		mtx_unlock(&msi_lock);
425 		free(mirqs, M_MSI);
426 		return (ENOSPC);
427 	}
428 
429 #ifdef ACPI_DMAR
430 	mtx_unlock(&msi_lock);
431 	error = iommu_alloc_msi_intr(dev, cookies, count);
432 	mtx_lock(&msi_lock);
433 	if (error == EOPNOTSUPP)
434 		error = 0;
435 	if (error != 0) {
436 		for (i = 0; i < count; i++)
437 			apic_free_vector(cpu, vector + i, irqs[i]);
438 		free(mirqs, M_MSI);
439 		return (error);
440 	}
441 	for (i = 0; i < count; i++) {
442 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
443 		msi->msi_remap_cookie = cookies[i];
444 	}
445 #endif
446 
447 	/* Assign IDT vectors and make these messages owned by 'dev'. */
448 	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
449 	for (i = 0; i < count; i++) {
450 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
451 		msi->msi_cpu = cpu;
452 		msi->msi_dev = dev;
453 		msi->msi_vector = vector + i;
454 		if (bootverbose)
455 			printf(
456 		    "msi: routing MSI IRQ %d to local APIC %u vector %u\n",
457 			    msi->msi_irq, msi->msi_cpu, msi->msi_vector);
458 		msi->msi_first = fsrc;
459 		KASSERT(msi->msi_intsrc.is_handlers == 0,
460 		    ("dead MSI has handlers"));
461 	}
462 	fsrc->msi_count = count;
463 	fsrc->msi_maxcount = maxcount;
464 	if (count > 1)
465 		bcopy(irqs, mirqs, count * sizeof(*mirqs));
466 	fsrc->msi_irqs = mirqs;
467 	mtx_unlock(&msi_lock);
468 	return (0);
469 }
470 
471 int
472 msi_release(int *irqs, int count)
473 {
474 	struct msi_intsrc *msi, *first;
475 	int i;
476 
477 	mtx_lock(&msi_lock);
478 	first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
479 	if (first == NULL) {
480 		mtx_unlock(&msi_lock);
481 		return (ENOENT);
482 	}
483 
484 	/* Make sure this isn't an MSI-X message. */
485 	if (first->msi_msix) {
486 		mtx_unlock(&msi_lock);
487 		return (EINVAL);
488 	}
489 
490 	/* Make sure this message is allocated to a group. */
491 	if (first->msi_first == NULL) {
492 		mtx_unlock(&msi_lock);
493 		return (ENXIO);
494 	}
495 
496 	/*
497 	 * Make sure this is the start of a group and that we are releasing
498 	 * the entire group.
499 	 */
500 	if (first->msi_first != first || first->msi_count != count) {
501 		mtx_unlock(&msi_lock);
502 		return (EINVAL);
503 	}
504 	KASSERT(first->msi_dev != NULL, ("unowned group"));
505 
506 	/* Clear all the extra messages in the group. */
507 	for (i = 1; i < count; i++) {
508 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
509 		KASSERT(msi->msi_first == first, ("message not in group"));
510 		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
511 #ifdef ACPI_DMAR
512 		iommu_unmap_msi_intr(first->msi_dev, msi->msi_remap_cookie);
513 #endif
514 		msi->msi_first = NULL;
515 		msi->msi_dev = NULL;
516 		apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
517 		msi->msi_vector = 0;
518 	}
519 
520 	/* Clear out the first message. */
521 #ifdef ACPI_DMAR
522 	mtx_unlock(&msi_lock);
523 	iommu_unmap_msi_intr(first->msi_dev, first->msi_remap_cookie);
524 	mtx_lock(&msi_lock);
525 #endif
526 	first->msi_first = NULL;
527 	first->msi_dev = NULL;
528 	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
529 	first->msi_vector = 0;
530 	first->msi_count = 0;
531 	first->msi_maxcount = 0;
532 	free(first->msi_irqs, M_MSI);
533 	first->msi_irqs = NULL;
534 
535 	mtx_unlock(&msi_lock);
536 	return (0);
537 }
538 
539 int
540 msi_map(int irq, uint64_t *addr, uint32_t *data)
541 {
542 	struct msi_intsrc *msi;
543 	int error;
544 #ifdef ACPI_DMAR
545 	struct msi_intsrc *msi1;
546 	int i, k;
547 #endif
548 
549 	mtx_lock(&msi_lock);
550 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
551 	if (msi == NULL) {
552 		mtx_unlock(&msi_lock);
553 		return (ENOENT);
554 	}
555 
556 	/* Make sure this message is allocated to a device. */
557 	if (msi->msi_dev == NULL) {
558 		mtx_unlock(&msi_lock);
559 		return (ENXIO);
560 	}
561 
562 	/*
563 	 * If this message isn't an MSI-X message, make sure it's part
564 	 * of a group, and switch to the first message in the
565 	 * group.
566 	 */
567 	if (!msi->msi_msix) {
568 		if (msi->msi_first == NULL) {
569 			mtx_unlock(&msi_lock);
570 			return (ENXIO);
571 		}
572 		msi = msi->msi_first;
573 	}
574 
575 #ifdef ACPI_DMAR
576 	if (!msi->msi_msix) {
577 		for (k = msi->msi_count - 1, i = FIRST_MSI_INT; k > 0 &&
578 		    i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
579 			if (i == msi->msi_irq)
580 				continue;
581 			msi1 = (struct msi_intsrc *)intr_lookup_source(i);
582 			if (!msi1->msi_msix && msi1->msi_first == msi) {
583 				mtx_unlock(&msi_lock);
584 				iommu_map_msi_intr(msi1->msi_dev,
585 				    msi1->msi_cpu, msi1->msi_vector,
586 				    msi1->msi_remap_cookie, NULL, NULL);
587 				k--;
588 				mtx_lock(&msi_lock);
589 			}
590 		}
591 	}
592 	mtx_unlock(&msi_lock);
593 	error = iommu_map_msi_intr(msi->msi_dev, msi->msi_cpu,
594 	    msi->msi_vector, msi->msi_remap_cookie, addr, data);
595 #else
596 	mtx_unlock(&msi_lock);
597 	error = EOPNOTSUPP;
598 #endif
599 	if (error == EOPNOTSUPP) {
600 		*addr = INTEL_ADDR(msi);
601 		*data = INTEL_DATA(msi);
602 		error = 0;
603 	}
604 	return (error);
605 }
606 
607 int
608 msix_alloc(device_t dev, int *irq)
609 {
610 	struct msi_intsrc *msi;
611 	u_int cpu;
612 	int i, vector;
613 #ifdef ACPI_DMAR
614 	u_int cookie;
615 	int error;
616 #endif
617 
618 	if (!msi_enabled)
619 		return (ENXIO);
620 
621 again:
622 	mtx_lock(&msi_lock);
623 
624 	/* Find a free IRQ. */
625 	for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
626 		msi = (struct msi_intsrc *)intr_lookup_source(i);
627 
628 		/* End of allocated sources, so break. */
629 		if (msi == NULL)
630 			break;
631 
632 		/* Stop at the first free source. */
633 		if (msi->msi_dev == NULL)
634 			break;
635 	}
636 
637 	/* Do we need to create a new source? */
638 	if (msi == NULL) {
639 		/* If we would exceed the max, give up. */
640 		if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) {
641 			mtx_unlock(&msi_lock);
642 			return (ENXIO);
643 		}
644 		mtx_unlock(&msi_lock);
645 
646 		/* Create a new source. */
647 		msi_create_source();
648 		goto again;
649 	}
650 
651 	/* Allocate an IDT vector. */
652 	cpu = intr_next_cpu();
653 	vector = apic_alloc_vector(cpu, i);
654 	if (vector == 0) {
655 		mtx_unlock(&msi_lock);
656 		return (ENOSPC);
657 	}
658 
659 	msi->msi_dev = dev;
660 #ifdef ACPI_DMAR
661 	mtx_unlock(&msi_lock);
662 	error = iommu_alloc_msi_intr(dev, &cookie, 1);
663 	mtx_lock(&msi_lock);
664 	if (error == EOPNOTSUPP)
665 		error = 0;
666 	if (error != 0) {
667 		msi->msi_dev = NULL;
668 		apic_free_vector(cpu, vector, i);
669 		return (error);
670 	}
671 	msi->msi_remap_cookie = cookie;
672 #endif
673 
674 	if (bootverbose)
675 		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
676 		    msi->msi_irq, cpu, vector);
677 
678 	/* Setup source. */
679 	msi->msi_cpu = cpu;
680 	msi->msi_first = msi;
681 	msi->msi_vector = vector;
682 	msi->msi_msix = 1;
683 	msi->msi_count = 1;
684 	msi->msi_maxcount = 1;
685 	msi->msi_irqs = NULL;
686 
687 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
688 	mtx_unlock(&msi_lock);
689 
690 	*irq = i;
691 	return (0);
692 }
693 
694 int
695 msix_release(int irq)
696 {
697 	struct msi_intsrc *msi;
698 
699 	mtx_lock(&msi_lock);
700 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
701 	if (msi == NULL) {
702 		mtx_unlock(&msi_lock);
703 		return (ENOENT);
704 	}
705 
706 	/* Make sure this is an MSI-X message. */
707 	if (!msi->msi_msix) {
708 		mtx_unlock(&msi_lock);
709 		return (EINVAL);
710 	}
711 
712 	KASSERT(msi->msi_dev != NULL, ("unowned message"));
713 
714 	/* Clear out the message. */
715 #ifdef ACPI_DMAR
716 	mtx_unlock(&msi_lock);
717 	iommu_unmap_msi_intr(msi->msi_dev, msi->msi_remap_cookie);
718 	mtx_lock(&msi_lock);
719 #endif
720 	msi->msi_first = NULL;
721 	msi->msi_dev = NULL;
722 	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
723 	msi->msi_vector = 0;
724 	msi->msi_msix = 0;
725 	msi->msi_count = 0;
726 	msi->msi_maxcount = 0;
727 
728 	mtx_unlock(&msi_lock);
729 	return (0);
730 }
731