xref: /freebsd/sys/x86/x86/msi.c (revision 3157ba21)
1 /*-
2  * Copyright (c) 2006 Yahoo!, Inc.
3  * All rights reserved.
4  * Written by: John Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the author nor the names of any co-contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * Support for PCI Message Signalled Interrupts (MSI).  MSI interrupts on
33  * x86 are basically APIC messages that the northbridge delivers directly
34  * to the local APICs as if they had come from an I/O APIC.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/bus.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/sx.h>
47 #include <sys/systm.h>
48 #include <machine/apicreg.h>
49 #include <machine/cputypes.h>
50 #include <machine/md_var.h>
51 #include <machine/frame.h>
52 #include <machine/intr_machdep.h>
53 #include <machine/apicvar.h>
54 #include <machine/specialreg.h>
55 #include <dev/pci/pcivar.h>
56 
57 /* Fields in address for Intel MSI messages. */
58 #define	MSI_INTEL_ADDR_DEST		0x000ff000
59 #define	MSI_INTEL_ADDR_RH		0x00000008
60 # define MSI_INTEL_ADDR_RH_ON		0x00000008
61 # define MSI_INTEL_ADDR_RH_OFF		0x00000000
62 #define	MSI_INTEL_ADDR_DM		0x00000004
63 # define MSI_INTEL_ADDR_DM_PHYSICAL	0x00000000
64 # define MSI_INTEL_ADDR_DM_LOGICAL	0x00000004
65 
66 /* Fields in data for Intel MSI messages. */
67 #define	MSI_INTEL_DATA_TRGRMOD		IOART_TRGRMOD	/* Trigger mode. */
68 # define MSI_INTEL_DATA_TRGREDG		IOART_TRGREDG
69 # define MSI_INTEL_DATA_TRGRLVL		IOART_TRGRLVL
70 #define	MSI_INTEL_DATA_LEVEL		0x00004000	/* Polarity. */
71 # define MSI_INTEL_DATA_DEASSERT	0x00000000
72 # define MSI_INTEL_DATA_ASSERT		0x00004000
73 #define	MSI_INTEL_DATA_DELMOD		IOART_DELMOD	/* Delivery mode. */
74 # define MSI_INTEL_DATA_DELFIXED	IOART_DELFIXED
75 # define MSI_INTEL_DATA_DELLOPRI	IOART_DELLOPRI
76 # define MSI_INTEL_DATA_DELSMI		IOART_DELSMI
77 # define MSI_INTEL_DATA_DELNMI		IOART_DELNMI
78 # define MSI_INTEL_DATA_DELINIT		IOART_DELINIT
79 # define MSI_INTEL_DATA_DELEXINT	IOART_DELEXINT
80 #define	MSI_INTEL_DATA_INTVEC		IOART_INTVEC	/* Interrupt vector. */
81 
82 /*
83  * Build Intel MSI message and data values from a source.  AMD64 systems
84  * seem to be compatible, so we use the same function for both.
85  */
86 #define	INTEL_ADDR(msi)							\
87 	(MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 |			\
88 	    MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
89 #define	INTEL_DATA(msi)							\
90 	(MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
91 
92 static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
93 
94 /*
95  * MSI sources are bunched into groups.  This is because MSI forces
96  * all of the messages to share the address and data registers and
97  * thus certain properties (such as the local APIC ID target on x86).
98  * Each group has a 'first' source that contains information global to
99  * the group.  These fields are marked with (g) below.
100  *
101  * Note that local APIC ID is kind of special.  Each message will be
102  * assigned an ID by the system; however, a group will use the ID from
103  * the first message.
104  *
105  * For MSI-X, each message is isolated.
106  */
107 struct msi_intsrc {
108 	struct intsrc msi_intsrc;
109 	device_t msi_dev;		/* Owning device. (g) */
110 	struct msi_intsrc *msi_first;	/* First source in group. */
111 	u_int msi_irq;			/* IRQ cookie. */
112 	u_int msi_msix;			/* MSI-X message. */
113 	u_int msi_vector:8;		/* IDT vector. */
114 	u_int msi_cpu:8;		/* Local APIC ID. (g) */
115 	u_int msi_count:8;		/* Messages in this group. (g) */
116 	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
117 	int *msi_irqs;			/* Group's IRQ list. (g) */
118 };
119 
120 static void	msi_create_source(void);
121 static void	msi_enable_source(struct intsrc *isrc);
122 static void	msi_disable_source(struct intsrc *isrc, int eoi);
123 static void	msi_eoi_source(struct intsrc *isrc);
124 static void	msi_enable_intr(struct intsrc *isrc);
125 static void	msi_disable_intr(struct intsrc *isrc);
126 static int	msi_vector(struct intsrc *isrc);
127 static int	msi_source_pending(struct intsrc *isrc);
128 static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
129 		    enum intr_polarity pol);
130 static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
131 
132 struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
133 		       msi_enable_intr, msi_disable_intr, msi_vector,
134 		       msi_source_pending, NULL, NULL, msi_config_intr,
135 		       msi_assign_cpu };
136 
137 static int msi_enabled;
138 static int msi_last_irq;
139 static struct mtx msi_lock;
140 
141 static void
142 msi_enable_source(struct intsrc *isrc)
143 {
144 }
145 
146 static void
147 msi_disable_source(struct intsrc *isrc, int eoi)
148 {
149 
150 	if (eoi == PIC_EOI)
151 		lapic_eoi();
152 }
153 
154 static void
155 msi_eoi_source(struct intsrc *isrc)
156 {
157 
158 	lapic_eoi();
159 }
160 
161 static void
162 msi_enable_intr(struct intsrc *isrc)
163 {
164 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
165 
166 	apic_enable_vector(msi->msi_cpu, msi->msi_vector);
167 }
168 
169 static void
170 msi_disable_intr(struct intsrc *isrc)
171 {
172 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
173 
174 	apic_disable_vector(msi->msi_cpu, msi->msi_vector);
175 }
176 
177 static int
178 msi_vector(struct intsrc *isrc)
179 {
180 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
181 
182 	return (msi->msi_irq);
183 }
184 
185 static int
186 msi_source_pending(struct intsrc *isrc)
187 {
188 
189 	return (0);
190 }
191 
192 static int
193 msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
194     enum intr_polarity pol)
195 {
196 
197 	return (ENODEV);
198 }
199 
200 static int
201 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
202 {
203 	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
204 	int old_vector;
205 	u_int old_id;
206 	int i, vector;
207 
208 	/*
209 	 * Only allow CPUs to be assigned to the first message for an
210 	 * MSI group.
211 	 */
212 	if (msi->msi_first != msi)
213 		return (EINVAL);
214 
215 	/* Store information to free existing irq. */
216 	old_vector = msi->msi_vector;
217 	old_id = msi->msi_cpu;
218 	if (old_id == apic_id)
219 		return (0);
220 
221 	/* Allocate IDT vectors on this cpu. */
222 	if (msi->msi_count > 1) {
223 		KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
224 		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
225 		    msi->msi_count, msi->msi_maxcount);
226 	} else
227 		vector = apic_alloc_vector(apic_id, msi->msi_irq);
228 	if (vector == 0)
229 		return (ENOSPC);
230 
231 	msi->msi_cpu = apic_id;
232 	msi->msi_vector = vector;
233 	if (msi->msi_intsrc.is_handlers > 0)
234 		apic_enable_vector(msi->msi_cpu, msi->msi_vector);
235 	if (bootverbose)
236 		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
237 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
238 		    msi->msi_cpu, msi->msi_vector);
239 	for (i = 1; i < msi->msi_count; i++) {
240 		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
241 		sib->msi_cpu = apic_id;
242 		sib->msi_vector = vector + i;
243 		if (sib->msi_intsrc.is_handlers > 0)
244 			apic_enable_vector(sib->msi_cpu, sib->msi_vector);
245 		if (bootverbose)
246 			printf(
247 		    "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
248 			    sib->msi_irq, sib->msi_cpu, sib->msi_vector);
249 	}
250 	BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev,
251 	    msi->msi_irq);
252 
253 	/*
254 	 * Free the old vector after the new one is established.  This is done
255 	 * to prevent races where we could miss an interrupt.
256 	 */
257 	if (msi->msi_intsrc.is_handlers > 0)
258 		apic_disable_vector(old_id, old_vector);
259 	apic_free_vector(old_id, old_vector, msi->msi_irq);
260 	for (i = 1; i < msi->msi_count; i++) {
261 		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
262 		if (sib->msi_intsrc.is_handlers > 0)
263 			apic_disable_vector(old_id, old_vector + i);
264 		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
265 	}
266 	return (0);
267 }
268 
269 void
270 msi_init(void)
271 {
272 
273 	/* Check if we have a supported CPU. */
274 	switch (cpu_vendor_id) {
275 	case CPU_VENDOR_INTEL:
276 	case CPU_VENDOR_AMD:
277 		break;
278 	case CPU_VENDOR_CENTAUR:
279 		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
280 		    CPUID_TO_MODEL(cpu_id) >= 0xf)
281 			break;
282 		/* FALLTHROUGH */
283 	default:
284 		return;
285 	}
286 
287 	msi_enabled = 1;
288 	intr_register_pic(&msi_pic);
289 	mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
290 }
291 
292 static void
293 msi_create_source(void)
294 {
295 	struct msi_intsrc *msi;
296 	u_int irq;
297 
298 	mtx_lock(&msi_lock);
299 	if (msi_last_irq >= NUM_MSI_INTS) {
300 		mtx_unlock(&msi_lock);
301 		return;
302 	}
303 	irq = msi_last_irq + FIRST_MSI_INT;
304 	msi_last_irq++;
305 	mtx_unlock(&msi_lock);
306 
307 	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
308 	msi->msi_intsrc.is_pic = &msi_pic;
309 	msi->msi_irq = irq;
310 	intr_register_source(&msi->msi_intsrc);
311 	nexus_add_irq(irq);
312 }
313 
314 /*
315  * Try to allocate 'count' interrupt sources with contiguous IDT values.
316  */
317 int
318 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
319 {
320 	struct msi_intsrc *msi, *fsrc;
321 	u_int cpu;
322 	int cnt, i, *mirqs, vector;
323 
324 	if (!msi_enabled)
325 		return (ENXIO);
326 
327 	if (count > 1)
328 		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
329 	else
330 		mirqs = NULL;
331 again:
332 	mtx_lock(&msi_lock);
333 
334 	/* Try to find 'count' free IRQs. */
335 	cnt = 0;
336 	for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
337 		msi = (struct msi_intsrc *)intr_lookup_source(i);
338 
339 		/* End of allocated sources, so break. */
340 		if (msi == NULL)
341 			break;
342 
343 		/* If this is a free one, save its IRQ in the array. */
344 		if (msi->msi_dev == NULL) {
345 			irqs[cnt] = i;
346 			cnt++;
347 			if (cnt == count)
348 				break;
349 		}
350 	}
351 
352 	/* Do we need to create some new sources? */
353 	if (cnt < count) {
354 		/* If we would exceed the max, give up. */
355 		if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
356 			mtx_unlock(&msi_lock);
357 			free(mirqs, M_MSI);
358 			return (ENXIO);
359 		}
360 		mtx_unlock(&msi_lock);
361 
362 		/* We need count - cnt more sources. */
363 		while (cnt < count) {
364 			msi_create_source();
365 			cnt++;
366 		}
367 		goto again;
368 	}
369 
370 	/* Ok, we now have the IRQs allocated. */
371 	KASSERT(cnt == count, ("count mismatch"));
372 
373 	/* Allocate 'count' IDT vectors. */
374 	cpu = intr_next_cpu();
375 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
376 	if (vector == 0) {
377 		mtx_unlock(&msi_lock);
378 		free(mirqs, M_MSI);
379 		return (ENOSPC);
380 	}
381 
382 	/* Assign IDT vectors and make these messages owned by 'dev'. */
383 	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
384 	for (i = 0; i < count; i++) {
385 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
386 		msi->msi_cpu = cpu;
387 		msi->msi_dev = dev;
388 		msi->msi_vector = vector + i;
389 		if (bootverbose)
390 			printf(
391 		    "msi: routing MSI IRQ %d to local APIC %u vector %u\n",
392 			    msi->msi_irq, msi->msi_cpu, msi->msi_vector);
393 		msi->msi_first = fsrc;
394 		KASSERT(msi->msi_intsrc.is_handlers == 0,
395 		    ("dead MSI has handlers"));
396 	}
397 	fsrc->msi_count = count;
398 	fsrc->msi_maxcount = maxcount;
399 	if (count > 1)
400 		bcopy(irqs, mirqs, count * sizeof(*mirqs));
401 	fsrc->msi_irqs = mirqs;
402 	mtx_unlock(&msi_lock);
403 
404 	return (0);
405 }
406 
407 int
408 msi_release(int *irqs, int count)
409 {
410 	struct msi_intsrc *msi, *first;
411 	int i;
412 
413 	mtx_lock(&msi_lock);
414 	first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
415 	if (first == NULL) {
416 		mtx_unlock(&msi_lock);
417 		return (ENOENT);
418 	}
419 
420 	/* Make sure this isn't an MSI-X message. */
421 	if (first->msi_msix) {
422 		mtx_unlock(&msi_lock);
423 		return (EINVAL);
424 	}
425 
426 	/* Make sure this message is allocated to a group. */
427 	if (first->msi_first == NULL) {
428 		mtx_unlock(&msi_lock);
429 		return (ENXIO);
430 	}
431 
432 	/*
433 	 * Make sure this is the start of a group and that we are releasing
434 	 * the entire group.
435 	 */
436 	if (first->msi_first != first || first->msi_count != count) {
437 		mtx_unlock(&msi_lock);
438 		return (EINVAL);
439 	}
440 	KASSERT(first->msi_dev != NULL, ("unowned group"));
441 
442 	/* Clear all the extra messages in the group. */
443 	for (i = 1; i < count; i++) {
444 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
445 		KASSERT(msi->msi_first == first, ("message not in group"));
446 		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
447 		msi->msi_first = NULL;
448 		msi->msi_dev = NULL;
449 		apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
450 		msi->msi_vector = 0;
451 	}
452 
453 	/* Clear out the first message. */
454 	first->msi_first = NULL;
455 	first->msi_dev = NULL;
456 	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
457 	first->msi_vector = 0;
458 	first->msi_count = 0;
459 	first->msi_maxcount = 0;
460 	free(first->msi_irqs, M_MSI);
461 	first->msi_irqs = NULL;
462 
463 	mtx_unlock(&msi_lock);
464 	return (0);
465 }
466 
467 int
468 msi_map(int irq, uint64_t *addr, uint32_t *data)
469 {
470 	struct msi_intsrc *msi;
471 
472 	mtx_lock(&msi_lock);
473 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
474 	if (msi == NULL) {
475 		mtx_unlock(&msi_lock);
476 		return (ENOENT);
477 	}
478 
479 	/* Make sure this message is allocated to a device. */
480 	if (msi->msi_dev == NULL) {
481 		mtx_unlock(&msi_lock);
482 		return (ENXIO);
483 	}
484 
485 	/*
486 	 * If this message isn't an MSI-X message, make sure it's part
487 	 * of a group, and switch to the first message in the
488 	 * group.
489 	 */
490 	if (!msi->msi_msix) {
491 		if (msi->msi_first == NULL) {
492 			mtx_unlock(&msi_lock);
493 			return (ENXIO);
494 		}
495 		msi = msi->msi_first;
496 	}
497 
498 	*addr = INTEL_ADDR(msi);
499 	*data = INTEL_DATA(msi);
500 	mtx_unlock(&msi_lock);
501 	return (0);
502 }
503 
504 int
505 msix_alloc(device_t dev, int *irq)
506 {
507 	struct msi_intsrc *msi;
508 	u_int cpu;
509 	int i, vector;
510 
511 	if (!msi_enabled)
512 		return (ENXIO);
513 
514 again:
515 	mtx_lock(&msi_lock);
516 
517 	/* Find a free IRQ. */
518 	for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
519 		msi = (struct msi_intsrc *)intr_lookup_source(i);
520 
521 		/* End of allocated sources, so break. */
522 		if (msi == NULL)
523 			break;
524 
525 		/* Stop at the first free source. */
526 		if (msi->msi_dev == NULL)
527 			break;
528 	}
529 
530 	/* Do we need to create a new source? */
531 	if (msi == NULL) {
532 		/* If we would exceed the max, give up. */
533 		if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) {
534 			mtx_unlock(&msi_lock);
535 			return (ENXIO);
536 		}
537 		mtx_unlock(&msi_lock);
538 
539 		/* Create a new source. */
540 		msi_create_source();
541 		goto again;
542 	}
543 
544 	/* Allocate an IDT vector. */
545 	cpu = intr_next_cpu();
546 	vector = apic_alloc_vector(cpu, i);
547 	if (vector == 0) {
548 		mtx_unlock(&msi_lock);
549 		return (ENOSPC);
550 	}
551 	if (bootverbose)
552 		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
553 		    msi->msi_irq, cpu, vector);
554 
555 	/* Setup source. */
556 	msi->msi_cpu = cpu;
557 	msi->msi_dev = dev;
558 	msi->msi_first = msi;
559 	msi->msi_vector = vector;
560 	msi->msi_msix = 1;
561 	msi->msi_count = 1;
562 	msi->msi_maxcount = 1;
563 	msi->msi_irqs = NULL;
564 
565 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
566 	mtx_unlock(&msi_lock);
567 
568 	*irq = i;
569 	return (0);
570 }
571 
572 int
573 msix_release(int irq)
574 {
575 	struct msi_intsrc *msi;
576 
577 	mtx_lock(&msi_lock);
578 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
579 	if (msi == NULL) {
580 		mtx_unlock(&msi_lock);
581 		return (ENOENT);
582 	}
583 
584 	/* Make sure this is an MSI-X message. */
585 	if (!msi->msi_msix) {
586 		mtx_unlock(&msi_lock);
587 		return (EINVAL);
588 	}
589 
590 	KASSERT(msi->msi_dev != NULL, ("unowned message"));
591 
592 	/* Clear out the message. */
593 	msi->msi_first = NULL;
594 	msi->msi_dev = NULL;
595 	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
596 	msi->msi_vector = 0;
597 	msi->msi_msix = 0;
598 	msi->msi_count = 0;
599 	msi->msi_maxcount = 0;
600 
601 	mtx_unlock(&msi_lock);
602 	return (0);
603 }
604