xref: /src/sys/x86/x86/msi.c (revision 02f29c1324cf5193c3aec181cb409917b541f7fe)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2006 Yahoo!, Inc.
5  * All rights reserved.
6  * Written by: John Baldwin <jhb@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Support for PCI Message Signalled Interrupts (MSI).  MSI interrupts on
35  * x86 are basically APIC messages that the northbridge delivers directly
36  * to the local APICs as if they had come from an I/O APIC.
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_acpi.h"
41 #include "opt_iommu.h"
42 
43 #include <sys/param.h>
44 #include <sys/bus.h>
45 #include <sys/kernel.h>
46 #include <sys/limits.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
50 #include <sys/sx.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <x86/apicreg.h>
54 #include <machine/cputypes.h>
55 #include <machine/md_var.h>
56 #include <machine/frame.h>
57 #include <machine/intr_machdep.h>
58 #include <x86/apicvar.h>
59 #include <x86/iommu/iommu_intrmap.h>
60 #include <machine/specialreg.h>
61 #include <dev/pci/pcivar.h>
62 
63 /* Fields in address for Intel MSI messages. */
64 #define	MSI_INTEL_ADDR_DEST		0x000ff000
65 #define	MSI_INTEL_ADDR_RH		0x00000008
66 # define MSI_INTEL_ADDR_RH_ON		0x00000008
67 # define MSI_INTEL_ADDR_RH_OFF		0x00000000
68 #define	MSI_INTEL_ADDR_DM		0x00000004
69 # define MSI_INTEL_ADDR_DM_PHYSICAL	0x00000000
70 # define MSI_INTEL_ADDR_DM_LOGICAL	0x00000004
71 
72 /* Fields in data for Intel MSI messages. */
73 #define	MSI_INTEL_DATA_TRGRMOD		IOART_TRGRMOD	/* Trigger mode. */
74 # define MSI_INTEL_DATA_TRGREDG		IOART_TRGREDG
75 # define MSI_INTEL_DATA_TRGRLVL		IOART_TRGRLVL
76 #define	MSI_INTEL_DATA_LEVEL		0x00004000	/* Polarity. */
77 # define MSI_INTEL_DATA_DEASSERT	0x00000000
78 # define MSI_INTEL_DATA_ASSERT		0x00004000
79 #define	MSI_INTEL_DATA_DELMOD		IOART_DELMOD	/* Delivery mode. */
80 # define MSI_INTEL_DATA_DELFIXED	IOART_DELFIXED
81 # define MSI_INTEL_DATA_DELLOPRI	IOART_DELLOPRI
82 # define MSI_INTEL_DATA_DELSMI		IOART_DELSMI
83 # define MSI_INTEL_DATA_DELNMI		IOART_DELNMI
84 # define MSI_INTEL_DATA_DELINIT		IOART_DELINIT
85 # define MSI_INTEL_DATA_DELEXINT	IOART_DELEXINT
86 #define	MSI_INTEL_DATA_INTVEC		IOART_INTVEC	/* Interrupt vector. */
87 
88 /*
89  * Build Intel MSI message and data values from a source.  AMD64 systems
90  * seem to be compatible, so we use the same function for both.
91  */
92 #define	INTEL_ADDR(msi)							\
93 	(MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 |			\
94 	    MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
95 #define	INTEL_ADDR_EXT(msi)						\
96 	(MSI_INTEL_ADDR_BASE | ((msi)->msi_cpu & 0xff) << 12 |		\
97 	((msi)->msi_cpu & 0x7f00) >> 3 |				\
98 	    MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
99 #define	INTEL_DATA(msi)							\
100 	(MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
101 
102 static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
103 
104 /*
105  * MSI sources are bunched into groups.  This is because MSI forces
106  * all of the messages to share the address and data registers and
107  * thus certain properties (such as the local APIC ID target on x86).
108  * Each group has a 'first' source that contains information global to
109  * the group.  These fields are marked with (g) below.
110  *
111  * Note that local APIC ID is kind of special.  Each message will be
112  * assigned an ID by the system; however, a group will use the ID from
113  * the first message.
114  *
115  * For MSI-X, each message is isolated.
116  */
117 struct msi_intsrc {
118 	struct intsrc msi_intsrc;
119 	device_t msi_dev;		/* Owning device. (g) */
120 	struct msi_intsrc *msi_first;	/* First source in group. */
121 	u_int *msi_irqs;		/* Group's IRQ list. (g) */
122 	u_int msi_irq;			/* IRQ cookie. */
123 	u_int msi_cpu;			/* Local APIC ID. (g) */
124 	u_int msi_remap_cookie;		/* IOMMU cookie. */
125 	u_int msi_vector:8;		/* IDT vector. */
126 	u_int msi_count:8;		/* Messages in this group. (g) */
127 	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
128 	u_int msi_enabled:8;		/* Enabled messages in this group. (g) */
129 	bool msi_msix;			/* MSI-X message. */
130 };
131 
132 static void	msi_create_source(void);
133 static void	msi_enable_source(struct intsrc *isrc);
134 static void	msi_disable_source(struct intsrc *isrc, int eoi);
135 static void	msi_eoi_source(struct intsrc *isrc);
136 static void	msi_enable_intr(struct intsrc *isrc);
137 static void	msi_disable_intr(struct intsrc *isrc);
138 static int	msi_vector(struct intsrc *isrc);
139 static int	msi_source_pending(struct intsrc *isrc);
140 static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
141 		    enum intr_polarity pol);
142 static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
143 
144 struct pic msi_pic = {
145 	.pic_enable_source = msi_enable_source,
146 	.pic_disable_source = msi_disable_source,
147 	.pic_eoi_source = msi_eoi_source,
148 	.pic_enable_intr = msi_enable_intr,
149 	.pic_disable_intr = msi_disable_intr,
150 	.pic_vector = msi_vector,
151 	.pic_source_pending = msi_source_pending,
152 	.pic_suspend = NULL,
153 	.pic_resume = NULL,
154 	.pic_config_intr = msi_config_intr,
155 	.pic_assign_cpu = msi_assign_cpu,
156 	.pic_reprogram_pin = NULL,
157 };
158 
159 u_int first_msi_irq;
160 SYSCTL_UINT(_machdep, OID_AUTO, first_msi_irq, CTLFLAG_RD, &first_msi_irq, 0,
161     "Number of first IRQ reserved for MSI and MSI-X interrupts");
162 
163 u_int num_msi_irqs = 2048;
164 SYSCTL_UINT(_machdep, OID_AUTO, num_msi_irqs, CTLFLAG_RDTUN, &num_msi_irqs, 0,
165     "Number of IRQs reserved for MSI and MSI-X interrupts");
166 
167 #ifdef SMP
168 /**
169  * Xen hypervisors prior to 4.6.0 do not properly handle updates to
170  * enabled MSI-X table entries.  Allow migration of MSI-X interrupts
171  * to be disabled via a tunable. Values have the following meaning:
172  *
173  * -1: automatic detection by FreeBSD
174  *  0: enable migration
175  *  1: disable migration
176  */
177 int msix_disable_migration = -1;
178 SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN,
179     &msix_disable_migration, 0,
180     "Disable migration of MSI-X interrupts between CPUs");
181 #endif
182 
183 static int msi_enabled;
184 static u_int msi_last_irq;
185 static struct mtx msi_lock;
186 
187 static void
msi_enable_source(struct intsrc * isrc)188 msi_enable_source(struct intsrc *isrc)
189 {
190 }
191 
192 static void
msi_disable_source(struct intsrc * isrc,int eoi)193 msi_disable_source(struct intsrc *isrc, int eoi)
194 {
195 
196 	if (eoi == PIC_EOI)
197 		lapic_eoi();
198 }
199 
200 static void
msi_eoi_source(struct intsrc * isrc)201 msi_eoi_source(struct intsrc *isrc)
202 {
203 
204 	lapic_eoi();
205 }
206 
207 static void
msi_enable_intr(struct intsrc * isrc)208 msi_enable_intr(struct intsrc *isrc)
209 {
210 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
211 
212 	msi = msi->msi_first;
213 	if (msi->msi_enabled == 0) {
214 		for (u_int i = 0; i < msi->msi_count; i++)
215 			apic_enable_vector(msi->msi_cpu, msi->msi_vector + i);
216 	}
217 	msi->msi_enabled++;
218 }
219 
220 static void
msi_disable_intr(struct intsrc * isrc)221 msi_disable_intr(struct intsrc *isrc)
222 {
223 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
224 
225 	msi = msi->msi_first;
226 
227 	/*
228 	 * Interrupt sources are always registered, but never unregistered.
229 	 * Handle the case where MSIs have all been unregistered.
230 	 */
231 	if (msi == NULL)
232 		return;
233 
234 	msi->msi_enabled--;
235 	if (msi->msi_enabled == 0) {
236 		for (u_int i = 0; i < msi->msi_count; i++)
237 			apic_disable_vector(msi->msi_cpu, msi->msi_vector + i);
238 	}
239 }
240 
241 static int
msi_vector(struct intsrc * isrc)242 msi_vector(struct intsrc *isrc)
243 {
244 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
245 
246 	return (msi->msi_irq);
247 }
248 
249 static int
msi_source_pending(struct intsrc * isrc)250 msi_source_pending(struct intsrc *isrc)
251 {
252 
253 	return (0);
254 }
255 
256 static int
msi_config_intr(struct intsrc * isrc,enum intr_trigger trig,enum intr_polarity pol)257 msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
258     enum intr_polarity pol)
259 {
260 
261 	return (ENODEV);
262 }
263 
264 static int
msi_assign_cpu(struct intsrc * isrc,u_int apic_id)265 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
266 {
267 	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
268 	int old_vector;
269 	u_int old_id;
270 	int error, i, vector;
271 
272 	/*
273 	 * Only allow CPUs to be assigned to the first message for an
274 	 * MSI group.
275 	 */
276 	if (msi->msi_first != msi)
277 		return (EINVAL);
278 
279 #ifdef SMP
280 	if (msix_disable_migration && msi->msi_msix)
281 		return (EINVAL);
282 #endif
283 
284 	/* Store information to free existing irq. */
285 	old_vector = msi->msi_vector;
286 	old_id = msi->msi_cpu;
287 	if (old_id == apic_id)
288 		return (0);
289 
290 	/* Allocate IDT vectors on this cpu. */
291 	if (msi->msi_count > 1) {
292 		KASSERT(!msi->msi_msix, ("MSI-X message group"));
293 		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
294 		    msi->msi_count, msi->msi_maxcount);
295 	} else
296 		vector = apic_alloc_vector(apic_id, msi->msi_irq);
297 	if (vector == 0)
298 		return (ENOSPC);
299 
300 	/* Must be set before BUS_REMAP_INTR as it may call back into MSI. */
301 	msi->msi_cpu = apic_id;
302 	msi->msi_vector = vector;
303 	if (msi->msi_enabled > 0) {
304 		for (i = 0; i < msi->msi_count; i++)
305 			apic_enable_vector(apic_id, vector + i);
306 	}
307 	error = BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev,
308 	    msi->msi_irq);
309 	if (error == 0) {
310 		if (bootverbose) {
311 			printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
312 			    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
313 			    msi->msi_cpu, msi->msi_vector);
314 		}
315 		for (i = 1; i < msi->msi_count; i++) {
316 			sib = (struct msi_intsrc *)intr_lookup_source(
317 			    msi->msi_irqs[i]);
318 			sib->msi_cpu = apic_id;
319 			sib->msi_vector = vector + i;
320 			if (bootverbose)
321 				printf("msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
322 				    sib->msi_irq, sib->msi_cpu,
323 				    sib->msi_vector);
324 		}
325 	} else {
326 		device_printf(msi->msi_dev,
327 		    "remap irq %u to APIC ID %u failed (error %d)\n",
328 		    msi->msi_irq, apic_id, error);
329 		msi->msi_cpu = old_id;
330 		msi->msi_vector = old_vector;
331 		old_id = apic_id;
332 		old_vector = vector;
333 	}
334 
335 	/*
336 	 * Free the old vector after the new one is established.  This is done
337 	 * to prevent races where we could miss an interrupt.  If BUS_REMAP_INTR
338 	 * failed then we disable and free the new, unused vector(s).
339 	 */
340 	if (msi->msi_enabled > 0) {
341 		for (i = 0; i < msi->msi_count; i++)
342 			apic_disable_vector(old_id, old_vector + i);
343 	}
344 	apic_free_vector(old_id, old_vector, msi->msi_irq);
345 	for (i = 1; i < msi->msi_count; i++)
346 		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
347 	return (error);
348 }
349 
350 void
msi_init(void)351 msi_init(void)
352 {
353 
354 	/* Check if we have a supported CPU. */
355 	switch (cpu_vendor_id) {
356 	case CPU_VENDOR_INTEL:
357 	case CPU_VENDOR_AMD:
358 	case CPU_VENDOR_HYGON:
359 		break;
360 	case CPU_VENDOR_CENTAUR:
361 		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
362 		    CPUID_TO_MODEL(cpu_id) >= 0xf)
363 			break;
364 		/* FALLTHROUGH */
365 	default:
366 		return;
367 	}
368 
369 #ifdef SMP
370 	if (msix_disable_migration == -1) {
371 		/* The default is to allow migration of MSI-X interrupts. */
372 		msix_disable_migration = 0;
373 	}
374 #endif
375 
376 	if (num_msi_irqs == 0)
377 		return;
378 
379 	first_msi_irq = num_io_irqs;
380 	if (num_msi_irqs > UINT_MAX - first_msi_irq)
381 		panic("num_msi_irqs too high");
382 	num_io_irqs = first_msi_irq + num_msi_irqs;
383 
384 	msi_enabled = 1;
385 	intr_register_pic(&msi_pic);
386 	mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
387 }
388 
389 static void
msi_create_source(void)390 msi_create_source(void)
391 {
392 	struct msi_intsrc *msi;
393 	u_int irq;
394 
395 	mtx_lock(&msi_lock);
396 	if (msi_last_irq >= num_msi_irqs) {
397 		mtx_unlock(&msi_lock);
398 		return;
399 	}
400 	irq = msi_last_irq + first_msi_irq;
401 	msi_last_irq++;
402 	mtx_unlock(&msi_lock);
403 
404 	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
405 	msi->msi_intsrc.is_pic = &msi_pic;
406 	msi->msi_irq = irq;
407 	intr_register_source(&msi->msi_intsrc);
408 	nexus_add_irq(irq);
409 }
410 
411 /*
412  * Try to allocate 'count' interrupt sources with contiguous IDT values.
413  */
414 int
msi_alloc(device_t dev,int count,int maxcount,int * irqs)415 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
416 {
417 	struct msi_intsrc *msi, *fsrc;
418 	u_int cpu, domain, *mirqs;
419 	int cnt, i, vector;
420 #ifdef IOMMU
421 	u_int cookies[count];
422 	int error;
423 #endif
424 
425 	if (!msi_enabled)
426 		return (ENXIO);
427 
428 	if (bus_get_domain(dev, &domain) != 0)
429 		domain = 0;
430 
431 	if (count > 1)
432 		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
433 	else
434 		mirqs = NULL;
435 again:
436 	mtx_lock(&msi_lock);
437 
438 	/* Try to find 'count' free IRQs. */
439 	cnt = 0;
440 	for (i = first_msi_irq; i < first_msi_irq + num_msi_irqs; i++) {
441 		msi = (struct msi_intsrc *)intr_lookup_source(i);
442 
443 		/* End of allocated sources, so break. */
444 		if (msi == NULL)
445 			break;
446 
447 		/* If this is a free one, save its IRQ in the array. */
448 		if (msi->msi_dev == NULL) {
449 			irqs[cnt] = i;
450 			cnt++;
451 			if (cnt == count)
452 				break;
453 		}
454 	}
455 
456 	/* Do we need to create some new sources? */
457 	if (cnt < count) {
458 		/* If we would exceed the max, give up. */
459 		if (i + (count - cnt) > first_msi_irq + num_msi_irqs) {
460 			mtx_unlock(&msi_lock);
461 			free(mirqs, M_MSI);
462 			return (ENXIO);
463 		}
464 		mtx_unlock(&msi_lock);
465 
466 		/* We need count - cnt more sources. */
467 		while (cnt < count) {
468 			msi_create_source();
469 			cnt++;
470 		}
471 		goto again;
472 	}
473 
474 	/* Ok, we now have the IRQs allocated. */
475 	KASSERT(cnt == count, ("count mismatch"));
476 
477 	/* Allocate 'count' IDT vectors. */
478 	cpu = intr_next_cpu(domain);
479 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
480 	if (vector == 0) {
481 		mtx_unlock(&msi_lock);
482 		free(mirqs, M_MSI);
483 		return (ENOSPC);
484 	}
485 
486 #ifdef IOMMU
487 	mtx_unlock(&msi_lock);
488 	error = iommu_alloc_msi_intr(dev, cookies, count);
489 	mtx_lock(&msi_lock);
490 	if (error == EOPNOTSUPP)
491 		error = 0;
492 	if (error != 0) {
493 		for (i = 0; i < count; i++)
494 			apic_free_vector(cpu, vector + i, irqs[i]);
495 		mtx_unlock(&msi_lock);
496 		free(mirqs, M_MSI);
497 		return (error);
498 	}
499 	for (i = 0; i < count; i++) {
500 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
501 		msi->msi_remap_cookie = cookies[i];
502 	}
503 #endif
504 
505 	/* Assign IDT vectors and make these messages owned by 'dev'. */
506 	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
507 	for (i = 0; i < count; i++) {
508 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
509 		msi->msi_cpu = cpu;
510 		msi->msi_dev = dev;
511 		msi->msi_vector = vector + i;
512 		if (bootverbose)
513 			printf(
514 		    "msi: routing MSI IRQ %d to local APIC %u vector %u\n",
515 			    msi->msi_irq, msi->msi_cpu, msi->msi_vector);
516 		msi->msi_first = fsrc;
517 		KASSERT(msi->msi_intsrc.is_handlers == 0,
518 		    ("dead MSI has handlers"));
519 	}
520 	fsrc->msi_count = count;
521 	fsrc->msi_maxcount = maxcount;
522 	if (count > 1)
523 		bcopy(irqs, mirqs, count * sizeof(*mirqs));
524 	fsrc->msi_irqs = mirqs;
525 	mtx_unlock(&msi_lock);
526 	return (0);
527 }
528 
529 int
msi_release(int * irqs,int count)530 msi_release(int *irqs, int count)
531 {
532 	struct msi_intsrc *msi, *first;
533 	int i;
534 
535 	mtx_lock(&msi_lock);
536 	first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
537 	if (first == NULL) {
538 		mtx_unlock(&msi_lock);
539 		return (ENOENT);
540 	}
541 
542 	/* Make sure this isn't an MSI-X message. */
543 	if (first->msi_msix) {
544 		mtx_unlock(&msi_lock);
545 		return (EINVAL);
546 	}
547 
548 	/* Make sure this message is allocated to a group. */
549 	if (first->msi_first == NULL) {
550 		mtx_unlock(&msi_lock);
551 		return (ENXIO);
552 	}
553 
554 	/*
555 	 * Make sure this is the start of a group and that we are releasing
556 	 * the entire group.
557 	 */
558 	if (first->msi_first != first || first->msi_count != count) {
559 		mtx_unlock(&msi_lock);
560 		return (EINVAL);
561 	}
562 	KASSERT(first->msi_dev != NULL, ("unowned group"));
563 
564 	/* Clear all the extra messages in the group. */
565 	for (i = 1; i < count; i++) {
566 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
567 		KASSERT(msi->msi_first == first, ("message not in group"));
568 		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
569 #ifdef IOMMU
570 		mtx_unlock(&msi_lock);
571 		iommu_unmap_msi_intr(first->msi_dev, msi->msi_remap_cookie);
572 		mtx_lock(&msi_lock);
573 #endif
574 		msi->msi_first = NULL;
575 		msi->msi_dev = NULL;
576 		apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
577 		msi->msi_vector = 0;
578 	}
579 
580 	/* Clear out the first message. */
581 #ifdef IOMMU
582 	mtx_unlock(&msi_lock);
583 	iommu_unmap_msi_intr(first->msi_dev, first->msi_remap_cookie);
584 	mtx_lock(&msi_lock);
585 #endif
586 	first->msi_first = NULL;
587 	first->msi_dev = NULL;
588 	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
589 	first->msi_vector = 0;
590 	first->msi_count = 0;
591 	first->msi_maxcount = 0;
592 	free(first->msi_irqs, M_MSI);
593 	first->msi_irqs = NULL;
594 
595 	mtx_unlock(&msi_lock);
596 	return (0);
597 }
598 
599 int
msi_map(int irq,uint64_t * addr,uint32_t * data)600 msi_map(int irq, uint64_t *addr, uint32_t *data)
601 {
602 	struct msi_intsrc *msi;
603 	int error;
604 #ifdef IOMMU
605 	struct msi_intsrc *msi1;
606 	int i, k;
607 #endif
608 
609 	mtx_lock(&msi_lock);
610 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
611 	if (msi == NULL) {
612 		mtx_unlock(&msi_lock);
613 		return (ENOENT);
614 	}
615 
616 	/* Make sure this message is allocated to a device. */
617 	if (msi->msi_dev == NULL) {
618 		mtx_unlock(&msi_lock);
619 		return (ENXIO);
620 	}
621 
622 	/*
623 	 * If this message isn't an MSI-X message, make sure it's part
624 	 * of a group, and switch to the first message in the
625 	 * group.
626 	 */
627 	if (!msi->msi_msix) {
628 		if (msi->msi_first == NULL) {
629 			mtx_unlock(&msi_lock);
630 			return (ENXIO);
631 		}
632 		msi = msi->msi_first;
633 	}
634 
635 #ifdef IOMMU
636 	if (!msi->msi_msix) {
637 		for (k = msi->msi_count - 1, i = first_msi_irq; k > 0 &&
638 		    i < first_msi_irq + num_msi_irqs; i++) {
639 			if (i == msi->msi_irq)
640 				continue;
641 			msi1 = (struct msi_intsrc *)intr_lookup_source(i);
642 			if (!msi1->msi_msix && msi1->msi_first == msi) {
643 				mtx_unlock(&msi_lock);
644 				iommu_map_msi_intr(msi1->msi_dev,
645 				    msi1->msi_cpu, msi1->msi_vector,
646 				    msi1->msi_remap_cookie, NULL, NULL);
647 				k--;
648 				mtx_lock(&msi_lock);
649 			}
650 		}
651 	}
652 	mtx_unlock(&msi_lock);
653 	error = iommu_map_msi_intr(msi->msi_dev, msi->msi_cpu,
654 	    msi->msi_vector, msi->msi_remap_cookie, addr, data);
655 #else
656 	mtx_unlock(&msi_lock);
657 	error = EOPNOTSUPP;
658 #endif
659 	if (error == EOPNOTSUPP &&
660 	    (msi->msi_cpu > 0x7fff ||
661 	     (msi->msi_cpu > 0xff && apic_ext_dest_id != 1))) {
662 		printf("%s: unsupported destination APIC ID %u\n", __func__,
663 		    msi->msi_cpu);
664 		error = EINVAL;
665 	}
666 	if (error == EOPNOTSUPP) {
667 		*addr = (apic_ext_dest_id == 1) ?
668 		    INTEL_ADDR_EXT(msi) : INTEL_ADDR(msi);
669 		*data = INTEL_DATA(msi);
670 		error = 0;
671 	}
672 	return (error);
673 }
674 
675 int
msix_alloc(device_t dev,int * irq)676 msix_alloc(device_t dev, int *irq)
677 {
678 	struct msi_intsrc *msi;
679 	u_int cpu, domain;
680 	int i, vector;
681 #ifdef IOMMU
682 	u_int cookie;
683 	int error;
684 #endif
685 
686 	if (!msi_enabled)
687 		return (ENXIO);
688 
689 	if (bus_get_domain(dev, &domain) != 0)
690 		domain = 0;
691 
692 again:
693 	mtx_lock(&msi_lock);
694 
695 	/* Find a free IRQ. */
696 	for (i = first_msi_irq; i < first_msi_irq + num_msi_irqs; i++) {
697 		msi = (struct msi_intsrc *)intr_lookup_source(i);
698 
699 		/* End of allocated sources, so break. */
700 		if (msi == NULL)
701 			break;
702 
703 		/* Stop at the first free source. */
704 		if (msi->msi_dev == NULL)
705 			break;
706 	}
707 
708 	/* Are all IRQs in use? */
709 	if (i == first_msi_irq + num_msi_irqs) {
710 		mtx_unlock(&msi_lock);
711 		return (ENXIO);
712 	}
713 
714 	/* Do we need to create a new source? */
715 	if (msi == NULL) {
716 		mtx_unlock(&msi_lock);
717 
718 		/* Create a new source. */
719 		msi_create_source();
720 		goto again;
721 	}
722 
723 	/* Allocate an IDT vector. */
724 	cpu = intr_next_cpu(domain);
725 	vector = apic_alloc_vector(cpu, i);
726 	if (vector == 0) {
727 		mtx_unlock(&msi_lock);
728 		return (ENOSPC);
729 	}
730 
731 	msi->msi_dev = dev;
732 #ifdef IOMMU
733 	mtx_unlock(&msi_lock);
734 	error = iommu_alloc_msi_intr(dev, &cookie, 1);
735 	mtx_lock(&msi_lock);
736 	if (error == EOPNOTSUPP)
737 		error = 0;
738 	if (error != 0) {
739 		msi->msi_dev = NULL;
740 		apic_free_vector(cpu, vector, i);
741 		mtx_unlock(&msi_lock);
742 		return (error);
743 	}
744 	msi->msi_remap_cookie = cookie;
745 #endif
746 
747 	if (bootverbose)
748 		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
749 		    msi->msi_irq, cpu, vector);
750 
751 	/* Setup source. */
752 	msi->msi_cpu = cpu;
753 	msi->msi_first = msi;
754 	msi->msi_vector = vector;
755 	msi->msi_msix = true;
756 	msi->msi_count = 1;
757 	msi->msi_maxcount = 1;
758 	msi->msi_irqs = NULL;
759 
760 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
761 	mtx_unlock(&msi_lock);
762 
763 	*irq = i;
764 	return (0);
765 }
766 
767 int
msix_release(int irq)768 msix_release(int irq)
769 {
770 	struct msi_intsrc *msi;
771 
772 	mtx_lock(&msi_lock);
773 	msi = (struct msi_intsrc *)intr_lookup_source(irq);
774 	if (msi == NULL) {
775 		mtx_unlock(&msi_lock);
776 		return (ENOENT);
777 	}
778 
779 	/* Make sure this is an MSI-X message. */
780 	if (!msi->msi_msix) {
781 		mtx_unlock(&msi_lock);
782 		return (EINVAL);
783 	}
784 
785 	KASSERT(msi->msi_dev != NULL, ("unowned message"));
786 
787 	/* Clear out the message. */
788 #ifdef IOMMU
789 	mtx_unlock(&msi_lock);
790 	iommu_unmap_msi_intr(msi->msi_dev, msi->msi_remap_cookie);
791 	mtx_lock(&msi_lock);
792 #endif
793 	msi->msi_first = NULL;
794 	msi->msi_dev = NULL;
795 	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
796 	msi->msi_vector = 0;
797 	msi->msi_msix = false;
798 	msi->msi_count = 0;
799 	msi->msi_maxcount = 0;
800 
801 	mtx_unlock(&msi_lock);
802 	return (0);
803 }
804