1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014 Intel Corp.
4  * Author: Jiang Liu <jiang.liu@linux.intel.com>
5  *
6  * This file is licensed under GPLv2.
7  *
8  * This file contains common code to support Message Signaled Interrupts for
9  * PCI compatible and non PCI compatible devices.
10  */
11 #include <linux/device.h>
12 #include <linux/irq.h>
13 #include <linux/irqdomain.h>
14 #include <linux/msi.h>
15 #include <linux/mutex.h>
16 #include <linux/pci.h>
17 #include <linux/slab.h>
18 #include <linux/seq_file.h>
19 #include <linux/sysfs.h>
20 #include <linux/types.h>
21 #include <linux/xarray.h>
22 
23 #include "internals.h"
24 
25 /**
26  * struct msi_device_data - MSI per device data
27  * @properties:		MSI properties which are interesting to drivers
28  * @mutex:		Mutex protecting the MSI descriptor store
29  * @__domains:		Internal data for per device MSI domains
30  * @__iter_idx:		Index to search the next entry for iterators
31  */
32 struct msi_device_data {
33 	unsigned long			properties;
34 	struct mutex			mutex;
35 	struct msi_dev_domain		__domains[MSI_MAX_DEVICE_IRQDOMAINS];
36 	unsigned long			__iter_idx;
37 };
38 
39 /**
40  * struct msi_ctrl - MSI internal management control structure
41  * @domid:	ID of the domain on which management operations should be done
42  * @first:	First (hardware) slot index to operate on
43  * @last:	Last (hardware) slot index to operate on
44  * @nirqs:	The number of Linux interrupts to allocate. Can be larger
45  *		than the range due to PCI/multi-MSI.
46  */
47 struct msi_ctrl {
48 	unsigned int			domid;
49 	unsigned int			first;
50 	unsigned int			last;
51 	unsigned int			nirqs;
52 };
53 
54 /* Invalid Xarray index which is outside of any searchable range */
55 #define MSI_XA_MAX_INDEX	(ULONG_MAX - 1)
56 /* The maximum domain size */
57 #define MSI_XA_DOMAIN_SIZE	(MSI_MAX_INDEX + 1)
58 
59 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
60 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
61 static inline int msi_sysfs_create_group(struct device *dev);
62 static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
63 				   int nvec, msi_alloc_info_t *arg);
64 
65 /**
66  * msi_alloc_desc - Allocate an initialized msi_desc
67  * @dev:	Pointer to the device for which this is allocated
68  * @nvec:	The number of vectors used in this entry
69  * @affinity:	Optional pointer to an affinity mask array size of @nvec
70  *
71  * If @affinity is not %NULL then an affinity array[@nvec] is allocated
72  * and the affinity masks and flags from @affinity are copied.
73  *
74  * Return: pointer to allocated &msi_desc on success or %NULL on failure
75  */
76 static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
77 				       const struct irq_affinity_desc *affinity)
78 {
79 	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
80 
81 	if (!desc)
82 		return NULL;
83 
84 	desc->dev = dev;
85 	desc->nvec_used = nvec;
86 	if (affinity) {
87 		desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL);
88 		if (!desc->affinity) {
89 			kfree(desc);
90 			return NULL;
91 		}
92 	}
93 	return desc;
94 }
95 
96 static void msi_free_desc(struct msi_desc *desc)
97 {
98 	kfree(desc->affinity);
99 	kfree(desc);
100 }
101 
102 static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
103 			   unsigned int domid, unsigned int index)
104 {
105 	struct msi_device_data *md = dev->msi.data;
106 	struct xarray *xa = &md->__domains[domid].store;
107 	unsigned int hwsize;
108 	int ret;
109 
110 	hwsize = msi_domain_get_hwsize(dev, domid);
111 
112 	if (index == MSI_ANY_INDEX) {
113 		struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
114 		unsigned int index;
115 
116 		/* Let the xarray allocate a free index within the limit */
117 		ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
118 		if (ret)
119 			goto fail;
120 
121 		desc->msi_index = index;
122 		return 0;
123 	} else {
124 		if (index >= hwsize) {
125 			ret = -ERANGE;
126 			goto fail;
127 		}
128 
129 		desc->msi_index = index;
130 		ret = xa_insert(xa, index, desc, GFP_KERNEL);
131 		if (ret)
132 			goto fail;
133 		return 0;
134 	}
135 fail:
136 	msi_free_desc(desc);
137 	return ret;
138 }
139 
140 /**
141  * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
142  *				insert it at @init_desc->msi_index
143  *
144  * @dev:	Pointer to the device for which the descriptor is allocated
145  * @domid:	The id of the interrupt domain to which the desriptor is added
146  * @init_desc:	Pointer to an MSI descriptor to initialize the new descriptor
147  *
148  * Return: 0 on success or an appropriate failure code.
149  */
150 int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
151 			       struct msi_desc *init_desc)
152 {
153 	struct msi_desc *desc;
154 
155 	lockdep_assert_held(&dev->msi.data->mutex);
156 
157 	desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
158 	if (!desc)
159 		return -ENOMEM;
160 
161 	/* Copy type specific data to the new descriptor. */
162 	desc->pci = init_desc->pci;
163 
164 	return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
165 }
166 
167 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
168 {
169 	switch (filter) {
170 	case MSI_DESC_ALL:
171 		return true;
172 	case MSI_DESC_NOTASSOCIATED:
173 		return !desc->irq;
174 	case MSI_DESC_ASSOCIATED:
175 		return !!desc->irq;
176 	}
177 	WARN_ON_ONCE(1);
178 	return false;
179 }
180 
181 static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
182 {
183 	unsigned int hwsize;
184 
185 	if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
186 			 (dev->msi.domain &&
187 			  !dev->msi.data->__domains[ctrl->domid].domain)))
188 		return false;
189 
190 	hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
191 	if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
192 			 ctrl->first >= hwsize ||
193 			 ctrl->last >= hwsize))
194 		return false;
195 	return true;
196 }
197 
198 static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
199 {
200 	struct msi_desc *desc;
201 	struct xarray *xa;
202 	unsigned long idx;
203 
204 	lockdep_assert_held(&dev->msi.data->mutex);
205 
206 	if (!msi_ctrl_valid(dev, ctrl))
207 		return;
208 
209 	xa = &dev->msi.data->__domains[ctrl->domid].store;
210 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
211 		xa_erase(xa, idx);
212 
213 		/* Leak the descriptor when it is still referenced */
214 		if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
215 			continue;
216 		msi_free_desc(desc);
217 	}
218 }
219 
220 /**
221  * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
222  * @dev:	Device for which to free the descriptors
223  * @domid:	Id of the domain to operate on
224  * @first:	Index to start freeing from (inclusive)
225  * @last:	Last index to be freed (inclusive)
226  */
227 void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
228 				     unsigned int first, unsigned int last)
229 {
230 	struct msi_ctrl ctrl = {
231 		.domid	= domid,
232 		.first	= first,
233 		.last	= last,
234 	};
235 
236 	msi_domain_free_descs(dev, &ctrl);
237 }
238 
239 /**
240  * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
241  * @dev:	Pointer to the device for which the descriptors are allocated
242  * @ctrl:	Allocation control struct
243  *
244  * Return: 0 on success or an appropriate failure code.
245  */
246 static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
247 {
248 	struct msi_desc *desc;
249 	unsigned int idx;
250 	int ret;
251 
252 	lockdep_assert_held(&dev->msi.data->mutex);
253 
254 	if (!msi_ctrl_valid(dev, ctrl))
255 		return -EINVAL;
256 
257 	for (idx = ctrl->first; idx <= ctrl->last; idx++) {
258 		desc = msi_alloc_desc(dev, 1, NULL);
259 		if (!desc)
260 			goto fail_mem;
261 		ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
262 		if (ret)
263 			goto fail;
264 	}
265 	return 0;
266 
267 fail_mem:
268 	ret = -ENOMEM;
269 fail:
270 	msi_domain_free_descs(dev, ctrl);
271 	return ret;
272 }
273 
274 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
275 {
276 	*msg = entry->msg;
277 }
278 
279 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
280 {
281 	struct msi_desc *entry = irq_get_msi_desc(irq);
282 
283 	__get_cached_msi_msg(entry, msg);
284 }
285 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
286 
287 static void msi_device_data_release(struct device *dev, void *res)
288 {
289 	struct msi_device_data *md = res;
290 	int i;
291 
292 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
293 		msi_remove_device_irq_domain(dev, i);
294 		WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
295 		xa_destroy(&md->__domains[i].store);
296 	}
297 	dev->msi.data = NULL;
298 }
299 
300 /**
301  * msi_setup_device_data - Setup MSI device data
302  * @dev:	Device for which MSI device data should be set up
303  *
304  * Return: 0 on success, appropriate error code otherwise
305  *
306  * This can be called more than once for @dev. If the MSI device data is
307  * already allocated the call succeeds. The allocated memory is
308  * automatically released when the device is destroyed.
309  */
310 int msi_setup_device_data(struct device *dev)
311 {
312 	struct msi_device_data *md;
313 	int ret, i;
314 
315 	if (dev->msi.data)
316 		return 0;
317 
318 	md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
319 	if (!md)
320 		return -ENOMEM;
321 
322 	ret = msi_sysfs_create_group(dev);
323 	if (ret) {
324 		devres_free(md);
325 		return ret;
326 	}
327 
328 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
329 		xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
330 
331 	/*
332 	 * If @dev::msi::domain is set and is a global MSI domain, copy the
333 	 * pointer into the domain array so all code can operate on domain
334 	 * ids. The NULL pointer check is required to keep the legacy
335 	 * architecture specific PCI/MSI support working.
336 	 */
337 	if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
338 		md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
339 
340 	mutex_init(&md->mutex);
341 	dev->msi.data = md;
342 	devres_add(dev, md);
343 	return 0;
344 }
345 
346 /**
347  * __msi_lock_descs - Lock the MSI descriptor storage of a device
348  * @dev:	Device to operate on
349  *
350  * Internal function for guard(msi_descs_lock). Don't use in code.
351  */
352 void __msi_lock_descs(struct device *dev)
353 {
354 	mutex_lock(&dev->msi.data->mutex);
355 }
356 EXPORT_SYMBOL_GPL(__msi_lock_descs);
357 
358 /**
359  * __msi_unlock_descs - Unlock the MSI descriptor storage of a device
360  * @dev:	Device to operate on
361  *
362  * Internal function for guard(msi_descs_lock). Don't use in code.
363  */
364 void __msi_unlock_descs(struct device *dev)
365 {
366 	/* Invalidate the index which was cached by the iterator */
367 	dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
368 	mutex_unlock(&dev->msi.data->mutex);
369 }
370 EXPORT_SYMBOL_GPL(__msi_unlock_descs);
371 
372 static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
373 				      enum msi_desc_filter filter)
374 {
375 	struct xarray *xa = &md->__domains[domid].store;
376 	struct msi_desc *desc;
377 
378 	xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
379 		if (msi_desc_match(desc, filter))
380 			return desc;
381 	}
382 	md->__iter_idx = MSI_XA_MAX_INDEX;
383 	return NULL;
384 }
385 
386 /**
387  * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
388  * @dev:	Device to operate on
389  * @domid:	The id of the interrupt domain which should be walked.
390  * @filter:	Descriptor state filter
391  *
392  * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
393  * must be invoked before the call.
394  *
395  * Return: Pointer to the first MSI descriptor matching the search
396  *	   criteria, NULL if none found.
397  */
398 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
399 				       enum msi_desc_filter filter)
400 {
401 	struct msi_device_data *md = dev->msi.data;
402 
403 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
404 		return NULL;
405 
406 	lockdep_assert_held(&md->mutex);
407 
408 	md->__iter_idx = 0;
409 	return msi_find_desc(md, domid, filter);
410 }
411 EXPORT_SYMBOL_GPL(msi_domain_first_desc);
412 
413 /**
414  * msi_next_desc - Get the next MSI descriptor of a device
415  * @dev:	Device to operate on
416  * @domid:	The id of the interrupt domain which should be walked.
417  * @filter:	Descriptor state filter
418  *
419  * The first invocation of msi_next_desc() has to be preceeded by a
420  * successful invocation of __msi_first_desc(). Consecutive invocations are
421  * only valid if the previous one was successful. All these operations have
422  * to be done within the same MSI mutex held region.
423  *
424  * Return: Pointer to the next MSI descriptor matching the search
425  *	   criteria, NULL if none found.
426  */
427 struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
428 			       enum msi_desc_filter filter)
429 {
430 	struct msi_device_data *md = dev->msi.data;
431 
432 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
433 		return NULL;
434 
435 	lockdep_assert_held(&md->mutex);
436 
437 	if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
438 		return NULL;
439 
440 	md->__iter_idx++;
441 	return msi_find_desc(md, domid, filter);
442 }
443 EXPORT_SYMBOL_GPL(msi_next_desc);
444 
445 /**
446  * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
447  * @dev:	Device to operate on
448  * @domid:	Domain ID of the interrupt domain associated to the device
449  * @index:	MSI interrupt index to look for (0-based)
450  *
451  * Return: The Linux interrupt number on success (> 0), 0 if not found
452  */
453 unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
454 {
455 	struct msi_desc *desc;
456 	bool pcimsi = false;
457 	struct xarray *xa;
458 
459 	if (!dev->msi.data)
460 		return 0;
461 
462 	if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
463 		return 0;
464 
465 	/* This check is only valid for the PCI default MSI domain */
466 	if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
467 		pcimsi = to_pci_dev(dev)->msi_enabled;
468 
469 	guard(msi_descs_lock)(dev);
470 	xa = &dev->msi.data->__domains[domid].store;
471 	desc = xa_load(xa, pcimsi ? 0 : index);
472 	if (desc && desc->irq) {
473 		/*
474 		 * PCI-MSI has only one descriptor for multiple interrupts.
475 		 * PCI-MSIX and platform MSI use a descriptor per
476 		 * interrupt.
477 		 */
478 		if (!pcimsi)
479 			return desc->irq;
480 		if (index < desc->nvec_used)
481 			return desc->irq + index;
482 	}
483 	return 0;
484 }
485 EXPORT_SYMBOL_GPL(msi_domain_get_virq);
486 
487 #ifdef CONFIG_SYSFS
488 static struct attribute *msi_dev_attrs[] = {
489 	NULL
490 };
491 
492 static const struct attribute_group msi_irqs_group = {
493 	.name	= "msi_irqs",
494 	.attrs	= msi_dev_attrs,
495 };
496 
497 static inline int msi_sysfs_create_group(struct device *dev)
498 {
499 	return devm_device_add_group(dev, &msi_irqs_group);
500 }
501 
502 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
503 			     char *buf)
504 {
505 	/* MSI vs. MSIX is per device not per interrupt */
506 	bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
507 
508 	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
509 }
510 
511 static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
512 {
513 	struct device_attribute *attrs = desc->sysfs_attrs;
514 	int i;
515 
516 	if (!attrs)
517 		return;
518 
519 	desc->sysfs_attrs = NULL;
520 	for (i = 0; i < desc->nvec_used; i++) {
521 		if (attrs[i].show)
522 			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
523 		kfree(attrs[i].attr.name);
524 	}
525 	kfree(attrs);
526 }
527 
528 static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
529 {
530 	struct device_attribute *attrs;
531 	int ret, i;
532 
533 	attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
534 	if (!attrs)
535 		return -ENOMEM;
536 
537 	desc->sysfs_attrs = attrs;
538 	for (i = 0; i < desc->nvec_used; i++) {
539 		sysfs_attr_init(&attrs[i].attr);
540 		attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
541 		if (!attrs[i].attr.name) {
542 			ret = -ENOMEM;
543 			goto fail;
544 		}
545 
546 		attrs[i].attr.mode = 0444;
547 		attrs[i].show = msi_mode_show;
548 
549 		ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
550 		if (ret) {
551 			attrs[i].show = NULL;
552 			goto fail;
553 		}
554 	}
555 	return 0;
556 
557 fail:
558 	msi_sysfs_remove_desc(dev, desc);
559 	return ret;
560 }
561 
562 #if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
563 /**
564  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
565  * @dev:	The device (PCI, platform etc) which will get sysfs entries
566  */
567 int msi_device_populate_sysfs(struct device *dev)
568 {
569 	struct msi_desc *desc;
570 	int ret;
571 
572 	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
573 		if (desc->sysfs_attrs)
574 			continue;
575 		ret = msi_sysfs_populate_desc(dev, desc);
576 		if (ret)
577 			return ret;
578 	}
579 	return 0;
580 }
581 
582 /**
583  * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
584  * @dev:		The device (PCI, platform etc) for which to remove
585  *			sysfs entries
586  */
587 void msi_device_destroy_sysfs(struct device *dev)
588 {
589 	struct msi_desc *desc;
590 
591 	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
592 		msi_sysfs_remove_desc(dev, desc);
593 }
594 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
595 #else /* CONFIG_SYSFS */
596 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
597 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
598 static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
599 #endif /* !CONFIG_SYSFS */
600 
601 static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
602 {
603 	struct irq_domain *domain;
604 
605 	lockdep_assert_held(&dev->msi.data->mutex);
606 
607 	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
608 		return NULL;
609 
610 	domain = dev->msi.data->__domains[domid].domain;
611 	if (!domain)
612 		return NULL;
613 
614 	if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
615 		return NULL;
616 
617 	return domain;
618 }
619 
620 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
621 {
622 	struct msi_domain_info *info;
623 	struct irq_domain *domain;
624 
625 	domain = msi_get_device_domain(dev, domid);
626 	if (domain) {
627 		info = domain->host_data;
628 		return info->hwsize;
629 	}
630 	/* No domain, default to MSI_XA_DOMAIN_SIZE */
631 	return MSI_XA_DOMAIN_SIZE;
632 }
633 
634 static inline void irq_chip_write_msi_msg(struct irq_data *data,
635 					  struct msi_msg *msg)
636 {
637 	data->chip->irq_write_msi_msg(data, msg);
638 }
639 
640 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
641 {
642 	struct msi_domain_info *info = domain->host_data;
643 
644 	/*
645 	 * If the MSI provider has messed with the second message and
646 	 * not advertized that it is level-capable, signal the breakage.
647 	 */
648 	WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
649 		  (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
650 		(msg[1].address_lo || msg[1].address_hi || msg[1].data));
651 }
652 
653 /**
654  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
655  * @irq_data:	The irq data associated to the interrupt
656  * @mask:	The affinity mask to set
657  * @force:	Flag to enforce setting (disable online checks)
658  *
659  * Intended to be used by MSI interrupt controllers which are
660  * implemented with hierarchical domains.
661  *
662  * Return: IRQ_SET_MASK_* result code
663  */
664 int msi_domain_set_affinity(struct irq_data *irq_data,
665 			    const struct cpumask *mask, bool force)
666 {
667 	struct irq_data *parent = irq_data->parent_data;
668 	struct msi_msg msg[2] = { [1] = { }, };
669 	int ret;
670 
671 	ret = parent->chip->irq_set_affinity(parent, mask, force);
672 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
673 		BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
674 		msi_check_level(irq_data->domain, msg);
675 		irq_chip_write_msi_msg(irq_data, msg);
676 	}
677 
678 	return ret;
679 }
680 
681 static int msi_domain_activate(struct irq_domain *domain,
682 			       struct irq_data *irq_data, bool early)
683 {
684 	struct msi_msg msg[2] = { [1] = { }, };
685 
686 	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
687 	msi_check_level(irq_data->domain, msg);
688 	irq_chip_write_msi_msg(irq_data, msg);
689 	return 0;
690 }
691 
692 static void msi_domain_deactivate(struct irq_domain *domain,
693 				  struct irq_data *irq_data)
694 {
695 	struct msi_msg msg[2];
696 
697 	memset(msg, 0, sizeof(msg));
698 	irq_chip_write_msi_msg(irq_data, msg);
699 }
700 
701 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
702 			    unsigned int nr_irqs, void *arg)
703 {
704 	struct msi_domain_info *info = domain->host_data;
705 	struct msi_domain_ops *ops = info->ops;
706 	irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
707 	int i, ret;
708 
709 	if (irq_find_mapping(domain, hwirq) > 0)
710 		return -EEXIST;
711 
712 	if (domain->parent) {
713 		ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
714 		if (ret < 0)
715 			return ret;
716 	}
717 
718 	for (i = 0; i < nr_irqs; i++) {
719 		ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
720 		if (ret < 0) {
721 			if (ops->msi_free) {
722 				for (i--; i >= 0; i--)
723 					ops->msi_free(domain, info, virq + i);
724 			}
725 			irq_domain_free_irqs_top(domain, virq, nr_irqs);
726 			return ret;
727 		}
728 	}
729 
730 	return 0;
731 }
732 
733 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
734 			    unsigned int nr_irqs)
735 {
736 	struct msi_domain_info *info = domain->host_data;
737 	int i;
738 
739 	if (info->ops->msi_free) {
740 		for (i = 0; i < nr_irqs; i++)
741 			info->ops->msi_free(domain, info, virq + i);
742 	}
743 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
744 }
745 
746 static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
747 				irq_hw_number_t *hwirq, unsigned int *type)
748 {
749 	struct msi_domain_info *info = domain->host_data;
750 
751 	/*
752 	 * This will catch allocations through the regular irqdomain path except
753 	 * for MSI domains which really support this, e.g. MBIGEN.
754 	 */
755 	if (!info->ops->msi_translate)
756 		return -ENOTSUPP;
757 	return info->ops->msi_translate(domain, fwspec, hwirq, type);
758 }
759 
760 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
761 static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d,
762 				  struct irq_data *irqd, int ind)
763 {
764 	struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL;
765 
766 	if (!desc)
767 		return;
768 
769 	seq_printf(m, "\n%*saddress_hi: 0x%08x", ind + 1, "", desc->msg.address_hi);
770 	seq_printf(m, "\n%*saddress_lo: 0x%08x", ind + 1, "", desc->msg.address_lo);
771 	seq_printf(m, "\n%*smsg_data:   0x%08x\n", ind + 1, "", desc->msg.data);
772 }
773 #endif
774 
775 static const struct irq_domain_ops msi_domain_ops = {
776 	.alloc		= msi_domain_alloc,
777 	.free		= msi_domain_free,
778 	.activate	= msi_domain_activate,
779 	.deactivate	= msi_domain_deactivate,
780 	.translate	= msi_domain_translate,
781 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
782 	.debug_show     = msi_domain_debug_show,
783 #endif
784 };
785 
786 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
787 						msi_alloc_info_t *arg)
788 {
789 	return arg->hwirq;
790 }
791 
792 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
793 				  int nvec, msi_alloc_info_t *arg)
794 {
795 	memset(arg, 0, sizeof(*arg));
796 	return 0;
797 }
798 
799 static void msi_domain_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg)
800 {
801 }
802 
803 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
804 				    struct msi_desc *desc)
805 {
806 	arg->desc = desc;
807 }
808 
809 static int msi_domain_ops_init(struct irq_domain *domain,
810 			       struct msi_domain_info *info,
811 			       unsigned int virq, irq_hw_number_t hwirq,
812 			       msi_alloc_info_t *arg)
813 {
814 	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
815 				      info->chip_data);
816 	if (info->handler && info->handler_name) {
817 		__irq_set_handler(virq, info->handler, 0, info->handler_name);
818 		if (info->handler_data)
819 			irq_set_handler_data(virq, info->handler_data);
820 	}
821 	return 0;
822 }
823 
824 static struct msi_domain_ops msi_domain_ops_default = {
825 	.get_hwirq		= msi_domain_ops_get_hwirq,
826 	.msi_init		= msi_domain_ops_init,
827 	.msi_prepare		= msi_domain_ops_prepare,
828 	.msi_teardown		= msi_domain_ops_teardown,
829 	.set_desc		= msi_domain_ops_set_desc,
830 };
831 
832 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
833 {
834 	struct msi_domain_ops *ops = info->ops;
835 
836 	if (ops == NULL) {
837 		info->ops = &msi_domain_ops_default;
838 		return;
839 	}
840 
841 	if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
842 		return;
843 
844 	if (ops->get_hwirq == NULL)
845 		ops->get_hwirq = msi_domain_ops_default.get_hwirq;
846 	if (ops->msi_init == NULL)
847 		ops->msi_init = msi_domain_ops_default.msi_init;
848 	if (ops->msi_prepare == NULL)
849 		ops->msi_prepare = msi_domain_ops_default.msi_prepare;
850 	if (ops->msi_teardown == NULL)
851 		ops->msi_teardown = msi_domain_ops_default.msi_teardown;
852 	if (ops->set_desc == NULL)
853 		ops->set_desc = msi_domain_ops_default.set_desc;
854 }
855 
856 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
857 {
858 	struct irq_chip *chip = info->chip;
859 
860 	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
861 	if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
862 		chip->irq_set_affinity = msi_domain_set_affinity;
863 }
864 
865 static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
866 						  struct msi_domain_info *info,
867 						  unsigned int flags,
868 						  struct irq_domain *parent)
869 {
870 	struct irq_domain *domain;
871 
872 	if (info->hwsize > MSI_XA_DOMAIN_SIZE)
873 		return NULL;
874 
875 	/*
876 	 * Hardware size 0 is valid for backwards compatibility and for
877 	 * domains which are not backed by a hardware table. Grant the
878 	 * maximum index space.
879 	 */
880 	if (!info->hwsize)
881 		info->hwsize = MSI_XA_DOMAIN_SIZE;
882 
883 	msi_domain_update_dom_ops(info);
884 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
885 		msi_domain_update_chip_ops(info);
886 
887 	domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
888 					     fwnode, &msi_domain_ops, info);
889 
890 	if (domain) {
891 		irq_domain_update_bus_token(domain, info->bus_token);
892 		if (info->flags & MSI_FLAG_PARENT_PM_DEV)
893 			domain->pm_dev = parent->pm_dev;
894 	}
895 
896 	return domain;
897 }
898 
899 /**
900  * msi_create_irq_domain - Create an MSI interrupt domain
901  * @fwnode:	Optional fwnode of the interrupt controller
902  * @info:	MSI domain info
903  * @parent:	Parent irq domain
904  *
905  * Return: pointer to the created &struct irq_domain or %NULL on failure
906  */
907 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
908 					 struct msi_domain_info *info,
909 					 struct irq_domain *parent)
910 {
911 	return __msi_create_irq_domain(fwnode, info, 0, parent);
912 }
913 
914 /**
915  * msi_create_parent_irq_domain - Create an MSI-parent interrupt domain
916  * @info:		MSI irqdomain creation info
917  * @msi_parent_ops:	MSI parent callbacks and configuration
918  *
919  * Return: pointer to the created &struct irq_domain or %NULL on failure
920  */
921 struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info,
922 						const struct msi_parent_ops *msi_parent_ops)
923 {
924 	struct irq_domain *d;
925 
926 	info->hwirq_max		= max(info->hwirq_max, info->size);
927 	info->size		= info->hwirq_max;
928 	info->domain_flags	|= IRQ_DOMAIN_FLAG_MSI_PARENT;
929 	info->bus_token		= msi_parent_ops->bus_select_token;
930 
931 	d = irq_domain_instantiate(info);
932 	if (IS_ERR(d))
933 		return NULL;
934 
935 	d->msi_parent_ops = msi_parent_ops;
936 	return d;
937 }
938 EXPORT_SYMBOL_GPL(msi_create_parent_irq_domain);
939 
940 /**
941  * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
942  *				  in the domain hierarchy
943  * @dev:		The device for which the domain should be created
944  * @domain:		The domain in the hierarchy this op is being called on
945  * @msi_parent_domain:	The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
946  *			be created
947  * @msi_child_info:	The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
948  *			domain to be created
949  *
950  * Return: true on success, false otherwise
951  *
952  * This is the most complex problem of per device MSI domains and the
953  * underlying interrupt domain hierarchy:
954  *
955  * The device domain to be initialized requests the broadest feature set
956  * possible and the underlying domain hierarchy puts restrictions on it.
957  *
958  * That's trivial for a simple parent->child relationship, but it gets
959  * interesting with an intermediate domain: root->parent->child.  The
960  * intermediate 'parent' can expand the capabilities which the 'root'
961  * domain is providing. So that creates a classic hen and egg problem:
962  * Which entity is doing the restrictions/expansions?
963  *
964  * One solution is to let the root domain handle the initialization that's
965  * why there is the @domain and the @msi_parent_domain pointer.
966  */
967 bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
968 				  struct irq_domain *msi_parent_domain,
969 				  struct msi_domain_info *msi_child_info)
970 {
971 	struct irq_domain *parent = domain->parent;
972 
973 	if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
974 			 !parent->msi_parent_ops->init_dev_msi_info))
975 		return false;
976 
977 	return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
978 							 msi_child_info);
979 }
980 
981 /**
982  * msi_create_device_irq_domain - Create a device MSI interrupt domain
983  * @dev:		Pointer to the device
984  * @domid:		Domain id
985  * @template:		MSI domain info bundle used as template
986  * @hwsize:		Maximum number of MSI table entries (0 if unknown or unlimited)
987  * @domain_data:	Optional pointer to domain specific data which is set in
988  *			msi_domain_info::data
989  * @chip_data:		Optional pointer to chip specific data which is set in
990  *			msi_domain_info::chip_data
991  *
992  * Return: True on success, false otherwise
993  *
994  * There is no firmware node required for this interface because the per
995  * device domains are software constructs which are actually closer to the
996  * hardware reality than any firmware can describe them.
997  *
998  * The domain name and the irq chip name for a MSI device domain are
999  * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
1000  *
1001  * $PREFIX:   Optional prefix provided by the underlying MSI parent domain
1002  *	      via msi_parent_ops::prefix. If that pointer is NULL the prefix
1003  *	      is empty.
1004  * $CHIPNAME: The name of the irq_chip in @template
1005  * $DEVNAME:  The name of the device
1006  *
1007  * This results in understandable chip names and hardware interrupt numbers
1008  * in e.g. /proc/interrupts
1009  *
1010  * PCI-MSI-0000:00:1c.0     0-edge  Parent domain has no prefix
1011  * IR-PCI-MSI-0000:00:1c.4  0-edge  Same with interrupt remapping prefix 'IR-'
1012  *
1013  * IR-PCI-MSIX-0000:3d:00.0 0-edge  Hardware interrupt numbers reflect
1014  * IR-PCI-MSIX-0000:3d:00.0 1-edge  the real MSI-X index on that device
1015  * IR-PCI-MSIX-0000:3d:00.0 2-edge
1016  *
1017  * On IMS domains the hardware interrupt number is either a table entry
1018  * index or a purely software managed index but it is guaranteed to be
1019  * unique.
1020  *
1021  * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
1022  * subsequent operations on the domain depend on the domain id.
1023  *
1024  * The domain is automatically freed when the device is removed via devres
1025  * in the context of @dev::msi::data freeing, but it can also be
1026  * independently removed via @msi_remove_device_irq_domain().
1027  */
1028 bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
1029 				  const struct msi_domain_template *template,
1030 				  unsigned int hwsize, void *domain_data,
1031 				  void *chip_data)
1032 {
1033 	struct irq_domain *domain, *parent = dev->msi.domain;
1034 	const struct msi_parent_ops *pops;
1035 	struct fwnode_handle *fwnode;
1036 
1037 	if (!irq_domain_is_msi_parent(parent))
1038 		return false;
1039 
1040 	if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
1041 		return false;
1042 
1043 	struct msi_domain_template *bundle __free(kfree) =
1044 		kmemdup(template, sizeof(*bundle), GFP_KERNEL);
1045 	if (!bundle)
1046 		return false;
1047 
1048 	bundle->info.hwsize = hwsize;
1049 	bundle->info.chip = &bundle->chip;
1050 	bundle->info.ops = &bundle->ops;
1051 	bundle->info.data = domain_data;
1052 	bundle->info.chip_data = chip_data;
1053 	bundle->info.alloc_data = &bundle->alloc_info;
1054 
1055 	pops = parent->msi_parent_ops;
1056 	snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
1057 		 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
1058 	bundle->chip.name = bundle->name;
1059 
1060 	/*
1061 	 * Using the device firmware node is required for wire to MSI
1062 	 * device domains so that the existing firmware results in a domain
1063 	 * match.
1064 	 * All other device domains like PCI/MSI use the named firmware
1065 	 * node as they are not guaranteed to have a fwnode. They are never
1066 	 * looked up and always handled in the context of the device.
1067 	 */
1068 	struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL;
1069 
1070 	if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE))
1071 		fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name);
1072 	else
1073 		fwnode = dev->fwnode;
1074 
1075 	if (!fwnode)
1076 		return false;
1077 
1078 	if (msi_setup_device_data(dev))
1079 		return false;
1080 
1081 	guard(msi_descs_lock)(dev);
1082 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
1083 		return false;
1084 
1085 	if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
1086 		return false;
1087 
1088 	domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
1089 	if (!domain)
1090 		return false;
1091 
1092 	domain->dev = dev;
1093 	dev->msi.data->__domains[domid].domain = domain;
1094 
1095 	if (msi_domain_prepare_irqs(domain, dev, hwsize, &bundle->alloc_info)) {
1096 		dev->msi.data->__domains[domid].domain = NULL;
1097 		irq_domain_remove(domain);
1098 		return false;
1099 	}
1100 
1101 	/* @bundle and @fwnode_alloced are now in use. Prevent cleanup */
1102 	retain_and_null_ptr(bundle);
1103 	retain_and_null_ptr(fwnode_alloced);
1104 	return true;
1105 }
1106 
1107 /**
1108  * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1109  * @dev:	Pointer to the device
1110  * @domid:	Domain id
1111  */
1112 void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1113 {
1114 	struct fwnode_handle *fwnode = NULL;
1115 	struct msi_domain_info *info;
1116 	struct irq_domain *domain;
1117 
1118 	guard(msi_descs_lock)(dev);
1119 	domain = msi_get_device_domain(dev, domid);
1120 	if (!domain || !irq_domain_is_msi_device(domain))
1121 		return;
1122 
1123 	dev->msi.data->__domains[domid].domain = NULL;
1124 	info = domain->host_data;
1125 
1126 	info->ops->msi_teardown(domain, info->alloc_data);
1127 
1128 	if (irq_domain_is_msi_device(domain))
1129 		fwnode = domain->fwnode;
1130 	irq_domain_remove(domain);
1131 	irq_domain_free_fwnode(fwnode);
1132 	kfree(container_of(info, struct msi_domain_template, info));
1133 }
1134 
1135 /**
1136  * msi_match_device_irq_domain - Match a device irq domain against a bus token
1137  * @dev:	Pointer to the device
1138  * @domid:	Domain id
1139  * @bus_token:	Bus token to match against the domain bus token
1140  *
1141  * Return: True if device domain exists and bus tokens match.
1142  */
1143 bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1144 				 enum irq_domain_bus_token bus_token)
1145 {
1146 	struct msi_domain_info *info;
1147 	struct irq_domain *domain;
1148 
1149 	guard(msi_descs_lock)(dev);
1150 	domain = msi_get_device_domain(dev, domid);
1151 	if (domain && irq_domain_is_msi_device(domain)) {
1152 		info = domain->host_data;
1153 		return info->bus_token == bus_token;
1154 	}
1155 	return false;
1156 }
1157 
1158 static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1159 				   int nvec, msi_alloc_info_t *arg)
1160 {
1161 	struct msi_domain_info *info = domain->host_data;
1162 	struct msi_domain_ops *ops = info->ops;
1163 
1164 	return ops->msi_prepare(domain, dev, nvec, arg);
1165 }
1166 
1167 /*
1168  * Carefully check whether the device can use reservation mode. If
1169  * reservation mode is enabled then the early activation will assign a
1170  * dummy vector to the device. If the PCI/MSI device does not support
1171  * masking of the entry then this can result in spurious interrupts when
1172  * the device driver is not absolutely careful. But even then a malfunction
1173  * of the hardware could result in a spurious interrupt on the dummy vector
1174  * and render the device unusable. If the entry can be masked then the core
1175  * logic will prevent the spurious interrupt and reservation mode can be
1176  * used. For now reservation mode is restricted to PCI/MSI.
1177  */
1178 static bool msi_check_reservation_mode(struct irq_domain *domain,
1179 				       struct msi_domain_info *info,
1180 				       struct device *dev)
1181 {
1182 	struct msi_desc *desc;
1183 
1184 	switch(domain->bus_token) {
1185 	case DOMAIN_BUS_PCI_MSI:
1186 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1187 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1188 	case DOMAIN_BUS_VMD_MSI:
1189 		break;
1190 	default:
1191 		return false;
1192 	}
1193 
1194 	if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1195 		return false;
1196 
1197 	if (info->flags & MSI_FLAG_NO_MASK)
1198 		return false;
1199 
1200 	/*
1201 	 * Checking the first MSI descriptor is sufficient. MSIX supports
1202 	 * masking and MSI does so when the can_mask attribute is set.
1203 	 */
1204 	desc = msi_first_desc(dev, MSI_DESC_ALL);
1205 	return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1206 }
1207 
1208 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1209 			       int allocated)
1210 {
1211 	switch(domain->bus_token) {
1212 	case DOMAIN_BUS_PCI_MSI:
1213 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1214 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1215 	case DOMAIN_BUS_VMD_MSI:
1216 		if (IS_ENABLED(CONFIG_PCI_MSI))
1217 			break;
1218 		fallthrough;
1219 	default:
1220 		return -ENOSPC;
1221 	}
1222 
1223 	/* Let a failed PCI multi MSI allocation retry */
1224 	if (desc->nvec_used > 1)
1225 		return 1;
1226 
1227 	/* If there was a successful allocation let the caller know */
1228 	return allocated ? allocated : -ENOSPC;
1229 }
1230 
1231 #define VIRQ_CAN_RESERVE	0x01
1232 #define VIRQ_ACTIVATE		0x02
1233 
1234 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1235 {
1236 	struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1237 	int ret;
1238 
1239 	if (!(vflags & VIRQ_CAN_RESERVE)) {
1240 		irqd_clr_can_reserve(irqd);
1241 
1242 		/*
1243 		 * If the interrupt is managed but no CPU is available to
1244 		 * service it, shut it down until better times. Note that
1245 		 * we only do this on the !RESERVE path as x86 (the only
1246 		 * architecture using this flag) deals with this in a
1247 		 * different way by using a catch-all vector.
1248 		 */
1249 		if ((vflags & VIRQ_ACTIVATE) &&
1250 		    irqd_affinity_is_managed(irqd) &&
1251 		    !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1252 					cpu_online_mask)) {
1253 			    irqd_set_managed_shutdown(irqd);
1254 			    return 0;
1255 		    }
1256 	}
1257 
1258 	if (!(vflags & VIRQ_ACTIVATE))
1259 		return 0;
1260 
1261 	ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1262 	if (ret)
1263 		return ret;
1264 	/*
1265 	 * If the interrupt uses reservation mode, clear the activated bit
1266 	 * so request_irq() will assign the final vector.
1267 	 */
1268 	if (vflags & VIRQ_CAN_RESERVE)
1269 		irqd_clr_activated(irqd);
1270 	return 0;
1271 }
1272 
1273 static int populate_alloc_info(struct irq_domain *domain, struct device *dev,
1274 			       unsigned int nirqs, msi_alloc_info_t *arg)
1275 {
1276 	struct msi_domain_info *info = domain->host_data;
1277 
1278 	/*
1279 	 * If the caller has provided a template alloc info, use that. Once
1280 	 * all users of msi_create_irq_domain() have been eliminated, this
1281 	 * should be the only source of allocation information, and the
1282 	 * prepare call below should be finally removed.
1283 	 */
1284 	if (!info->alloc_data)
1285 		return msi_domain_prepare_irqs(domain, dev, nirqs, arg);
1286 
1287 	*arg = *info->alloc_data;
1288 	return 0;
1289 }
1290 
1291 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1292 				   struct msi_ctrl *ctrl)
1293 {
1294 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1295 	struct msi_domain_info *info = domain->host_data;
1296 	struct msi_domain_ops *ops = info->ops;
1297 	unsigned int vflags = 0, allocated = 0;
1298 	msi_alloc_info_t arg = { };
1299 	struct msi_desc *desc;
1300 	unsigned long idx;
1301 	int i, ret, virq;
1302 
1303 	ret = populate_alloc_info(domain, dev, ctrl->nirqs, &arg);
1304 	if (ret)
1305 		return ret;
1306 
1307 	/*
1308 	 * This flag is set by the PCI layer as we need to activate
1309 	 * the MSI entries before the PCI layer enables MSI in the
1310 	 * card. Otherwise the card latches a random msi message.
1311 	 */
1312 	if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1313 		vflags |= VIRQ_ACTIVATE;
1314 
1315 	/*
1316 	 * Interrupt can use a reserved vector and will not occupy
1317 	 * a real device vector until the interrupt is requested.
1318 	 */
1319 	if (msi_check_reservation_mode(domain, info, dev))
1320 		vflags |= VIRQ_CAN_RESERVE;
1321 
1322 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1323 		if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1324 			continue;
1325 
1326 		/* This should return -ECONFUSED... */
1327 		if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1328 			return -EINVAL;
1329 
1330 		if (ops->prepare_desc)
1331 			ops->prepare_desc(domain, &arg, desc);
1332 
1333 		ops->set_desc(&arg, desc);
1334 
1335 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1336 					       dev_to_node(dev), &arg, false,
1337 					       desc->affinity);
1338 		if (virq < 0)
1339 			return msi_handle_pci_fail(domain, desc, allocated);
1340 
1341 		for (i = 0; i < desc->nvec_used; i++) {
1342 			irq_set_msi_desc_off(virq, i, desc);
1343 			irq_debugfs_copy_devname(virq + i, dev);
1344 			ret = msi_init_virq(domain, virq + i, vflags);
1345 			if (ret)
1346 				return ret;
1347 		}
1348 		if (info->flags & MSI_FLAG_DEV_SYSFS) {
1349 			ret = msi_sysfs_populate_desc(dev, desc);
1350 			if (ret)
1351 				return ret;
1352 		}
1353 		allocated++;
1354 	}
1355 	return 0;
1356 }
1357 
1358 static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1359 					     struct msi_domain_info *info,
1360 					     struct msi_ctrl *ctrl)
1361 {
1362 	if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1363 		return 0;
1364 
1365 	return msi_domain_add_simple_msi_descs(dev, ctrl);
1366 }
1367 
1368 static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1369 {
1370 	struct msi_domain_info *info;
1371 	struct msi_domain_ops *ops;
1372 	struct irq_domain *domain;
1373 	int ret;
1374 
1375 	if (!msi_ctrl_valid(dev, ctrl))
1376 		return -EINVAL;
1377 
1378 	domain = msi_get_device_domain(dev, ctrl->domid);
1379 	if (!domain)
1380 		return -ENODEV;
1381 
1382 	info = domain->host_data;
1383 
1384 	ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1385 	if (ret)
1386 		return ret;
1387 
1388 	ops = info->ops;
1389 	if (ops->domain_alloc_irqs)
1390 		return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1391 
1392 	return __msi_domain_alloc_irqs(dev, domain, ctrl);
1393 }
1394 
1395 static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1396 {
1397 	int ret = __msi_domain_alloc_locked(dev, ctrl);
1398 
1399 	if (ret)
1400 		msi_domain_free_locked(dev, ctrl);
1401 	return ret;
1402 }
1403 
1404 /**
1405  * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1406  * @dev:	Pointer to device struct of the device for which the interrupts
1407  *		are allocated
1408  * @domid:	Id of the interrupt domain to operate on
1409  * @first:	First index to allocate (inclusive)
1410  * @last:	Last index to allocate (inclusive)
1411  *
1412  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1413  * pair. Use this for MSI irqdomains which implement their own descriptor
1414  * allocation/free.
1415  *
1416  * Return: %0 on success or an error code.
1417  */
1418 int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1419 				       unsigned int first, unsigned int last)
1420 {
1421 	struct msi_ctrl ctrl = {
1422 		.domid	= domid,
1423 		.first	= first,
1424 		.last	= last,
1425 		.nirqs	= last + 1 - first,
1426 	};
1427 
1428 	return msi_domain_alloc_locked(dev, &ctrl);
1429 }
1430 
1431 /**
1432  * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1433  * @dev:	Pointer to device struct of the device for which the interrupts
1434  *		are allocated
1435  * @domid:	Id of the interrupt domain to operate on
1436  * @first:	First index to allocate (inclusive)
1437  * @last:	Last index to allocate (inclusive)
1438  *
1439  * Return: %0 on success or an error code.
1440  */
1441 int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1442 				unsigned int first, unsigned int last)
1443 {
1444 
1445 	guard(msi_descs_lock)(dev);
1446 	return msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1447 }
1448 EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
1449 
1450 /**
1451  * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1452  *
1453  * @dev:	Pointer to device struct of the device for which the interrupts
1454  *		are allocated
1455  * @domid:	Id of the interrupt domain to operate on
1456  * @nirqs:	The number of interrupts to allocate
1457  *
1458  * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1459  * for all unassigned ones. That function is to be used for MSI domain usage where
1460  * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1461  *
1462  * Return: %0 on success or an error code.
1463  */
1464 int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1465 {
1466 	struct msi_ctrl ctrl = {
1467 		.domid	= domid,
1468 		.first	= 0,
1469 		.last	= msi_domain_get_hwsize(dev, domid) - 1,
1470 		.nirqs	= nirqs,
1471 	};
1472 
1473 	return msi_domain_alloc_locked(dev, &ctrl);
1474 }
1475 
1476 static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
1477 						unsigned int index,
1478 						const struct irq_affinity_desc *affdesc,
1479 						union msi_instance_cookie *icookie)
1480 {
1481 	struct msi_ctrl ctrl = { .domid	= domid, .nirqs = 1, };
1482 	struct irq_domain *domain;
1483 	struct msi_map map = { };
1484 	struct msi_desc *desc;
1485 	int ret;
1486 
1487 	domain = msi_get_device_domain(dev, domid);
1488 	if (!domain) {
1489 		map.index = -ENODEV;
1490 		return map;
1491 	}
1492 
1493 	desc = msi_alloc_desc(dev, 1, affdesc);
1494 	if (!desc) {
1495 		map.index = -ENOMEM;
1496 		return map;
1497 	}
1498 
1499 	if (icookie)
1500 		desc->data.icookie = *icookie;
1501 
1502 	ret = msi_insert_desc(dev, desc, domid, index);
1503 	if (ret) {
1504 		map.index = ret;
1505 		return map;
1506 	}
1507 
1508 	ctrl.first = ctrl.last = desc->msi_index;
1509 
1510 	ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1511 	if (ret) {
1512 		map.index = ret;
1513 		msi_domain_free_locked(dev, &ctrl);
1514 	} else {
1515 		map.index = desc->msi_index;
1516 		map.virq = desc->irq;
1517 	}
1518 	return map;
1519 }
1520 
1521 /**
1522  * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1523  *			     a given index - or at the next free index
1524  *
1525  * @dev:	Pointer to device struct of the device for which the interrupts
1526  *		are allocated
1527  * @domid:	Id of the interrupt domain to operate on
1528  * @index:	Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1529  *		uses the next free index.
1530  * @affdesc:	Optional pointer to an interrupt affinity descriptor structure
1531  * @icookie:	Optional pointer to a domain specific per instance cookie. If
1532  *		non-NULL the content of the cookie is stored in msi_desc::data.
1533  *		Must be NULL for MSI-X allocations
1534  *
1535  * This requires a MSI interrupt domain which lets the core code manage the
1536  * MSI descriptors.
1537  *
1538  * Return: struct msi_map
1539  *
1540  *	On success msi_map::index contains the allocated index number and
1541  *	msi_map::virq the corresponding Linux interrupt number
1542  *
1543  *	On failure msi_map::index contains the error code and msi_map::virq
1544  *	is %0.
1545  */
1546 struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1547 				       const struct irq_affinity_desc *affdesc,
1548 				       union msi_instance_cookie *icookie)
1549 {
1550 	guard(msi_descs_lock)(dev);
1551 	return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
1552 }
1553 
1554 /**
1555  * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
1556  * @domain:	The domain to allocate on
1557  * @hwirq:	The hardware interrupt number to allocate for
1558  * @type:	The interrupt type
1559  *
1560  * This weirdness supports wire to MSI controllers like MBIGEN.
1561  *
1562  * @hwirq is the hardware interrupt number which is handed in from
1563  * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
1564  * sized in firmware, the hardware interrupt number cannot be used as MSI
1565  * index. For the underlying irq chip the MSI index is irrelevant and
1566  * all it needs is the hardware interrupt number.
1567  *
1568  * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
1569  * hardware interrupt number is stored along with the type information in
1570  * msi_desc::cookie so the underlying interrupt chip and domain code can
1571  * retrieve it.
1572  *
1573  * Return: The Linux interrupt number (> 0) or an error code
1574  */
1575 int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
1576 				  unsigned int type)
1577 {
1578 	unsigned int domid = MSI_DEFAULT_DOMAIN;
1579 	union msi_instance_cookie icookie = { };
1580 	struct device *dev = domain->dev;
1581 	struct msi_map map = { };
1582 
1583 	if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1584 		return -EINVAL;
1585 
1586 	icookie.value = ((u64)type << 32) | hwirq;
1587 
1588 	guard(msi_descs_lock)(dev);
1589 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
1590 		map.index = -EINVAL;
1591 	else
1592 		map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
1593 	return map.index >= 0 ? map.virq : map.index;
1594 }
1595 
1596 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1597 				   struct msi_ctrl *ctrl)
1598 {
1599 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1600 	struct msi_domain_info *info = domain->host_data;
1601 	struct irq_data *irqd;
1602 	struct msi_desc *desc;
1603 	unsigned long idx;
1604 	int i;
1605 
1606 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1607 		/* Only handle MSI entries which have an interrupt associated */
1608 		if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1609 			continue;
1610 
1611 		/* Make sure all interrupts are deactivated */
1612 		for (i = 0; i < desc->nvec_used; i++) {
1613 			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1614 			if (irqd && irqd_is_activated(irqd))
1615 				irq_domain_deactivate_irq(irqd);
1616 		}
1617 
1618 		irq_domain_free_irqs(desc->irq, desc->nvec_used);
1619 		if (info->flags & MSI_FLAG_DEV_SYSFS)
1620 			msi_sysfs_remove_desc(dev, desc);
1621 		desc->irq = 0;
1622 	}
1623 }
1624 
1625 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1626 {
1627 	struct msi_domain_info *info;
1628 	struct msi_domain_ops *ops;
1629 	struct irq_domain *domain;
1630 
1631 	if (!msi_ctrl_valid(dev, ctrl))
1632 		return;
1633 
1634 	domain = msi_get_device_domain(dev, ctrl->domid);
1635 	if (!domain)
1636 		return;
1637 
1638 	info = domain->host_data;
1639 	ops = info->ops;
1640 
1641 	if (ops->domain_free_irqs)
1642 		ops->domain_free_irqs(domain, dev);
1643 	else
1644 		__msi_domain_free_irqs(dev, domain, ctrl);
1645 
1646 	if (ops->msi_post_free)
1647 		ops->msi_post_free(domain, dev);
1648 
1649 	if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1650 		msi_domain_free_descs(dev, ctrl);
1651 }
1652 
1653 /**
1654  * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1655  *				       associated to @dev with msi_lock held
1656  * @dev:	Pointer to device struct of the device for which the interrupts
1657  *		are freed
1658  * @domid:	Id of the interrupt domain to operate on
1659  * @first:	First index to free (inclusive)
1660  * @last:	Last index to free (inclusive)
1661  */
1662 void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1663 				       unsigned int first, unsigned int last)
1664 {
1665 	struct msi_ctrl ctrl = {
1666 		.domid	= domid,
1667 		.first	= first,
1668 		.last	= last,
1669 	};
1670 	msi_domain_free_locked(dev, &ctrl);
1671 }
1672 
1673 /**
1674  * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1675  *				associated to @dev
1676  * @dev:	Pointer to device struct of the device for which the interrupts
1677  *		are freed
1678  * @domid:	Id of the interrupt domain to operate on
1679  * @first:	First index to free (inclusive)
1680  * @last:	Last index to free (inclusive)
1681  */
1682 void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1683 				unsigned int first, unsigned int last)
1684 {
1685 	guard(msi_descs_lock)(dev);
1686 	msi_domain_free_irqs_range_locked(dev, domid, first, last);
1687 }
1688 EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
1689 
1690 /**
1691  * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1692  *				     associated to a device
1693  * @dev:	Pointer to device struct of the device for which the interrupts
1694  *		are freed
1695  * @domid:	The id of the domain to operate on
1696  *
1697  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1698  * pair. Use this for MSI irqdomains which implement their own vector
1699  * allocation.
1700  */
1701 void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1702 {
1703 	msi_domain_free_irqs_range_locked(dev, domid, 0,
1704 					  msi_domain_get_hwsize(dev, domid) - 1);
1705 }
1706 
1707 /**
1708  * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1709  *			      associated to a device
1710  * @dev:	Pointer to device struct of the device for which the interrupts
1711  *		are freed
1712  * @domid:	The id of the domain to operate on
1713  */
1714 void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1715 {
1716 	guard(msi_descs_lock)(dev);
1717 	msi_domain_free_irqs_all_locked(dev, domid);
1718 }
1719 
1720 /**
1721  * msi_device_domain_free_wired - Free a wired interrupt in @domain
1722  * @domain:	The domain to free the interrupt on
1723  * @virq:	The Linux interrupt number to free
1724  *
1725  * This is the counterpart of msi_device_domain_alloc_wired() for the
1726  * weird wired to MSI converting domains.
1727  */
1728 void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
1729 {
1730 	struct msi_desc *desc = irq_get_msi_desc(virq);
1731 	struct device *dev = domain->dev;
1732 
1733 	if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1734 		return;
1735 
1736 	guard(msi_descs_lock)(dev);
1737 	if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain))
1738 		return;
1739 	msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
1740 					  desc->msi_index);
1741 }
1742 
1743 /**
1744  * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1745  * @domain:	The interrupt domain to retrieve data from
1746  *
1747  * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1748  */
1749 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1750 {
1751 	return (struct msi_domain_info *)domain->host_data;
1752 }
1753 
1754 /**
1755  * msi_device_has_isolated_msi - True if the device has isolated MSI
1756  * @dev: The device to check
1757  *
1758  * Isolated MSI means that HW modeled by an irq_domain on the path from the
1759  * initiating device to the CPU will validate that the MSI message specifies an
1760  * interrupt number that the device is authorized to trigger. This must block
1761  * devices from triggering interrupts they are not authorized to trigger.
1762  * Currently authorization means the MSI vector is one assigned to the device.
1763  *
1764  * This is interesting for securing VFIO use cases where a rouge MSI (eg created
1765  * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
1766  * impact outside its security domain, eg userspace triggering interrupts on
1767  * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
1768  * triggering interrupts on another VM.
1769  */
1770 bool msi_device_has_isolated_msi(struct device *dev)
1771 {
1772 	struct irq_domain *domain = dev_get_msi_domain(dev);
1773 
1774 	for (; domain; domain = domain->parent)
1775 		if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
1776 			return true;
1777 	return arch_is_isolated_msi();
1778 }
1779 EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
1780