xref: /linux/arch/x86/include/asm/posted_intr.h (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _X86_POSTED_INTR_H
3 #define _X86_POSTED_INTR_H
4 
5 #include <asm/cmpxchg.h>
6 #include <asm/rwonce.h>
7 #include <asm/irq_vectors.h>
8 
9 #include <linux/bitmap.h>
10 
11 #define POSTED_INTR_ON  0
12 #define POSTED_INTR_SN  1
13 
14 #define PID_TABLE_ENTRY_VALID 1
15 
16 #define NR_PIR_VECTORS	256
17 #define NR_PIR_WORDS	(NR_PIR_VECTORS / BITS_PER_LONG)
18 
19 /* Posted-Interrupt Descriptor */
20 struct pi_desc {
21 	unsigned long pir[NR_PIR_WORDS];     /* Posted interrupt requested */
22 	union {
23 		struct {
24 			u16	notifications; /* Suppress and outstanding bits */
25 			u8	nv;
26 			u8	rsvd_2;
27 			u32	ndst;
28 		};
29 		u64 control;
30 	};
31 	u32 rsvd[6];
32 } __aligned(64);
33 
34 /*
35  * De-multiplexing posted interrupts is on the performance path, the code
36  * below is written to optimize the cache performance based on the following
37  * considerations:
38  * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently
39  *   accessed by both CPU and IOMMU.
40  * 2.During software processing of posted interrupts, the CPU needs to do
41  *   natural width read and xchg for checking and clearing posted interrupt
42  *   request (PIR), a 256 bit field within the PID.
43  * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache
44  *   line when posting interrupts and setting control bits.
45  * 4.The CPU can access the cache line a magnitude faster than the IOMMU.
46  * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID
47  *   cache line. The cache line states after each operation are as follows,
48  *   assuming a 64-bit kernel:
49  *   CPU		IOMMU			PID Cache line state
50  *   ---------------------------------------------------------------
51  *...read64					exclusive
52  *...lock xchg64				modified
53  *...			post/atomic swap	invalid
54  *...-------------------------------------------------------------
55  *
56  * To reduce L1 data cache miss, it is important to avoid contention with
57  * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used
58  * when processing posted interrupts in software, e.g. to dispatch interrupt
59  * handlers for posted MSIs, or to move interrupts from the PIR to the vIRR
60  * in KVM.
61  *
62  * In addition, the code is trying to keep the cache line state consistent
63  * as much as possible. e.g. when making a copy and clearing the PIR
64  * (assuming non-zero PIR bits are present in the entire PIR), it does:
65  *		read, read, read, read, xchg, xchg, xchg, xchg
66  * instead of:
67  *		read, xchg, read, xchg, read, xchg, read, xchg
68  */
pi_harvest_pir(unsigned long * pir,unsigned long * pir_vals)69 static __always_inline bool pi_harvest_pir(unsigned long *pir,
70 					   unsigned long *pir_vals)
71 {
72 	unsigned long pending = 0;
73 	int i;
74 
75 	for (i = 0; i < NR_PIR_WORDS; i++) {
76 		pir_vals[i] = READ_ONCE(pir[i]);
77 		pending |= pir_vals[i];
78 	}
79 
80 	if (!pending)
81 		return false;
82 
83 	for (i = 0; i < NR_PIR_WORDS; i++) {
84 		if (!pir_vals[i])
85 			continue;
86 
87 		pir_vals[i] = arch_xchg(&pir[i], 0);
88 	}
89 
90 	return true;
91 }
92 
pi_test_and_set_on(struct pi_desc * pi_desc)93 static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
94 {
95 	return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
96 }
97 
pi_test_and_clear_on(struct pi_desc * pi_desc)98 static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
99 {
100 	return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
101 }
102 
pi_test_and_clear_sn(struct pi_desc * pi_desc)103 static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
104 {
105 	return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
106 }
107 
pi_test_and_set_pir(int vector,struct pi_desc * pi_desc)108 static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
109 {
110 	return test_and_set_bit(vector, pi_desc->pir);
111 }
112 
pi_is_pir_empty(struct pi_desc * pi_desc)113 static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
114 {
115 	return bitmap_empty(pi_desc->pir, NR_VECTORS);
116 }
117 
pi_set_sn(struct pi_desc * pi_desc)118 static inline void pi_set_sn(struct pi_desc *pi_desc)
119 {
120 	set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
121 }
122 
pi_set_on(struct pi_desc * pi_desc)123 static inline void pi_set_on(struct pi_desc *pi_desc)
124 {
125 	set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
126 }
127 
pi_clear_on(struct pi_desc * pi_desc)128 static inline void pi_clear_on(struct pi_desc *pi_desc)
129 {
130 	clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
131 }
132 
pi_clear_sn(struct pi_desc * pi_desc)133 static inline void pi_clear_sn(struct pi_desc *pi_desc)
134 {
135 	clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
136 }
137 
pi_test_on(struct pi_desc * pi_desc)138 static inline bool pi_test_on(struct pi_desc *pi_desc)
139 {
140 	return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
141 }
142 
pi_test_sn(struct pi_desc * pi_desc)143 static inline bool pi_test_sn(struct pi_desc *pi_desc)
144 {
145 	return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
146 }
147 
pi_test_pir(int vector,struct pi_desc * pi_desc)148 static inline bool pi_test_pir(int vector, struct pi_desc *pi_desc)
149 {
150 	return test_bit(vector, (unsigned long *)pi_desc->pir);
151 }
152 
153 /* Non-atomic helpers */
__pi_set_sn(struct pi_desc * pi_desc)154 static inline void __pi_set_sn(struct pi_desc *pi_desc)
155 {
156 	pi_desc->notifications |= BIT(POSTED_INTR_SN);
157 }
158 
__pi_clear_sn(struct pi_desc * pi_desc)159 static inline void __pi_clear_sn(struct pi_desc *pi_desc)
160 {
161 	pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
162 }
163 
164 #ifdef CONFIG_X86_POSTED_MSI
165 /*
166  * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's
167  * own interrupts. Here we do not distinguish them since those vector bits in
168  * PIR will always be zero.
169  */
pi_pending_this_cpu(unsigned int vector)170 static inline bool pi_pending_this_cpu(unsigned int vector)
171 {
172 	struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc);
173 
174 	if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
175 		return false;
176 
177 	return test_bit(vector, pid->pir);
178 }
179 
180 extern void intel_posted_msi_init(void);
181 #else
pi_pending_this_cpu(unsigned int vector)182 static inline bool pi_pending_this_cpu(unsigned int vector) { return false; }
183 
intel_posted_msi_init(void)184 static inline void intel_posted_msi_init(void) {};
185 #endif /* X86_POSTED_MSI */
186 
187 #endif /* _X86_POSTED_INTR_H */
188