xref: /linux/include/linux/generic_pt/iommu.h (revision f1d26d72f01556c787b1291729aa7a2ce37656a8)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
4  */
5 #ifndef __GENERIC_PT_IOMMU_H
6 #define __GENERIC_PT_IOMMU_H
7 
8 #include <linux/generic_pt/common.h>
9 #include <linux/iommu.h>
10 #include <linux/mm_types.h>
11 
12 struct iommu_iotlb_gather;
13 struct pt_iommu_ops;
14 struct pt_iommu_driver_ops;
15 struct iommu_dirty_bitmap;
16 
17 /**
18  * DOC: IOMMU Radix Page Table
19  *
20  * The IOMMU implementation of the Generic Page Table provides an ops struct
21  * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
22  * the generic map/unmap interface.
23  *
24  * This interface uses a caller provided locking approach. The caller must have
25  * a VA range lock concept that prevents concurrent threads from calling ops on
26  * the same VA. Generally the range lock must be at least as large as a single
27  * map call.
28  */
29 
30 /**
31  * struct pt_iommu - Base structure for IOMMU page tables
32  *
33  * The format-specific struct will include this as the first member.
34  */
35 struct pt_iommu {
36 	/**
37 	 * @domain: The core IOMMU domain. The driver should use a union to
38 	 * overlay this memory with its previously existing domain struct to
39 	 * create an alias.
40 	 */
41 	struct iommu_domain domain;
42 
43 	/**
44 	 * @ops: Function pointers to access the API
45 	 */
46 	const struct pt_iommu_ops *ops;
47 
48 	/**
49 	 * @driver_ops: Function pointers provided by the HW driver to help
50 	 * manage HW details like caches.
51 	 */
52 	const struct pt_iommu_driver_ops *driver_ops;
53 
54 	/**
55 	 * @nid: Node ID to use for table memory allocations. The IOMMU driver
56 	 * may want to set the NID to the device's NID, if there are multiple
57 	 * table walkers.
58 	 */
59 	int nid;
60 
61 	/**
62 	 * @iommu_device: Device pointer used for any DMA cache flushing when
63 	 * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
64 	 * page table which must have dma ops that perform cache flushing.
65 	 */
66 	struct device *iommu_device;
67 };
68 
iommupt_from_domain(struct iommu_domain * domain)69 static inline struct pt_iommu *iommupt_from_domain(struct iommu_domain *domain)
70 {
71 	if (!IS_ENABLED(CONFIG_IOMMU_PT) || !domain->is_iommupt)
72 		return NULL;
73 	return container_of(domain, struct pt_iommu, domain);
74 }
75 
76 /**
77  * struct pt_iommu_info - Details about the IOMMU page table
78  *
79  * Returned from pt_iommu_ops->get_info()
80  */
81 struct pt_iommu_info {
82 	/**
83 	 * @pgsize_bitmap: A bitmask where each set bit indicates
84 	 * a page size that can be natively stored in the page table.
85 	 */
86 	u64 pgsize_bitmap;
87 };
88 
89 struct pt_iommu_ops {
90 	/**
91 	 * @map_range: Install translation for an IOVA range
92 	 * @iommu_table: Table to manipulate
93 	 * @iova: IO virtual address to start
94 	 * @paddr: Physical/Output address to start
95 	 * @len: Length of the range starting from @iova
96 	 * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
97 	 * @gfp: GFP flags for any memory allocations
98 	 *
99 	 * The range starting at IOVA will have paddr installed into it. The
100 	 * rage is automatically segmented into optimally sized table entries,
101 	 * and can have any valid alignment.
102 	 *
103 	 * On error the caller will probably want to invoke unmap on the range
104 	 * from iova up to the amount indicated by @mapped to return the table
105 	 * back to an unchanged state.
106 	 *
107 	 * Context: The caller must hold a write range lock that includes
108 	 * the whole range.
109 	 *
110 	 * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA
111 	 * that were mapped are added to @mapped, @mapped is not zerod first.
112 	 */
113 	int (*map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
114 			 phys_addr_t paddr, dma_addr_t len, unsigned int prot,
115 			 gfp_t gfp, size_t *mapped);
116 
117 	/**
118 	 * @unmap_range: Make a range of IOVA empty/not present
119 	 * @iommu_table: Table to manipulate
120 	 * @iova: IO virtual address to start
121 	 * @len: Length of the range starting from @iova
122 	 * @iotlb_gather: Gather struct that must be flushed on return
123 	 *
124 	 * unmap_range() will remove a translation created by map_range(). It
125 	 * cannot subdivide a mapping created by map_range(), so it should be
126 	 * called with IOVA ranges that match those passed to map_pages. The
127 	 * IOVA range can aggregate contiguous map_range() calls so long as no
128 	 * individual range is split.
129 	 *
130 	 * Context: The caller must hold a write range lock that includes
131 	 * the whole range.
132 	 *
133 	 * Returns: Number of bytes of VA unmapped. iova + res will be the
134 	 * point unmapping stopped.
135 	 */
136 	size_t (*unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
137 			      dma_addr_t len,
138 			      struct iommu_iotlb_gather *iotlb_gather);
139 
140 	/**
141 	 * @set_dirty: Make the iova write dirty
142 	 * @iommu_table: Table to manipulate
143 	 * @iova: IO virtual address to start
144 	 *
145 	 * This is only used by iommufd testing. It makes the iova dirty so that
146 	 * read_and_clear_dirty() will see it as dirty. Unlike all the other ops
147 	 * this one is safe to call without holding any locking. It may return
148 	 * -EAGAIN if there is a race.
149 	 */
150 	int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);
151 
152 	/**
153 	 * @get_info: Return the pt_iommu_info structure
154 	 * @iommu_table: Table to query
155 	 *
156 	 * Return some basic static information about the page table.
157 	 */
158 	void (*get_info)(struct pt_iommu *iommu_table,
159 			 struct pt_iommu_info *info);
160 
161 	/**
162 	 * @deinit: Undo a format specific init operation
163 	 * @iommu_table: Table to destroy
164 	 *
165 	 * Release all of the memory. The caller must have already removed the
166 	 * table from all HW access and all caches.
167 	 */
168 	void (*deinit)(struct pt_iommu *iommu_table);
169 };
170 
171 /**
172  * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
173  *
174  * The IOMMU driver should implement these using container_of(iommu_table) to
175  * get to it's iommu_domain derived structure. All ops can be called in atomic
176  * contexts as they are buried under DMA API calls.
177  */
178 struct pt_iommu_driver_ops {
179 	/**
180 	 * @change_top: Update the top of table pointer
181 	 * @iommu_table: Table to operate on
182 	 * @top_paddr: New CPU physical address of the top pointer
183 	 * @top_level: IOMMU PT level of the new top
184 	 *
185 	 * Called under the get_top_lock() spinlock. The driver must update all
186 	 * HW references to this domain with a new top address and
187 	 * configuration. On return mappings placed in the new top must be
188 	 * reachable by the HW.
189 	 *
190 	 * top_level encodes the level in IOMMU PT format, level 0 is the
191 	 * smallest page size increasing from there. This has to be translated
192 	 * to any HW specific format. During this call the new top will not be
193 	 * visible to any other API.
194 	 *
195 	 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
196 	 * enabled.
197 	 */
198 	void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
199 			   unsigned int top_level);
200 
201 	/**
202 	 * @get_top_lock: lock to hold when changing the table top
203 	 * @iommu_table: Table to operate on
204 	 *
205 	 * Return a lock to hold when changing the table top page table from
206 	 * being stored in HW. The lock will be held prior to calling
207 	 * change_top() and released once the top is fully visible.
208 	 *
209 	 * Typically this would be a lock that protects the iommu_domain's
210 	 * attachment list.
211 	 *
212 	 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
213 	 * enabled.
214 	 */
215 	spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
216 };
217 
pt_iommu_deinit(struct pt_iommu * iommu_table)218 static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
219 {
220 	/*
221 	 * It is safe to call pt_iommu_deinit() before an init, or if init
222 	 * fails. The ops pointer will only become non-NULL if deinit needs to be
223 	 * run.
224 	 */
225 	if (iommu_table->ops)
226 		iommu_table->ops->deinit(iommu_table);
227 }
228 
229 /**
230  * struct pt_iommu_cfg - Common configuration values for all formats
231  */
232 struct pt_iommu_cfg {
233 	/**
234 	 * @features: Features required. Only these features will be turned on.
235 	 * The feature list should reflect what the IOMMU HW is capable of.
236 	 */
237 	unsigned int features;
238 	/**
239 	 * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
240 	 * imply the top level of the table.
241 	 */
242 	u8 hw_max_vasz_lg2;
243 	/**
244 	 * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
245 	 * might select a lower maximum OA.
246 	 */
247 	u8 hw_max_oasz_lg2;
248 };
249 
250 /* Generate the exported function signatures from iommu_pt.h */
251 #define IOMMU_PROTOTYPES(fmt)                                                  \
252 	phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
253 						  dma_addr_t iova);            \
254 	int pt_iommu_##fmt##_read_and_clear_dirty(                             \
255 		struct iommu_domain *domain, unsigned long iova, size_t size,  \
256 		unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
257 	int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table,                \
258 				  const struct pt_iommu_##fmt##_cfg *cfg,      \
259 				  gfp_t gfp);                                  \
260 	void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table,            \
261 				      struct pt_iommu_##fmt##_hw_info *info)
262 #define IOMMU_FORMAT(fmt, member)       \
263 	struct pt_iommu_##fmt {         \
264 		struct pt_iommu iommu;  \
265 		struct pt_##fmt member; \
266 	};                              \
267 	IOMMU_PROTOTYPES(fmt)
268 
269 /*
270  * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
271  * iommu_pt
272  */
273 #define IOMMU_PT_DOMAIN_OPS(fmt)                        \
274 	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys
275 #define IOMMU_PT_DIRTY_OPS(fmt) \
276 	.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
277 
278 /*
279  * The driver should setup its domain struct like
280  *	union {
281  *		struct iommu_domain domain;
282  *		struct pt_iommu_xxx xx;
283  *	};
284  * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
285  *
286  * Which creates an alias between driver_domain.domain and
287  * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
288  * driver_domain.domain users.
289  */
290 #define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
291 	static_assert(offsetof(s, pt_iommu_memb.domain) ==   \
292 		      offsetof(s, domain_memb))
293 
294 struct pt_iommu_amdv1_cfg {
295 	struct pt_iommu_cfg common;
296 	unsigned int starting_level;
297 };
298 
299 struct pt_iommu_amdv1_hw_info {
300 	u64 host_pt_root;
301 	u8 mode;
302 };
303 
304 IOMMU_FORMAT(amdv1, amdpt);
305 
306 /* amdv1_mock is used by the iommufd selftest */
307 #define pt_iommu_amdv1_mock pt_iommu_amdv1
308 #define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
309 struct pt_iommu_amdv1_mock_hw_info;
310 IOMMU_PROTOTYPES(amdv1_mock);
311 
312 struct pt_iommu_vtdss_cfg {
313 	struct pt_iommu_cfg common;
314 	/* 4 is a 57 bit 5 level table */
315 	unsigned int top_level;
316 };
317 
318 struct pt_iommu_vtdss_hw_info {
319 	u64 ssptptr;
320 	u8 aw;
321 };
322 
323 IOMMU_FORMAT(vtdss, vtdss_pt);
324 
325 struct pt_iommu_riscv_64_cfg {
326 	struct pt_iommu_cfg common;
327 };
328 
329 struct pt_iommu_riscv_64_hw_info {
330 	u64 ppn;
331 	u8 fsc_iosatp_mode;
332 };
333 
334 IOMMU_FORMAT(riscv_64, riscv_64pt);
335 
336 struct pt_iommu_x86_64_cfg {
337 	struct pt_iommu_cfg common;
338 	/* 4 is a 57 bit 5 level table */
339 	unsigned int top_level;
340 };
341 
342 struct pt_iommu_x86_64_hw_info {
343 	u64 gcr3_pt;
344 	u8 levels;
345 };
346 
347 IOMMU_FORMAT(x86_64, x86_64_pt);
348 
349 #undef IOMMU_PROTOTYPES
350 #undef IOMMU_FORMAT
351 #endif
352