1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
4  */
5 
6 #include <uapi/linux/iommufd.h>
7 
8 #include "arm-smmu-v3.h"
9 
arm_smmu_hw_info(struct device * dev,u32 * length,u32 * type)10 void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
11 {
12 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
13 	struct iommu_hw_info_arm_smmuv3 *info;
14 	u32 __iomem *base_idr;
15 	unsigned int i;
16 
17 	info = kzalloc(sizeof(*info), GFP_KERNEL);
18 	if (!info)
19 		return ERR_PTR(-ENOMEM);
20 
21 	base_idr = master->smmu->base + ARM_SMMU_IDR0;
22 	for (i = 0; i <= 5; i++)
23 		info->idr[i] = readl_relaxed(base_idr + i);
24 	info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
25 	info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
26 
27 	*length = sizeof(*info);
28 	*type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
29 
30 	return info;
31 }
32 
arm_smmu_make_nested_cd_table_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)33 static void arm_smmu_make_nested_cd_table_ste(
34 	struct arm_smmu_ste *target, struct arm_smmu_master *master,
35 	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
36 {
37 	arm_smmu_make_s2_domain_ste(
38 		target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
39 
40 	target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
41 				      FIELD_PREP(STRTAB_STE_0_CFG,
42 						 STRTAB_STE_0_CFG_NESTED));
43 	target->data[0] |= nested_domain->ste[0] &
44 			   ~cpu_to_le64(STRTAB_STE_0_CFG);
45 	target->data[1] |= nested_domain->ste[1];
46 	/* Merge events for DoS mitigations on eventq */
47 	target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
48 }
49 
50 /*
51  * Create a physical STE from the virtual STE that userspace provided when it
52  * created the nested domain. Using the vSTE userspace can request:
53  * - Non-valid STE
54  * - Abort STE
55  * - Bypass STE (install the S2, no CD table)
56  * - CD table STE (install the S2 and the userspace CD table)
57  */
arm_smmu_make_nested_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)58 static void arm_smmu_make_nested_domain_ste(
59 	struct arm_smmu_ste *target, struct arm_smmu_master *master,
60 	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
61 {
62 	unsigned int cfg =
63 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
64 
65 	/*
66 	 * Userspace can request a non-valid STE through the nesting interface.
67 	 * We relay that into an abort physical STE with the intention that
68 	 * C_BAD_STE for this SID can be generated to userspace.
69 	 */
70 	if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
71 		cfg = STRTAB_STE_0_CFG_ABORT;
72 
73 	switch (cfg) {
74 	case STRTAB_STE_0_CFG_S1_TRANS:
75 		arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
76 						  ats_enabled);
77 		break;
78 	case STRTAB_STE_0_CFG_BYPASS:
79 		arm_smmu_make_s2_domain_ste(target, master,
80 					    nested_domain->vsmmu->s2_parent,
81 					    ats_enabled);
82 		break;
83 	case STRTAB_STE_0_CFG_ABORT:
84 	default:
85 		arm_smmu_make_abort_ste(target);
86 		break;
87 	}
88 }
89 
arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state * state,struct arm_smmu_nested_domain * nested_domain)90 int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
91 				    struct arm_smmu_nested_domain *nested_domain)
92 {
93 	struct arm_smmu_vmaster *vmaster;
94 	unsigned long vsid;
95 	int ret;
96 
97 	iommu_group_mutex_assert(state->master->dev);
98 
99 	ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
100 					 state->master->dev, &vsid);
101 	if (ret)
102 		return ret;
103 
104 	vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
105 	if (!vmaster)
106 		return -ENOMEM;
107 	vmaster->vsmmu = nested_domain->vsmmu;
108 	vmaster->vsid = vsid;
109 	state->vmaster = vmaster;
110 
111 	return 0;
112 }
113 
arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state * state)114 void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
115 {
116 	struct arm_smmu_master *master = state->master;
117 
118 	mutex_lock(&master->smmu->streams_mutex);
119 	kfree(master->vmaster);
120 	master->vmaster = state->vmaster;
121 	mutex_unlock(&master->smmu->streams_mutex);
122 }
123 
arm_smmu_master_clear_vmaster(struct arm_smmu_master * master)124 void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
125 {
126 	struct arm_smmu_attach_state state = { .master = master };
127 
128 	arm_smmu_attach_commit_vmaster(&state);
129 }
130 
arm_smmu_attach_dev_nested(struct iommu_domain * domain,struct device * dev)131 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
132 				      struct device *dev)
133 {
134 	struct arm_smmu_nested_domain *nested_domain =
135 		to_smmu_nested_domain(domain);
136 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
137 	struct arm_smmu_attach_state state = {
138 		.master = master,
139 		.old_domain = iommu_get_domain_for_dev(dev),
140 		.ssid = IOMMU_NO_PASID,
141 	};
142 	struct arm_smmu_ste ste;
143 	int ret;
144 
145 	if (nested_domain->vsmmu->smmu != master->smmu)
146 		return -EINVAL;
147 	if (arm_smmu_ssids_in_use(&master->cd_table))
148 		return -EBUSY;
149 
150 	mutex_lock(&arm_smmu_asid_lock);
151 	/*
152 	 * The VM has to control the actual ATS state at the PCI device because
153 	 * we forward the invalidations directly from the VM. If the VM doesn't
154 	 * think ATS is on it will not generate ATC flushes and the ATC will
155 	 * become incoherent. Since we can't access the actual virtual PCI ATS
156 	 * config bit here base this off the EATS value in the STE. If the EATS
157 	 * is set then the VM must generate ATC flushes.
158 	 */
159 	state.disable_ats = !nested_domain->enable_ats;
160 	ret = arm_smmu_attach_prepare(&state, domain);
161 	if (ret) {
162 		mutex_unlock(&arm_smmu_asid_lock);
163 		return ret;
164 	}
165 
166 	arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
167 					state.ats_enabled);
168 	arm_smmu_install_ste_for_dev(master, &ste);
169 	arm_smmu_attach_commit(&state);
170 	mutex_unlock(&arm_smmu_asid_lock);
171 	return 0;
172 }
173 
arm_smmu_domain_nested_free(struct iommu_domain * domain)174 static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
175 {
176 	kfree(to_smmu_nested_domain(domain));
177 }
178 
179 static const struct iommu_domain_ops arm_smmu_nested_ops = {
180 	.attach_dev = arm_smmu_attach_dev_nested,
181 	.free = arm_smmu_domain_nested_free,
182 };
183 
arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 * arg,bool * enable_ats)184 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
185 				  bool *enable_ats)
186 {
187 	unsigned int eats;
188 	unsigned int cfg;
189 
190 	if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
191 		memset(arg->ste, 0, sizeof(arg->ste));
192 		return 0;
193 	}
194 
195 	/* EIO is reserved for invalid STE data. */
196 	if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
197 	    (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
198 		return -EIO;
199 
200 	cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
201 	if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
202 	    cfg != STRTAB_STE_0_CFG_S1_TRANS)
203 		return -EIO;
204 
205 	/*
206 	 * Only Full ATS or ATS UR is supported
207 	 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
208 	 */
209 	eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
210 	arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
211 	if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
212 		return -EIO;
213 
214 	if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
215 		*enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
216 	return 0;
217 }
218 
219 static struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)220 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
221 			      const struct iommu_user_data *user_data)
222 {
223 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
224 	struct arm_smmu_nested_domain *nested_domain;
225 	struct iommu_hwpt_arm_smmuv3 arg;
226 	bool enable_ats = false;
227 	int ret;
228 
229 	if (flags)
230 		return ERR_PTR(-EOPNOTSUPP);
231 
232 	ret = iommu_copy_struct_from_user(&arg, user_data,
233 					  IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
234 	if (ret)
235 		return ERR_PTR(ret);
236 
237 	ret = arm_smmu_validate_vste(&arg, &enable_ats);
238 	if (ret)
239 		return ERR_PTR(ret);
240 
241 	nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
242 	if (!nested_domain)
243 		return ERR_PTR(-ENOMEM);
244 
245 	nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
246 	nested_domain->domain.ops = &arm_smmu_nested_ops;
247 	nested_domain->enable_ats = enable_ats;
248 	nested_domain->vsmmu = vsmmu;
249 	nested_domain->ste[0] = arg.ste[0];
250 	nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
251 
252 	return &nested_domain->domain;
253 }
254 
arm_vsmmu_vsid_to_sid(struct arm_vsmmu * vsmmu,u32 vsid,u32 * sid)255 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
256 {
257 	struct arm_smmu_master *master;
258 	struct device *dev;
259 	int ret = 0;
260 
261 	xa_lock(&vsmmu->core.vdevs);
262 	dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
263 	if (!dev) {
264 		ret = -EIO;
265 		goto unlock;
266 	}
267 	master = dev_iommu_priv_get(dev);
268 
269 	/* At this moment, iommufd only supports PCI device that has one SID */
270 	if (sid)
271 		*sid = master->streams[0].id;
272 unlock:
273 	xa_unlock(&vsmmu->core.vdevs);
274 	return ret;
275 }
276 
277 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
278 struct arm_vsmmu_invalidation_cmd {
279 	union {
280 		u64 cmd[2];
281 		struct iommu_viommu_arm_smmuv3_invalidate ucmd;
282 	};
283 };
284 
285 /*
286  * Convert, in place, the raw invalidation command into an internal format that
287  * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
288  * stored in CPU endian.
289  *
290  * Enforce the VMID or SID on the command.
291  */
arm_vsmmu_convert_user_cmd(struct arm_vsmmu * vsmmu,struct arm_vsmmu_invalidation_cmd * cmd)292 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
293 				      struct arm_vsmmu_invalidation_cmd *cmd)
294 {
295 	/* Commands are le64 stored in u64 */
296 	cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
297 	cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
298 
299 	switch (cmd->cmd[0] & CMDQ_0_OP) {
300 	case CMDQ_OP_TLBI_NSNH_ALL:
301 		/* Convert to NH_ALL */
302 		cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
303 			      FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
304 		cmd->cmd[1] = 0;
305 		break;
306 	case CMDQ_OP_TLBI_NH_VA:
307 	case CMDQ_OP_TLBI_NH_VAA:
308 	case CMDQ_OP_TLBI_NH_ALL:
309 	case CMDQ_OP_TLBI_NH_ASID:
310 		cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
311 		cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
312 		break;
313 	case CMDQ_OP_ATC_INV:
314 	case CMDQ_OP_CFGI_CD:
315 	case CMDQ_OP_CFGI_CD_ALL: {
316 		u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
317 
318 		if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
319 			return -EIO;
320 		cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
321 		cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
322 		break;
323 	}
324 	default:
325 		return -EIO;
326 	}
327 	return 0;
328 }
329 
arm_vsmmu_cache_invalidate(struct iommufd_viommu * viommu,struct iommu_user_data_array * array)330 static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
331 				      struct iommu_user_data_array *array)
332 {
333 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
334 	struct arm_smmu_device *smmu = vsmmu->smmu;
335 	struct arm_vsmmu_invalidation_cmd *last;
336 	struct arm_vsmmu_invalidation_cmd *cmds;
337 	struct arm_vsmmu_invalidation_cmd *cur;
338 	struct arm_vsmmu_invalidation_cmd *end;
339 	int ret;
340 
341 	cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
342 	if (!cmds)
343 		return -ENOMEM;
344 	cur = cmds;
345 	end = cmds + array->entry_num;
346 
347 	static_assert(sizeof(*cmds) == 2 * sizeof(u64));
348 	ret = iommu_copy_struct_from_full_user_array(
349 		cmds, sizeof(*cmds), array,
350 		IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
351 	if (ret)
352 		goto out;
353 
354 	last = cmds;
355 	while (cur != end) {
356 		ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
357 		if (ret)
358 			goto out;
359 
360 		/* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
361 		cur++;
362 		if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
363 			continue;
364 
365 		/* FIXME always uses the main cmdq rather than trying to group by type */
366 		ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
367 						  cur - last, true);
368 		if (ret) {
369 			cur--;
370 			goto out;
371 		}
372 		last = cur;
373 	}
374 out:
375 	array->entry_num = cur - cmds;
376 	kfree(cmds);
377 	return ret;
378 }
379 
380 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
381 	.alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
382 	.cache_invalidate = arm_vsmmu_cache_invalidate,
383 };
384 
arm_vsmmu_alloc(struct device * dev,struct iommu_domain * parent,struct iommufd_ctx * ictx,unsigned int viommu_type)385 struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
386 				       struct iommu_domain *parent,
387 				       struct iommufd_ctx *ictx,
388 				       unsigned int viommu_type)
389 {
390 	struct arm_smmu_device *smmu =
391 		iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu);
392 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
393 	struct arm_smmu_domain *s2_parent = to_smmu_domain(parent);
394 	struct arm_vsmmu *vsmmu;
395 
396 	if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
397 		return ERR_PTR(-EOPNOTSUPP);
398 
399 	if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
400 		return ERR_PTR(-EOPNOTSUPP);
401 
402 	if (s2_parent->smmu != master->smmu)
403 		return ERR_PTR(-EINVAL);
404 
405 	/*
406 	 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
407 	 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
408 	 * any change to remove this.
409 	 */
410 	if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
411 		return ERR_PTR(-EOPNOTSUPP);
412 
413 	/*
414 	 * Must support some way to prevent the VM from bypassing the cache
415 	 * because VFIO currently does not do any cache maintenance. canwbs
416 	 * indicates the device is fully coherent and no cache maintenance is
417 	 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
418 	 * things non-coherent using the memattr, but No-Snoop behavior is not
419 	 * effected.
420 	 */
421 	if (!arm_smmu_master_canwbs(master) &&
422 	    !(smmu->features & ARM_SMMU_FEAT_S2FWB))
423 		return ERR_PTR(-EOPNOTSUPP);
424 
425 	vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
426 				     &arm_vsmmu_ops);
427 	if (IS_ERR(vsmmu))
428 		return ERR_CAST(vsmmu);
429 
430 	vsmmu->smmu = smmu;
431 	vsmmu->s2_parent = s2_parent;
432 	/* FIXME Move VMID allocation from the S2 domain allocation to here */
433 	vsmmu->vmid = s2_parent->s2_cfg.vmid;
434 
435 	return &vsmmu->core;
436 }
437 
arm_vmaster_report_event(struct arm_smmu_vmaster * vmaster,u64 * evt)438 int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
439 {
440 	struct iommu_vevent_arm_smmuv3 vevt;
441 	int i;
442 
443 	lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
444 
445 	vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
446 				  FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
447 	for (i = 1; i < EVTQ_ENT_DWORDS; i++)
448 		vevt.evt[i] = cpu_to_le64(evt[i]);
449 
450 	return iommufd_viommu_report_event(&vmaster->vsmmu->core,
451 					   IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
452 					   sizeof(vevt));
453 }
454 
455 MODULE_IMPORT_NS("IOMMUFD");
456