1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES 4 */ 5 6 #include <uapi/linux/iommufd.h> 7 8 #include "arm-smmu-v3.h" 9 10 void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) 11 { 12 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 13 struct iommu_hw_info_arm_smmuv3 *info; 14 u32 __iomem *base_idr; 15 unsigned int i; 16 17 info = kzalloc(sizeof(*info), GFP_KERNEL); 18 if (!info) 19 return ERR_PTR(-ENOMEM); 20 21 base_idr = master->smmu->base + ARM_SMMU_IDR0; 22 for (i = 0; i <= 5; i++) 23 info->idr[i] = readl_relaxed(base_idr + i); 24 info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR); 25 info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR); 26 27 *length = sizeof(*info); 28 *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3; 29 30 return info; 31 } 32 33 static void arm_smmu_make_nested_cd_table_ste( 34 struct arm_smmu_ste *target, struct arm_smmu_master *master, 35 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) 36 { 37 arm_smmu_make_s2_domain_ste( 38 target, master, nested_domain->vsmmu->s2_parent, ats_enabled); 39 40 target->data[0] = cpu_to_le64(STRTAB_STE_0_V | 41 FIELD_PREP(STRTAB_STE_0_CFG, 42 STRTAB_STE_0_CFG_NESTED)); 43 target->data[0] |= nested_domain->ste[0] & 44 ~cpu_to_le64(STRTAB_STE_0_CFG); 45 target->data[1] |= nested_domain->ste[1]; 46 /* Merge events for DoS mitigations on eventq */ 47 target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV); 48 } 49 50 /* 51 * Create a physical STE from the virtual STE that userspace provided when it 52 * created the nested domain. Using the vSTE userspace can request: 53 * - Non-valid STE 54 * - Abort STE 55 * - Bypass STE (install the S2, no CD table) 56 * - CD table STE (install the S2 and the userspace CD table) 57 */ 58 static void arm_smmu_make_nested_domain_ste( 59 struct arm_smmu_ste *target, struct arm_smmu_master *master, 60 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) 61 { 62 unsigned int cfg = 63 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])); 64 65 /* 66 * Userspace can request a non-valid STE through the nesting interface. 67 * We relay that into an abort physical STE with the intention that 68 * C_BAD_STE for this SID can be generated to userspace. 69 */ 70 if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) 71 cfg = STRTAB_STE_0_CFG_ABORT; 72 73 switch (cfg) { 74 case STRTAB_STE_0_CFG_S1_TRANS: 75 arm_smmu_make_nested_cd_table_ste(target, master, nested_domain, 76 ats_enabled); 77 break; 78 case STRTAB_STE_0_CFG_BYPASS: 79 arm_smmu_make_s2_domain_ste(target, master, 80 nested_domain->vsmmu->s2_parent, 81 ats_enabled); 82 break; 83 case STRTAB_STE_0_CFG_ABORT: 84 default: 85 arm_smmu_make_abort_ste(target); 86 break; 87 } 88 } 89 90 int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, 91 struct arm_smmu_nested_domain *nested_domain) 92 { 93 struct arm_smmu_vmaster *vmaster; 94 unsigned long vsid; 95 int ret; 96 97 iommu_group_mutex_assert(state->master->dev); 98 99 ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core, 100 state->master->dev, &vsid); 101 if (ret) 102 return ret; 103 104 vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL); 105 if (!vmaster) 106 return -ENOMEM; 107 vmaster->vsmmu = nested_domain->vsmmu; 108 vmaster->vsid = vsid; 109 state->vmaster = vmaster; 110 111 return 0; 112 } 113 114 void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) 115 { 116 struct arm_smmu_master *master = state->master; 117 118 mutex_lock(&master->smmu->streams_mutex); 119 kfree(master->vmaster); 120 master->vmaster = state->vmaster; 121 mutex_unlock(&master->smmu->streams_mutex); 122 } 123 124 void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) 125 { 126 struct arm_smmu_attach_state state = { .master = master }; 127 128 arm_smmu_attach_commit_vmaster(&state); 129 } 130 131 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, 132 struct device *dev) 133 { 134 struct arm_smmu_nested_domain *nested_domain = 135 to_smmu_nested_domain(domain); 136 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 137 struct arm_smmu_attach_state state = { 138 .master = master, 139 .old_domain = iommu_get_domain_for_dev(dev), 140 .ssid = IOMMU_NO_PASID, 141 }; 142 struct arm_smmu_ste ste; 143 int ret; 144 145 if (nested_domain->vsmmu->smmu != master->smmu) 146 return -EINVAL; 147 if (arm_smmu_ssids_in_use(&master->cd_table)) 148 return -EBUSY; 149 150 mutex_lock(&arm_smmu_asid_lock); 151 /* 152 * The VM has to control the actual ATS state at the PCI device because 153 * we forward the invalidations directly from the VM. If the VM doesn't 154 * think ATS is on it will not generate ATC flushes and the ATC will 155 * become incoherent. Since we can't access the actual virtual PCI ATS 156 * config bit here base this off the EATS value in the STE. If the EATS 157 * is set then the VM must generate ATC flushes. 158 */ 159 state.disable_ats = !nested_domain->enable_ats; 160 ret = arm_smmu_attach_prepare(&state, domain); 161 if (ret) { 162 mutex_unlock(&arm_smmu_asid_lock); 163 return ret; 164 } 165 166 arm_smmu_make_nested_domain_ste(&ste, master, nested_domain, 167 state.ats_enabled); 168 arm_smmu_install_ste_for_dev(master, &ste); 169 arm_smmu_attach_commit(&state); 170 mutex_unlock(&arm_smmu_asid_lock); 171 return 0; 172 } 173 174 static void arm_smmu_domain_nested_free(struct iommu_domain *domain) 175 { 176 kfree(to_smmu_nested_domain(domain)); 177 } 178 179 static const struct iommu_domain_ops arm_smmu_nested_ops = { 180 .attach_dev = arm_smmu_attach_dev_nested, 181 .free = arm_smmu_domain_nested_free, 182 }; 183 184 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg, 185 bool *enable_ats) 186 { 187 unsigned int eats; 188 unsigned int cfg; 189 190 if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { 191 memset(arg->ste, 0, sizeof(arg->ste)); 192 return 0; 193 } 194 195 /* EIO is reserved for invalid STE data. */ 196 if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) || 197 (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED)) 198 return -EIO; 199 200 cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0])); 201 if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && 202 cfg != STRTAB_STE_0_CFG_S1_TRANS) 203 return -EIO; 204 205 /* 206 * Only Full ATS or ATS UR is supported 207 * The EATS field will be set by arm_smmu_make_nested_domain_ste() 208 */ 209 eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1])); 210 arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS); 211 if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS) 212 return -EIO; 213 214 if (cfg == STRTAB_STE_0_CFG_S1_TRANS) 215 *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS); 216 return 0; 217 } 218 219 static struct iommu_domain * 220 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, 221 const struct iommu_user_data *user_data) 222 { 223 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); 224 struct arm_smmu_nested_domain *nested_domain; 225 struct iommu_hwpt_arm_smmuv3 arg; 226 bool enable_ats = false; 227 int ret; 228 229 if (flags) 230 return ERR_PTR(-EOPNOTSUPP); 231 232 ret = iommu_copy_struct_from_user(&arg, user_data, 233 IOMMU_HWPT_DATA_ARM_SMMUV3, ste); 234 if (ret) 235 return ERR_PTR(ret); 236 237 ret = arm_smmu_validate_vste(&arg, &enable_ats); 238 if (ret) 239 return ERR_PTR(ret); 240 241 nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT); 242 if (!nested_domain) 243 return ERR_PTR(-ENOMEM); 244 245 nested_domain->domain.type = IOMMU_DOMAIN_NESTED; 246 nested_domain->domain.ops = &arm_smmu_nested_ops; 247 nested_domain->enable_ats = enable_ats; 248 nested_domain->vsmmu = vsmmu; 249 nested_domain->ste[0] = arg.ste[0]; 250 nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS); 251 252 return &nested_domain->domain; 253 } 254 255 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid) 256 { 257 struct arm_smmu_master *master; 258 struct device *dev; 259 int ret = 0; 260 261 xa_lock(&vsmmu->core.vdevs); 262 dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid); 263 if (!dev) { 264 ret = -EIO; 265 goto unlock; 266 } 267 master = dev_iommu_priv_get(dev); 268 269 /* At this moment, iommufd only supports PCI device that has one SID */ 270 if (sid) 271 *sid = master->streams[0].id; 272 unlock: 273 xa_unlock(&vsmmu->core.vdevs); 274 return ret; 275 } 276 277 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */ 278 struct arm_vsmmu_invalidation_cmd { 279 union { 280 u64 cmd[2]; 281 struct iommu_viommu_arm_smmuv3_invalidate ucmd; 282 }; 283 }; 284 285 /* 286 * Convert, in place, the raw invalidation command into an internal format that 287 * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are 288 * stored in CPU endian. 289 * 290 * Enforce the VMID or SID on the command. 291 */ 292 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu, 293 struct arm_vsmmu_invalidation_cmd *cmd) 294 { 295 /* Commands are le64 stored in u64 */ 296 cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]); 297 cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]); 298 299 switch (cmd->cmd[0] & CMDQ_0_OP) { 300 case CMDQ_OP_TLBI_NSNH_ALL: 301 /* Convert to NH_ALL */ 302 cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL | 303 FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid); 304 cmd->cmd[1] = 0; 305 break; 306 case CMDQ_OP_TLBI_NH_VA: 307 case CMDQ_OP_TLBI_NH_VAA: 308 case CMDQ_OP_TLBI_NH_ALL: 309 case CMDQ_OP_TLBI_NH_ASID: 310 cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID; 311 cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid); 312 break; 313 case CMDQ_OP_ATC_INV: 314 case CMDQ_OP_CFGI_CD: 315 case CMDQ_OP_CFGI_CD_ALL: { 316 u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]); 317 318 if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid)) 319 return -EIO; 320 cmd->cmd[0] &= ~CMDQ_CFGI_0_SID; 321 cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid); 322 break; 323 } 324 default: 325 return -EIO; 326 } 327 return 0; 328 } 329 330 static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, 331 struct iommu_user_data_array *array) 332 { 333 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); 334 struct arm_smmu_device *smmu = vsmmu->smmu; 335 struct arm_vsmmu_invalidation_cmd *last; 336 struct arm_vsmmu_invalidation_cmd *cmds; 337 struct arm_vsmmu_invalidation_cmd *cur; 338 struct arm_vsmmu_invalidation_cmd *end; 339 int ret; 340 341 cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL); 342 if (!cmds) 343 return -ENOMEM; 344 cur = cmds; 345 end = cmds + array->entry_num; 346 347 static_assert(sizeof(*cmds) == 2 * sizeof(u64)); 348 ret = iommu_copy_struct_from_full_user_array( 349 cmds, sizeof(*cmds), array, 350 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3); 351 if (ret) 352 goto out; 353 354 last = cmds; 355 while (cur != end) { 356 ret = arm_vsmmu_convert_user_cmd(vsmmu, cur); 357 if (ret) 358 goto out; 359 360 /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */ 361 cur++; 362 if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1) 363 continue; 364 365 /* FIXME always uses the main cmdq rather than trying to group by type */ 366 ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd, 367 cur - last, true); 368 if (ret) { 369 cur--; 370 goto out; 371 } 372 last = cur; 373 } 374 out: 375 array->entry_num = cur - cmds; 376 kfree(cmds); 377 return ret; 378 } 379 380 static const struct iommufd_viommu_ops arm_vsmmu_ops = { 381 .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, 382 .cache_invalidate = arm_vsmmu_cache_invalidate, 383 }; 384 385 struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, 386 struct iommu_domain *parent, 387 struct iommufd_ctx *ictx, 388 unsigned int viommu_type) 389 { 390 struct arm_smmu_device *smmu = 391 iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu); 392 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 393 struct arm_smmu_domain *s2_parent = to_smmu_domain(parent); 394 struct arm_vsmmu *vsmmu; 395 396 if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3) 397 return ERR_PTR(-EOPNOTSUPP); 398 399 if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) 400 return ERR_PTR(-EOPNOTSUPP); 401 402 if (s2_parent->smmu != master->smmu) 403 return ERR_PTR(-EINVAL); 404 405 /* 406 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW 407 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs 408 * any change to remove this. 409 */ 410 if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) 411 return ERR_PTR(-EOPNOTSUPP); 412 413 /* 414 * Must support some way to prevent the VM from bypassing the cache 415 * because VFIO currently does not do any cache maintenance. canwbs 416 * indicates the device is fully coherent and no cache maintenance is 417 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make 418 * things non-coherent using the memattr, but No-Snoop behavior is not 419 * effected. 420 */ 421 if (!arm_smmu_master_canwbs(master) && 422 !(smmu->features & ARM_SMMU_FEAT_S2FWB)) 423 return ERR_PTR(-EOPNOTSUPP); 424 425 vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, 426 &arm_vsmmu_ops); 427 if (IS_ERR(vsmmu)) 428 return ERR_CAST(vsmmu); 429 430 vsmmu->smmu = smmu; 431 vsmmu->s2_parent = s2_parent; 432 /* FIXME Move VMID allocation from the S2 domain allocation to here */ 433 vsmmu->vmid = s2_parent->s2_cfg.vmid; 434 435 return &vsmmu->core; 436 } 437 438 int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt) 439 { 440 struct iommu_vevent_arm_smmuv3 vevt; 441 int i; 442 443 lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex); 444 445 vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) | 446 FIELD_PREP(EVTQ_0_SID, vmaster->vsid)); 447 for (i = 1; i < EVTQ_ENT_DWORDS; i++) 448 vevt.evt[i] = cpu_to_le64(evt[i]); 449 450 return iommufd_viommu_report_event(&vmaster->vsmmu->core, 451 IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt, 452 sizeof(vevt)); 453 } 454 455 MODULE_IMPORT_NS("IOMMUFD"); 456