1 /* 2 * s390 vfio-pci interfaces 3 * 4 * Copyright 2020 IBM Corp. 5 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com> 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or (at 8 * your option) any later version. See the COPYING file in the top-level 9 * directory. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include <sys/ioctl.h> 15 #include <linux/vfio.h> 16 #include <linux/vfio_zdev.h> 17 18 #include "trace.h" 19 #include "hw/s390x/s390-pci-bus.h" 20 #include "hw/s390x/s390-pci-clp.h" 21 #include "hw/s390x/s390-pci-vfio.h" 22 #include "hw/vfio/pci.h" 23 #include "hw/vfio/vfio-container.h" 24 #include "hw/vfio/vfio-helpers.h" 25 26 /* 27 * Get the current DMA available count from vfio. Returns true if vfio is 28 * limiting DMA requests, false otherwise. The current available count read 29 * from vfio is returned in avail. 30 */ 31 bool s390_pci_update_dma_avail(int fd, unsigned int *avail) 32 { 33 uint32_t argsz = sizeof(struct vfio_iommu_type1_info); 34 g_autofree struct vfio_iommu_type1_info *info = g_malloc0(argsz); 35 36 assert(avail); 37 38 /* 39 * If the specified argsz is not large enough to contain all capabilities 40 * it will be updated upon return from the ioctl. Retry until we have 41 * a big enough buffer to hold the entire capability chain. 42 */ 43 retry: 44 info->argsz = argsz; 45 46 if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) { 47 return false; 48 } 49 50 if (info->argsz > argsz) { 51 argsz = info->argsz; 52 info = g_realloc(info, argsz); 53 goto retry; 54 } 55 56 /* If the capability exists, update with the current value */ 57 return vfio_get_info_dma_avail(info, avail); 58 } 59 60 S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, 61 S390PCIBusDevice *pbdev) 62 { 63 S390PCIDMACount *cnt; 64 uint32_t avail; 65 VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 66 int id; 67 68 assert(vpdev); 69 70 if (!vpdev->vbasedev.group) { 71 return NULL; 72 } 73 74 id = vpdev->vbasedev.group->container->fd; 75 76 if (!s390_pci_update_dma_avail(id, &avail)) { 77 return NULL; 78 } 79 80 QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { 81 if (cnt->id == id) { 82 cnt->users++; 83 return cnt; 84 } 85 } 86 87 cnt = g_new0(S390PCIDMACount, 1); 88 cnt->id = id; 89 cnt->users = 1; 90 cnt->avail = avail; 91 QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); 92 pbdev->iommu->max_dma_limit = avail; 93 return cnt; 94 } 95 96 void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) 97 { 98 assert(cnt); 99 100 cnt->users--; 101 if (cnt->users == 0) { 102 QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); 103 } 104 } 105 106 static void s390_pci_read_base(S390PCIBusDevice *pbdev, 107 struct vfio_device_info *info) 108 { 109 struct vfio_info_cap_header *hdr; 110 struct vfio_device_info_cap_zpci_base *cap; 111 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 112 uint64_t vfio_size; 113 114 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); 115 116 /* If capability not provided, just leave the defaults in place */ 117 if (hdr == NULL) { 118 trace_s390_pci_clp_cap(vpci->vbasedev.name, 119 VFIO_DEVICE_INFO_CAP_ZPCI_BASE); 120 return; 121 } 122 cap = (void *) hdr; 123 124 pbdev->zpci_fn.sdma = cap->start_dma; 125 pbdev->zpci_fn.edma = cap->end_dma; 126 pbdev->zpci_fn.pchid = cap->pchid; 127 pbdev->zpci_fn.vfn = cap->vfn; 128 pbdev->zpci_fn.pfgid = cap->gid; 129 /* The following values remain 0 until we support other FMB formats */ 130 pbdev->zpci_fn.fmbl = 0; 131 pbdev->zpci_fn.pft = 0; 132 /* Store function type separately for type-specific behavior */ 133 pbdev->pft = cap->pft; 134 135 /* 136 * If the device is a passthrough ISM device, disallow relaxed 137 * translation. 138 */ 139 if (pbdev->pft == ZPCI_PFT_ISM) { 140 pbdev->rtr_avail = false; 141 } 142 143 /* 144 * If appropriate, reduce the size of the supported DMA aperture reported 145 * to the guest based upon the vfio DMA limit. This is applicable for 146 * devices that are guaranteed to not use relaxed translation. If the 147 * device is capable of relaxed translation then we must advertise the 148 * full aperture. In this case, if translation is used then we will 149 * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16 150 * to request that the guest free DMA mappings as necessary. 151 */ 152 if (!pbdev->rtr_avail) { 153 vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; 154 if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) { 155 pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; 156 } 157 } 158 } 159 160 static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, 161 uint32_t *fh) 162 { 163 struct vfio_info_cap_header *hdr; 164 struct vfio_device_info_cap_zpci_base *cap; 165 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 166 167 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); 168 169 /* Can only get the host fh with version 2 or greater */ 170 if (hdr == NULL || hdr->version < 2) { 171 trace_s390_pci_clp_cap(vpci->vbasedev.name, 172 VFIO_DEVICE_INFO_CAP_ZPCI_BASE); 173 return false; 174 } 175 cap = (void *) hdr; 176 177 *fh = cap->fh; 178 return true; 179 } 180 181 static void s390_pci_read_group(S390PCIBusDevice *pbdev, 182 struct vfio_device_info *info) 183 { 184 struct vfio_info_cap_header *hdr; 185 struct vfio_device_info_cap_zpci_group *cap; 186 S390pciState *s = s390_get_phb(); 187 ClpRspQueryPciGrp *resgrp; 188 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 189 uint8_t start_gid = pbdev->zpci_fn.pfgid; 190 191 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); 192 193 /* 194 * If capability not provided or the underlying hostdev is simulated, just 195 * use the default group. 196 */ 197 if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) { 198 trace_s390_pci_clp_cap(vpci->vbasedev.name, 199 VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); 200 pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; 201 pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP); 202 return; 203 } 204 cap = (void *) hdr; 205 206 /* 207 * For an intercept device, let's use an existing simulated group if one 208 * one was already created for other intercept devices in this group. 209 * If not, create a new simulated group if any are still available. 210 * If all else fails, just fall back on the default group. 211 */ 212 if (!pbdev->interp) { 213 pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid); 214 if (pbdev->pci_group) { 215 /* Use existing simulated group */ 216 pbdev->zpci_fn.pfgid = pbdev->pci_group->id; 217 return; 218 } else { 219 if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) { 220 /* All out of simulated groups, use default */ 221 trace_s390_pci_clp_cap(vpci->vbasedev.name, 222 VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); 223 pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; 224 pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP); 225 return; 226 } else { 227 /* We can assign a new simulated group */ 228 pbdev->zpci_fn.pfgid = s->next_sim_grp; 229 s->next_sim_grp++; 230 /* Fall through to create the new sim group using CLP info */ 231 } 232 } 233 } 234 235 /* See if the PCI group is already defined, create if not */ 236 pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid); 237 238 if (!pbdev->pci_group) { 239 pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid); 240 241 resgrp = &pbdev->pci_group->zpci_group; 242 if (pbdev->rtr_avail) { 243 resgrp->fr |= CLP_RSP_QPCIG_MASK_RTR; 244 } 245 if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) { 246 resgrp->fr |= CLP_RSP_QPCIG_MASK_REFRESH; 247 } 248 resgrp->dasm = cap->dasm; 249 resgrp->msia = cap->msi_addr; 250 resgrp->mui = cap->mui; 251 resgrp->i = cap->noi; 252 if (pbdev->interp && hdr->version >= 2) { 253 resgrp->maxstbl = cap->imaxstbl; 254 } else { 255 resgrp->maxstbl = cap->maxstbl; 256 } 257 resgrp->version = cap->version; 258 resgrp->dtsm = ZPCI_DTSM; 259 } 260 } 261 262 static void s390_pci_read_util(S390PCIBusDevice *pbdev, 263 struct vfio_device_info *info) 264 { 265 struct vfio_info_cap_header *hdr; 266 struct vfio_device_info_cap_zpci_util *cap; 267 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 268 269 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL); 270 271 /* If capability not provided, just leave the defaults in place */ 272 if (hdr == NULL) { 273 trace_s390_pci_clp_cap(vpci->vbasedev.name, 274 VFIO_DEVICE_INFO_CAP_ZPCI_UTIL); 275 return; 276 } 277 cap = (void *) hdr; 278 279 if (cap->size > CLP_UTIL_STR_LEN) { 280 trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size, 281 VFIO_DEVICE_INFO_CAP_ZPCI_UTIL); 282 return; 283 } 284 285 pbdev->zpci_fn.flags |= CLP_RSP_QPCI_MASK_UTIL; 286 memcpy(pbdev->zpci_fn.util_str, cap->util_str, CLP_UTIL_STR_LEN); 287 } 288 289 static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, 290 struct vfio_device_info *info) 291 { 292 struct vfio_info_cap_header *hdr; 293 struct vfio_device_info_cap_zpci_pfip *cap; 294 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 295 296 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP); 297 298 /* If capability not provided, just leave the defaults in place */ 299 if (hdr == NULL) { 300 trace_s390_pci_clp_cap(vpci->vbasedev.name, 301 VFIO_DEVICE_INFO_CAP_ZPCI_PFIP); 302 return; 303 } 304 cap = (void *) hdr; 305 306 if (cap->size > CLP_PFIP_NR_SEGMENTS) { 307 trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size, 308 VFIO_DEVICE_INFO_CAP_ZPCI_PFIP); 309 return; 310 } 311 312 memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); 313 } 314 315 static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) 316 { 317 VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); 318 319 return vfio_get_device_info(vfio_pci->vbasedev.fd); 320 } 321 322 /* 323 * Get the host function handle from the vfio CLP capabilities chain. Returns 324 * true if a fh value was placed into the provided buffer. Returns false 325 * if a fh could not be obtained (ioctl failed or capability version does 326 * not include the fh) 327 */ 328 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) 329 { 330 g_autofree struct vfio_device_info *info = NULL; 331 332 assert(fh); 333 334 info = get_device_info(pbdev); 335 if (!info) { 336 return false; 337 } 338 339 return get_host_fh(pbdev, info, fh); 340 } 341 342 /* 343 * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for 344 * capabilities that contain information about CLP features provided by the 345 * underlying host. 346 * On entry, defaults have already been placed into the guest CLP response 347 * buffers. On exit, defaults will have been overwritten for any CLP features 348 * found in the capability chain; defaults will remain for any CLP features not 349 * found in the chain. 350 */ 351 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) 352 { 353 g_autofree struct vfio_device_info *info = NULL; 354 355 info = get_device_info(pbdev); 356 if (!info) { 357 return; 358 } 359 360 /* 361 * Find the CLP features provided and fill in the guest CLP responses. 362 * Always call s390_pci_read_base first as information from this could 363 * determine which function group is used in s390_pci_read_group. 364 * For any feature not found, the default values will remain in the CLP 365 * response. 366 */ 367 s390_pci_read_base(pbdev, info); 368 s390_pci_read_group(pbdev, info); 369 s390_pci_read_util(pbdev, info); 370 s390_pci_read_pfip(pbdev, info); 371 } 372