1 /*
2 * s390 vfio-pci interfaces
3 *
4 * Copyright 2020 IBM Corp.
5 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or (at
8 * your option) any later version. See the COPYING file in the top-level
9 * directory.
10 */
11
12 #include "qemu/osdep.h"
13
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include <linux/vfio_zdev.h>
17
18 #include "trace.h"
19 #include "hw/s390x/s390-pci-bus.h"
20 #include "hw/s390x/s390-pci-clp.h"
21 #include "hw/s390x/s390-pci-vfio.h"
22 #include "hw/vfio/pci.h"
23 #include "hw/vfio/vfio-container.h"
24 #include "hw/vfio/vfio-helpers.h"
25
26 /*
27 * Get the current DMA available count from vfio. Returns true if vfio is
28 * limiting DMA requests, false otherwise. The current available count read
29 * from vfio is returned in avail.
30 */
s390_pci_update_dma_avail(int fd,unsigned int * avail)31 bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
32 {
33 uint32_t argsz = sizeof(struct vfio_iommu_type1_info);
34 g_autofree struct vfio_iommu_type1_info *info = g_malloc0(argsz);
35
36 assert(avail);
37
38 /*
39 * If the specified argsz is not large enough to contain all capabilities
40 * it will be updated upon return from the ioctl. Retry until we have
41 * a big enough buffer to hold the entire capability chain.
42 */
43 retry:
44 info->argsz = argsz;
45
46 if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) {
47 return false;
48 }
49
50 if (info->argsz > argsz) {
51 argsz = info->argsz;
52 info = g_realloc(info, argsz);
53 goto retry;
54 }
55
56 /* If the capability exists, update with the current value */
57 return vfio_get_info_dma_avail(info, avail);
58 }
59
s390_pci_start_dma_count(S390pciState * s,S390PCIBusDevice * pbdev)60 S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
61 S390PCIBusDevice *pbdev)
62 {
63 S390PCIDMACount *cnt;
64 uint32_t avail;
65 VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
66 int id;
67
68 assert(vpdev);
69
70 if (!vpdev->vbasedev.group) {
71 return NULL;
72 }
73
74 id = vpdev->vbasedev.group->container->fd;
75
76 if (!s390_pci_update_dma_avail(id, &avail)) {
77 return NULL;
78 }
79
80 QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
81 if (cnt->id == id) {
82 cnt->users++;
83 return cnt;
84 }
85 }
86
87 cnt = g_new0(S390PCIDMACount, 1);
88 cnt->id = id;
89 cnt->users = 1;
90 cnt->avail = avail;
91 QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
92 pbdev->iommu->max_dma_limit = avail;
93 return cnt;
94 }
95
s390_pci_end_dma_count(S390pciState * s,S390PCIDMACount * cnt)96 void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
97 {
98 assert(cnt);
99
100 cnt->users--;
101 if (cnt->users == 0) {
102 QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
103 }
104 }
105
s390_pci_read_base(S390PCIBusDevice * pbdev,struct vfio_device_info * info)106 static void s390_pci_read_base(S390PCIBusDevice *pbdev,
107 struct vfio_device_info *info)
108 {
109 struct vfio_info_cap_header *hdr;
110 struct vfio_device_info_cap_zpci_base *cap;
111 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
112 uint64_t vfio_size;
113
114 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
115
116 /* If capability not provided, just leave the defaults in place */
117 if (hdr == NULL) {
118 trace_s390_pci_clp_cap(vpci->vbasedev.name,
119 VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
120 return;
121 }
122 cap = (void *) hdr;
123
124 pbdev->zpci_fn.sdma = cap->start_dma;
125 pbdev->zpci_fn.edma = cap->end_dma;
126 pbdev->zpci_fn.pchid = cap->pchid;
127 pbdev->zpci_fn.vfn = cap->vfn;
128 pbdev->zpci_fn.pfgid = cap->gid;
129 /* The following values remain 0 until we support other FMB formats */
130 pbdev->zpci_fn.fmbl = 0;
131 pbdev->zpci_fn.pft = 0;
132 /* Store function type separately for type-specific behavior */
133 pbdev->pft = cap->pft;
134
135 /*
136 * If the device is a passthrough ISM device, disallow relaxed
137 * translation.
138 */
139 if (pbdev->pft == ZPCI_PFT_ISM) {
140 pbdev->rtr_avail = false;
141 }
142
143 /*
144 * If appropriate, reduce the size of the supported DMA aperture reported
145 * to the guest based upon the vfio DMA limit. This is applicable for
146 * devices that are guaranteed to not use relaxed translation. If the
147 * device is capable of relaxed translation then we must advertise the
148 * full aperture. In this case, if translation is used then we will
149 * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16
150 * to request that the guest free DMA mappings as necessary.
151 */
152 if (!pbdev->rtr_avail) {
153 vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
154 if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
155 pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
156 }
157 }
158 }
159
get_host_fh(S390PCIBusDevice * pbdev,struct vfio_device_info * info,uint32_t * fh)160 static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
161 uint32_t *fh)
162 {
163 struct vfio_info_cap_header *hdr;
164 struct vfio_device_info_cap_zpci_base *cap;
165 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
166
167 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
168
169 /* Can only get the host fh with version 2 or greater */
170 if (hdr == NULL || hdr->version < 2) {
171 trace_s390_pci_clp_cap(vpci->vbasedev.name,
172 VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
173 return false;
174 }
175 cap = (void *) hdr;
176
177 *fh = cap->fh;
178 return true;
179 }
180
s390_pci_read_group(S390PCIBusDevice * pbdev,struct vfio_device_info * info)181 static void s390_pci_read_group(S390PCIBusDevice *pbdev,
182 struct vfio_device_info *info)
183 {
184 struct vfio_info_cap_header *hdr;
185 struct vfio_device_info_cap_zpci_group *cap;
186 S390pciState *s = s390_get_phb();
187 ClpRspQueryPciGrp *resgrp;
188 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
189 uint8_t start_gid = pbdev->zpci_fn.pfgid;
190
191 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
192
193 /*
194 * If capability not provided or the underlying hostdev is simulated, just
195 * use the default group.
196 */
197 if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) {
198 trace_s390_pci_clp_cap(vpci->vbasedev.name,
199 VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
200 pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
201 pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
202 return;
203 }
204 cap = (void *) hdr;
205
206 /*
207 * For an intercept device, let's use an existing simulated group if one
208 * one was already created for other intercept devices in this group.
209 * If not, create a new simulated group if any are still available.
210 * If all else fails, just fall back on the default group.
211 */
212 if (!pbdev->interp) {
213 pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid);
214 if (pbdev->pci_group) {
215 /* Use existing simulated group */
216 pbdev->zpci_fn.pfgid = pbdev->pci_group->id;
217 return;
218 } else {
219 if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) {
220 /* All out of simulated groups, use default */
221 trace_s390_pci_clp_cap(vpci->vbasedev.name,
222 VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
223 pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
224 pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
225 return;
226 } else {
227 /* We can assign a new simulated group */
228 pbdev->zpci_fn.pfgid = s->next_sim_grp;
229 s->next_sim_grp++;
230 /* Fall through to create the new sim group using CLP info */
231 }
232 }
233 }
234
235 /* See if the PCI group is already defined, create if not */
236 pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid);
237
238 if (!pbdev->pci_group) {
239 pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid);
240
241 resgrp = &pbdev->pci_group->zpci_group;
242 if (pbdev->rtr_avail) {
243 resgrp->fr |= CLP_RSP_QPCIG_MASK_RTR;
244 }
245 if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
246 resgrp->fr |= CLP_RSP_QPCIG_MASK_REFRESH;
247 }
248 resgrp->dasm = cap->dasm;
249 resgrp->msia = cap->msi_addr;
250 resgrp->mui = cap->mui;
251 resgrp->i = cap->noi;
252 if (pbdev->interp && hdr->version >= 2) {
253 resgrp->maxstbl = cap->imaxstbl;
254 } else {
255 resgrp->maxstbl = cap->maxstbl;
256 }
257 resgrp->version = cap->version;
258 resgrp->dtsm = ZPCI_DTSM;
259 }
260 }
261
s390_pci_read_util(S390PCIBusDevice * pbdev,struct vfio_device_info * info)262 static void s390_pci_read_util(S390PCIBusDevice *pbdev,
263 struct vfio_device_info *info)
264 {
265 struct vfio_info_cap_header *hdr;
266 struct vfio_device_info_cap_zpci_util *cap;
267 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
268
269 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
270
271 /* If capability not provided, just leave the defaults in place */
272 if (hdr == NULL) {
273 trace_s390_pci_clp_cap(vpci->vbasedev.name,
274 VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
275 return;
276 }
277 cap = (void *) hdr;
278
279 if (cap->size > CLP_UTIL_STR_LEN) {
280 trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
281 VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
282 return;
283 }
284
285 pbdev->zpci_fn.flags |= CLP_RSP_QPCI_MASK_UTIL;
286 memcpy(pbdev->zpci_fn.util_str, cap->util_str, CLP_UTIL_STR_LEN);
287 }
288
s390_pci_read_pfip(S390PCIBusDevice * pbdev,struct vfio_device_info * info)289 static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
290 struct vfio_device_info *info)
291 {
292 struct vfio_info_cap_header *hdr;
293 struct vfio_device_info_cap_zpci_pfip *cap;
294 VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
295
296 hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
297
298 /* If capability not provided, just leave the defaults in place */
299 if (hdr == NULL) {
300 trace_s390_pci_clp_cap(vpci->vbasedev.name,
301 VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
302 return;
303 }
304 cap = (void *) hdr;
305
306 if (cap->size > CLP_PFIP_NR_SEGMENTS) {
307 trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
308 VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
309 return;
310 }
311
312 memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS);
313 }
314
get_device_info(S390PCIBusDevice * pbdev)315 static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev)
316 {
317 VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
318
319 return vfio_get_device_info(vfio_pci->vbasedev.fd);
320 }
321
322 /*
323 * Get the host function handle from the vfio CLP capabilities chain. Returns
324 * true if a fh value was placed into the provided buffer. Returns false
325 * if a fh could not be obtained (ioctl failed or capability version does
326 * not include the fh)
327 */
s390_pci_get_host_fh(S390PCIBusDevice * pbdev,uint32_t * fh)328 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
329 {
330 g_autofree struct vfio_device_info *info = NULL;
331
332 assert(fh);
333
334 info = get_device_info(pbdev);
335 if (!info) {
336 return false;
337 }
338
339 return get_host_fh(pbdev, info, fh);
340 }
341
342 /*
343 * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for
344 * capabilities that contain information about CLP features provided by the
345 * underlying host.
346 * On entry, defaults have already been placed into the guest CLP response
347 * buffers. On exit, defaults will have been overwritten for any CLP features
348 * found in the capability chain; defaults will remain for any CLP features not
349 * found in the chain.
350 */
s390_pci_get_clp_info(S390PCIBusDevice * pbdev)351 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
352 {
353 g_autofree struct vfio_device_info *info = NULL;
354
355 info = get_device_info(pbdev);
356 if (!info) {
357 return;
358 }
359
360 /*
361 * Find the CLP features provided and fill in the guest CLP responses.
362 * Always call s390_pci_read_base first as information from this could
363 * determine which function group is used in s390_pci_read_group.
364 * For any feature not found, the default values will remain in the CLP
365 * response.
366 */
367 s390_pci_read_base(pbdev, info);
368 s390_pci_read_group(pbdev, info);
369 s390_pci_read_util(pbdev, info);
370 s390_pci_read_pfip(pbdev, info);
371 }
372