xref: /qemu/hw/s390x/s390-pci-vfio.c (revision cc3d262aa93a42e19c38f6acb6d0f6012a71eb4b)
1 /*
2  * s390 vfio-pci interfaces
3  *
4  * Copyright 2020 IBM Corp.
5  * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or (at
8  * your option) any later version. See the COPYING file in the top-level
9  * directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include <linux/vfio_zdev.h>
17 
18 #include "trace.h"
19 #include "hw/s390x/s390-pci-bus.h"
20 #include "hw/s390x/s390-pci-clp.h"
21 #include "hw/s390x/s390-pci-vfio.h"
22 #include "hw/vfio/pci.h"
23 #include "hw/vfio/vfio-common.h"
24 
25 /*
26  * Get the current DMA available count from vfio.  Returns true if vfio is
27  * limiting DMA requests, false otherwise.  The current available count read
28  * from vfio is returned in avail.
29  */
30 bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
31 {
32     uint32_t argsz = sizeof(struct vfio_iommu_type1_info);
33     g_autofree struct vfio_iommu_type1_info *info = g_malloc0(argsz);
34 
35     assert(avail);
36 
37     /*
38      * If the specified argsz is not large enough to contain all capabilities
39      * it will be updated upon return from the ioctl.  Retry until we have
40      * a big enough buffer to hold the entire capability chain.
41      */
42 retry:
43     info->argsz = argsz;
44 
45     if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) {
46         return false;
47     }
48 
49     if (info->argsz > argsz) {
50         argsz = info->argsz;
51         info = g_realloc(info, argsz);
52         goto retry;
53     }
54 
55     /* If the capability exists, update with the current value */
56     return vfio_get_info_dma_avail(info, avail);
57 }
58 
59 S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
60                                           S390PCIBusDevice *pbdev)
61 {
62     S390PCIDMACount *cnt;
63     uint32_t avail;
64     VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
65     int id;
66 
67     assert(vpdev);
68 
69     if (!vpdev->vbasedev.group) {
70         return NULL;
71     }
72 
73     id = vpdev->vbasedev.group->container->fd;
74 
75     if (!s390_pci_update_dma_avail(id, &avail)) {
76         return NULL;
77     }
78 
79     QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
80         if (cnt->id  == id) {
81             cnt->users++;
82             return cnt;
83         }
84     }
85 
86     cnt = g_new0(S390PCIDMACount, 1);
87     cnt->id = id;
88     cnt->users = 1;
89     cnt->avail = avail;
90     QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
91     pbdev->iommu->max_dma_limit = avail;
92     return cnt;
93 }
94 
95 void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
96 {
97     assert(cnt);
98 
99     cnt->users--;
100     if (cnt->users == 0) {
101         QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
102     }
103 }
104 
105 static void s390_pci_read_base(S390PCIBusDevice *pbdev,
106                                struct vfio_device_info *info)
107 {
108     struct vfio_info_cap_header *hdr;
109     struct vfio_device_info_cap_zpci_base *cap;
110     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
111     uint64_t vfio_size;
112 
113     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
114 
115     /* If capability not provided, just leave the defaults in place */
116     if (hdr == NULL) {
117         trace_s390_pci_clp_cap(vpci->vbasedev.name,
118                                VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
119         return;
120     }
121     cap = (void *) hdr;
122 
123     pbdev->zpci_fn.sdma = cap->start_dma;
124     pbdev->zpci_fn.edma = cap->end_dma;
125     pbdev->zpci_fn.pchid = cap->pchid;
126     pbdev->zpci_fn.vfn = cap->vfn;
127     pbdev->zpci_fn.pfgid = cap->gid;
128     /* The following values remain 0 until we support other FMB formats */
129     pbdev->zpci_fn.fmbl = 0;
130     pbdev->zpci_fn.pft = 0;
131     /* Store function type separately for type-specific behavior */
132     pbdev->pft = cap->pft;
133 
134     /*
135      * If the device is a passthrough ISM device, disallow relaxed
136      * translation.
137      */
138     if (pbdev->pft == ZPCI_PFT_ISM) {
139         pbdev->rtr_avail = false;
140     }
141 
142     /*
143      * If appropriate, reduce the size of the supported DMA aperture reported
144      * to the guest based upon the vfio DMA limit.  This is applicable for
145      * devices that are guaranteed to not use relaxed translation.  If the
146      * device is capable of relaxed translation then we must advertise the
147      * full aperture.  In this case, if translation is used then we will
148      * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16
149      * to request that the guest free DMA mappings as necessary.
150      */
151     if (!pbdev->rtr_avail) {
152         vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
153         if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
154             pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
155         }
156     }
157 }
158 
159 static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
160                         uint32_t *fh)
161 {
162     struct vfio_info_cap_header *hdr;
163     struct vfio_device_info_cap_zpci_base *cap;
164     VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
165 
166     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
167 
168     /* Can only get the host fh with version 2 or greater */
169     if (hdr == NULL || hdr->version < 2) {
170         trace_s390_pci_clp_cap(vpci->vbasedev.name,
171                                VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
172         return false;
173     }
174     cap = (void *) hdr;
175 
176     *fh = cap->fh;
177     return true;
178 }
179 
180 static void s390_pci_read_group(S390PCIBusDevice *pbdev,
181                                 struct vfio_device_info *info)
182 {
183     struct vfio_info_cap_header *hdr;
184     struct vfio_device_info_cap_zpci_group *cap;
185     S390pciState *s = s390_get_phb();
186     ClpRspQueryPciGrp *resgrp;
187     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
188     uint8_t start_gid = pbdev->zpci_fn.pfgid;
189 
190     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
191 
192     /*
193      * If capability not provided or the underlying hostdev is simulated, just
194      * use the default group.
195      */
196     if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) {
197         trace_s390_pci_clp_cap(vpci->vbasedev.name,
198                                VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
199         pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
200         pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
201         return;
202     }
203     cap = (void *) hdr;
204 
205     /*
206      * For an intercept device, let's use an existing simulated group if one
207      * one was already created for other intercept devices in this group.
208      * If not, create a new simulated group if any are still available.
209      * If all else fails, just fall back on the default group.
210      */
211     if (!pbdev->interp) {
212         pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid);
213         if (pbdev->pci_group) {
214             /* Use existing simulated group */
215             pbdev->zpci_fn.pfgid = pbdev->pci_group->id;
216             return;
217         } else {
218             if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) {
219                 /* All out of simulated groups, use default */
220                 trace_s390_pci_clp_cap(vpci->vbasedev.name,
221                                        VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
222                 pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
223                 pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
224                 return;
225             } else {
226                 /* We can assign a new simulated group */
227                 pbdev->zpci_fn.pfgid = s->next_sim_grp;
228                 s->next_sim_grp++;
229                 /* Fall through to create the new sim group using CLP info */
230             }
231         }
232     }
233 
234     /* See if the PCI group is already defined, create if not */
235     pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid);
236 
237     if (!pbdev->pci_group) {
238         pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid);
239 
240         resgrp = &pbdev->pci_group->zpci_group;
241         if (pbdev->rtr_avail) {
242             resgrp->fr |= CLP_RSP_QPCIG_MASK_RTR;
243         }
244         if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
245             resgrp->fr |= CLP_RSP_QPCIG_MASK_REFRESH;
246         }
247         resgrp->dasm = cap->dasm;
248         resgrp->msia = cap->msi_addr;
249         resgrp->mui = cap->mui;
250         resgrp->i = cap->noi;
251         if (pbdev->interp && hdr->version >= 2) {
252             resgrp->maxstbl = cap->imaxstbl;
253         } else {
254             resgrp->maxstbl = cap->maxstbl;
255         }
256         resgrp->version = cap->version;
257         resgrp->dtsm = ZPCI_DTSM;
258     }
259 }
260 
261 static void s390_pci_read_util(S390PCIBusDevice *pbdev,
262                                struct vfio_device_info *info)
263 {
264     struct vfio_info_cap_header *hdr;
265     struct vfio_device_info_cap_zpci_util *cap;
266     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
267 
268     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
269 
270     /* If capability not provided, just leave the defaults in place */
271     if (hdr == NULL) {
272         trace_s390_pci_clp_cap(vpci->vbasedev.name,
273                                VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
274         return;
275     }
276     cap = (void *) hdr;
277 
278     if (cap->size > CLP_UTIL_STR_LEN) {
279         trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
280                                     VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
281         return;
282     }
283 
284     pbdev->zpci_fn.flags |= CLP_RSP_QPCI_MASK_UTIL;
285     memcpy(pbdev->zpci_fn.util_str, cap->util_str, CLP_UTIL_STR_LEN);
286 }
287 
288 static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
289                                struct vfio_device_info *info)
290 {
291     struct vfio_info_cap_header *hdr;
292     struct vfio_device_info_cap_zpci_pfip *cap;
293     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
294 
295     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
296 
297     /* If capability not provided, just leave the defaults in place */
298     if (hdr == NULL) {
299         trace_s390_pci_clp_cap(vpci->vbasedev.name,
300                                VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
301         return;
302     }
303     cap = (void *) hdr;
304 
305     if (cap->size > CLP_PFIP_NR_SEGMENTS) {
306         trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
307                                     VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
308         return;
309     }
310 
311     memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS);
312 }
313 
314 static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev)
315 {
316     VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
317 
318     return vfio_get_device_info(vfio_pci->vbasedev.fd);
319 }
320 
321 /*
322  * Get the host function handle from the vfio CLP capabilities chain.  Returns
323  * true if a fh value was placed into the provided buffer.  Returns false
324  * if a fh could not be obtained (ioctl failed or capability version does
325  * not include the fh)
326  */
327 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
328 {
329     g_autofree struct vfio_device_info *info = NULL;
330 
331     assert(fh);
332 
333     info = get_device_info(pbdev);
334     if (!info) {
335         return false;
336     }
337 
338     return get_host_fh(pbdev, info, fh);
339 }
340 
341 /*
342  * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for
343  * capabilities that contain information about CLP features provided by the
344  * underlying host.
345  * On entry, defaults have already been placed into the guest CLP response
346  * buffers.  On exit, defaults will have been overwritten for any CLP features
347  * found in the capability chain; defaults will remain for any CLP features not
348  * found in the chain.
349  */
350 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
351 {
352     g_autofree struct vfio_device_info *info = NULL;
353 
354     info = get_device_info(pbdev);
355     if (!info) {
356         return;
357     }
358 
359     /*
360      * Find the CLP features provided and fill in the guest CLP responses.
361      * Always call s390_pci_read_base first as information from this could
362      * determine which function group is used in s390_pci_read_group.
363      * For any feature not found, the default values will remain in the CLP
364      * response.
365      */
366     s390_pci_read_base(pbdev, info);
367     s390_pci_read_group(pbdev, info);
368     s390_pci_read_util(pbdev, info);
369     s390_pci_read_pfip(pbdev, info);
370 
371     return;
372 }
373