xref: /qemu/hw/s390x/s390-pci-vfio.c (revision 090c9641882da217e40936c98742749e4cc94130)
1 /*
2  * s390 vfio-pci interfaces
3  *
4  * Copyright 2020 IBM Corp.
5  * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or (at
8  * your option) any later version. See the COPYING file in the top-level
9  * directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include <linux/vfio_zdev.h>
17 
18 #include "trace.h"
19 #include "hw/s390x/s390-pci-bus.h"
20 #include "hw/s390x/s390-pci-clp.h"
21 #include "hw/s390x/s390-pci-vfio.h"
22 #include "hw/vfio/pci.h"
23 #include "hw/vfio/vfio-container.h"
24 #include "hw/vfio/vfio-helpers.h"
25 
26 /*
27  * Get the current DMA available count from vfio.  Returns true if vfio is
28  * limiting DMA requests, false otherwise.  The current available count read
29  * from vfio is returned in avail.
30  */
s390_pci_update_dma_avail(int fd,unsigned int * avail)31 bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
32 {
33     uint32_t argsz = sizeof(struct vfio_iommu_type1_info);
34     g_autofree struct vfio_iommu_type1_info *info = g_malloc0(argsz);
35 
36     assert(avail);
37 
38     /*
39      * If the specified argsz is not large enough to contain all capabilities
40      * it will be updated upon return from the ioctl.  Retry until we have
41      * a big enough buffer to hold the entire capability chain.
42      */
43 retry:
44     info->argsz = argsz;
45 
46     if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) {
47         return false;
48     }
49 
50     if (info->argsz > argsz) {
51         argsz = info->argsz;
52         info = g_realloc(info, argsz);
53         goto retry;
54     }
55 
56     /* If the capability exists, update with the current value */
57     return vfio_get_info_dma_avail(info, avail);
58 }
59 
s390_pci_start_dma_count(S390pciState * s,S390PCIBusDevice * pbdev)60 S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
61                                           S390PCIBusDevice *pbdev)
62 {
63     S390PCIDMACount *cnt;
64     uint32_t avail;
65     VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
66     int id;
67 
68     assert(vpdev);
69 
70     if (!vpdev->vbasedev.group) {
71         return NULL;
72     }
73 
74     id = vpdev->vbasedev.group->container->fd;
75 
76     if (!s390_pci_update_dma_avail(id, &avail)) {
77         return NULL;
78     }
79 
80     QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
81         if (cnt->id  == id) {
82             cnt->users++;
83             return cnt;
84         }
85     }
86 
87     cnt = g_new0(S390PCIDMACount, 1);
88     cnt->id = id;
89     cnt->users = 1;
90     cnt->avail = avail;
91     QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
92     pbdev->iommu->max_dma_limit = avail;
93     return cnt;
94 }
95 
s390_pci_end_dma_count(S390pciState * s,S390PCIDMACount * cnt)96 void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
97 {
98     assert(cnt);
99 
100     cnt->users--;
101     if (cnt->users == 0) {
102         QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
103     }
104 }
105 
s390_pci_read_base(S390PCIBusDevice * pbdev,struct vfio_device_info * info)106 static void s390_pci_read_base(S390PCIBusDevice *pbdev,
107                                struct vfio_device_info *info)
108 {
109     struct vfio_info_cap_header *hdr;
110     struct vfio_device_info_cap_zpci_base *cap;
111     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
112     uint64_t vfio_size;
113 
114     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
115 
116     /* If capability not provided, just leave the defaults in place */
117     if (hdr == NULL) {
118         trace_s390_pci_clp_cap(vpci->vbasedev.name,
119                                VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
120         return;
121     }
122     cap = (void *) hdr;
123 
124     pbdev->zpci_fn.sdma = cap->start_dma;
125     pbdev->zpci_fn.edma = cap->end_dma;
126     pbdev->zpci_fn.pchid = cap->pchid;
127     pbdev->zpci_fn.vfn = cap->vfn;
128     pbdev->zpci_fn.pfgid = cap->gid;
129     /* The following values remain 0 until we support other FMB formats */
130     pbdev->zpci_fn.fmbl = 0;
131     pbdev->zpci_fn.pft = 0;
132     /* Store function type separately for type-specific behavior */
133     pbdev->pft = cap->pft;
134 
135     /*
136      * If the device is a passthrough ISM device, disallow relaxed
137      * translation.
138      */
139     if (pbdev->pft == ZPCI_PFT_ISM) {
140         pbdev->rtr_avail = false;
141     }
142 
143     /*
144      * If appropriate, reduce the size of the supported DMA aperture reported
145      * to the guest based upon the vfio DMA limit.  This is applicable for
146      * devices that are guaranteed to not use relaxed translation.  If the
147      * device is capable of relaxed translation then we must advertise the
148      * full aperture.  In this case, if translation is used then we will
149      * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16
150      * to request that the guest free DMA mappings as necessary.
151      */
152     if (!pbdev->rtr_avail) {
153         vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
154         if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
155             pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
156         }
157     }
158 }
159 
get_host_fh(S390PCIBusDevice * pbdev,struct vfio_device_info * info,uint32_t * fh)160 static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
161                         uint32_t *fh)
162 {
163     struct vfio_info_cap_header *hdr;
164     struct vfio_device_info_cap_zpci_base *cap;
165     VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
166 
167     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
168 
169     /* Can only get the host fh with version 2 or greater */
170     if (hdr == NULL || hdr->version < 2) {
171         trace_s390_pci_clp_cap(vpci->vbasedev.name,
172                                VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
173         return false;
174     }
175     cap = (void *) hdr;
176 
177     *fh = cap->fh;
178     return true;
179 }
180 
s390_pci_read_group(S390PCIBusDevice * pbdev,struct vfio_device_info * info)181 static void s390_pci_read_group(S390PCIBusDevice *pbdev,
182                                 struct vfio_device_info *info)
183 {
184     struct vfio_info_cap_header *hdr;
185     struct vfio_device_info_cap_zpci_group *cap;
186     S390pciState *s = s390_get_phb();
187     ClpRspQueryPciGrp *resgrp;
188     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
189     uint8_t start_gid = pbdev->zpci_fn.pfgid;
190 
191     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
192 
193     /*
194      * If capability not provided or the underlying hostdev is simulated, just
195      * use the default group.
196      */
197     if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) {
198         trace_s390_pci_clp_cap(vpci->vbasedev.name,
199                                VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
200         pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
201         pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
202         return;
203     }
204     cap = (void *) hdr;
205 
206     /*
207      * For an intercept device, let's use an existing simulated group if one
208      * one was already created for other intercept devices in this group.
209      * If not, create a new simulated group if any are still available.
210      * If all else fails, just fall back on the default group.
211      */
212     if (!pbdev->interp) {
213         pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid);
214         if (pbdev->pci_group) {
215             /* Use existing simulated group */
216             pbdev->zpci_fn.pfgid = pbdev->pci_group->id;
217             return;
218         } else {
219             if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) {
220                 /* All out of simulated groups, use default */
221                 trace_s390_pci_clp_cap(vpci->vbasedev.name,
222                                        VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
223                 pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
224                 pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
225                 return;
226             } else {
227                 /* We can assign a new simulated group */
228                 pbdev->zpci_fn.pfgid = s->next_sim_grp;
229                 s->next_sim_grp++;
230                 /* Fall through to create the new sim group using CLP info */
231             }
232         }
233     }
234 
235     /* See if the PCI group is already defined, create if not */
236     pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid);
237 
238     if (!pbdev->pci_group) {
239         pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid);
240 
241         resgrp = &pbdev->pci_group->zpci_group;
242         if (pbdev->rtr_avail) {
243             resgrp->fr |= CLP_RSP_QPCIG_MASK_RTR;
244         }
245         if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
246             resgrp->fr |= CLP_RSP_QPCIG_MASK_REFRESH;
247         }
248         resgrp->dasm = cap->dasm;
249         resgrp->msia = cap->msi_addr;
250         resgrp->mui = cap->mui;
251         resgrp->i = cap->noi;
252         if (pbdev->interp && hdr->version >= 2) {
253             resgrp->maxstbl = cap->imaxstbl;
254         } else {
255             resgrp->maxstbl = cap->maxstbl;
256         }
257         resgrp->version = cap->version;
258         resgrp->dtsm = ZPCI_DTSM;
259     }
260 }
261 
s390_pci_read_util(S390PCIBusDevice * pbdev,struct vfio_device_info * info)262 static void s390_pci_read_util(S390PCIBusDevice *pbdev,
263                                struct vfio_device_info *info)
264 {
265     struct vfio_info_cap_header *hdr;
266     struct vfio_device_info_cap_zpci_util *cap;
267     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
268 
269     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
270 
271     /* If capability not provided, just leave the defaults in place */
272     if (hdr == NULL) {
273         trace_s390_pci_clp_cap(vpci->vbasedev.name,
274                                VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
275         return;
276     }
277     cap = (void *) hdr;
278 
279     if (cap->size > CLP_UTIL_STR_LEN) {
280         trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
281                                     VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
282         return;
283     }
284 
285     pbdev->zpci_fn.flags |= CLP_RSP_QPCI_MASK_UTIL;
286     memcpy(pbdev->zpci_fn.util_str, cap->util_str, CLP_UTIL_STR_LEN);
287 }
288 
s390_pci_read_pfip(S390PCIBusDevice * pbdev,struct vfio_device_info * info)289 static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
290                                struct vfio_device_info *info)
291 {
292     struct vfio_info_cap_header *hdr;
293     struct vfio_device_info_cap_zpci_pfip *cap;
294     VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
295 
296     hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
297 
298     /* If capability not provided, just leave the defaults in place */
299     if (hdr == NULL) {
300         trace_s390_pci_clp_cap(vpci->vbasedev.name,
301                                VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
302         return;
303     }
304     cap = (void *) hdr;
305 
306     if (cap->size > CLP_PFIP_NR_SEGMENTS) {
307         trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
308                                     VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
309         return;
310     }
311 
312     memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS);
313 }
314 
get_device_info(S390PCIBusDevice * pbdev)315 static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev)
316 {
317     VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
318 
319     return vfio_get_device_info(vfio_pci->vbasedev.fd);
320 }
321 
322 /*
323  * Get the host function handle from the vfio CLP capabilities chain.  Returns
324  * true if a fh value was placed into the provided buffer.  Returns false
325  * if a fh could not be obtained (ioctl failed or capability version does
326  * not include the fh)
327  */
s390_pci_get_host_fh(S390PCIBusDevice * pbdev,uint32_t * fh)328 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
329 {
330     g_autofree struct vfio_device_info *info = NULL;
331 
332     assert(fh);
333 
334     info = get_device_info(pbdev);
335     if (!info) {
336         return false;
337     }
338 
339     return get_host_fh(pbdev, info, fh);
340 }
341 
342 /*
343  * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for
344  * capabilities that contain information about CLP features provided by the
345  * underlying host.
346  * On entry, defaults have already been placed into the guest CLP response
347  * buffers.  On exit, defaults will have been overwritten for any CLP features
348  * found in the capability chain; defaults will remain for any CLP features not
349  * found in the chain.
350  */
s390_pci_get_clp_info(S390PCIBusDevice * pbdev)351 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
352 {
353     g_autofree struct vfio_device_info *info = NULL;
354 
355     info = get_device_info(pbdev);
356     if (!info) {
357         return;
358     }
359 
360     /*
361      * Find the CLP features provided and fill in the guest CLP responses.
362      * Always call s390_pci_read_base first as information from this could
363      * determine which function group is used in s390_pci_read_group.
364      * For any feature not found, the default values will remain in the CLP
365      * response.
366      */
367     s390_pci_read_base(pbdev, info);
368     s390_pci_read_group(pbdev, info);
369     s390_pci_read_util(pbdev, info);
370     s390_pci_read_pfip(pbdev, info);
371 }
372