1 /* 2 * pcie_sriov.c: 3 * 4 * Implementation of SR/IOV emulation support. 5 * 6 * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 */ 12 13 #include "qemu/osdep.h" 14 #include "hw/pci/pci_device.h" 15 #include "hw/pci/pcie.h" 16 #include "hw/pci/pci_bus.h" 17 #include "hw/qdev-properties.h" 18 #include "qemu/error-report.h" 19 #include "qemu/range.h" 20 #include "qapi/error.h" 21 #include "trace.h" 22 23 static PCIDevice *register_vf(PCIDevice *pf, int devfn, 24 const char *name, uint16_t vf_num); 25 static void unregister_vfs(PCIDevice *dev); 26 27 bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, 28 const char *vfname, uint16_t vf_dev_id, 29 uint16_t init_vfs, uint16_t total_vfs, 30 uint16_t vf_offset, uint16_t vf_stride, 31 Error **errp) 32 { 33 uint8_t *cfg = dev->config + offset; 34 uint8_t *wmask; 35 36 if (total_vfs) { 37 uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI); 38 uint16_t first_vf_devfn = dev->devfn + vf_offset; 39 uint16_t last_vf_devfn = first_vf_devfn + vf_stride * (total_vfs - 1); 40 41 if ((!ari_cap && PCI_SLOT(dev->devfn) != PCI_SLOT(last_vf_devfn)) || 42 last_vf_devfn >= PCI_DEVFN_MAX) { 43 error_setg(errp, "VF function number overflows"); 44 return false; 45 } 46 } 47 48 pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, 49 offset, PCI_EXT_CAP_SRIOV_SIZEOF); 50 dev->exp.sriov_cap = offset; 51 dev->exp.sriov_pf.num_vfs = 0; 52 dev->exp.sriov_pf.vfname = g_strdup(vfname); 53 dev->exp.sriov_pf.vf = NULL; 54 55 pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); 56 pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride); 57 58 /* 59 * Mandatory page sizes to support. 60 * Device implementations can call pcie_sriov_pf_add_sup_pgsize() 61 * to set more bits: 62 */ 63 pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, SRIOV_SUP_PGSIZE_MINREQ); 64 65 /* 66 * Default is to use 4K pages, software can modify it 67 * to any of the supported bits 68 */ 69 pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1); 70 71 /* Set up device ID and initial/total number of VFs available */ 72 pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id); 73 pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs); 74 pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs); 75 pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0); 76 77 /* Write enable control bits */ 78 wmask = dev->wmask + offset; 79 pci_set_word(wmask + PCI_SRIOV_CTRL, 80 PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI); 81 pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff); 82 pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); 83 84 qdev_prop_set_bit(&dev->qdev, "multifunction", true); 85 86 return true; 87 } 88 89 void pcie_sriov_pf_exit(PCIDevice *dev) 90 { 91 unregister_vfs(dev); 92 g_free((char *)dev->exp.sriov_pf.vfname); 93 dev->exp.sriov_pf.vfname = NULL; 94 } 95 96 void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, 97 uint8_t type, dma_addr_t size) 98 { 99 uint32_t addr; 100 uint64_t wmask; 101 uint16_t sriov_cap = dev->exp.sriov_cap; 102 103 assert(sriov_cap > 0); 104 assert(region_num >= 0); 105 assert(region_num < PCI_NUM_REGIONS); 106 assert(region_num != PCI_ROM_SLOT); 107 108 wmask = ~(size - 1); 109 addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4; 110 111 pci_set_long(dev->config + addr, type); 112 if (!(type & PCI_BASE_ADDRESS_SPACE_IO) && 113 type & PCI_BASE_ADDRESS_MEM_TYPE_64) { 114 pci_set_quad(dev->wmask + addr, wmask); 115 pci_set_quad(dev->cmask + addr, ~0ULL); 116 } else { 117 pci_set_long(dev->wmask + addr, wmask & 0xffffffff); 118 pci_set_long(dev->cmask + addr, 0xffffffff); 119 } 120 dev->exp.sriov_pf.vf_bar_type[region_num] = type; 121 } 122 123 void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, 124 MemoryRegion *memory) 125 { 126 PCIIORegion *r; 127 PCIBus *bus = pci_get_bus(dev); 128 uint8_t type; 129 pcibus_t size = memory_region_size(memory); 130 131 assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ 132 assert(region_num >= 0); 133 assert(region_num < PCI_NUM_REGIONS); 134 type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; 135 136 if (!is_power_of_2(size)) { 137 error_report("%s: PCI region size must be a power" 138 " of two - type=0x%x, size=0x%"FMT_PCIBUS, 139 __func__, type, size); 140 exit(1); 141 } 142 143 r = &dev->io_regions[region_num]; 144 r->memory = memory; 145 r->address_space = 146 type & PCI_BASE_ADDRESS_SPACE_IO 147 ? bus->address_space_io 148 : bus->address_space_mem; 149 r->size = size; 150 r->type = type; 151 152 r->addr = pci_bar_address(dev, region_num, r->type, r->size); 153 if (r->addr != PCI_BAR_UNMAPPED) { 154 memory_region_add_subregion_overlap(r->address_space, 155 r->addr, r->memory, 1); 156 } 157 } 158 159 static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, 160 uint16_t vf_num) 161 { 162 PCIDevice *dev = pci_new(devfn, name); 163 dev->exp.sriov_vf.pf = pf; 164 dev->exp.sriov_vf.vf_number = vf_num; 165 PCIBus *bus = pci_get_bus(pf); 166 Error *local_err = NULL; 167 168 qdev_realize(&dev->qdev, &bus->qbus, &local_err); 169 if (local_err) { 170 error_report_err(local_err); 171 return NULL; 172 } 173 174 /* set vid/did according to sr/iov spec - they are not used */ 175 pci_config_set_vendor_id(dev->config, 0xffff); 176 pci_config_set_device_id(dev->config, 0xffff); 177 178 return dev; 179 } 180 181 static void register_vfs(PCIDevice *dev) 182 { 183 uint16_t num_vfs; 184 uint16_t i; 185 uint16_t sriov_cap = dev->exp.sriov_cap; 186 uint16_t vf_offset = 187 pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); 188 uint16_t vf_stride = 189 pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); 190 int32_t devfn = dev->devfn + vf_offset; 191 192 assert(sriov_cap > 0); 193 num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); 194 if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { 195 return; 196 } 197 198 dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); 199 200 trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), 201 PCI_FUNC(dev->devfn), num_vfs); 202 for (i = 0; i < num_vfs; i++) { 203 dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, 204 dev->exp.sriov_pf.vfname, i); 205 if (!dev->exp.sriov_pf.vf[i]) { 206 num_vfs = i; 207 break; 208 } 209 devfn += vf_stride; 210 } 211 dev->exp.sriov_pf.num_vfs = num_vfs; 212 } 213 214 static void unregister_vfs(PCIDevice *dev) 215 { 216 uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; 217 uint16_t i; 218 219 trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), 220 PCI_FUNC(dev->devfn), num_vfs); 221 for (i = 0; i < num_vfs; i++) { 222 PCIDevice *vf = dev->exp.sriov_pf.vf[i]; 223 object_unparent(OBJECT(vf)); 224 object_unref(OBJECT(vf)); 225 } 226 g_free(dev->exp.sriov_pf.vf); 227 dev->exp.sriov_pf.vf = NULL; 228 dev->exp.sriov_pf.num_vfs = 0; 229 } 230 231 void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, 232 uint32_t val, int len) 233 { 234 uint32_t off; 235 uint16_t sriov_cap = dev->exp.sriov_cap; 236 237 if (!sriov_cap || address < sriov_cap) { 238 return; 239 } 240 off = address - sriov_cap; 241 if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) { 242 return; 243 } 244 245 trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn), 246 PCI_FUNC(dev->devfn), off, val, len); 247 248 if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { 249 if (dev->exp.sriov_pf.num_vfs) { 250 if (!(val & PCI_SRIOV_CTRL_VFE)) { 251 unregister_vfs(dev); 252 } 253 } else { 254 if (val & PCI_SRIOV_CTRL_VFE) { 255 register_vfs(dev); 256 } 257 } 258 } 259 } 260 261 262 /* Reset SR/IOV */ 263 void pcie_sriov_pf_reset(PCIDevice *dev) 264 { 265 uint16_t sriov_cap = dev->exp.sriov_cap; 266 if (!sriov_cap) { 267 return; 268 } 269 270 pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0); 271 unregister_vfs(dev); 272 273 pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0); 274 275 /* 276 * Default is to use 4K pages, software can modify it 277 * to any of the supported bits 278 */ 279 pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1); 280 281 for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) { 282 pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4, 283 dev->exp.sriov_pf.vf_bar_type[i]); 284 } 285 } 286 287 /* Add optional supported page sizes to the mask of supported page sizes */ 288 void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) 289 { 290 uint8_t *cfg = dev->config + dev->exp.sriov_cap; 291 uint8_t *wmask = dev->wmask + dev->exp.sriov_cap; 292 293 uint16_t sup_pgsize = pci_get_word(cfg + PCI_SRIOV_SUP_PGSIZE); 294 295 sup_pgsize |= opt_sup_pgsize; 296 297 /* 298 * Make sure the new bits are set, and that system page size 299 * also can be set to any of the new values according to spec: 300 */ 301 pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize); 302 pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize); 303 } 304 305 306 uint16_t pcie_sriov_vf_number(PCIDevice *dev) 307 { 308 assert(pci_is_vf(dev)); 309 return dev->exp.sriov_vf.vf_number; 310 } 311 312 PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) 313 { 314 return dev->exp.sriov_vf.pf; 315 } 316 317 PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) 318 { 319 assert(!pci_is_vf(dev)); 320 if (n < dev->exp.sriov_pf.num_vfs) { 321 return dev->exp.sriov_pf.vf[n]; 322 } 323 return NULL; 324 } 325 326 uint16_t pcie_sriov_num_vfs(PCIDevice *dev) 327 { 328 return dev->exp.sriov_pf.num_vfs; 329 } 330