1 /*
2 * Copyright (c) 2021-2024 Oracle and/or its affiliates.
3 *
4 * This work is licensed under the terms of the GNU GPL, version 2 or later.
5 * See the COPYING file in the top-level directory.
6 */
7
8 #include "qemu/osdep.h"
9 #include "hw/vfio/vfio-device.h"
10 #include "hw/vfio/vfio-cpr.h"
11 #include "hw/vfio/pci.h"
12 #include "hw/pci/msix.h"
13 #include "hw/pci/msi.h"
14 #include "migration/cpr.h"
15 #include "qapi/error.h"
16 #include "system/runstate.h"
17
vfio_cpr_reboot_notifier(NotifierWithReturn * notifier,MigrationEvent * e,Error ** errp)18 int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
19 MigrationEvent *e, Error **errp)
20 {
21 if (e->type == MIG_EVENT_PRECOPY_SETUP &&
22 !runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) {
23
24 error_setg(errp,
25 "VFIO device only supports cpr-reboot for runstate suspended");
26
27 return -1;
28 }
29 return 0;
30 }
31
32 #define STRDUP_VECTOR_FD_NAME(vdev, name) \
33 g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
34
vfio_cpr_save_vector_fd(VFIOPCIDevice * vdev,const char * name,int nr,int fd)35 void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
36 int fd)
37 {
38 g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
39 cpr_save_fd(fdname, nr, fd);
40 }
41
vfio_cpr_load_vector_fd(VFIOPCIDevice * vdev,const char * name,int nr)42 int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
43 {
44 g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
45 return cpr_find_fd(fdname, nr);
46 }
47
vfio_cpr_delete_vector_fd(VFIOPCIDevice * vdev,const char * name,int nr)48 void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
49 {
50 g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
51 cpr_delete_fd(fdname, nr);
52 }
53
vfio_cpr_claim_vectors(VFIOPCIDevice * vdev,int nr_vectors,bool msix)54 static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
55 bool msix)
56 {
57 int i, fd;
58 bool pending = false;
59 PCIDevice *pdev = &vdev->pdev;
60
61 vdev->nr_vectors = nr_vectors;
62 vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
63 vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
64
65 vfio_pci_prepare_kvm_msi_virq_batch(vdev);
66
67 for (i = 0; i < nr_vectors; i++) {
68 VFIOMSIVector *vector = &vdev->msi_vectors[i];
69
70 fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
71 if (fd >= 0) {
72 vfio_pci_vector_init(vdev, i);
73 vfio_pci_msi_set_handler(vdev, i);
74 }
75
76 if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
77 vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix);
78 } else {
79 vdev->msi_vectors[i].virq = -1;
80 }
81
82 if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
83 set_bit(i, vdev->msix->pending);
84 pending = true;
85 }
86 }
87
88 vfio_pci_commit_kvm_msi_virq_batch(vdev);
89
90 if (msix) {
91 memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
92 }
93 }
94
95 /*
96 * The kernel may change non-emulated config bits. Exclude them from the
97 * changed-bits check in get_pci_config_device.
98 */
vfio_cpr_pci_pre_load(void * opaque)99 static int vfio_cpr_pci_pre_load(void *opaque)
100 {
101 VFIOPCIDevice *vdev = opaque;
102 PCIDevice *pdev = &vdev->pdev;
103 int size = MIN(pci_config_size(pdev), vdev->config_size);
104 int i;
105
106 for (i = 0; i < size; i++) {
107 pdev->cmask[i] &= vdev->emulated_config_bits[i];
108 }
109
110 return 0;
111 }
112
vfio_cpr_pci_post_load(void * opaque,int version_id)113 static int vfio_cpr_pci_post_load(void *opaque, int version_id)
114 {
115 VFIOPCIDevice *vdev = opaque;
116 PCIDevice *pdev = &vdev->pdev;
117 int nr_vectors;
118
119 if (msix_enabled(pdev)) {
120 vfio_pci_msix_set_notifiers(vdev);
121 nr_vectors = vdev->msix->entries;
122 vfio_cpr_claim_vectors(vdev, nr_vectors, true);
123
124 } else if (msi_enabled(pdev)) {
125 nr_vectors = msi_nr_vectors_allocated(pdev);
126 vfio_cpr_claim_vectors(vdev, nr_vectors, false);
127
128 } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
129 Error *local_err = NULL;
130 if (!vfio_pci_intx_enable(vdev, &local_err)) {
131 error_report_err(local_err);
132 return -1;
133 }
134 }
135
136 return 0;
137 }
138
pci_msix_present(void * opaque,int version_id)139 static bool pci_msix_present(void *opaque, int version_id)
140 {
141 PCIDevice *pdev = opaque;
142
143 return msix_present(pdev);
144 }
145
146 static const VMStateDescription vfio_intx_vmstate = {
147 .name = "vfio-cpr-intx",
148 .version_id = 0,
149 .minimum_version_id = 0,
150 .fields = (VMStateField[]) {
151 VMSTATE_BOOL(pending, VFIOINTx),
152 VMSTATE_UINT32(route.mode, VFIOINTx),
153 VMSTATE_INT32(route.irq, VFIOINTx),
154 VMSTATE_END_OF_LIST()
155 }
156 };
157
158 #define VMSTATE_VFIO_INTX(_field, _state) { \
159 .name = (stringify(_field)), \
160 .size = sizeof(VFIOINTx), \
161 .vmsd = &vfio_intx_vmstate, \
162 .flags = VMS_STRUCT, \
163 .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
164 }
165
166 const VMStateDescription vfio_cpr_pci_vmstate = {
167 .name = "vfio-cpr-pci",
168 .version_id = 0,
169 .minimum_version_id = 0,
170 .pre_load = vfio_cpr_pci_pre_load,
171 .post_load = vfio_cpr_pci_post_load,
172 .needed = cpr_incoming_needed,
173 .fields = (VMStateField[]) {
174 VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
175 VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present),
176 VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
177 VMSTATE_END_OF_LIST()
178 }
179 };
180
181 static NotifierWithReturn kvm_close_notifier;
182
vfio_cpr_kvm_close_notifier(NotifierWithReturn * notifier,MigrationEvent * e,Error ** errp)183 static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier,
184 MigrationEvent *e,
185 Error **errp)
186 {
187 if (e->type == MIG_EVENT_PRECOPY_DONE) {
188 vfio_kvm_device_close();
189 }
190 return 0;
191 }
192
vfio_cpr_add_kvm_notifier(void)193 void vfio_cpr_add_kvm_notifier(void)
194 {
195 if (!kvm_close_notifier.notify) {
196 migration_add_notifier_mode(&kvm_close_notifier,
197 vfio_cpr_kvm_close_notifier,
198 MIG_MODE_CPR_TRANSFER);
199 }
200 }
201