1 /*
2 * Multifd common functions
3 *
4 * Copyright (c) 2019-2020 Red Hat Inc
5 *
6 * Authors:
7 * Juan Quintela <quintela@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #ifndef QEMU_MIGRATION_MULTIFD_H
14 #define QEMU_MIGRATION_MULTIFD_H
15
16 #include "exec/target_page.h"
17 #include "ram.h"
18
19 typedef struct MultiFDRecvData MultiFDRecvData;
20 typedef struct MultiFDSendData MultiFDSendData;
21
22 typedef enum {
23 /* No sync request */
24 MULTIFD_SYNC_NONE = 0,
25 /* Sync locally on the sender threads without pushing messages */
26 MULTIFD_SYNC_LOCAL,
27 /*
28 * Sync not only on the sender threads, but also push MULTIFD_FLAG_SYNC
29 * message to the wire for each iochannel (which is for a remote sync).
30 *
31 * When remote sync is used, need to be paired with a follow up
32 * RAM_SAVE_FLAG_EOS / RAM_SAVE_FLAG_MULTIFD_FLUSH message on the main
33 * channel.
34 */
35 MULTIFD_SYNC_ALL,
36 } MultiFDSyncReq;
37
38 bool multifd_send_setup(void);
39 void multifd_send_shutdown(void);
40 void multifd_send_channel_created(void);
41 int multifd_recv_setup(Error **errp);
42 void multifd_recv_cleanup(void);
43 void multifd_recv_shutdown(void);
44 bool multifd_recv_all_channels_created(void);
45 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
46 void multifd_recv_sync_main(void);
47 int multifd_send_sync_main(MultiFDSyncReq req);
48 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
49 bool multifd_recv(void);
50 MultiFDRecvData *multifd_get_recv_data(void);
51
52 /* Multiple fd's */
53
54 #define MULTIFD_MAGIC 0x11223344U
55 #define MULTIFD_VERSION 1
56
57 /* Multifd Compression flags */
58 #define MULTIFD_FLAG_SYNC (1 << 0)
59
60 /* We reserve 5 bits for compression methods */
61 #define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1)
62 /* we need to be compatible. Before compression value was 0 */
63 #define MULTIFD_FLAG_NOCOMP (0 << 1)
64 #define MULTIFD_FLAG_ZLIB (1 << 1)
65 #define MULTIFD_FLAG_ZSTD (2 << 1)
66 #define MULTIFD_FLAG_QPL (4 << 1)
67 #define MULTIFD_FLAG_UADK (8 << 1)
68 #define MULTIFD_FLAG_QATZIP (16 << 1)
69
70 /*
71 * If set it means that this packet contains device state
72 * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
73 */
74 #define MULTIFD_FLAG_DEVICE_STATE (32 << 1)
75
76 /* This value needs to be a multiple of qemu_target_page_size() */
77 #define MULTIFD_PACKET_SIZE (512 * 1024)
78
79 typedef struct {
80 uint32_t magic;
81 uint32_t version;
82 uint32_t flags;
83 } __attribute__((packed)) MultiFDPacketHdr_t;
84
85 typedef struct {
86 MultiFDPacketHdr_t hdr;
87
88 /* maximum number of allocated pages */
89 uint32_t pages_alloc;
90 /* non zero pages */
91 uint32_t normal_pages;
92 /* size of the next packet that contains pages */
93 uint32_t next_packet_size;
94 uint64_t packet_num;
95 /* zero pages */
96 uint32_t zero_pages;
97 uint32_t unused32[1]; /* Reserved for future use */
98 uint64_t unused64[3]; /* Reserved for future use */
99 char ramblock[256];
100 /*
101 * This array contains the pointers to:
102 * - normal pages (initial normal_pages entries)
103 * - zero pages (following zero_pages entries)
104 */
105 uint64_t offset[];
106 } __attribute__((packed)) MultiFDPacket_t;
107
108 typedef struct {
109 MultiFDPacketHdr_t hdr;
110
111 char idstr[256];
112 uint32_t instance_id;
113
114 /* size of the next packet that contains the actual data */
115 uint32_t next_packet_size;
116 } __attribute__((packed)) MultiFDPacketDeviceState_t;
117
118 typedef struct {
119 /* number of used pages */
120 uint32_t num;
121 /* number of normal pages */
122 uint32_t normal_num;
123 /*
124 * Pointer to the ramblock. NOTE: it's caller's responsibility to make
125 * sure the pointer is always valid!
126 */
127 RAMBlock *block;
128 /* offset array of each page, managed by multifd */
129 ram_addr_t *offset;
130 } MultiFDPages_t;
131
132 struct MultiFDRecvData {
133 void *opaque;
134 size_t size;
135 /* for preadv */
136 off_t file_offset;
137 };
138
139 typedef struct {
140 char *idstr;
141 uint32_t instance_id;
142 char *buf;
143 size_t buf_len;
144 } MultiFDDeviceState_t;
145
146 typedef enum {
147 MULTIFD_PAYLOAD_NONE,
148 MULTIFD_PAYLOAD_RAM,
149 MULTIFD_PAYLOAD_DEVICE_STATE,
150 } MultiFDPayloadType;
151
152 typedef struct MultiFDPayload {
153 MultiFDPages_t ram;
154 MultiFDDeviceState_t device_state;
155 } MultiFDPayload;
156
157 struct MultiFDSendData {
158 MultiFDPayloadType type;
159 MultiFDPayload u;
160 };
161
multifd_payload_empty(MultiFDSendData * data)162 static inline bool multifd_payload_empty(MultiFDSendData *data)
163 {
164 return data->type == MULTIFD_PAYLOAD_NONE;
165 }
166
multifd_payload_device_state(MultiFDSendData * data)167 static inline bool multifd_payload_device_state(MultiFDSendData *data)
168 {
169 return data->type == MULTIFD_PAYLOAD_DEVICE_STATE;
170 }
171
multifd_set_payload_type(MultiFDSendData * data,MultiFDPayloadType type)172 static inline void multifd_set_payload_type(MultiFDSendData *data,
173 MultiFDPayloadType type)
174 {
175 assert(multifd_payload_empty(data));
176 assert(type != MULTIFD_PAYLOAD_NONE);
177
178 data->type = type;
179 }
180
181 typedef struct {
182 /* Fields are only written at creating/deletion time */
183 /* No lock required for them, they are read only */
184
185 /* channel number */
186 uint8_t id;
187 /* channel thread name */
188 char *name;
189 /* channel thread id */
190 QemuThread thread;
191 bool thread_created;
192 QemuThread tls_thread;
193 bool tls_thread_created;
194 /* communication channel */
195 QIOChannel *c;
196 /* packet allocated len */
197 uint32_t packet_len;
198 /* multifd flags for sending ram */
199 int write_flags;
200
201 /* sem where to wait for more work */
202 QemuSemaphore sem;
203 /* syncs main thread and channels */
204 QemuSemaphore sem_sync;
205
206 /* multifd flags for each packet */
207 uint32_t flags;
208 /*
209 * The sender thread has work to do if either of below field is set.
210 *
211 * @pending_job: a job is pending
212 * @pending_sync: a sync request is pending
213 *
214 * For both of these fields, they're only set by the requesters, and
215 * cleared by the multifd sender threads.
216 */
217 bool pending_job;
218 MultiFDSyncReq pending_sync;
219
220 MultiFDSendData *data;
221
222 /* thread local variables. No locking required */
223
224 /* pointers to the possible packet types */
225 MultiFDPacket_t *packet;
226 MultiFDPacketDeviceState_t *packet_device_state;
227 /* size of the next packet that contains pages */
228 uint32_t next_packet_size;
229 /* packets sent through this channel */
230 uint64_t packets_sent;
231 /* buffers to send */
232 struct iovec *iov;
233 /* number of iovs used */
234 uint32_t iovs_num;
235 /* used for compression methods */
236 void *compress_data;
237 } MultiFDSendParams;
238
239 typedef struct {
240 /* Fields are only written at creating/deletion time */
241 /* No lock required for them, they are read only */
242
243 /* channel number */
244 uint8_t id;
245 /* channel thread name */
246 char *name;
247 /* channel thread id */
248 QemuThread thread;
249 bool thread_created;
250 /* communication channel */
251 QIOChannel *c;
252 /* packet allocated len */
253 uint32_t packet_len;
254
255 /* syncs main thread and channels */
256 QemuSemaphore sem_sync;
257 /* sem where to wait for more work */
258 QemuSemaphore sem;
259
260 /* this mutex protects the following parameters */
261 QemuMutex mutex;
262 /* should this thread finish */
263 bool quit;
264 /* multifd flags for each packet */
265 uint32_t flags;
266 /* global number of generated multifd packets */
267 uint64_t packet_num;
268 int pending_job;
269 MultiFDRecvData *data;
270
271 /* thread local variables. No locking required */
272
273 /* pointers to the possible packet types */
274 MultiFDPacket_t *packet;
275 MultiFDPacketDeviceState_t *packet_dev_state;
276 /* size of the next packet that contains pages */
277 uint32_t next_packet_size;
278 /* packets received through this channel */
279 uint64_t packets_recved;
280 /* ramblock */
281 RAMBlock *block;
282 /* ramblock host address */
283 uint8_t *host;
284 /* buffers to recv */
285 struct iovec *iov;
286 /* Pages that are not zero */
287 ram_addr_t *normal;
288 /* num of non zero pages */
289 uint32_t normal_num;
290 /* Pages that are zero */
291 ram_addr_t *zero;
292 /* num of zero pages */
293 uint32_t zero_num;
294 /* used for de-compression methods */
295 void *compress_data;
296 /* Flags for the QIOChannel */
297 int read_flags;
298 } MultiFDRecvParams;
299
300 typedef struct {
301 /*
302 * The send_setup, send_cleanup, send_prepare are only called on
303 * the QEMU instance at the migration source.
304 */
305
306 /*
307 * Setup for sending side. Called once per channel during channel
308 * setup phase.
309 *
310 * Must allocate p->iov. If packets are in use (default), one
311 * extra iovec must be allocated for the packet header. Any memory
312 * allocated in this hook must be released at send_cleanup.
313 *
314 * p->write_flags may be used for passing flags to the QIOChannel.
315 *
316 * p->compression_data may be used by compression methods to store
317 * compression data.
318 */
319 int (*send_setup)(MultiFDSendParams *p, Error **errp);
320
321 /*
322 * Cleanup for sending side. Called once per channel during
323 * channel cleanup phase.
324 */
325 void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
326
327 /*
328 * Prepare the send packet. Called as a result of multifd_send()
329 * on the client side, with p pointing to the MultiFDSendParams of
330 * a channel that is currently idle.
331 *
332 * Must populate p->iov with the data to be sent, increment
333 * p->iovs_num to match the amount of iovecs used and set
334 * p->next_packet_size with the amount of data currently present
335 * in p->iov.
336 *
337 * Must indicate whether this is a compression packet by setting
338 * p->flags.
339 *
340 * As a last step, if packets are in use (default), must prepare
341 * the packet by calling multifd_send_fill_packet().
342 */
343 int (*send_prepare)(MultiFDSendParams *p, Error **errp);
344
345 /*
346 * The recv_setup, recv_cleanup, recv are only called on the QEMU
347 * instance at the migration destination.
348 */
349
350 /*
351 * Setup for receiving side. Called once per channel during
352 * channel setup phase. May be empty.
353 *
354 * May allocate data structures for the receiving of data. May use
355 * p->iov. Compression methods may use p->compress_data.
356 */
357 int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
358
359 /*
360 * Cleanup for receiving side. Called once per channel during
361 * channel cleanup phase. May be empty.
362 */
363 void (*recv_cleanup)(MultiFDRecvParams *p);
364
365 /*
366 * Data receive method. Called as a result of multifd_recv() on
367 * the client side, with p pointing to the MultiFDRecvParams of a
368 * channel that is currently idle. Only called if there is data
369 * available to receive.
370 *
371 * Must validate p->flags according to what was set at
372 * send_prepare.
373 *
374 * Must read the data from the QIOChannel p->c.
375 */
376 int (*recv)(MultiFDRecvParams *p, Error **errp);
377 } MultiFDMethods;
378
379 void multifd_register_ops(int method, const MultiFDMethods *ops);
380 void multifd_send_fill_packet(MultiFDSendParams *p);
381 bool multifd_send_prepare_common(MultiFDSendParams *p);
382 void multifd_send_zero_page_detect(MultiFDSendParams *p);
383 void multifd_recv_zero_page_process(MultiFDRecvParams *p);
384
385 void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
386 bool multifd_send(MultiFDSendData **send_data);
387 MultiFDSendData *multifd_send_data_alloc(void);
388 void multifd_send_data_clear(MultiFDSendData *data);
389 void multifd_send_data_free(MultiFDSendData *data);
390
multifd_ram_page_size(void)391 static inline uint32_t multifd_ram_page_size(void)
392 {
393 return qemu_target_page_size();
394 }
395
multifd_ram_page_count(void)396 static inline uint32_t multifd_ram_page_count(void)
397 {
398 return MULTIFD_PACKET_SIZE / qemu_target_page_size();
399 }
400
401 void multifd_ram_save_setup(void);
402 void multifd_ram_save_cleanup(void);
403 int multifd_ram_flush_and_sync(QEMUFile *f);
404 bool multifd_ram_sync_per_round(void);
405 bool multifd_ram_sync_per_section(void);
406 void multifd_ram_payload_alloc(MultiFDPages_t *pages);
407 void multifd_ram_payload_free(MultiFDPages_t *pages);
408 void multifd_ram_fill_packet(MultiFDSendParams *p);
409 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
410
411 void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
412
413 void multifd_device_state_send_setup(void);
414 void multifd_device_state_send_cleanup(void);
415
416 void multifd_device_state_send_prepare(MultiFDSendParams *p);
417
418 #endif
419