xref: /qemu/migration/multifd.h (revision 15606965400b8f3038d6e85cfe5956d5a6ac33a1)
1 /*
2  * Multifd common functions
3  *
4  * Copyright (c) 2019-2020 Red Hat Inc
5  *
6  * Authors:
7  *  Juan Quintela <quintela@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #ifndef QEMU_MIGRATION_MULTIFD_H
14 #define QEMU_MIGRATION_MULTIFD_H
15 
16 #include "exec/target_page.h"
17 #include "ram.h"
18 
19 typedef struct MultiFDRecvData MultiFDRecvData;
20 typedef struct MultiFDSendData MultiFDSendData;
21 
22 typedef enum {
23     /* No sync request */
24     MULTIFD_SYNC_NONE = 0,
25     /* Sync locally on the sender threads without pushing messages */
26     MULTIFD_SYNC_LOCAL,
27     /*
28      * Sync not only on the sender threads, but also push MULTIFD_FLAG_SYNC
29      * message to the wire for each iochannel (which is for a remote sync).
30      *
31      * When remote sync is used, need to be paired with a follow up
32      * RAM_SAVE_FLAG_EOS / RAM_SAVE_FLAG_MULTIFD_FLUSH message on the main
33      * channel.
34      */
35     MULTIFD_SYNC_ALL,
36 } MultiFDSyncReq;
37 
38 bool multifd_send_setup(void);
39 void multifd_send_shutdown(void);
40 void multifd_send_channel_created(void);
41 int multifd_recv_setup(Error **errp);
42 void multifd_recv_cleanup(void);
43 void multifd_recv_shutdown(void);
44 bool multifd_recv_all_channels_created(void);
45 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
46 void multifd_recv_sync_main(void);
47 int multifd_send_sync_main(MultiFDSyncReq req);
48 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
49 bool multifd_recv(void);
50 MultiFDRecvData *multifd_get_recv_data(void);
51 
52 /* Multifd Compression flags */
53 #define MULTIFD_FLAG_SYNC (1 << 0)
54 
55 /* We reserve 5 bits for compression methods */
56 #define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1)
57 /* we need to be compatible. Before compression value was 0 */
58 #define MULTIFD_FLAG_NOCOMP (0 << 1)
59 #define MULTIFD_FLAG_ZLIB (1 << 1)
60 #define MULTIFD_FLAG_ZSTD (2 << 1)
61 #define MULTIFD_FLAG_QPL (4 << 1)
62 #define MULTIFD_FLAG_UADK (8 << 1)
63 #define MULTIFD_FLAG_QATZIP (16 << 1)
64 
65 /*
66  * If set it means that this packet contains device state
67  * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
68  */
69 #define MULTIFD_FLAG_DEVICE_STATE (32 << 1)
70 
71 /* This value needs to be a multiple of qemu_target_page_size() */
72 #define MULTIFD_PACKET_SIZE (512 * 1024)
73 
74 typedef struct {
75     uint32_t magic;
76     uint32_t version;
77     uint32_t flags;
78 } __attribute__((packed)) MultiFDPacketHdr_t;
79 
80 typedef struct {
81     MultiFDPacketHdr_t hdr;
82 
83     /* maximum number of allocated pages */
84     uint32_t pages_alloc;
85     /* non zero pages */
86     uint32_t normal_pages;
87     /* size of the next packet that contains pages */
88     uint32_t next_packet_size;
89     uint64_t packet_num;
90     /* zero pages */
91     uint32_t zero_pages;
92     uint32_t unused32[1];    /* Reserved for future use */
93     uint64_t unused64[3];    /* Reserved for future use */
94     char ramblock[256];
95     /*
96      * This array contains the pointers to:
97      *  - normal pages (initial normal_pages entries)
98      *  - zero pages (following zero_pages entries)
99      */
100     uint64_t offset[];
101 } __attribute__((packed)) MultiFDPacket_t;
102 
103 typedef struct {
104     MultiFDPacketHdr_t hdr;
105 
106     char idstr[256];
107     uint32_t instance_id;
108 
109     /* size of the next packet that contains the actual data */
110     uint32_t next_packet_size;
111 } __attribute__((packed)) MultiFDPacketDeviceState_t;
112 
113 typedef struct {
114     /* number of used pages */
115     uint32_t num;
116     /* number of normal pages */
117     uint32_t normal_num;
118     /*
119      * Pointer to the ramblock.  NOTE: it's caller's responsibility to make
120      * sure the pointer is always valid!
121      */
122     RAMBlock *block;
123     /* offset array of each page, managed by multifd */
124     ram_addr_t *offset;
125 } MultiFDPages_t;
126 
127 struct MultiFDRecvData {
128     void *opaque;
129     size_t size;
130     /* for preadv */
131     off_t file_offset;
132 };
133 
134 typedef struct {
135     char *idstr;
136     uint32_t instance_id;
137     char *buf;
138     size_t buf_len;
139 } MultiFDDeviceState_t;
140 
141 typedef enum {
142     MULTIFD_PAYLOAD_NONE,
143     MULTIFD_PAYLOAD_RAM,
144     MULTIFD_PAYLOAD_DEVICE_STATE,
145 } MultiFDPayloadType;
146 
147 typedef struct MultiFDPayload {
148     MultiFDPages_t ram;
149     MultiFDDeviceState_t device_state;
150 } MultiFDPayload;
151 
152 struct MultiFDSendData {
153     MultiFDPayloadType type;
154     MultiFDPayload u;
155 };
156 
157 static inline bool multifd_payload_empty(MultiFDSendData *data)
158 {
159     return data->type == MULTIFD_PAYLOAD_NONE;
160 }
161 
162 static inline bool multifd_payload_device_state(MultiFDSendData *data)
163 {
164     return data->type == MULTIFD_PAYLOAD_DEVICE_STATE;
165 }
166 
167 static inline void multifd_set_payload_type(MultiFDSendData *data,
168                                             MultiFDPayloadType type)
169 {
170     assert(multifd_payload_empty(data));
171     assert(type != MULTIFD_PAYLOAD_NONE);
172 
173     data->type = type;
174 }
175 
176 typedef struct {
177     /* Fields are only written at creating/deletion time */
178     /* No lock required for them, they are read only */
179 
180     /* channel number */
181     uint8_t id;
182     /* channel thread name */
183     char *name;
184     /* channel thread id */
185     QemuThread thread;
186     bool thread_created;
187     QemuThread tls_thread;
188     bool tls_thread_created;
189     /* communication channel */
190     QIOChannel *c;
191     /* packet allocated len */
192     uint32_t packet_len;
193     /* multifd flags for sending ram */
194     int write_flags;
195 
196     /* sem where to wait for more work */
197     QemuSemaphore sem;
198     /* syncs main thread and channels */
199     QemuSemaphore sem_sync;
200 
201     /* multifd flags for each packet */
202     uint32_t flags;
203     /*
204      * The sender thread has work to do if either of below field is set.
205      *
206      * @pending_job:  a job is pending
207      * @pending_sync: a sync request is pending
208      *
209      * For both of these fields, they're only set by the requesters, and
210      * cleared by the multifd sender threads.
211      */
212     bool pending_job;
213     MultiFDSyncReq pending_sync;
214 
215     MultiFDSendData *data;
216 
217     /* thread local variables. No locking required */
218 
219     /* pointers to the possible packet types */
220     MultiFDPacket_t *packet;
221     MultiFDPacketDeviceState_t *packet_device_state;
222     /* size of the next packet that contains pages */
223     uint32_t next_packet_size;
224     /* packets sent through this channel */
225     uint64_t packets_sent;
226     /* buffers to send */
227     struct iovec *iov;
228     /* number of iovs used */
229     uint32_t iovs_num;
230     /* used for compression methods */
231     void *compress_data;
232 }  MultiFDSendParams;
233 
234 typedef struct {
235     /* Fields are only written at creating/deletion time */
236     /* No lock required for them, they are read only */
237 
238     /* channel number */
239     uint8_t id;
240     /* channel thread name */
241     char *name;
242     /* channel thread id */
243     QemuThread thread;
244     bool thread_created;
245     /* communication channel */
246     QIOChannel *c;
247     /* packet allocated len */
248     uint32_t packet_len;
249 
250     /* syncs main thread and channels */
251     QemuSemaphore sem_sync;
252     /* sem where to wait for more work */
253     QemuSemaphore sem;
254 
255     /* this mutex protects the following parameters */
256     QemuMutex mutex;
257     /* should this thread finish */
258     bool quit;
259     /* multifd flags for each packet */
260     uint32_t flags;
261     /* global number of generated multifd packets */
262     uint64_t packet_num;
263     int pending_job;
264     MultiFDRecvData *data;
265 
266     /* thread local variables. No locking required */
267 
268     /* pointers to the possible packet types */
269     MultiFDPacket_t *packet;
270     MultiFDPacketDeviceState_t *packet_dev_state;
271     /* size of the next packet that contains pages */
272     uint32_t next_packet_size;
273     /* packets received through this channel */
274     uint64_t packets_recved;
275     /* ramblock */
276     RAMBlock *block;
277     /* ramblock host address */
278     uint8_t *host;
279     /* buffers to recv */
280     struct iovec *iov;
281     /* Pages that are not zero */
282     ram_addr_t *normal;
283     /* num of non zero pages */
284     uint32_t normal_num;
285     /* Pages that are zero */
286     ram_addr_t *zero;
287     /* num of zero pages */
288     uint32_t zero_num;
289     /* used for de-compression methods */
290     void *compress_data;
291     /* Flags for the QIOChannel */
292     int read_flags;
293 } MultiFDRecvParams;
294 
295 typedef struct {
296     /*
297      * The send_setup, send_cleanup, send_prepare are only called on
298      * the QEMU instance at the migration source.
299      */
300 
301     /*
302      * Setup for sending side. Called once per channel during channel
303      * setup phase.
304      *
305      * Must allocate p->iov. If packets are in use (default), one
306      * extra iovec must be allocated for the packet header. Any memory
307      * allocated in this hook must be released at send_cleanup.
308      *
309      * p->write_flags may be used for passing flags to the QIOChannel.
310      *
311      * p->compression_data may be used by compression methods to store
312      * compression data.
313      */
314     int (*send_setup)(MultiFDSendParams *p, Error **errp);
315 
316     /*
317      * Cleanup for sending side. Called once per channel during
318      * channel cleanup phase.
319      */
320     void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
321 
322     /*
323      * Prepare the send packet. Called as a result of multifd_send()
324      * on the client side, with p pointing to the MultiFDSendParams of
325      * a channel that is currently idle.
326      *
327      * Must populate p->iov with the data to be sent, increment
328      * p->iovs_num to match the amount of iovecs used and set
329      * p->next_packet_size with the amount of data currently present
330      * in p->iov.
331      *
332      * Must indicate whether this is a compression packet by setting
333      * p->flags.
334      *
335      * As a last step, if packets are in use (default), must prepare
336      * the packet by calling multifd_send_fill_packet().
337      */
338     int (*send_prepare)(MultiFDSendParams *p, Error **errp);
339 
340     /*
341      * The recv_setup, recv_cleanup, recv are only called on the QEMU
342      * instance at the migration destination.
343      */
344 
345     /*
346      * Setup for receiving side. Called once per channel during
347      * channel setup phase. May be empty.
348      *
349      * May allocate data structures for the receiving of data. May use
350      * p->iov. Compression methods may use p->compress_data.
351      */
352     int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
353 
354     /*
355      * Cleanup for receiving side. Called once per channel during
356      * channel cleanup phase. May be empty.
357      */
358     void (*recv_cleanup)(MultiFDRecvParams *p);
359 
360     /*
361      * Data receive method. Called as a result of multifd_recv() on
362      * the client side, with p pointing to the MultiFDRecvParams of a
363      * channel that is currently idle. Only called if there is data
364      * available to receive.
365      *
366      * Must validate p->flags according to what was set at
367      * send_prepare.
368      *
369      * Must read the data from the QIOChannel p->c.
370      */
371     int (*recv)(MultiFDRecvParams *p, Error **errp);
372 } MultiFDMethods;
373 
374 void multifd_register_ops(int method, const MultiFDMethods *ops);
375 void multifd_send_fill_packet(MultiFDSendParams *p);
376 bool multifd_send_prepare_common(MultiFDSendParams *p);
377 void multifd_send_zero_page_detect(MultiFDSendParams *p);
378 void multifd_recv_zero_page_process(MultiFDRecvParams *p);
379 
380 void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
381 bool multifd_send(MultiFDSendData **send_data);
382 MultiFDSendData *multifd_send_data_alloc(void);
383 void multifd_send_data_clear(MultiFDSendData *data);
384 void multifd_send_data_free(MultiFDSendData *data);
385 
386 static inline uint32_t multifd_ram_page_size(void)
387 {
388     return qemu_target_page_size();
389 }
390 
391 static inline uint32_t multifd_ram_page_count(void)
392 {
393     return MULTIFD_PACKET_SIZE / qemu_target_page_size();
394 }
395 
396 void multifd_ram_save_setup(void);
397 void multifd_ram_save_cleanup(void);
398 int multifd_ram_flush_and_sync(QEMUFile *f);
399 bool multifd_ram_sync_per_round(void);
400 bool multifd_ram_sync_per_section(void);
401 void multifd_ram_payload_alloc(MultiFDPages_t *pages);
402 void multifd_ram_payload_free(MultiFDPages_t *pages);
403 void multifd_ram_fill_packet(MultiFDSendParams *p);
404 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
405 
406 void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
407 
408 void multifd_device_state_send_setup(void);
409 void multifd_device_state_send_cleanup(void);
410 
411 void multifd_device_state_send_prepare(MultiFDSendParams *p);
412 
413 #endif
414