Lines Matching +full:- +full:- +full:disable +full:- +full:attr
4 * Copyright IBM, Corp. 2010-2013
5 * Copyright Red Hat, Inc. 2015-2016
13 * later. See the COPYING file in the top-level directory.
23 #include "migration-stats.h"
24 #include "qemu-file.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
52 * This is only for non-live state being migrated.
78 * A work request ID is 64-bits and we split up these bits
81 * bits 0-15 : type of control message, 2^16
82 * bits 16-29: ram block index, 2^14
83 * bits 30-63: ram block chunk number, 2^34
94 ((1UL << RDMA_WRID_BLOCK_SHIFT) - 1UL)
97 (~RDMA_WRID_TYPE_MASK & ((1UL << RDMA_WRID_CHUNK_SHIFT) - 1UL))
134 RDMA_CONTROL_QEMU_FILE, /* QEMUFile-transmitted bytes */
141 RDMA_CONTROL_UNREGISTER_REQUEST, /* dynamic UN-registration */
158 * Negotiate RDMA capabilities during connection-setup time.
167 cap->version = htonl(cap->version); in caps_to_network()
168 cap->flags = htonl(cap->flags); in caps_to_network()
173 cap->version = ntohl(cap->version); in network_to_caps()
174 cap->flags = ntohl(cap->flags); in network_to_caps()
190 struct ibv_mr **pmr; /* MRs for chunk-level registration */
191 struct ibv_mr *mr; /* MR for non-chunk-level registration */
192 uint32_t *remote_keys; /* rkeys for chunk-level registration */
193 uint32_t remote_rkey; /* rkeys for non-chunk-level registration */
204 * This gets transmitted by the dest during connection-time
262 db->remote_host_addr = htonll(db->remote_host_addr); in dest_block_to_network()
263 db->offset = htonll(db->offset); in dest_block_to_network()
264 db->length = htonll(db->length); in dest_block_to_network()
265 db->remote_rkey = htonl(db->remote_rkey); in dest_block_to_network()
270 db->remote_host_addr = ntohll(db->remote_host_addr); in network_to_dest_block()
271 db->offset = ntohll(db->offset); in network_to_dest_block()
272 db->length = ntohll(db->length); in network_to_dest_block()
273 db->remote_rkey = ntohl(db->remote_rkey); in network_to_dest_block()
278 * the RAMBlock descriptions at connection-time.
323 * infiniband-specific variables for opening the device
327 * cm_id->verbs, cm_id->channel, and cm_id->qp.
380 #define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
406 control->type = htonl(control->type); in control_to_network()
407 control->len = htonl(control->len); in control_to_network()
408 control->repeat = htonl(control->repeat); in control_to_network()
413 control->type = ntohl(control->type); in network_to_control()
414 control->len = ntohl(control->len); in network_to_control()
415 control->repeat = ntohl(control->repeat); in network_to_control()
436 if (rdma->errored && !rdma->error_reported) { in rdma_errored()
439 rdma->error_reported = true; in rdma_errored()
441 return rdma->errored; in rdma_errored()
447 local_block = &rdma->local_ram_blocks.block[reg->current_index]; in register_to_network()
449 if (local_block->is_ram_block) { in register_to_network()
454 reg->key.current_addr -= local_block->offset; in register_to_network()
455 reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; in register_to_network()
457 reg->key.current_addr = htonll(reg->key.current_addr); in register_to_network()
458 reg->current_index = htonl(reg->current_index); in register_to_network()
459 reg->chunks = htonll(reg->chunks); in register_to_network()
464 reg->key.current_addr = ntohll(reg->key.current_addr); in network_to_register()
465 reg->current_index = ntohl(reg->current_index); in network_to_register()
466 reg->chunks = ntohll(reg->chunks); in network_to_register()
478 comp->value = htonl(comp->value); in compress_to_network()
480 * comp->offset as passed in is an address in the local ram_addr_t in compress_to_network()
483 comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; in compress_to_network()
484 comp->offset += rdma->dest_blocks[comp->block_idx].offset; in compress_to_network()
485 comp->block_idx = htonl(comp->block_idx); in compress_to_network()
486 comp->offset = htonll(comp->offset); in compress_to_network()
487 comp->length = htonll(comp->length); in compress_to_network()
492 comp->value = ntohl(comp->value); in network_to_compress()
493 comp->block_idx = ntohl(comp->block_idx); in network_to_compress()
494 comp->offset = ntohll(comp->offset); in network_to_compress()
495 comp->length = ntohll(comp->length); in network_to_compress()
511 result->rkey = htonl(result->rkey); in result_to_network()
512 result->host_addr = htonll(result->host_addr); in result_to_network()
517 result->rkey = ntohl(result->rkey); in network_to_result()
518 result->host_addr = ntohll(result->host_addr); in network_to_result()
531 return ((uintptr_t) host - (uintptr_t) start) >> RDMA_REG_CHUNK_SHIFT; in ram_chunk_index()
537 return (uint8_t *)(uintptr_t)(rdma_ram_block->local_host_addr + in ram_chunk_start()
547 if (result > (rdma_ram_block->local_host_addr + rdma_ram_block->length)) { in ram_chunk_end()
548 result = rdma_ram_block->local_host_addr + rdma_ram_block->length; in ram_chunk_end()
558 RDMALocalBlocks *local = &rdma->local_ram_blocks; in rdma_add_block()
560 RDMALocalBlock *old = local->block; in rdma_add_block()
562 local->block = g_new0(RDMALocalBlock, local->nb_blocks + 1); in rdma_add_block()
564 if (local->nb_blocks) { in rdma_add_block()
565 if (rdma->blockmap) { in rdma_add_block()
566 for (int x = 0; x < local->nb_blocks; x++) { in rdma_add_block()
567 g_hash_table_remove(rdma->blockmap, in rdma_add_block()
569 g_hash_table_insert(rdma->blockmap, in rdma_add_block()
571 &local->block[x]); in rdma_add_block()
574 memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks); in rdma_add_block()
578 block = &local->block[local->nb_blocks]; in rdma_add_block()
580 block->block_name = g_strdup(block_name); in rdma_add_block()
581 block->local_host_addr = host_addr; in rdma_add_block()
582 block->offset = block_offset; in rdma_add_block()
583 block->length = length; in rdma_add_block()
584 block->index = local->nb_blocks; in rdma_add_block()
585 block->src_index = ~0U; /* Filled in by the receipt of the block list */ in rdma_add_block()
586 block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL; in rdma_add_block()
587 block->transit_bitmap = bitmap_new(block->nb_chunks); in rdma_add_block()
588 bitmap_clear(block->transit_bitmap, 0, block->nb_chunks); in rdma_add_block()
589 block->unregister_bitmap = bitmap_new(block->nb_chunks); in rdma_add_block()
590 bitmap_clear(block->unregister_bitmap, 0, block->nb_chunks); in rdma_add_block()
591 block->remote_keys = g_new0(uint32_t, block->nb_chunks); in rdma_add_block()
593 block->is_ram_block = local->init ? false : true; in rdma_add_block()
595 if (rdma->blockmap) { in rdma_add_block()
596 g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)block_offset, block); in rdma_add_block()
599 trace_rdma_add_block(block_name, local->nb_blocks, in rdma_add_block()
600 (uintptr_t) block->local_host_addr, in rdma_add_block()
601 block->offset, block->length, in rdma_add_block()
602 (uintptr_t) (block->local_host_addr + block->length), in rdma_add_block()
603 BITS_TO_LONGS(block->nb_chunks) * in rdma_add_block()
605 block->nb_chunks); in rdma_add_block()
607 local->nb_blocks++; in rdma_add_block()
632 RDMALocalBlocks *local = &rdma->local_ram_blocks; in qemu_rdma_init_ram_blocks()
635 assert(rdma->blockmap == NULL); in qemu_rdma_init_ram_blocks()
639 trace_qemu_rdma_init_ram_blocks(local->nb_blocks); in qemu_rdma_init_ram_blocks()
640 rdma->dest_blocks = g_new0(RDMADestBlock, in qemu_rdma_init_ram_blocks()
641 rdma->local_ram_blocks.nb_blocks); in qemu_rdma_init_ram_blocks()
642 local->init = true; in qemu_rdma_init_ram_blocks()
651 RDMALocalBlocks *local = &rdma->local_ram_blocks; in rdma_delete_block()
652 RDMALocalBlock *old = local->block; in rdma_delete_block()
654 if (rdma->blockmap) { in rdma_delete_block()
655 g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset); in rdma_delete_block()
657 if (block->pmr) { in rdma_delete_block()
658 for (int j = 0; j < block->nb_chunks; j++) { in rdma_delete_block()
659 if (!block->pmr[j]) { in rdma_delete_block()
662 ibv_dereg_mr(block->pmr[j]); in rdma_delete_block()
663 rdma->total_registrations--; in rdma_delete_block()
665 g_free(block->pmr); in rdma_delete_block()
666 block->pmr = NULL; in rdma_delete_block()
669 if (block->mr) { in rdma_delete_block()
670 ibv_dereg_mr(block->mr); in rdma_delete_block()
671 rdma->total_registrations--; in rdma_delete_block()
672 block->mr = NULL; in rdma_delete_block()
675 g_free(block->transit_bitmap); in rdma_delete_block()
676 block->transit_bitmap = NULL; in rdma_delete_block()
678 g_free(block->unregister_bitmap); in rdma_delete_block()
679 block->unregister_bitmap = NULL; in rdma_delete_block()
681 g_free(block->remote_keys); in rdma_delete_block()
682 block->remote_keys = NULL; in rdma_delete_block()
684 g_free(block->block_name); in rdma_delete_block()
685 block->block_name = NULL; in rdma_delete_block()
687 if (rdma->blockmap) { in rdma_delete_block()
688 for (int x = 0; x < local->nb_blocks; x++) { in rdma_delete_block()
689 g_hash_table_remove(rdma->blockmap, in rdma_delete_block()
694 if (local->nb_blocks > 1) { in rdma_delete_block()
696 local->block = g_new0(RDMALocalBlock, local->nb_blocks - 1); in rdma_delete_block()
698 if (block->index) { in rdma_delete_block()
699 memcpy(local->block, old, sizeof(RDMALocalBlock) * block->index); in rdma_delete_block()
702 if (block->index < (local->nb_blocks - 1)) { in rdma_delete_block()
703 memcpy(local->block + block->index, old + (block->index + 1), in rdma_delete_block()
705 (local->nb_blocks - (block->index + 1))); in rdma_delete_block()
706 for (int x = block->index; x < local->nb_blocks - 1; x++) { in rdma_delete_block()
707 local->block[x].index--; in rdma_delete_block()
711 assert(block == local->block); in rdma_delete_block()
712 local->block = NULL; in rdma_delete_block()
715 trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr, in rdma_delete_block()
716 block->offset, block->length, in rdma_delete_block()
717 (uintptr_t)(block->local_host_addr + block->length), in rdma_delete_block()
718 BITS_TO_LONGS(block->nb_chunks) * in rdma_delete_block()
719 sizeof(unsigned long) * 8, block->nb_chunks); in rdma_delete_block()
723 local->nb_blocks--; in rdma_delete_block()
725 if (local->nb_blocks && rdma->blockmap) { in rdma_delete_block()
726 for (int x = 0; x < local->nb_blocks; x++) { in rdma_delete_block()
727 g_hash_table_insert(rdma->blockmap, in rdma_delete_block()
728 (void *)(uintptr_t)local->block[x].offset, in rdma_delete_block()
729 &local->block[x]); in rdma_delete_block()
747 verbs->device->name, in qemu_rdma_dump_id()
748 verbs->device->dev_name, in qemu_rdma_dump_id()
749 verbs->device->dev_path, in qemu_rdma_dump_id()
750 verbs->device->ibdev_path, in qemu_rdma_dump_id()
765 inet_ntop(AF_INET6, &id->route.addr.addr.ibaddr.sgid, sgid, sizeof sgid); in qemu_rdma_dump_gid()
766 inet_ntop(AF_INET6, &id->route.addr.addr.ibaddr.dgid, dgid, sizeof dgid); in qemu_rdma_dump_gid()
783 if (rdma->host == NULL || !strcmp(rdma->host, "")) { in qemu_rdma_resolve_host()
785 return -1; in qemu_rdma_resolve_host()
789 rdma->channel = rdma_create_event_channel(); in qemu_rdma_resolve_host()
790 if (!rdma->channel) { in qemu_rdma_resolve_host()
792 return -1; in qemu_rdma_resolve_host()
796 ret = rdma_create_id(rdma->channel, &rdma->cm_id, NULL, RDMA_PS_TCP); in qemu_rdma_resolve_host()
802 snprintf(port_str, 16, "%d", rdma->port); in qemu_rdma_resolve_host()
805 ret = rdma_getaddrinfo(rdma->host, port_str, NULL, &res); in qemu_rdma_resolve_host()
808 rdma->host); in qemu_rdma_resolve_host()
813 for (struct rdma_addrinfo *e = res; e != NULL; e = e->ai_next) { in qemu_rdma_resolve_host()
815 inet_ntop(e->ai_family, in qemu_rdma_resolve_host()
816 &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip); in qemu_rdma_resolve_host()
817 trace_qemu_rdma_resolve_host_trying(rdma->host, ip); in qemu_rdma_resolve_host()
819 ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_dst_addr, in qemu_rdma_resolve_host()
827 error_setg(errp, "RDMA ERROR: could not resolve address %s", rdma->host); in qemu_rdma_resolve_host()
832 qemu_rdma_dump_gid("source_resolve_addr", rdma->cm_id); in qemu_rdma_resolve_host()
834 ret = rdma_get_cm_event(rdma->channel, &cm_event); in qemu_rdma_resolve_host()
840 if (cm_event->event != RDMA_CM_EVENT_ADDR_RESOLVED) { in qemu_rdma_resolve_host()
843 rdma_event_str(cm_event->event)); in qemu_rdma_resolve_host()
850 ret = rdma_resolve_route(rdma->cm_id, RDMA_RESOLVE_TIMEOUT_MS); in qemu_rdma_resolve_host()
856 ret = rdma_get_cm_event(rdma->channel, &cm_event); in qemu_rdma_resolve_host()
861 if (cm_event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) { in qemu_rdma_resolve_host()
864 rdma_event_str(cm_event->event)); in qemu_rdma_resolve_host()
869 rdma->verbs = rdma->cm_id->verbs; in qemu_rdma_resolve_host()
870 qemu_rdma_dump_id("source_resolve_host", rdma->cm_id->verbs); in qemu_rdma_resolve_host()
871 qemu_rdma_dump_gid("source_resolve_host", rdma->cm_id); in qemu_rdma_resolve_host()
875 rdma_destroy_id(rdma->cm_id); in qemu_rdma_resolve_host()
876 rdma->cm_id = NULL; in qemu_rdma_resolve_host()
878 rdma_destroy_event_channel(rdma->channel); in qemu_rdma_resolve_host()
879 rdma->channel = NULL; in qemu_rdma_resolve_host()
880 return -1; in qemu_rdma_resolve_host()
889 rdma->pd = ibv_alloc_pd(rdma->verbs); in qemu_rdma_alloc_pd_cq()
890 if (!rdma->pd) { in qemu_rdma_alloc_pd_cq()
892 return -1; in qemu_rdma_alloc_pd_cq()
896 rdma->recv_comp_channel = ibv_create_comp_channel(rdma->verbs); in qemu_rdma_alloc_pd_cq()
897 if (!rdma->recv_comp_channel) { in qemu_rdma_alloc_pd_cq()
905 rdma->recv_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), in qemu_rdma_alloc_pd_cq()
906 NULL, rdma->recv_comp_channel, 0); in qemu_rdma_alloc_pd_cq()
907 if (!rdma->recv_cq) { in qemu_rdma_alloc_pd_cq()
913 rdma->send_comp_channel = ibv_create_comp_channel(rdma->verbs); in qemu_rdma_alloc_pd_cq()
914 if (!rdma->send_comp_channel) { in qemu_rdma_alloc_pd_cq()
919 rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), in qemu_rdma_alloc_pd_cq()
920 NULL, rdma->send_comp_channel, 0); in qemu_rdma_alloc_pd_cq()
921 if (!rdma->send_cq) { in qemu_rdma_alloc_pd_cq()
929 if (rdma->pd) { in qemu_rdma_alloc_pd_cq()
930 ibv_dealloc_pd(rdma->pd); in qemu_rdma_alloc_pd_cq()
932 if (rdma->recv_comp_channel) { in qemu_rdma_alloc_pd_cq()
933 ibv_destroy_comp_channel(rdma->recv_comp_channel); in qemu_rdma_alloc_pd_cq()
935 if (rdma->send_comp_channel) { in qemu_rdma_alloc_pd_cq()
936 ibv_destroy_comp_channel(rdma->send_comp_channel); in qemu_rdma_alloc_pd_cq()
938 if (rdma->recv_cq) { in qemu_rdma_alloc_pd_cq()
939 ibv_destroy_cq(rdma->recv_cq); in qemu_rdma_alloc_pd_cq()
940 rdma->recv_cq = NULL; in qemu_rdma_alloc_pd_cq()
942 rdma->pd = NULL; in qemu_rdma_alloc_pd_cq()
943 rdma->recv_comp_channel = NULL; in qemu_rdma_alloc_pd_cq()
944 rdma->send_comp_channel = NULL; in qemu_rdma_alloc_pd_cq()
945 return -1; in qemu_rdma_alloc_pd_cq()
954 struct ibv_qp_init_attr attr = { 0 }; in qemu_rdma_alloc_qp() local
956 attr.cap.max_send_wr = RDMA_SIGNALED_SEND_MAX; in qemu_rdma_alloc_qp()
957 attr.cap.max_recv_wr = 3; in qemu_rdma_alloc_qp()
958 attr.cap.max_send_sge = 1; in qemu_rdma_alloc_qp()
959 attr.cap.max_recv_sge = 1; in qemu_rdma_alloc_qp()
960 attr.send_cq = rdma->send_cq; in qemu_rdma_alloc_qp()
961 attr.recv_cq = rdma->recv_cq; in qemu_rdma_alloc_qp()
962 attr.qp_type = IBV_QPT_RC; in qemu_rdma_alloc_qp()
964 if (rdma_create_qp(rdma->cm_id, rdma->pd, &attr) < 0) { in qemu_rdma_alloc_qp()
965 return -1; in qemu_rdma_alloc_qp()
968 rdma->qp = rdma->cm_id->qp; in qemu_rdma_alloc_qp()
972 /* Check whether On-Demand Paging is supported by RDAM device */
975 struct ibv_device_attr_ex attr = {0}; in rdma_support_odp() local
977 if (ibv_query_device_ex(dev, NULL, &attr)) { in rdma_support_odp()
981 if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) { in rdma_support_odp()
1013 RDMALocalBlocks *local = &rdma->local_ram_blocks; in qemu_rdma_reg_whole_ram_blocks()
1015 for (i = 0; i < local->nb_blocks; i++) { in qemu_rdma_reg_whole_ram_blocks()
1018 local->block[i].mr = in qemu_rdma_reg_whole_ram_blocks()
1019 ibv_reg_mr(rdma->pd, in qemu_rdma_reg_whole_ram_blocks()
1020 local->block[i].local_host_addr, in qemu_rdma_reg_whole_ram_blocks()
1021 local->block[i].length, access in qemu_rdma_reg_whole_ram_blocks()
1029 if (!local->block[i].mr && in qemu_rdma_reg_whole_ram_blocks()
1030 errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { in qemu_rdma_reg_whole_ram_blocks()
1033 local->block[i].mr = in qemu_rdma_reg_whole_ram_blocks()
1034 ibv_reg_mr(rdma->pd, in qemu_rdma_reg_whole_ram_blocks()
1035 local->block[i].local_host_addr, in qemu_rdma_reg_whole_ram_blocks()
1036 local->block[i].length, access); in qemu_rdma_reg_whole_ram_blocks()
1037 trace_qemu_rdma_register_odp_mr(local->block[i].block_name); in qemu_rdma_reg_whole_ram_blocks()
1039 if (local->block[i].mr) { in qemu_rdma_reg_whole_ram_blocks()
1040 qemu_rdma_advise_prefetch_mr(rdma->pd, in qemu_rdma_reg_whole_ram_blocks()
1041 (uintptr_t)local->block[i].local_host_addr, in qemu_rdma_reg_whole_ram_blocks()
1042 local->block[i].length, in qemu_rdma_reg_whole_ram_blocks()
1043 local->block[i].mr->lkey, in qemu_rdma_reg_whole_ram_blocks()
1044 local->block[i].block_name, in qemu_rdma_reg_whole_ram_blocks()
1049 if (!local->block[i].mr) { in qemu_rdma_reg_whole_ram_blocks()
1054 rdma->total_registrations++; in qemu_rdma_reg_whole_ram_blocks()
1060 for (i--; i >= 0; i--) { in qemu_rdma_reg_whole_ram_blocks()
1061 ibv_dereg_mr(local->block[i].mr); in qemu_rdma_reg_whole_ram_blocks()
1062 local->block[i].mr = NULL; in qemu_rdma_reg_whole_ram_blocks()
1063 rdma->total_registrations--; in qemu_rdma_reg_whole_ram_blocks()
1066 return -1; in qemu_rdma_reg_whole_ram_blocks()
1085 RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap, in qemu_rdma_search_ram_block()
1088 assert(current_addr >= block->offset); in qemu_rdma_search_ram_block()
1089 assert((current_addr + length) <= (block->offset + block->length)); in qemu_rdma_search_ram_block()
1091 *block_index = block->index; in qemu_rdma_search_ram_block()
1092 *chunk_index = ram_chunk_index(block->local_host_addr, in qemu_rdma_search_ram_block()
1093 block->local_host_addr + (current_addr - block->offset)); in qemu_rdma_search_ram_block()
1108 if (block->mr) { in qemu_rdma_register_and_get_keys()
1110 *lkey = block->mr->lkey; in qemu_rdma_register_and_get_keys()
1113 *rkey = block->mr->rkey; in qemu_rdma_register_and_get_keys()
1119 if (!block->pmr) { in qemu_rdma_register_and_get_keys()
1120 block->pmr = g_new0(struct ibv_mr *, block->nb_chunks); in qemu_rdma_register_and_get_keys()
1128 if (!block->pmr[chunk]) { in qemu_rdma_register_and_get_keys()
1129 uint64_t len = chunk_end - chunk_start; in qemu_rdma_register_and_get_keys()
1135 block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); in qemu_rdma_register_and_get_keys()
1142 if (!block->pmr[chunk] && in qemu_rdma_register_and_get_keys()
1143 errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { in qemu_rdma_register_and_get_keys()
1146 block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); in qemu_rdma_register_and_get_keys()
1147 trace_qemu_rdma_register_odp_mr(block->block_name); in qemu_rdma_register_and_get_keys()
1149 if (block->pmr[chunk]) { in qemu_rdma_register_and_get_keys()
1150 qemu_rdma_advise_prefetch_mr(rdma->pd, (uintptr_t)chunk_start, in qemu_rdma_register_and_get_keys()
1151 len, block->pmr[chunk]->lkey, in qemu_rdma_register_and_get_keys()
1152 block->block_name, rkey); in qemu_rdma_register_and_get_keys()
1157 if (!block->pmr[chunk]) { in qemu_rdma_register_and_get_keys()
1158 return -1; in qemu_rdma_register_and_get_keys()
1160 rdma->total_registrations++; in qemu_rdma_register_and_get_keys()
1163 *lkey = block->pmr[chunk]->lkey; in qemu_rdma_register_and_get_keys()
1166 *rkey = block->pmr[chunk]->rkey; in qemu_rdma_register_and_get_keys()
1177 rdma->wr_data[idx].control_mr = ibv_reg_mr(rdma->pd, in qemu_rdma_reg_control()
1178 rdma->wr_data[idx].control, RDMA_CONTROL_MAX_BUFFER, in qemu_rdma_reg_control()
1180 if (rdma->wr_data[idx].control_mr) { in qemu_rdma_reg_control()
1181 rdma->total_registrations++; in qemu_rdma_reg_control()
1184 return -1; in qemu_rdma_reg_control()
1188 * Perform a non-optimized memory unregistration after every transfer
1189 * for demonstration purposes, only if pin-all is not requested.
1193 - for bit clearing
1194 - and for receipt of unregister messages
1202 while (rdma->unregistrations[rdma->unregister_current]) { in qemu_rdma_unregister_waiting()
1204 uint64_t wr_id = rdma->unregistrations[rdma->unregister_current]; in qemu_rdma_unregister_waiting()
1210 &(rdma->local_ram_blocks.block[index]); in qemu_rdma_unregister_waiting()
1220 rdma->unregister_current); in qemu_rdma_unregister_waiting()
1222 rdma->unregistrations[rdma->unregister_current] = 0; in qemu_rdma_unregister_waiting()
1223 rdma->unregister_current++; in qemu_rdma_unregister_waiting()
1225 if (rdma->unregister_current == RDMA_SIGNALED_SEND_MAX) { in qemu_rdma_unregister_waiting()
1226 rdma->unregister_current = 0; in qemu_rdma_unregister_waiting()
1231 * Unregistration is speculative (because migration is single-threaded in qemu_rdma_unregister_waiting()
1237 clear_bit(chunk, block->unregister_bitmap); in qemu_rdma_unregister_waiting()
1239 if (test_bit(chunk, block->transit_bitmap)) { in qemu_rdma_unregister_waiting()
1246 ret = ibv_dereg_mr(block->pmr[chunk]); in qemu_rdma_unregister_waiting()
1247 block->pmr[chunk] = NULL; in qemu_rdma_unregister_waiting()
1248 block->remote_keys[chunk] = 0; in qemu_rdma_unregister_waiting()
1253 return -1; in qemu_rdma_unregister_waiting()
1255 rdma->total_registrations--; in qemu_rdma_unregister_waiting()
1263 return -1; in qemu_rdma_unregister_waiting()
1303 return -1; in qemu_rdma_poll()
1309 return -1; in qemu_rdma_poll()
1312 if (rdma->control_ready_expected && in qemu_rdma_poll()
1314 trace_qemu_rdma_poll_recv(wr_id - RDMA_WRID_RECV_CONTROL, wr_id, in qemu_rdma_poll()
1315 rdma->nb_sent); in qemu_rdma_poll()
1316 rdma->control_ready_expected = 0; in qemu_rdma_poll()
1324 RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]); in qemu_rdma_poll()
1326 trace_qemu_rdma_poll_write(wr_id, rdma->nb_sent, in qemu_rdma_poll()
1327 index, chunk, block->local_host_addr, in qemu_rdma_poll()
1328 (void *)(uintptr_t)block->remote_host_addr); in qemu_rdma_poll()
1330 clear_bit(chunk, block->transit_bitmap); in qemu_rdma_poll()
1332 if (rdma->nb_sent > 0) { in qemu_rdma_poll()
1333 rdma->nb_sent--; in qemu_rdma_poll()
1336 trace_qemu_rdma_poll_other(wr_id, rdma->nb_sent); in qemu_rdma_poll()
1348 * Returns 0 on success, none-0 on error.
1359 if (rdma->migration_started_on_destination && in qemu_rdma_wait_comp_channel()
1360 migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE) { in qemu_rdma_wait_comp_channel()
1361 yield_until_fd_readable(comp_channel->fd); in qemu_rdma_wait_comp_channel()
1370 while (!rdma->errored && !rdma->received_error) { in qemu_rdma_wait_comp_channel()
1372 pfds[0].fd = comp_channel->fd; in qemu_rdma_wait_comp_channel()
1376 pfds[1].fd = rdma->channel->fd; in qemu_rdma_wait_comp_channel()
1389 if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { in qemu_rdma_wait_comp_channel()
1390 return -1; in qemu_rdma_wait_comp_channel()
1393 if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || in qemu_rdma_wait_comp_channel()
1394 cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { in qemu_rdma_wait_comp_channel()
1396 return -1; in qemu_rdma_wait_comp_channel()
1405 default: /* Error of some type - in qemu_rdma_wait_comp_channel()
1408 return -1; in qemu_rdma_wait_comp_channel()
1411 if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { in qemu_rdma_wait_comp_channel()
1413 return -1; in qemu_rdma_wait_comp_channel()
1418 if (rdma->received_error) { in qemu_rdma_wait_comp_channel()
1419 return -1; in qemu_rdma_wait_comp_channel()
1421 return -rdma->errored; in qemu_rdma_wait_comp_channel()
1426 return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_comp_channel : in to_channel()
1427 rdma->recv_comp_channel; in to_channel()
1432 return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_cq : rdma->recv_cq; in to_cq()
1460 return -1; in qemu_rdma_block_for_wrid()
1466 return -1; in qemu_rdma_block_for_wrid()
1532 rdma->errored = true; in qemu_rdma_block_for_wrid()
1533 return -1; in qemu_rdma_block_for_wrid()
1545 RDMAWorkRequestData *wr = &rdma->wr_data[RDMA_WRID_CONTROL]; in qemu_rdma_post_send_control()
1548 .addr = (uintptr_t)(wr->control), in qemu_rdma_post_send_control()
1549 .length = head->len + sizeof(RDMAControlHeader), in qemu_rdma_post_send_control()
1550 .lkey = wr->control_mr->lkey, in qemu_rdma_post_send_control()
1560 trace_qemu_rdma_post_send_control(control_desc(head->type)); in qemu_rdma_post_send_control()
1565 * (not RAM in a performance-critical path), then its OK for now. in qemu_rdma_post_send_control()
1570 assert(head->len <= RDMA_CONTROL_MAX_BUFFER - sizeof(*head)); in qemu_rdma_post_send_control()
1571 memcpy(wr->control, head, sizeof(RDMAControlHeader)); in qemu_rdma_post_send_control()
1572 control_to_network((void *) wr->control); in qemu_rdma_post_send_control()
1575 memcpy(wr->control + sizeof(RDMAControlHeader), buf, head->len); in qemu_rdma_post_send_control()
1579 ret = ibv_post_send(rdma->qp, &send_wr, &bad_wr); in qemu_rdma_post_send_control()
1583 return -1; in qemu_rdma_post_send_control()
1589 return -1; in qemu_rdma_post_send_control()
1604 .addr = (uintptr_t)(rdma->wr_data[idx].control), in qemu_rdma_post_recv_control()
1606 .lkey = rdma->wr_data[idx].control_mr->lkey, in qemu_rdma_post_recv_control()
1616 if (ibv_post_recv(rdma->qp, &recv_wr, &bad_wr)) { in qemu_rdma_post_recv_control()
1618 return -1; in qemu_rdma_post_recv_control()
1637 return -1; in qemu_rdma_exchange_get_response()
1640 network_to_control((void *) rdma->wr_data[idx].control); in qemu_rdma_exchange_get_response()
1641 memcpy(head, rdma->wr_data[idx].control, sizeof(RDMAControlHeader)); in qemu_rdma_exchange_get_response()
1646 trace_qemu_rdma_exchange_get_response_none(control_desc(head->type), in qemu_rdma_exchange_get_response()
1647 head->type); in qemu_rdma_exchange_get_response()
1648 } else if (head->type != expecting || head->type == RDMA_CONTROL_ERROR) { in qemu_rdma_exchange_get_response()
1652 control_desc(head->type), head->type, head->len); in qemu_rdma_exchange_get_response()
1653 if (head->type == RDMA_CONTROL_ERROR) { in qemu_rdma_exchange_get_response()
1654 rdma->received_error = true; in qemu_rdma_exchange_get_response()
1656 return -1; in qemu_rdma_exchange_get_response()
1658 if (head->len > RDMA_CONTROL_MAX_BUFFER - sizeof(*head)) { in qemu_rdma_exchange_get_response()
1659 error_setg(errp, "too long length: %d", head->len); in qemu_rdma_exchange_get_response()
1660 return -1; in qemu_rdma_exchange_get_response()
1662 if (sizeof(*head) + head->len != byte_len) { in qemu_rdma_exchange_get_response()
1664 head->len, byte_len); in qemu_rdma_exchange_get_response()
1665 return -1; in qemu_rdma_exchange_get_response()
1682 rdma->wr_data[idx].control_len = head->len; in qemu_rdma_move_header()
1683 rdma->wr_data[idx].control_curr = in qemu_rdma_move_header()
1684 rdma->wr_data[idx].control + sizeof(RDMAControlHeader); in qemu_rdma_move_header()
1688 * This is an 'atomic' high-level operation to deliver a single, unified
1689 * control-channel message.
1698 * instead piggy-backing on the acknowledgement.
1713 if (rdma->control_ready_expected) { in qemu_rdma_exchange_send()
1720 return -1; in qemu_rdma_exchange_send()
1730 return -1; in qemu_rdma_exchange_send()
1739 return -1; in qemu_rdma_exchange_send()
1748 return -1; in qemu_rdma_exchange_send()
1759 return -1; in qemu_rdma_exchange_send()
1763 trace_qemu_rdma_exchange_send_waiting(control_desc(resp->type)); in qemu_rdma_exchange_send()
1765 resp->type, RDMA_WRID_DATA, in qemu_rdma_exchange_send()
1769 return -1; in qemu_rdma_exchange_send()
1776 trace_qemu_rdma_exchange_send_received(control_desc(resp->type)); in qemu_rdma_exchange_send()
1779 rdma->control_ready_expected = 1; in qemu_rdma_exchange_send()
1785 * This is an 'atomic' high-level operation to receive a single, unified
1786 * control-channel message.
1804 return -1; in qemu_rdma_exchange_recv()
1814 return -1; in qemu_rdma_exchange_recv()
1824 return -1; in qemu_rdma_exchange_recv()
1833 * If we're using dynamic registration on the dest-side, we have to
1846 RDMALocalBlock *block = &(rdma->local_ram_blocks.block[current_index]); in qemu_rdma_write_one()
1856 sge.addr = (uintptr_t)(block->local_host_addr + in qemu_rdma_write_one()
1857 (current_addr - block->offset)); in qemu_rdma_write_one()
1860 chunk = ram_chunk_index(block->local_host_addr, in qemu_rdma_write_one()
1864 if (block->is_ram_block) { in qemu_rdma_write_one()
1868 chunks--; in qemu_rdma_write_one()
1871 chunks = block->length / (1UL << RDMA_REG_CHUNK_SHIFT); in qemu_rdma_write_one()
1873 if (chunks && ((block->length % (1UL << RDMA_REG_CHUNK_SHIFT)) == 0)) { in qemu_rdma_write_one()
1874 chunks--; in qemu_rdma_write_one()
1885 while (test_bit(chunk, block->transit_bitmap)) { in qemu_rdma_write_one()
1888 sge.addr, length, rdma->nb_sent, block->nb_chunks); in qemu_rdma_write_one()
1896 current_index, chunk, sge.addr, length, rdma->nb_sent); in qemu_rdma_write_one()
1897 return -1; in qemu_rdma_write_one()
1901 if (!rdma->pin_all || !block->is_ram_block) { in qemu_rdma_write_one()
1902 if (!block->remote_keys[chunk]) { in qemu_rdma_write_one()
1928 return -1; in qemu_rdma_write_one()
1950 if (block->is_ram_block) { in qemu_rdma_write_one()
1964 return -1; in qemu_rdma_write_one()
1972 return -1; in qemu_rdma_write_one()
1976 rdma->wr_data[reg_result_idx].control_curr; in qemu_rdma_write_one()
1980 trace_qemu_rdma_write_one_recvregres(block->remote_keys[chunk], in qemu_rdma_write_one()
1981 reg_result->rkey, chunk); in qemu_rdma_write_one()
1983 block->remote_keys[chunk] = reg_result->rkey; in qemu_rdma_write_one()
1984 block->remote_host_addr = reg_result->host_addr; in qemu_rdma_write_one()
1991 return -1; in qemu_rdma_write_one()
1995 send_wr.wr.rdma.rkey = block->remote_keys[chunk]; in qemu_rdma_write_one()
1997 send_wr.wr.rdma.rkey = block->remote_rkey; in qemu_rdma_write_one()
2003 return -1; in qemu_rdma_write_one()
2020 send_wr.wr.rdma.remote_addr = block->remote_host_addr + in qemu_rdma_write_one()
2021 (current_addr - block->offset); in qemu_rdma_write_one()
2028 * per the specification they are positive - no idea why. in qemu_rdma_write_one()
2030 ret = ibv_post_send(rdma->qp, &send_wr, &bad_wr); in qemu_rdma_write_one()
2038 return -1; in qemu_rdma_write_one()
2046 return -1; in qemu_rdma_write_one()
2049 set_bit(chunk, block->transit_bitmap); in qemu_rdma_write_one()
2062 rdma->total_writes++; in qemu_rdma_write_one()
2077 if (!rdma->current_length) { in qemu_rdma_write_flush()
2081 ret = qemu_rdma_write_one(rdma, rdma->current_index, rdma->current_addr, in qemu_rdma_write_flush()
2082 rdma->current_length, errp); in qemu_rdma_write_flush()
2085 return -1; in qemu_rdma_write_flush()
2089 rdma->nb_sent++; in qemu_rdma_write_flush()
2090 trace_qemu_rdma_write_flush(rdma->nb_sent); in qemu_rdma_write_flush()
2093 rdma->current_length = 0; in qemu_rdma_write_flush()
2094 rdma->current_addr = 0; in qemu_rdma_write_flush()
2106 if (rdma->current_index < 0) { in qemu_rdma_buffer_mergeable()
2110 if (rdma->current_chunk < 0) { in qemu_rdma_buffer_mergeable()
2114 block = &(rdma->local_ram_blocks.block[rdma->current_index]); in qemu_rdma_buffer_mergeable()
2115 host_addr = block->local_host_addr + (offset - block->offset); in qemu_rdma_buffer_mergeable()
2116 chunk_end = ram_chunk_end(block, rdma->current_chunk); in qemu_rdma_buffer_mergeable()
2118 if (rdma->current_length == 0) { in qemu_rdma_buffer_mergeable()
2125 if (offset != (rdma->current_addr + rdma->current_length)) { in qemu_rdma_buffer_mergeable()
2129 if (offset < block->offset) { in qemu_rdma_buffer_mergeable()
2133 if ((offset + len) > (block->offset + block->length)) { in qemu_rdma_buffer_mergeable()
2159 uint64_t index = rdma->current_index; in qemu_rdma_write()
2160 uint64_t chunk = rdma->current_chunk; in qemu_rdma_write()
2165 return -1; in qemu_rdma_write()
2167 rdma->current_length = 0; in qemu_rdma_write()
2168 rdma->current_addr = current_addr; in qemu_rdma_write()
2172 rdma->current_index = index; in qemu_rdma_write()
2173 rdma->current_chunk = chunk; in qemu_rdma_write()
2177 rdma->current_length += len; in qemu_rdma_write()
2180 if (rdma->current_length >= RDMA_MERGE_MAX) { in qemu_rdma_write()
2191 if (rdma->cm_id && rdma->connected) { in qemu_rdma_cleanup()
2192 if ((rdma->errored || in qemu_rdma_cleanup()
2193 migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) && in qemu_rdma_cleanup()
2194 !rdma->received_error) { in qemu_rdma_cleanup()
2205 rdma_disconnect(rdma->cm_id); in qemu_rdma_cleanup()
2207 rdma->connected = false; in qemu_rdma_cleanup()
2210 if (rdma->channel) { in qemu_rdma_cleanup()
2211 qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); in qemu_rdma_cleanup()
2213 g_free(rdma->dest_blocks); in qemu_rdma_cleanup()
2214 rdma->dest_blocks = NULL; in qemu_rdma_cleanup()
2217 if (rdma->wr_data[i].control_mr) { in qemu_rdma_cleanup()
2218 rdma->total_registrations--; in qemu_rdma_cleanup()
2219 ibv_dereg_mr(rdma->wr_data[i].control_mr); in qemu_rdma_cleanup()
2221 rdma->wr_data[i].control_mr = NULL; in qemu_rdma_cleanup()
2224 if (rdma->local_ram_blocks.block) { in qemu_rdma_cleanup()
2225 while (rdma->local_ram_blocks.nb_blocks) { in qemu_rdma_cleanup()
2226 rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]); in qemu_rdma_cleanup()
2230 if (rdma->qp) { in qemu_rdma_cleanup()
2231 rdma_destroy_qp(rdma->cm_id); in qemu_rdma_cleanup()
2232 rdma->qp = NULL; in qemu_rdma_cleanup()
2234 if (rdma->recv_cq) { in qemu_rdma_cleanup()
2235 ibv_destroy_cq(rdma->recv_cq); in qemu_rdma_cleanup()
2236 rdma->recv_cq = NULL; in qemu_rdma_cleanup()
2238 if (rdma->send_cq) { in qemu_rdma_cleanup()
2239 ibv_destroy_cq(rdma->send_cq); in qemu_rdma_cleanup()
2240 rdma->send_cq = NULL; in qemu_rdma_cleanup()
2242 if (rdma->recv_comp_channel) { in qemu_rdma_cleanup()
2243 ibv_destroy_comp_channel(rdma->recv_comp_channel); in qemu_rdma_cleanup()
2244 rdma->recv_comp_channel = NULL; in qemu_rdma_cleanup()
2246 if (rdma->send_comp_channel) { in qemu_rdma_cleanup()
2247 ibv_destroy_comp_channel(rdma->send_comp_channel); in qemu_rdma_cleanup()
2248 rdma->send_comp_channel = NULL; in qemu_rdma_cleanup()
2250 if (rdma->pd) { in qemu_rdma_cleanup()
2251 ibv_dealloc_pd(rdma->pd); in qemu_rdma_cleanup()
2252 rdma->pd = NULL; in qemu_rdma_cleanup()
2254 if (rdma->cm_id) { in qemu_rdma_cleanup()
2255 rdma_destroy_id(rdma->cm_id); in qemu_rdma_cleanup()
2256 rdma->cm_id = NULL; in qemu_rdma_cleanup()
2260 if (rdma->listen_id) { in qemu_rdma_cleanup()
2261 if (!rdma->is_return_path) { in qemu_rdma_cleanup()
2262 rdma_destroy_id(rdma->listen_id); in qemu_rdma_cleanup()
2264 rdma->listen_id = NULL; in qemu_rdma_cleanup()
2266 if (rdma->channel) { in qemu_rdma_cleanup()
2267 if (!rdma->is_return_path) { in qemu_rdma_cleanup()
2268 rdma_destroy_event_channel(rdma->channel); in qemu_rdma_cleanup()
2270 rdma->channel = NULL; in qemu_rdma_cleanup()
2274 if (rdma->channel) { in qemu_rdma_cleanup()
2275 rdma_destroy_event_channel(rdma->channel); in qemu_rdma_cleanup()
2276 rdma->channel = NULL; in qemu_rdma_cleanup()
2278 g_free(rdma->host); in qemu_rdma_cleanup()
2279 rdma->host = NULL; in qemu_rdma_cleanup()
2291 rdma->pin_all = pin_all; in qemu_rdma_source_init()
2312 rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); in qemu_rdma_source_init()
2313 for (int i = 0; i < rdma->local_ram_blocks.nb_blocks; i++) { in qemu_rdma_source_init()
2314 g_hash_table_insert(rdma->blockmap, in qemu_rdma_source_init()
2315 (void *)(uintptr_t)rdma->local_ram_blocks.block[i].offset, in qemu_rdma_source_init()
2316 &rdma->local_ram_blocks.block[i]); in qemu_rdma_source_init()
2332 return -1; in qemu_rdma_source_init()
2341 .fd = rdma->channel->fd, in qemu_get_cm_event_timeout()
2352 return -1; in qemu_get_cm_event_timeout()
2356 return -1; in qemu_get_cm_event_timeout()
2358 if (rdma_get_cm_event(rdma->channel, cm_event) < 0) { in qemu_get_cm_event_timeout()
2360 return -1; in qemu_get_cm_event_timeout()
2366 return -1; in qemu_get_cm_event_timeout()
2389 if (rdma->pin_all) { in qemu_rdma_connect()
2401 ret = rdma_connect(rdma->cm_id, &conn_param); in qemu_rdma_connect()
2411 ret = rdma_get_cm_event(rdma->channel, &cm_event); in qemu_rdma_connect()
2421 if (cm_event->event != RDMA_CM_EVENT_ESTABLISHED) { in qemu_rdma_connect()
2426 rdma->connected = true; in qemu_rdma_connect()
2428 memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap)); in qemu_rdma_connect()
2433 * and disable them otherwise. in qemu_rdma_connect()
2435 if (rdma->pin_all && !(cap.flags & RDMA_CAPABILITY_PIN_ALL)) { in qemu_rdma_connect()
2438 rdma->pin_all = false; in qemu_rdma_connect()
2441 trace_qemu_rdma_connect_pin_all_outcome(rdma->pin_all); in qemu_rdma_connect()
2445 rdma->control_ready_expected = 1; in qemu_rdma_connect()
2446 rdma->nb_sent = 0; in qemu_rdma_connect()
2451 return -1; in qemu_rdma_connect()
2464 rdma->wr_data[i].control_len = 0; in qemu_rdma_dest_init()
2465 rdma->wr_data[i].control_curr = NULL; in qemu_rdma_dest_init()
2468 if (!rdma->host || !rdma->host[0]) { in qemu_rdma_dest_init()
2470 rdma->errored = true; in qemu_rdma_dest_init()
2471 return -1; in qemu_rdma_dest_init()
2474 rdma->channel = rdma_create_event_channel(); in qemu_rdma_dest_init()
2475 if (!rdma->channel) { in qemu_rdma_dest_init()
2477 rdma->errored = true; in qemu_rdma_dest_init()
2478 return -1; in qemu_rdma_dest_init()
2482 ret = rdma_create_id(rdma->channel, &listen_id, NULL, RDMA_PS_TCP); in qemu_rdma_dest_init()
2488 snprintf(port_str, 16, "%d", rdma->port); in qemu_rdma_dest_init()
2491 ret = rdma_getaddrinfo(rdma->host, port_str, NULL, &res); in qemu_rdma_dest_init()
2494 rdma->host); in qemu_rdma_dest_init()
2506 for (e = res; e != NULL; e = e->ai_next) { in qemu_rdma_dest_init()
2508 inet_ntop(e->ai_family, in qemu_rdma_dest_init()
2509 &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip); in qemu_rdma_dest_init()
2510 trace_qemu_rdma_dest_init_trying(rdma->host, ip); in qemu_rdma_dest_init()
2511 ret = rdma_bind_addr(listen_id, e->ai_dst_addr); in qemu_rdma_dest_init()
2524 rdma->listen_id = listen_id; in qemu_rdma_dest_init()
2531 rdma_destroy_event_channel(rdma->channel); in qemu_rdma_dest_init()
2532 rdma->channel = NULL; in qemu_rdma_dest_init()
2533 rdma->errored = true; in qemu_rdma_dest_init()
2534 return -1; in qemu_rdma_dest_init()
2542 rdma_return_path->wr_data[i].control_len = 0; in qemu_rdma_return_path_dest_init()
2543 rdma_return_path->wr_data[i].control_curr = NULL; in qemu_rdma_return_path_dest_init()
2547 rdma_return_path->channel = rdma->channel; in qemu_rdma_return_path_dest_init()
2548 rdma_return_path->listen_id = rdma->listen_id; in qemu_rdma_return_path_dest_init()
2550 rdma->return_path = rdma_return_path; in qemu_rdma_return_path_dest_init()
2551 rdma_return_path->return_path = rdma; in qemu_rdma_return_path_dest_init()
2552 rdma_return_path->is_return_path = true; in qemu_rdma_return_path_dest_init()
2560 rdma->current_index = -1; in qemu_rdma_data_init()
2561 rdma->current_chunk = -1; in qemu_rdma_data_init()
2563 rdma->host = g_strdup(saddr->host); in qemu_rdma_data_init()
2564 rdma->port = atoi(saddr->port); in qemu_rdma_data_init()
2588 rdma = qatomic_rcu_read(&rioc->rdmaout); in qio_channel_rdma_writev()
2592 return -1; in qio_channel_rdma_writev()
2595 if (rdma->errored) { in qio_channel_rdma_writev()
2598 return -1; in qio_channel_rdma_writev()
2607 rdma->errored = true; in qio_channel_rdma_writev()
2608 return -1; in qio_channel_rdma_writev()
2618 remaining -= len; in qio_channel_rdma_writev()
2627 rdma->errored = true; in qio_channel_rdma_writev()
2628 return -1; in qio_channel_rdma_writev()
2644 if (rdma->wr_data[idx].control_len) { in qemu_rdma_fill()
2645 trace_qemu_rdma_fill(rdma->wr_data[idx].control_len, size); in qemu_rdma_fill()
2647 len = MIN(size, rdma->wr_data[idx].control_len); in qemu_rdma_fill()
2648 memcpy(buf, rdma->wr_data[idx].control_curr, len); in qemu_rdma_fill()
2649 rdma->wr_data[idx].control_curr += len; in qemu_rdma_fill()
2650 rdma->wr_data[idx].control_len -= len; in qemu_rdma_fill()
2677 rdma = qatomic_rcu_read(&rioc->rdmain); in qio_channel_rdma_readv()
2681 return -1; in qio_channel_rdma_readv()
2684 if (rdma->errored) { in qio_channel_rdma_readv()
2687 return -1; in qio_channel_rdma_readv()
2701 want -= len; in qio_channel_rdma_readv()
2721 rdma->errored = true; in qio_channel_rdma_readv()
2722 return -1; in qio_channel_rdma_readv()
2730 want -= len; in qio_channel_rdma_readv()
2753 return -1; in qemu_rdma_drain_cq()
2756 while (rdma->nb_sent) { in qemu_rdma_drain_cq()
2759 return -1; in qemu_rdma_drain_cq()
2774 /* XXX we should make readv/writev actually honour this :-) */ in qio_channel_rdma_set_blocking()
2775 rioc->blocking = blocking; in qio_channel_rdma_set_blocking()
2794 *timeout = -1; in qio_channel_rdma_source_prepare()
2797 if (rsource->condition == G_IO_IN) { in qio_channel_rdma_source_prepare()
2798 rdma = qatomic_rcu_read(&rsource->rioc->rdmain); in qio_channel_rdma_source_prepare()
2800 rdma = qatomic_rcu_read(&rsource->rioc->rdmaout); in qio_channel_rdma_source_prepare()
2808 if (rdma->wr_data[0].control_len) { in qio_channel_rdma_source_prepare()
2813 return cond & rsource->condition; in qio_channel_rdma_source_prepare()
2824 if (rsource->condition == G_IO_IN) { in qio_channel_rdma_source_check()
2825 rdma = qatomic_rcu_read(&rsource->rioc->rdmain); in qio_channel_rdma_source_check()
2827 rdma = qatomic_rcu_read(&rsource->rioc->rdmaout); in qio_channel_rdma_source_check()
2835 if (rdma->wr_data[0].control_len) { in qio_channel_rdma_source_check()
2840 return cond & rsource->condition; in qio_channel_rdma_source_check()
2854 if (rsource->condition == G_IO_IN) { in qio_channel_rdma_source_dispatch()
2855 rdma = qatomic_rcu_read(&rsource->rioc->rdmain); in qio_channel_rdma_source_dispatch()
2857 rdma = qatomic_rcu_read(&rsource->rioc->rdmaout); in qio_channel_rdma_source_dispatch()
2865 if (rdma->wr_data[0].control_len) { in qio_channel_rdma_source_dispatch()
2870 return (*func)(QIO_CHANNEL(rsource->rioc), in qio_channel_rdma_source_dispatch()
2871 (cond & rsource->condition), in qio_channel_rdma_source_dispatch()
2880 object_unref(OBJECT(ssource->rioc)); in qio_channel_rdma_source_finalize()
2901 ssource->rioc = rioc; in qio_channel_rdma_create_watch()
2904 ssource->condition = condition; in qio_channel_rdma_create_watch()
2918 aio_set_fd_handler(read_ctx, rioc->rdmain->recv_comp_channel->fd, in qio_channel_rdma_set_aio_fd_handler()
2920 aio_set_fd_handler(read_ctx, rioc->rdmain->send_comp_channel->fd, in qio_channel_rdma_set_aio_fd_handler()
2923 aio_set_fd_handler(write_ctx, rioc->rdmaout->recv_comp_channel->fd, in qio_channel_rdma_set_aio_fd_handler()
2925 aio_set_fd_handler(write_ctx, rioc->rdmaout->send_comp_channel->fd, in qio_channel_rdma_set_aio_fd_handler()
2939 if (rcu->rdmain) { in qio_channel_rdma_close_rcu()
2940 qemu_rdma_cleanup(rcu->rdmain); in qio_channel_rdma_close_rcu()
2943 if (rcu->rdmaout) { in qio_channel_rdma_close_rcu()
2944 qemu_rdma_cleanup(rcu->rdmaout); in qio_channel_rdma_close_rcu()
2947 g_free(rcu->rdmain); in qio_channel_rdma_close_rcu()
2948 g_free(rcu->rdmaout); in qio_channel_rdma_close_rcu()
2961 rdmain = rioc->rdmain; in qio_channel_rdma_close()
2963 qatomic_rcu_set(&rioc->rdmain, NULL); in qio_channel_rdma_close()
2966 rdmaout = rioc->rdmaout; in qio_channel_rdma_close()
2968 qatomic_rcu_set(&rioc->rdmaout, NULL); in qio_channel_rdma_close()
2971 rcu->rdmain = rdmain; in qio_channel_rdma_close()
2972 rcu->rdmaout = rdmaout; in qio_channel_rdma_close()
2988 rdmain = qatomic_rcu_read(&rioc->rdmain); in qio_channel_rdma_shutdown()
2989 rdmaout = qatomic_rcu_read(&rioc->rdmain); in qio_channel_rdma_shutdown()
2994 rdmain->errored = true; in qio_channel_rdma_shutdown()
2999 rdmaout->errored = true; in qio_channel_rdma_shutdown()
3005 rdmain->errored = true; in qio_channel_rdma_shutdown()
3008 rdmaout->errored = true; in qio_channel_rdma_shutdown()
3030 * @pages_sent : User-specificed pointer to indicate how many pages were
3043 rdma = qatomic_rcu_read(&rioc->rdmaout); in qemu_rdma_save_page()
3046 return -1; in qemu_rdma_save_page()
3050 return -1; in qemu_rdma_save_page()
3075 ret = qemu_rdma_poll(rdma, rdma->recv_cq, &wr_id_in, NULL); in qemu_rdma_save_page()
3091 ret = qemu_rdma_poll(rdma, rdma->send_cq, &wr_id_in, NULL); in qemu_rdma_save_page()
3108 rdma->errored = true; in qemu_rdma_save_page()
3109 return -1; in qemu_rdma_save_page()
3135 if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { in rdma_cm_poll_handler()
3140 if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || in rdma_cm_poll_handler()
3141 cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { in rdma_cm_poll_handler()
3142 if (!rdma->errored && in rdma_cm_poll_handler()
3143 migration_incoming_get_current()->state != in rdma_cm_poll_handler()
3145 error_report("receive cm event, cm event is %d", cm_event->event); in rdma_cm_poll_handler()
3146 rdma->errored = true; in rdma_cm_poll_handler()
3147 if (rdma->return_path) { in rdma_cm_poll_handler()
3148 rdma->return_path->errored = true; in rdma_cm_poll_handler()
3152 if (mis->loadvm_co) { in rdma_cm_poll_handler()
3153 qemu_coroutine_enter(mis->loadvm_co); in rdma_cm_poll_handler()
3175 ret = rdma_get_cm_event(rdma->channel, &cm_event); in qemu_rdma_accept()
3180 if (cm_event->event != RDMA_CM_EVENT_CONNECT_REQUEST) { in qemu_rdma_accept()
3185 isock->host = g_strdup(rdma->host); in qemu_rdma_accept()
3186 isock->port = g_strdup_printf("%d", rdma->port); in qemu_rdma_accept()
3193 && !rdma->is_return_path) { in qemu_rdma_accept()
3203 memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap)); in qemu_rdma_accept()
3224 rdma->pin_all = true; in qemu_rdma_accept()
3227 rdma->cm_id = cm_event->id; in qemu_rdma_accept()
3228 verbs = cm_event->id->verbs; in qemu_rdma_accept()
3232 trace_qemu_rdma_accept_pin_state(rdma->pin_all); in qemu_rdma_accept()
3238 if (!rdma->verbs) { in qemu_rdma_accept()
3239 rdma->verbs = verbs; in qemu_rdma_accept()
3240 } else if (rdma->verbs != verbs) { in qemu_rdma_accept()
3241 error_report("ibv context not matching %p, %p!", rdma->verbs, in qemu_rdma_accept()
3272 && !rdma->is_return_path) { in qemu_rdma_accept()
3273 qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, in qemu_rdma_accept()
3275 (void *)(intptr_t)rdma->return_path); in qemu_rdma_accept()
3277 qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, in qemu_rdma_accept()
3281 ret = rdma_accept(rdma->cm_id, &conn_param); in qemu_rdma_accept()
3287 ret = rdma_get_cm_event(rdma->channel, &cm_event); in qemu_rdma_accept()
3293 if (cm_event->event != RDMA_CM_EVENT_ESTABLISHED) { in qemu_rdma_accept()
3300 rdma->connected = true; in qemu_rdma_accept()
3308 qemu_rdma_dump_gid("dest_connect", rdma->cm_id); in qemu_rdma_accept()
3313 rdma->errored = true; in qemu_rdma_accept()
3316 return -1; in qemu_rdma_accept()
3321 unsigned int a_index = ((const RDMALocalBlock *)a)->src_index; in dest_ram_sort_func()
3322 unsigned int b_index = ((const RDMALocalBlock *)b)->src_index; in dest_ram_sort_func()
3324 return (a_index < b_index) ? -1 : (a_index != b_index); in dest_ram_sort_func()
3368 rdma = qatomic_rcu_read(&rioc->rdmain); in rdma_registration_handle()
3371 return -1; in rdma_registration_handle()
3375 return -1; in rdma_registration_handle()
3378 local = &rdma->local_ram_blocks; in rdma_registration_handle()
3397 comp = (RDMACompress *) rdma->wr_data[idx].control_curr; in rdma_registration_handle()
3400 trace_rdma_registration_handle_compress(comp->length, in rdma_registration_handle()
3401 comp->block_idx, in rdma_registration_handle()
3402 comp->offset); in rdma_registration_handle()
3403 if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) { in rdma_registration_handle()
3405 (unsigned int)comp->block_idx, in rdma_registration_handle()
3406 rdma->local_ram_blocks.nb_blocks); in rdma_registration_handle()
3409 block = &(rdma->local_ram_blocks.block[comp->block_idx]); in rdma_registration_handle()
3411 host_addr = block->local_host_addr + in rdma_registration_handle()
3412 (comp->offset - block->offset); in rdma_registration_handle()
3413 if (comp->value) { in rdma_registration_handle()
3414 error_report("rdma: Zero page with non-zero (%d) value", in rdma_registration_handle()
3415 comp->value); in rdma_registration_handle()
3418 ram_handle_zero(host_addr, comp->length); in rdma_registration_handle()
3432 qsort(rdma->local_ram_blocks.block, in rdma_registration_handle()
3433 rdma->local_ram_blocks.nb_blocks, in rdma_registration_handle()
3435 for (int i = 0; i < local->nb_blocks; i++) { in rdma_registration_handle()
3436 local->block[i].index = i; in rdma_registration_handle()
3439 if (rdma->pin_all) { in rdma_registration_handle()
3453 for (int i = 0; i < local->nb_blocks; i++) { in rdma_registration_handle()
3454 rdma->dest_blocks[i].remote_host_addr = in rdma_registration_handle()
3455 (uintptr_t)(local->block[i].local_host_addr); in rdma_registration_handle()
3457 if (rdma->pin_all) { in rdma_registration_handle()
3458 rdma->dest_blocks[i].remote_rkey = local->block[i].mr->rkey; in rdma_registration_handle()
3461 rdma->dest_blocks[i].offset = local->block[i].offset; in rdma_registration_handle()
3462 rdma->dest_blocks[i].length = local->block[i].length; in rdma_registration_handle()
3464 dest_block_to_network(&rdma->dest_blocks[i]); in rdma_registration_handle()
3466 local->block[i].block_name, in rdma_registration_handle()
3467 local->block[i].offset, in rdma_registration_handle()
3468 local->block[i].length, in rdma_registration_handle()
3469 local->block[i].local_host_addr, in rdma_registration_handle()
3470 local->block[i].src_index); in rdma_registration_handle()
3473 blocks.len = rdma->local_ram_blocks.nb_blocks in rdma_registration_handle()
3478 (uint8_t *) rdma->dest_blocks, &blocks, in rdma_registration_handle()
3491 registers = (RDMARegister *) rdma->wr_data[idx].control_curr; in rdma_registration_handle()
3503 reg->current_index, reg->key.current_addr, reg->chunks); in rdma_registration_handle()
3505 if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) { in rdma_registration_handle()
3507 (unsigned int)reg->current_index, in rdma_registration_handle()
3508 rdma->local_ram_blocks.nb_blocks); in rdma_registration_handle()
3511 block = &(rdma->local_ram_blocks.block[reg->current_index]); in rdma_registration_handle()
3512 if (block->is_ram_block) { in rdma_registration_handle()
3513 if (block->offset > reg->key.current_addr) { in rdma_registration_handle()
3516 block->block_name, block->offset, in rdma_registration_handle()
3517 reg->key.current_addr); in rdma_registration_handle()
3520 host_addr = (block->local_host_addr + in rdma_registration_handle()
3521 (reg->key.current_addr - block->offset)); in rdma_registration_handle()
3522 chunk = ram_chunk_index(block->local_host_addr, in rdma_registration_handle()
3525 chunk = reg->key.chunk; in rdma_registration_handle()
3526 host_addr = block->local_host_addr + in rdma_registration_handle()
3527 (reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT)); in rdma_registration_handle()
3529 if (host_addr < (void *)block->local_host_addr) { in rdma_registration_handle()
3532 block->block_name, reg->key.chunk); in rdma_registration_handle()
3537 chunk_end = ram_chunk_end(block, chunk + reg->chunks); in rdma_registration_handle()
3538 /* avoid "-Waddress-of-packed-member" warning */ in rdma_registration_handle()
3546 reg_result->rkey = tmp_rkey; in rdma_registration_handle()
3548 reg_result->host_addr = (uintptr_t)block->local_host_addr; in rdma_registration_handle()
3550 trace_rdma_registration_handle_register_rkey(reg_result->rkey); in rdma_registration_handle()
3566 registers = (RDMARegister *) rdma->wr_data[idx].control_curr; in rdma_registration_handle()
3573 reg->current_index, reg->key.chunk); in rdma_registration_handle()
3575 block = &(rdma->local_ram_blocks.block[reg->current_index]); in rdma_registration_handle()
3577 ret = ibv_dereg_mr(block->pmr[reg->key.chunk]); in rdma_registration_handle()
3578 block->pmr[reg->key.chunk] = NULL; in rdma_registration_handle()
3586 rdma->total_registrations--; in rdma_registration_handle()
3588 trace_rdma_registration_handle_unregister_success(reg->key.chunk); in rdma_registration_handle()
3608 rdma->errored = true; in rdma_registration_handle()
3609 return -1; in rdma_registration_handle()
3621 int found = -1; in rdma_block_notification_handle()
3629 RDMAContext *rdma = qatomic_rcu_read(&rioc->rdmain); in rdma_block_notification_handle()
3632 return -1; in rdma_block_notification_handle()
3636 for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) { in rdma_block_notification_handle()
3637 if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) { in rdma_block_notification_handle()
3643 if (found == -1) { in rdma_block_notification_handle()
3645 return -1; in rdma_block_notification_handle()
3648 rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index; in rdma_block_notification_handle()
3649 trace_rdma_block_notification_handle(name, rdma->next_src_index); in rdma_block_notification_handle()
3650 rdma->next_src_index++; in rdma_block_notification_handle()
3663 RDMAContext *rdma = qatomic_rcu_read(&rioc->rdmaout); in rdma_registration_start()
3665 return -1; in rdma_registration_start()
3669 return -1; in rdma_registration_start()
3695 rdma = qatomic_rcu_read(&rioc->rdmaout); in rdma_registration_stop()
3697 return -1; in rdma_registration_stop()
3701 return -1; in rdma_registration_stop()
3713 RDMALocalBlocks *local = &rdma->local_ram_blocks; in rdma_registration_stop()
3728 ®_result_idx, rdma->pin_all ? in rdma_registration_stop()
3733 return -1; in rdma_registration_stop()
3741 * (dynamic chunk registration disabled - pin everything with one rkey.) in rdma_registration_stop()
3743 * (dynamic chunk registration enabled - pin individual chunks.) in rdma_registration_stop()
3750 if (local->nb_blocks != nb_dest_blocks) { in rdma_registration_stop()
3752 local->nb_blocks, nb_dest_blocks); in rdma_registration_stop()
3755 rdma->errored = true; in rdma_registration_stop()
3756 return -1; in rdma_registration_stop()
3760 memcpy(rdma->dest_blocks, in rdma_registration_stop()
3761 rdma->wr_data[reg_result_idx].control_curr, resp.len); in rdma_registration_stop()
3763 network_to_dest_block(&rdma->dest_blocks[i]); in rdma_registration_stop()
3766 if (rdma->dest_blocks[i].length != local->block[i].length) { in rdma_registration_stop()
3769 local->block[i].block_name, i, in rdma_registration_stop()
3770 local->block[i].length, in rdma_registration_stop()
3771 rdma->dest_blocks[i].length); in rdma_registration_stop()
3772 rdma->errored = true; in rdma_registration_stop()
3773 return -1; in rdma_registration_stop()
3775 local->block[i].remote_host_addr = in rdma_registration_stop()
3776 rdma->dest_blocks[i].remote_host_addr; in rdma_registration_stop()
3777 local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey; in rdma_registration_stop()
3793 rdma->errored = true; in rdma_registration_stop()
3794 return -1; in rdma_registration_stop()
3800 if (rioc->rdmain) { in qio_channel_rdma_finalize()
3801 qemu_rdma_cleanup(rioc->rdmain); in qio_channel_rdma_finalize()
3802 g_free(rioc->rdmain); in qio_channel_rdma_finalize()
3803 rioc->rdmain = NULL; in qio_channel_rdma_finalize()
3805 if (rioc->rdmaout) { in qio_channel_rdma_finalize()
3806 qemu_rdma_cleanup(rioc->rdmaout); in qio_channel_rdma_finalize()
3807 g_free(rioc->rdmaout); in qio_channel_rdma_finalize()
3808 rioc->rdmaout = NULL; in qio_channel_rdma_finalize()
3817 ioc_klass->io_writev = qio_channel_rdma_writev; in qio_channel_rdma_class_init()
3818 ioc_klass->io_readv = qio_channel_rdma_readv; in qio_channel_rdma_class_init()
3819 ioc_klass->io_set_blocking = qio_channel_rdma_set_blocking; in qio_channel_rdma_class_init()
3820 ioc_klass->io_close = qio_channel_rdma_close; in qio_channel_rdma_class_init()
3821 ioc_klass->io_create_watch = qio_channel_rdma_create_watch; in qio_channel_rdma_class_init()
3822 ioc_klass->io_set_aio_fd_handler = qio_channel_rdma_set_aio_fd_handler; in qio_channel_rdma_class_init()
3823 ioc_klass->io_shutdown = qio_channel_rdma_shutdown; in qio_channel_rdma_class_init()
3845 rioc->file = qemu_file_new_input(QIO_CHANNEL(rioc)); in rdma_new_input()
3846 rioc->rdmain = rdma; in rdma_new_input()
3847 rioc->rdmaout = rdma->return_path; in rdma_new_input()
3849 return rioc->file; in rdma_new_input()
3856 rioc->file = qemu_file_new_output(QIO_CHANNEL(rioc)); in rdma_new_output()
3857 rioc->rdmaout = rdma; in rdma_new_output()
3858 rioc->rdmain = rdma->return_path; in rdma_new_output()
3860 return rioc->file; in rdma_new_output()
3876 if (rdma->is_return_path) { in rdma_accept_incoming_migration()
3887 rdma->migration_started_on_destination = 1; in rdma_accept_incoming_migration()
3902 error_setg(errp, "RDMA: cannot disable RAM discard"); in rdma_start_incoming_migration()
3918 ret = rdma_listen(rdma->listen_id, 5); in rdma_start_incoming_migration()
3926 s->rdma_migration = true; in rdma_start_incoming_migration()
3927 qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, in rdma_start_incoming_migration()
3935 g_free(rdma->host); in rdma_start_incoming_migration()
3950 error_setg(errp, "RDMA: cannot disable RAM discard"); in rdma_start_outgoing_migration()
3993 rdma->return_path = rdma_return_path; in rdma_start_outgoing_migration()
3994 rdma_return_path->return_path = rdma; in rdma_start_outgoing_migration()
3995 rdma_return_path->is_return_path = true; in rdma_start_outgoing_migration()
4000 s->to_dst_file = rdma_new_output(rdma); in rdma_start_outgoing_migration()
4001 s->rdma_migration = true; in rdma_start_outgoing_migration()