1 /* 2 * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "vmxnet_tx_pkt.h" 19 #include "net/eth.h" 20 #include "qemu-common.h" 21 #include "qemu/iov.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "net/net.h" 25 #include "exec/cpu-common.h" 26 27 enum { 28 VMXNET_TX_PKT_VHDR_FRAG = 0, 29 VMXNET_TX_PKT_L2HDR_FRAG, 30 VMXNET_TX_PKT_L3HDR_FRAG, 31 VMXNET_TX_PKT_PL_START_FRAG 32 }; 33 34 /* TX packet private context */ 35 struct VmxnetTxPkt { 36 struct virtio_net_hdr virt_hdr; 37 bool has_virt_hdr; 38 39 struct iovec *raw; 40 uint32_t raw_frags; 41 uint32_t max_raw_frags; 42 43 struct iovec *vec; 44 45 uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; 46 47 uint32_t payload_len; 48 49 uint32_t payload_frags; 50 uint32_t max_payload_frags; 51 52 uint16_t hdr_len; 53 eth_pkt_types_e packet_type; 54 uint8_t l4proto; 55 }; 56 57 void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, 58 bool has_virt_hdr) 59 { 60 struct VmxnetTxPkt *p = g_malloc0(sizeof *p); 61 62 p->vec = g_malloc((sizeof *p->vec) * 63 (max_frags + VMXNET_TX_PKT_PL_START_FRAG)); 64 65 p->raw = g_malloc((sizeof *p->raw) * max_frags); 66 67 p->max_payload_frags = max_frags; 68 p->max_raw_frags = max_frags; 69 p->has_virt_hdr = has_virt_hdr; 70 p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; 71 p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_len = 72 p->has_virt_hdr ? sizeof p->virt_hdr : 0; 73 p->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; 74 p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; 75 p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len = 0; 76 77 *pkt = p; 78 } 79 80 void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt) 81 { 82 if (pkt) { 83 g_free(pkt->vec); 84 g_free(pkt->raw); 85 g_free(pkt); 86 } 87 } 88 89 void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt) 90 { 91 uint16_t csum; 92 uint32_t ph_raw_csum; 93 assert(pkt); 94 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 95 struct ip_header *ip_hdr; 96 97 if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && 98 VIRTIO_NET_HDR_GSO_UDP != gso_type) { 99 return; 100 } 101 102 ip_hdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; 103 104 if (pkt->payload_len + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len > 105 ETH_MAX_IP_DGRAM_LEN) { 106 return; 107 } 108 109 ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + 110 pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); 111 112 /* Calculate IP header checksum */ 113 ip_hdr->ip_sum = 0; 114 csum = net_raw_checksum((uint8_t *)ip_hdr, 115 pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); 116 ip_hdr->ip_sum = cpu_to_be16(csum); 117 118 /* Calculate IP pseudo header checksum */ 119 ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); 120 csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); 121 iov_from_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 122 pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); 123 } 124 125 static void vmxnet_tx_pkt_calculate_hdr_len(struct VmxnetTxPkt *pkt) 126 { 127 pkt->hdr_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len + 128 pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; 129 } 130 131 static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) 132 { 133 struct iovec *l2_hdr, *l3_hdr; 134 size_t bytes_read; 135 size_t full_ip6hdr_len; 136 uint16_t l3_proto; 137 138 assert(pkt); 139 140 l2_hdr = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; 141 l3_hdr = &pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG]; 142 143 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, 144 ETH_MAX_L2_HDR_LEN); 145 if (bytes_read < ETH_MAX_L2_HDR_LEN) { 146 l2_hdr->iov_len = 0; 147 return false; 148 } else { 149 l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); 150 } 151 152 l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); 153 154 switch (l3_proto) { 155 case ETH_P_IP: 156 l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); 157 158 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 159 l3_hdr->iov_base, sizeof(struct ip_header)); 160 161 if (bytes_read < sizeof(struct ip_header)) { 162 l3_hdr->iov_len = 0; 163 return false; 164 } 165 166 l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); 167 pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; 168 169 /* copy optional IPv4 header data */ 170 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 171 l2_hdr->iov_len + sizeof(struct ip_header), 172 l3_hdr->iov_base + sizeof(struct ip_header), 173 l3_hdr->iov_len - sizeof(struct ip_header)); 174 if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { 175 l3_hdr->iov_len = 0; 176 return false; 177 } 178 break; 179 180 case ETH_P_IPV6: 181 if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 182 &pkt->l4proto, &full_ip6hdr_len)) { 183 l3_hdr->iov_len = 0; 184 return false; 185 } 186 187 l3_hdr->iov_base = g_malloc(full_ip6hdr_len); 188 189 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, 190 l3_hdr->iov_base, full_ip6hdr_len); 191 192 if (bytes_read < full_ip6hdr_len) { 193 l3_hdr->iov_len = 0; 194 return false; 195 } else { 196 l3_hdr->iov_len = full_ip6hdr_len; 197 } 198 break; 199 200 default: 201 l3_hdr->iov_len = 0; 202 break; 203 } 204 205 vmxnet_tx_pkt_calculate_hdr_len(pkt); 206 pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); 207 return true; 208 } 209 210 static bool vmxnet_tx_pkt_rebuild_payload(struct VmxnetTxPkt *pkt) 211 { 212 size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; 213 214 pkt->payload_frags = iov_copy(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], 215 pkt->max_payload_frags, 216 pkt->raw, pkt->raw_frags, 217 pkt->hdr_len, payload_len); 218 219 if (pkt->payload_frags != (uint32_t) -1) { 220 pkt->payload_len = payload_len; 221 return true; 222 } else { 223 return false; 224 } 225 } 226 227 bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt) 228 { 229 return vmxnet_tx_pkt_parse_headers(pkt) && 230 vmxnet_tx_pkt_rebuild_payload(pkt); 231 } 232 233 struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt) 234 { 235 assert(pkt); 236 return &pkt->virt_hdr; 237 } 238 239 static uint8_t vmxnet_tx_pkt_get_gso_type(struct VmxnetTxPkt *pkt, 240 bool tso_enable) 241 { 242 uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; 243 uint16_t l3_proto; 244 245 l3_proto = eth_get_l3_proto(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, 246 pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len); 247 248 if (!tso_enable) { 249 goto func_exit; 250 } 251 252 rc = eth_get_gso_type(l3_proto, pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base, 253 pkt->l4proto); 254 255 func_exit: 256 return rc; 257 } 258 259 void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, 260 bool csum_enable, uint32_t gso_size) 261 { 262 struct tcp_hdr l4hdr; 263 assert(pkt); 264 265 /* csum has to be enabled if tso is. */ 266 assert(csum_enable || !tso_enable); 267 268 pkt->virt_hdr.gso_type = vmxnet_tx_pkt_get_gso_type(pkt, tso_enable); 269 270 switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 271 case VIRTIO_NET_HDR_GSO_NONE: 272 pkt->virt_hdr.hdr_len = 0; 273 pkt->virt_hdr.gso_size = 0; 274 break; 275 276 case VIRTIO_NET_HDR_GSO_UDP: 277 pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); 278 pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); 279 break; 280 281 case VIRTIO_NET_HDR_GSO_TCPV4: 282 case VIRTIO_NET_HDR_GSO_TCPV6: 283 iov_to_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 284 0, &l4hdr, sizeof(l4hdr)); 285 pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); 286 pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); 287 break; 288 289 default: 290 assert(false); 291 } 292 293 if (csum_enable) { 294 switch (pkt->l4proto) { 295 case IP_PROTO_TCP: 296 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 297 pkt->virt_hdr.csum_start = pkt->hdr_len; 298 pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); 299 break; 300 case IP_PROTO_UDP: 301 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 302 pkt->virt_hdr.csum_start = pkt->hdr_len; 303 pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); 304 break; 305 default: 306 break; 307 } 308 } 309 } 310 311 void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan) 312 { 313 bool is_new; 314 assert(pkt); 315 316 eth_setup_vlan_headers(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, 317 vlan, &is_new); 318 319 /* update l2hdrlen */ 320 if (is_new) { 321 pkt->hdr_len += sizeof(struct vlan_header); 322 pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len += 323 sizeof(struct vlan_header); 324 } 325 } 326 327 bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, 328 size_t len) 329 { 330 hwaddr mapped_len = 0; 331 struct iovec *ventry; 332 assert(pkt); 333 assert(pkt->max_raw_frags > pkt->raw_frags); 334 335 if (!len) { 336 return true; 337 } 338 339 ventry = &pkt->raw[pkt->raw_frags]; 340 mapped_len = len; 341 342 ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); 343 ventry->iov_len = mapped_len; 344 pkt->raw_frags += !!ventry->iov_base; 345 346 if ((ventry->iov_base == NULL) || (len != mapped_len)) { 347 return false; 348 } 349 350 return true; 351 } 352 353 eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt) 354 { 355 assert(pkt); 356 357 return pkt->packet_type; 358 } 359 360 size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt) 361 { 362 assert(pkt); 363 364 return pkt->hdr_len + pkt->payload_len; 365 } 366 367 void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt) 368 { 369 #ifdef VMXNET_TX_PKT_DEBUG 370 assert(pkt); 371 372 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " 373 "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, 374 pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len, 375 pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); 376 #endif 377 } 378 379 void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt) 380 { 381 int i; 382 383 /* no assert, as reset can be called before tx_pkt_init */ 384 if (!pkt) { 385 return; 386 } 387 388 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); 389 390 g_free(pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base); 391 pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; 392 393 assert(pkt->vec); 394 for (i = VMXNET_TX_PKT_L2HDR_FRAG; 395 i < pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG; i++) { 396 pkt->vec[i].iov_len = 0; 397 } 398 pkt->payload_len = 0; 399 pkt->payload_frags = 0; 400 401 assert(pkt->raw); 402 for (i = 0; i < pkt->raw_frags; i++) { 403 assert(pkt->raw[i].iov_base); 404 cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, 405 false, pkt->raw[i].iov_len); 406 pkt->raw[i].iov_len = 0; 407 } 408 pkt->raw_frags = 0; 409 410 pkt->hdr_len = 0; 411 pkt->packet_type = 0; 412 pkt->l4proto = 0; 413 } 414 415 static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt) 416 { 417 struct iovec *iov = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; 418 uint32_t csum_cntr; 419 uint16_t csum = 0; 420 /* num of iovec without vhdr */ 421 uint32_t iov_len = pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG - 1; 422 uint16_t csl; 423 struct ip_header *iphdr; 424 size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; 425 426 /* Put zero to checksum field */ 427 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); 428 429 /* Calculate L4 TCP/UDP checksum */ 430 csl = pkt->payload_len; 431 432 /* data checksum */ 433 csum_cntr = 434 net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); 435 /* add pseudo header to csum */ 436 iphdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; 437 csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); 438 439 /* Put the checksum obtained into the packet */ 440 csum = cpu_to_be16(net_checksum_finish(csum_cntr)); 441 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); 442 } 443 444 enum { 445 VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, 446 VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS, 447 VMXNET_TX_PKT_FRAGMENT_HEADER_NUM 448 }; 449 450 #define VMXNET_MAX_FRAG_SG_LIST (64) 451 452 static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt *pkt, 453 int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) 454 { 455 size_t fetched = 0; 456 struct iovec *src = pkt->vec; 457 458 *dst_idx = VMXNET_TX_PKT_FRAGMENT_HEADER_NUM; 459 460 while (fetched < pkt->virt_hdr.gso_size) { 461 462 /* no more place in fragment iov */ 463 if (*dst_idx == VMXNET_MAX_FRAG_SG_LIST) { 464 break; 465 } 466 467 /* no more data in iovec */ 468 if (*src_idx == (pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG)) { 469 break; 470 } 471 472 473 dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; 474 dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, 475 pkt->virt_hdr.gso_size - fetched); 476 477 *src_offset += dst[*dst_idx].iov_len; 478 fetched += dst[*dst_idx].iov_len; 479 480 if (*src_offset == src[*src_idx].iov_len) { 481 *src_offset = 0; 482 (*src_idx)++; 483 } 484 485 (*dst_idx)++; 486 } 487 488 return fetched; 489 } 490 491 static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt, 492 NetClientState *nc) 493 { 494 struct iovec fragment[VMXNET_MAX_FRAG_SG_LIST]; 495 size_t fragment_len = 0; 496 bool more_frags = false; 497 498 /* some pointers for shorter code */ 499 void *l2_iov_base, *l3_iov_base; 500 size_t l2_iov_len, l3_iov_len; 501 int src_idx = VMXNET_TX_PKT_PL_START_FRAG, dst_idx; 502 size_t src_offset = 0; 503 size_t fragment_offset = 0; 504 505 l2_iov_base = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base; 506 l2_iov_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len; 507 l3_iov_base = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; 508 l3_iov_len = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; 509 510 /* Copy headers */ 511 fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; 512 fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; 513 fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; 514 fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; 515 516 517 /* Put as much data as possible and send */ 518 do { 519 fragment_len = vmxnet_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, 520 fragment, &dst_idx); 521 522 more_frags = (fragment_offset + fragment_len < pkt->payload_len); 523 524 eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base, 525 l3_iov_len, fragment_len, fragment_offset, more_frags); 526 527 eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); 528 529 qemu_sendv_packet(nc, fragment, dst_idx); 530 531 fragment_offset += fragment_len; 532 533 } while (more_frags); 534 535 return true; 536 } 537 538 bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc) 539 { 540 assert(pkt); 541 542 if (!pkt->has_virt_hdr && 543 pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 544 vmxnet_tx_pkt_do_sw_csum(pkt); 545 } 546 547 /* 548 * Since underlying infrastructure does not support IP datagrams longer 549 * than 64K we should drop such packets and don't even try to send 550 */ 551 if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { 552 if (pkt->payload_len > 553 ETH_MAX_IP_DGRAM_LEN - 554 pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len) { 555 return false; 556 } 557 } 558 559 if (pkt->has_virt_hdr || 560 pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { 561 qemu_sendv_packet(nc, pkt->vec, 562 pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG); 563 return true; 564 } 565 566 return vmxnet_tx_pkt_do_sw_fragmentation(pkt, nc); 567 } 568