1 /* 2 * QEMU RX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "trace.h" 20 #include "net_rx_pkt.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 24 struct NetRxPkt { 25 struct virtio_net_hdr virt_hdr; 26 uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)]; 27 struct iovec *vec; 28 uint16_t vec_len_total; 29 uint16_t vec_len; 30 uint32_t tot_len; 31 uint16_t tci; 32 size_t ehdr_buf_len; 33 eth_pkt_types_e packet_type; 34 35 /* Analysis results */ 36 bool isip4; 37 bool isip6; 38 bool isudp; 39 bool istcp; 40 41 size_t l3hdr_off; 42 size_t l4hdr_off; 43 size_t l5hdr_off; 44 45 eth_ip6_hdr_info ip6hdr_info; 46 eth_ip4_hdr_info ip4hdr_info; 47 eth_l4_hdr_info l4hdr_info; 48 }; 49 50 void net_rx_pkt_init(struct NetRxPkt **pkt) 51 { 52 struct NetRxPkt *p = g_malloc0(sizeof *p); 53 p->vec = NULL; 54 p->vec_len_total = 0; 55 *pkt = p; 56 } 57 58 void net_rx_pkt_uninit(struct NetRxPkt *pkt) 59 { 60 if (pkt->vec_len_total != 0) { 61 g_free(pkt->vec); 62 } 63 64 g_free(pkt); 65 } 66 67 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) 68 { 69 assert(pkt); 70 return &pkt->virt_hdr; 71 } 72 73 static inline void 74 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, 75 int new_iov_len) 76 { 77 if (pkt->vec_len_total < new_iov_len) { 78 g_free(pkt->vec); 79 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); 80 pkt->vec_len_total = new_iov_len; 81 } 82 } 83 84 static void 85 net_rx_pkt_pull_data(struct NetRxPkt *pkt, 86 const struct iovec *iov, int iovcnt, 87 size_t ploff) 88 { 89 uint32_t pllen = iov_size(iov, iovcnt) - ploff; 90 91 if (pkt->ehdr_buf_len) { 92 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); 93 94 pkt->vec[0].iov_base = pkt->ehdr_buf; 95 pkt->vec[0].iov_len = pkt->ehdr_buf_len; 96 97 pkt->tot_len = pllen + pkt->ehdr_buf_len; 98 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, 99 iov, iovcnt, ploff, pllen) + 1; 100 } else { 101 net_rx_pkt_iovec_realloc(pkt, iovcnt); 102 103 pkt->tot_len = pllen; 104 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, 105 iov, iovcnt, ploff, pkt->tot_len); 106 } 107 108 eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, 109 &pkt->isudp, &pkt->istcp, 110 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 111 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 112 113 trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, 114 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); 115 } 116 117 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, 118 const struct iovec *iov, int iovcnt, 119 size_t iovoff, bool strip_vlan) 120 { 121 uint16_t tci = 0; 122 uint16_t ploff = iovoff; 123 assert(pkt); 124 125 if (strip_vlan) { 126 pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, 127 &ploff, &tci); 128 } else { 129 pkt->ehdr_buf_len = 0; 130 } 131 132 pkt->tci = tci; 133 134 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 135 } 136 137 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, 138 const struct iovec *iov, int iovcnt, 139 size_t iovoff, bool strip_vlan, 140 uint16_t vet) 141 { 142 uint16_t tci = 0; 143 uint16_t ploff = iovoff; 144 assert(pkt); 145 146 if (strip_vlan) { 147 pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, 148 pkt->ehdr_buf, 149 &ploff, &tci); 150 } else { 151 pkt->ehdr_buf_len = 0; 152 } 153 154 pkt->tci = tci; 155 156 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 157 } 158 159 void net_rx_pkt_dump(struct NetRxPkt *pkt) 160 { 161 #ifdef NET_RX_PKT_DEBUG 162 assert(pkt); 163 164 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n", 165 pkt->tot_len, pkt->ehdr_buf_len, pkt->tci); 166 #endif 167 } 168 169 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, 170 eth_pkt_types_e packet_type) 171 { 172 assert(pkt); 173 174 pkt->packet_type = packet_type; 175 176 } 177 178 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) 179 { 180 assert(pkt); 181 182 return pkt->packet_type; 183 } 184 185 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) 186 { 187 assert(pkt); 188 189 return pkt->tot_len; 190 } 191 192 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, 193 size_t len) 194 { 195 const struct iovec iov = { 196 .iov_base = (void *)data, 197 .iov_len = len 198 }; 199 200 assert(pkt); 201 202 eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, 203 &pkt->isudp, &pkt->istcp, 204 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 205 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 206 } 207 208 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, 209 bool *isip4, bool *isip6, 210 bool *isudp, bool *istcp) 211 { 212 assert(pkt); 213 214 *isip4 = pkt->isip4; 215 *isip6 = pkt->isip6; 216 *isudp = pkt->isudp; 217 *istcp = pkt->istcp; 218 } 219 220 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) 221 { 222 assert(pkt); 223 return pkt->l3hdr_off; 224 } 225 226 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) 227 { 228 assert(pkt); 229 return pkt->l4hdr_off; 230 } 231 232 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) 233 { 234 assert(pkt); 235 return pkt->l5hdr_off; 236 } 237 238 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) 239 { 240 return &pkt->ip6hdr_info; 241 } 242 243 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) 244 { 245 return &pkt->ip4hdr_info; 246 } 247 248 eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) 249 { 250 return &pkt->l4hdr_info; 251 } 252 253 static inline void 254 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, 255 void *ptr, size_t size) 256 { 257 memcpy(&rss_input[*bytes_written], ptr, size); 258 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); 259 *bytes_written += size; 260 } 261 262 static inline void 263 _net_rx_rss_prepare_ip4(uint8_t *rss_input, 264 struct NetRxPkt *pkt, 265 size_t *bytes_written) 266 { 267 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; 268 269 _net_rx_rss_add_chunk(rss_input, bytes_written, 270 &ip4_hdr->ip_src, sizeof(uint32_t)); 271 272 _net_rx_rss_add_chunk(rss_input, bytes_written, 273 &ip4_hdr->ip_dst, sizeof(uint32_t)); 274 } 275 276 static inline void 277 _net_rx_rss_prepare_ip6(uint8_t *rss_input, 278 struct NetRxPkt *pkt, 279 bool ipv6ex, size_t *bytes_written) 280 { 281 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; 282 283 _net_rx_rss_add_chunk(rss_input, bytes_written, 284 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src 285 : &ip6info->ip6_hdr.ip6_src, 286 sizeof(struct in6_address)); 287 288 _net_rx_rss_add_chunk(rss_input, bytes_written, 289 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst 290 : &ip6info->ip6_hdr.ip6_dst, 291 sizeof(struct in6_address)); 292 } 293 294 static inline void 295 _net_rx_rss_prepare_tcp(uint8_t *rss_input, 296 struct NetRxPkt *pkt, 297 size_t *bytes_written) 298 { 299 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; 300 301 _net_rx_rss_add_chunk(rss_input, bytes_written, 302 &tcphdr->th_sport, sizeof(uint16_t)); 303 304 _net_rx_rss_add_chunk(rss_input, bytes_written, 305 &tcphdr->th_dport, sizeof(uint16_t)); 306 } 307 308 static inline void 309 _net_rx_rss_prepare_udp(uint8_t *rss_input, 310 struct NetRxPkt *pkt, 311 size_t *bytes_written) 312 { 313 struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp; 314 315 _net_rx_rss_add_chunk(rss_input, bytes_written, 316 &udphdr->uh_sport, sizeof(uint16_t)); 317 318 _net_rx_rss_add_chunk(rss_input, bytes_written, 319 &udphdr->uh_dport, sizeof(uint16_t)); 320 } 321 322 uint32_t 323 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, 324 NetRxPktRssType type, 325 uint8_t *key) 326 { 327 uint8_t rss_input[36]; 328 size_t rss_length = 0; 329 uint32_t rss_hash = 0; 330 net_toeplitz_key key_data; 331 332 switch (type) { 333 case NetPktRssIpV4: 334 assert(pkt->isip4); 335 trace_net_rx_pkt_rss_ip4(); 336 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 337 break; 338 case NetPktRssIpV4Tcp: 339 assert(pkt->isip4); 340 assert(pkt->istcp); 341 trace_net_rx_pkt_rss_ip4_tcp(); 342 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 343 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 344 break; 345 case NetPktRssIpV6Tcp: 346 assert(pkt->isip6); 347 assert(pkt->istcp); 348 trace_net_rx_pkt_rss_ip6_tcp(); 349 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 350 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 351 break; 352 case NetPktRssIpV6: 353 assert(pkt->isip6); 354 trace_net_rx_pkt_rss_ip6(); 355 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 356 break; 357 case NetPktRssIpV6Ex: 358 assert(pkt->isip6); 359 trace_net_rx_pkt_rss_ip6_ex(); 360 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 361 break; 362 case NetPktRssIpV6TcpEx: 363 assert(pkt->isip6); 364 assert(pkt->istcp); 365 trace_net_rx_pkt_rss_ip6_ex_tcp(); 366 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 367 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 368 break; 369 case NetPktRssIpV4Udp: 370 assert(pkt->isip4); 371 assert(pkt->isudp); 372 trace_net_rx_pkt_rss_ip4_udp(); 373 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 374 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length); 375 break; 376 case NetPktRssIpV6Udp: 377 assert(pkt->isip6); 378 assert(pkt->isudp); 379 trace_net_rx_pkt_rss_ip6_udp(); 380 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 381 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length); 382 break; 383 case NetPktRssIpV6UdpEx: 384 assert(pkt->isip6); 385 assert(pkt->isudp); 386 trace_net_rx_pkt_rss_ip6_ex_udp(); 387 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 388 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length); 389 break; 390 default: 391 assert(false); 392 break; 393 } 394 395 net_toeplitz_key_init(&key_data, key); 396 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); 397 398 trace_net_rx_pkt_rss_hash(rss_length, rss_hash); 399 400 return rss_hash; 401 } 402 403 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) 404 { 405 assert(pkt); 406 407 if (pkt->isip4) { 408 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); 409 } 410 411 return 0; 412 } 413 414 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) 415 { 416 assert(pkt); 417 418 if (pkt->istcp) { 419 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; 420 } 421 422 return false; 423 } 424 425 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) 426 { 427 assert(pkt); 428 429 if (pkt->istcp) { 430 return pkt->l4hdr_info.has_tcp_data; 431 } 432 433 return false; 434 } 435 436 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) 437 { 438 assert(pkt); 439 440 return pkt->vec; 441 } 442 443 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) 444 { 445 assert(pkt); 446 447 return pkt->vec_len; 448 } 449 450 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, 451 struct virtio_net_hdr *vhdr) 452 { 453 assert(pkt); 454 455 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); 456 } 457 458 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, 459 const struct iovec *iov, int iovcnt) 460 { 461 assert(pkt); 462 463 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); 464 } 465 466 void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt) 467 { 468 assert(pkt); 469 470 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); 471 } 472 473 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) 474 { 475 assert(pkt); 476 477 return pkt->ehdr_buf_len ? true : false; 478 } 479 480 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) 481 { 482 assert(pkt); 483 484 return pkt->tci; 485 } 486 487 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) 488 { 489 uint32_t cntr; 490 uint16_t csum; 491 uint32_t csl; 492 493 trace_net_rx_pkt_l3_csum_validate_entry(); 494 495 if (!pkt->isip4) { 496 trace_net_rx_pkt_l3_csum_validate_not_ip4(); 497 return false; 498 } 499 500 csl = pkt->l4hdr_off - pkt->l3hdr_off; 501 502 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, 503 pkt->l3hdr_off, 504 csl, 0); 505 506 csum = net_checksum_finish(cntr); 507 508 *csum_valid = (csum == 0); 509 510 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, 511 cntr, csum, *csum_valid); 512 513 return true; 514 } 515 516 static uint16_t 517 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) 518 { 519 uint32_t cntr; 520 uint16_t csum; 521 uint16_t csl; 522 uint32_t cso; 523 524 trace_net_rx_pkt_l4_csum_calc_entry(); 525 526 if (pkt->isip4) { 527 if (pkt->isudp) { 528 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 529 trace_net_rx_pkt_l4_csum_calc_ip4_udp(); 530 } else { 531 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - 532 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); 533 trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); 534 } 535 536 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, 537 csl, &cso); 538 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 539 } else { 540 if (pkt->isudp) { 541 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 542 trace_net_rx_pkt_l4_csum_calc_ip6_udp(); 543 } else { 544 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; 545 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; 546 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 547 548 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - 549 ip6opts_len; 550 trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); 551 } 552 553 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, 554 pkt->ip6hdr_info.l4proto, &cso); 555 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 556 } 557 558 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, 559 pkt->l4hdr_off, csl, cso); 560 561 csum = net_checksum_finish_nozero(cntr); 562 563 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); 564 565 return csum; 566 } 567 568 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) 569 { 570 uint16_t csum; 571 572 trace_net_rx_pkt_l4_csum_validate_entry(); 573 574 if (!pkt->istcp && !pkt->isudp) { 575 trace_net_rx_pkt_l4_csum_validate_not_xxp(); 576 return false; 577 } 578 579 if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { 580 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); 581 return false; 582 } 583 584 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 585 trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); 586 return false; 587 } 588 589 csum = _net_rx_pkt_calc_l4_csum(pkt); 590 591 *csum_valid = ((csum == 0) || (csum == 0xFFFF)); 592 593 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); 594 595 return true; 596 } 597 598 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) 599 { 600 uint16_t csum = 0; 601 uint32_t l4_cso; 602 603 trace_net_rx_pkt_l4_csum_fix_entry(); 604 605 if (pkt->istcp) { 606 l4_cso = offsetof(struct tcp_header, th_sum); 607 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); 608 } else if (pkt->isudp) { 609 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { 610 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); 611 return false; 612 } 613 l4_cso = offsetof(struct udp_header, uh_sum); 614 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); 615 } else { 616 trace_net_rx_pkt_l4_csum_fix_not_xxp(); 617 return false; 618 } 619 620 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 621 trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); 622 return false; 623 } 624 625 /* Set zero to checksum word */ 626 iov_from_buf(pkt->vec, pkt->vec_len, 627 pkt->l4hdr_off + l4_cso, 628 &csum, sizeof(csum)); 629 630 /* Calculate L4 checksum */ 631 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); 632 633 /* Set calculated checksum to checksum word */ 634 iov_from_buf(pkt->vec, pkt->vec_len, 635 pkt->l4hdr_off + l4_cso, 636 &csum, sizeof(csum)); 637 638 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); 639 640 return true; 641 } 642