1 /* 2 * QEMU RX packets abstractions 3 * 4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) 5 * 6 * Developed by Daynix Computing LTD (http://www.daynix.com) 7 * 8 * Authors: 9 * Dmitry Fleytman <dmitry@daynix.com> 10 * Tamir Shomer <tamirs@daynix.com> 11 * Yan Vugenfirer <yan@daynix.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2 or later. 14 * See the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "qemu/osdep.h" 19 #include "trace.h" 20 #include "net_rx_pkt.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 24 struct NetRxPkt { 25 struct virtio_net_hdr virt_hdr; 26 uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)]; 27 struct iovec *vec; 28 uint16_t vec_len_total; 29 uint16_t vec_len; 30 uint32_t tot_len; 31 uint16_t tci; 32 size_t ehdr_buf_len; 33 bool has_virt_hdr; 34 eth_pkt_types_e packet_type; 35 36 /* Analysis results */ 37 bool isip4; 38 bool isip6; 39 bool isudp; 40 bool istcp; 41 42 size_t l3hdr_off; 43 size_t l4hdr_off; 44 size_t l5hdr_off; 45 46 eth_ip6_hdr_info ip6hdr_info; 47 eth_ip4_hdr_info ip4hdr_info; 48 eth_l4_hdr_info l4hdr_info; 49 }; 50 51 void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) 52 { 53 struct NetRxPkt *p = g_malloc0(sizeof *p); 54 p->has_virt_hdr = has_virt_hdr; 55 p->vec = NULL; 56 p->vec_len_total = 0; 57 *pkt = p; 58 } 59 60 void net_rx_pkt_uninit(struct NetRxPkt *pkt) 61 { 62 if (pkt->vec_len_total != 0) { 63 g_free(pkt->vec); 64 } 65 66 g_free(pkt); 67 } 68 69 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) 70 { 71 assert(pkt); 72 return &pkt->virt_hdr; 73 } 74 75 static inline void 76 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, 77 int new_iov_len) 78 { 79 if (pkt->vec_len_total < new_iov_len) { 80 g_free(pkt->vec); 81 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); 82 pkt->vec_len_total = new_iov_len; 83 } 84 } 85 86 static void 87 net_rx_pkt_pull_data(struct NetRxPkt *pkt, 88 const struct iovec *iov, int iovcnt, 89 size_t ploff) 90 { 91 if (pkt->ehdr_buf_len) { 92 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); 93 94 pkt->vec[0].iov_base = pkt->ehdr_buf; 95 pkt->vec[0].iov_len = pkt->ehdr_buf_len; 96 97 pkt->tot_len = iov_size(iov, iovcnt) - ploff + pkt->ehdr_buf_len; 98 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, 99 iov, iovcnt, ploff, pkt->tot_len); 100 } else { 101 net_rx_pkt_iovec_realloc(pkt, iovcnt); 102 103 pkt->tot_len = iov_size(iov, iovcnt) - ploff; 104 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, 105 iov, iovcnt, ploff, pkt->tot_len); 106 } 107 108 eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, 109 &pkt->isudp, &pkt->istcp, 110 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 111 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 112 113 trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, 114 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); 115 } 116 117 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, 118 const struct iovec *iov, int iovcnt, 119 size_t iovoff, bool strip_vlan) 120 { 121 uint16_t tci = 0; 122 uint16_t ploff = iovoff; 123 assert(pkt); 124 125 if (strip_vlan) { 126 pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, 127 &ploff, &tci); 128 } else { 129 pkt->ehdr_buf_len = 0; 130 } 131 132 pkt->tci = tci; 133 134 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 135 } 136 137 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, 138 const struct iovec *iov, int iovcnt, 139 size_t iovoff, bool strip_vlan, 140 uint16_t vet) 141 { 142 uint16_t tci = 0; 143 uint16_t ploff = iovoff; 144 assert(pkt); 145 146 if (strip_vlan) { 147 pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, 148 pkt->ehdr_buf, 149 &ploff, &tci); 150 } else { 151 pkt->ehdr_buf_len = 0; 152 } 153 154 pkt->tci = tci; 155 156 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); 157 } 158 159 void net_rx_pkt_dump(struct NetRxPkt *pkt) 160 { 161 #ifdef NET_RX_PKT_DEBUG 162 assert(pkt); 163 164 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n", 165 pkt->tot_len, pkt->ehdr_buf_len, pkt->tci); 166 #endif 167 } 168 169 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, 170 eth_pkt_types_e packet_type) 171 { 172 assert(pkt); 173 174 pkt->packet_type = packet_type; 175 176 } 177 178 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) 179 { 180 assert(pkt); 181 182 return pkt->packet_type; 183 } 184 185 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) 186 { 187 assert(pkt); 188 189 return pkt->tot_len; 190 } 191 192 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, 193 size_t len) 194 { 195 const struct iovec iov = { 196 .iov_base = (void *)data, 197 .iov_len = len 198 }; 199 200 assert(pkt); 201 202 eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, 203 &pkt->isudp, &pkt->istcp, 204 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, 205 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); 206 } 207 208 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, 209 bool *isip4, bool *isip6, 210 bool *isudp, bool *istcp) 211 { 212 assert(pkt); 213 214 *isip4 = pkt->isip4; 215 *isip6 = pkt->isip6; 216 *isudp = pkt->isudp; 217 *istcp = pkt->istcp; 218 } 219 220 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) 221 { 222 assert(pkt); 223 return pkt->l3hdr_off; 224 } 225 226 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) 227 { 228 assert(pkt); 229 return pkt->l4hdr_off; 230 } 231 232 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) 233 { 234 assert(pkt); 235 return pkt->l5hdr_off; 236 } 237 238 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) 239 { 240 return &pkt->ip6hdr_info; 241 } 242 243 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) 244 { 245 return &pkt->ip4hdr_info; 246 } 247 248 eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) 249 { 250 return &pkt->l4hdr_info; 251 } 252 253 static inline void 254 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, 255 void *ptr, size_t size) 256 { 257 memcpy(&rss_input[*bytes_written], ptr, size); 258 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); 259 *bytes_written += size; 260 } 261 262 static inline void 263 _net_rx_rss_prepare_ip4(uint8_t *rss_input, 264 struct NetRxPkt *pkt, 265 size_t *bytes_written) 266 { 267 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; 268 269 _net_rx_rss_add_chunk(rss_input, bytes_written, 270 &ip4_hdr->ip_src, sizeof(uint32_t)); 271 272 _net_rx_rss_add_chunk(rss_input, bytes_written, 273 &ip4_hdr->ip_dst, sizeof(uint32_t)); 274 } 275 276 static inline void 277 _net_rx_rss_prepare_ip6(uint8_t *rss_input, 278 struct NetRxPkt *pkt, 279 bool ipv6ex, size_t *bytes_written) 280 { 281 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; 282 283 _net_rx_rss_add_chunk(rss_input, bytes_written, 284 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src 285 : &ip6info->ip6_hdr.ip6_src, 286 sizeof(struct in6_address)); 287 288 _net_rx_rss_add_chunk(rss_input, bytes_written, 289 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst 290 : &ip6info->ip6_hdr.ip6_dst, 291 sizeof(struct in6_address)); 292 } 293 294 static inline void 295 _net_rx_rss_prepare_tcp(uint8_t *rss_input, 296 struct NetRxPkt *pkt, 297 size_t *bytes_written) 298 { 299 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; 300 301 _net_rx_rss_add_chunk(rss_input, bytes_written, 302 &tcphdr->th_sport, sizeof(uint16_t)); 303 304 _net_rx_rss_add_chunk(rss_input, bytes_written, 305 &tcphdr->th_dport, sizeof(uint16_t)); 306 } 307 308 uint32_t 309 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, 310 NetRxPktRssType type, 311 uint8_t *key) 312 { 313 uint8_t rss_input[36]; 314 size_t rss_length = 0; 315 uint32_t rss_hash = 0; 316 net_toeplitz_key key_data; 317 318 switch (type) { 319 case NetPktRssIpV4: 320 assert(pkt->isip4); 321 trace_net_rx_pkt_rss_ip4(); 322 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 323 break; 324 case NetPktRssIpV4Tcp: 325 assert(pkt->isip4); 326 assert(pkt->istcp); 327 trace_net_rx_pkt_rss_ip4_tcp(); 328 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); 329 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 330 break; 331 case NetPktRssIpV6Tcp: 332 assert(pkt->isip6); 333 assert(pkt->istcp); 334 trace_net_rx_pkt_rss_ip6_tcp(); 335 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 336 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); 337 break; 338 case NetPktRssIpV6: 339 assert(pkt->isip6); 340 trace_net_rx_pkt_rss_ip6(); 341 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); 342 break; 343 case NetPktRssIpV6Ex: 344 assert(pkt->isip6); 345 trace_net_rx_pkt_rss_ip6_ex(); 346 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); 347 break; 348 default: 349 assert(false); 350 break; 351 } 352 353 net_toeplitz_key_init(&key_data, key); 354 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); 355 356 trace_net_rx_pkt_rss_hash(rss_length, rss_hash); 357 358 return rss_hash; 359 } 360 361 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) 362 { 363 assert(pkt); 364 365 if (pkt->isip4) { 366 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); 367 } 368 369 return 0; 370 } 371 372 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) 373 { 374 assert(pkt); 375 376 if (pkt->istcp) { 377 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; 378 } 379 380 return false; 381 } 382 383 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) 384 { 385 assert(pkt); 386 387 if (pkt->istcp) { 388 return pkt->l4hdr_info.has_tcp_data; 389 } 390 391 return false; 392 } 393 394 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) 395 { 396 assert(pkt); 397 398 return pkt->vec; 399 } 400 401 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) 402 { 403 assert(pkt); 404 405 return pkt->vec_len; 406 } 407 408 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, 409 struct virtio_net_hdr *vhdr) 410 { 411 assert(pkt); 412 413 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); 414 } 415 416 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, 417 const struct iovec *iov, int iovcnt) 418 { 419 assert(pkt); 420 421 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); 422 } 423 424 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) 425 { 426 assert(pkt); 427 428 return pkt->ehdr_buf_len ? true : false; 429 } 430 431 bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt) 432 { 433 assert(pkt); 434 435 return pkt->has_virt_hdr; 436 } 437 438 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) 439 { 440 assert(pkt); 441 442 return pkt->tci; 443 } 444 445 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) 446 { 447 uint32_t cntr; 448 uint16_t csum; 449 uint32_t csl; 450 451 trace_net_rx_pkt_l3_csum_validate_entry(); 452 453 if (!pkt->isip4) { 454 trace_net_rx_pkt_l3_csum_validate_not_ip4(); 455 return false; 456 } 457 458 csl = pkt->l4hdr_off - pkt->l3hdr_off; 459 460 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, 461 pkt->l3hdr_off, 462 csl, 0); 463 464 csum = net_checksum_finish(cntr); 465 466 *csum_valid = (csum == 0); 467 468 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, 469 cntr, csum, *csum_valid); 470 471 return true; 472 } 473 474 static uint16_t 475 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) 476 { 477 uint32_t cntr; 478 uint16_t csum; 479 uint16_t csl; 480 uint32_t cso; 481 482 trace_net_rx_pkt_l4_csum_calc_entry(); 483 484 if (pkt->isip4) { 485 if (pkt->isudp) { 486 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 487 trace_net_rx_pkt_l4_csum_calc_ip4_udp(); 488 } else { 489 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - 490 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); 491 trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); 492 } 493 494 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, 495 csl, &cso); 496 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 497 } else { 498 if (pkt->isudp) { 499 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); 500 trace_net_rx_pkt_l4_csum_calc_ip6_udp(); 501 } else { 502 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; 503 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; 504 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); 505 506 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - 507 ip6opts_len; 508 trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); 509 } 510 511 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, 512 pkt->ip6hdr_info.l4proto, &cso); 513 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); 514 } 515 516 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, 517 pkt->l4hdr_off, csl, cso); 518 519 csum = net_checksum_finish(cntr); 520 521 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); 522 523 return csum; 524 } 525 526 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) 527 { 528 uint16_t csum; 529 530 trace_net_rx_pkt_l4_csum_validate_entry(); 531 532 if (!pkt->istcp && !pkt->isudp) { 533 trace_net_rx_pkt_l4_csum_validate_not_xxp(); 534 return false; 535 } 536 537 if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { 538 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); 539 return false; 540 } 541 542 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 543 trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); 544 return false; 545 } 546 547 csum = _net_rx_pkt_calc_l4_csum(pkt); 548 549 *csum_valid = ((csum == 0) || (csum == 0xFFFF)); 550 551 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); 552 553 return true; 554 } 555 556 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) 557 { 558 uint16_t csum = 0; 559 uint32_t l4_cso; 560 561 trace_net_rx_pkt_l4_csum_fix_entry(); 562 563 if (pkt->istcp) { 564 l4_cso = offsetof(struct tcp_header, th_sum); 565 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); 566 } else if (pkt->isudp) { 567 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { 568 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); 569 return false; 570 } 571 l4_cso = offsetof(struct udp_header, uh_sum); 572 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); 573 } else { 574 trace_net_rx_pkt_l4_csum_fix_not_xxp(); 575 return false; 576 } 577 578 if (pkt->isip4 && pkt->ip4hdr_info.fragment) { 579 trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); 580 return false; 581 } 582 583 /* Set zero to checksum word */ 584 iov_from_buf(pkt->vec, pkt->vec_len, 585 pkt->l4hdr_off + l4_cso, 586 &csum, sizeof(csum)); 587 588 /* Calculate L4 checksum */ 589 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); 590 591 /* Set calculated checksum to checksum word */ 592 iov_from_buf(pkt->vec, pkt->vec_len, 593 pkt->l4hdr_off + l4_cso, 594 &csum, sizeof(csum)); 595 596 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); 597 598 return true; 599 } 600