1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * This testsuite provides conformance testing for GRO coalescing.
4 *
5 * Test cases:
6 *
7 * data_*:
8 * Data packets of the same size and same header setup with correct
9 * sequence numbers coalesce. The one exception being the last data
10 * packet coalesced: it can be smaller than the rest and coalesced
11 * as long as it is in the same flow.
12 * - data_same: same size packets coalesce
13 * - data_lrg_sml: large then small coalesces
14 * - data_sml_lrg: small then large doesn't coalesce
15 *
16 * ack:
17 * Pure ACK does not coalesce.
18 *
19 * flags_*:
20 * No packets with PSH, SYN, URG, RST, CWR set will be coalesced.
21 * - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr
22 *
23 * tcp_*:
24 * Packets with incorrect checksum, non-consecutive seqno and
25 * different TCP header options shouldn't coalesce. Nit: given that
26 * some extension headers have paddings, such as timestamp, headers
27 * that are padded differently would not be coalesced.
28 * - tcp_csum: incorrect checksum
29 * - tcp_seq: non-consecutive sequence numbers
30 * - tcp_ts: different timestamps
31 * - tcp_opt: different TCP options
32 *
33 * ip_*:
34 * Packets with different (ECN, TTL, TOS) header, IP options or
35 * IP fragments shouldn't coalesce.
36 * - ip_ecn, ip_tos: shared between IPv4/IPv6
37 * - ip_ttl, ip_opt, ip_frag4: IPv4 only
38 * - ip_id_df*: IPv4 IP ID field coalescing tests
39 * - ip_frag6, ip_v6ext_*: IPv6 only
40 *
41 * large_*:
42 * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
43 * - large_max: exceeding max size
44 * - large_rem: remainder handling
45 *
46 * MSS is defined as 4096 - header because if it is too small
47 * (i.e. 1500 MTU - header), it will result in many packets,
48 * increasing the "large" test case's flakiness. This is because
49 * due to time sensitivity in the coalescing window, the receiver
50 * may not coalesce all of the packets.
51 *
52 * Note the timing issue applies to all of the test cases, so some
53 * flakiness is to be expected.
54 *
55 */
56
57 #define _GNU_SOURCE
58
59 #include <arpa/inet.h>
60 #include <errno.h>
61 #include <error.h>
62 #include <getopt.h>
63 #include <linux/filter.h>
64 #include <linux/if_packet.h>
65 #include <linux/ipv6.h>
66 #include <net/ethernet.h>
67 #include <net/if.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet/tcp.h>
72 #include <stdbool.h>
73 #include <stddef.h>
74 #include <stdio.h>
75 #include <stdarg.h>
76 #include <string.h>
77 #include <unistd.h>
78
79 #include "kselftest.h"
80 #include "../../net/lib/ksft.h"
81
82 #define DPORT 8000
83 #define SPORT 1500
84 #define PAYLOAD_LEN 100
85 #define NUM_PACKETS 4
86 #define START_SEQ 100
87 #define START_ACK 100
88 #define ETH_P_NONE 0
89 #define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
90 #define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
91 #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
92 #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
93 #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
94 #define MIN_EXTHDR_SIZE 8
95 #define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
96 #define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
97
98 #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
99 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
100
101 enum flush_id_case {
102 FLUSH_ID_DF1_INC,
103 FLUSH_ID_DF1_FIXED,
104 FLUSH_ID_DF0_INC,
105 FLUSH_ID_DF0_FIXED,
106 FLUSH_ID_DF1_INC_FIXED,
107 FLUSH_ID_DF1_FIXED_INC,
108 };
109
110 static const char *addr6_src = "fdaa::2";
111 static const char *addr6_dst = "fdaa::1";
112 static const char *addr4_src = "192.168.1.200";
113 static const char *addr4_dst = "192.168.1.100";
114 static int proto = -1;
115 static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
116 static char *testname = "data";
117 static char *ifname = "eth0";
118 static char *smac = "aa:00:00:00:00:02";
119 static char *dmac = "aa:00:00:00:00:01";
120 static bool verbose;
121 static bool tx_socket = true;
122 static int tcp_offset = -1;
123 static int total_hdr_len = -1;
124 static int ethhdr_proto = -1;
125 static bool ipip;
126
vlog(const char * fmt,...)127 static void vlog(const char *fmt, ...)
128 {
129 va_list args;
130
131 if (verbose) {
132 va_start(args, fmt);
133 vfprintf(stderr, fmt, args);
134 va_end(args);
135 }
136 }
137
setup_sock_filter(int fd)138 static void setup_sock_filter(int fd)
139 {
140 const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
141 const int ethproto_off = offsetof(struct ethhdr, h_proto);
142 int optlen = 0;
143 int ipproto_off, opt_ipproto_off;
144 int next_off;
145
146 if (ipip)
147 next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol);
148 else if (proto == PF_INET)
149 next_off = offsetof(struct iphdr, protocol);
150 else
151 next_off = offsetof(struct ipv6hdr, nexthdr);
152 ipproto_off = ETH_HLEN + next_off;
153
154 /* Overridden later if exthdrs are used: */
155 opt_ipproto_off = ipproto_off;
156
157 if (strcmp(testname, "ip_opt") == 0) {
158 optlen = sizeof(struct ip_timestamp);
159 } else if (strcmp(testname, "ip_frag6") == 0 ||
160 strcmp(testname, "ip_v6ext_same") == 0 ||
161 strcmp(testname, "ip_v6ext_diff") == 0) {
162 BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
163 BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
164 BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
165
166 /* same size for HBH and Fragment extension header types */
167 optlen = MIN_EXTHDR_SIZE;
168 opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
169 + offsetof(struct ip6_ext, ip6e_nxt);
170 }
171
172 /* this filter validates the following:
173 * - packet is IPv4/IPv6 according to the running test.
174 * - packet is TCP. Also handles the case of one extension header and then TCP.
175 * - checks the packet tcp dport equals to DPORT. Also handles the case of one
176 * extension header and then TCP.
177 */
178 struct sock_filter filter[] = {
179 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
180 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
181 BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
182 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
183 BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
184 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
185 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
186 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
187 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
188 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
189 BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
190 BPF_STMT(BPF_RET + BPF_K, 0),
191 };
192
193 struct sock_fprog bpf = {
194 .len = ARRAY_SIZE(filter),
195 .filter = filter,
196 };
197
198 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
199 error(1, errno, "error setting filter");
200 }
201
checksum_nofold(void * data,size_t len,uint32_t sum)202 static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
203 {
204 uint16_t *words = data;
205 int i;
206
207 for (i = 0; i < len / 2; i++)
208 sum += words[i];
209 if (len & 1)
210 sum += ((char *)data)[len - 1];
211 return sum;
212 }
213
checksum_fold(void * data,size_t len,uint32_t sum)214 static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
215 {
216 sum = checksum_nofold(data, len, sum);
217 while (sum > 0xFFFF)
218 sum = (sum & 0xFFFF) + (sum >> 16);
219 return ~sum;
220 }
221
tcp_checksum(void * buf,int payload_len)222 static uint16_t tcp_checksum(void *buf, int payload_len)
223 {
224 struct pseudo_header6 {
225 struct in6_addr saddr;
226 struct in6_addr daddr;
227 uint16_t protocol;
228 uint16_t payload_len;
229 } ph6;
230 struct pseudo_header4 {
231 struct in_addr saddr;
232 struct in_addr daddr;
233 uint16_t protocol;
234 uint16_t payload_len;
235 } ph4;
236 uint32_t sum = 0;
237
238 if (proto == PF_INET6) {
239 if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
240 error(1, errno, "inet_pton6 source ip pseudo");
241 if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
242 error(1, errno, "inet_pton6 dest ip pseudo");
243 ph6.protocol = htons(IPPROTO_TCP);
244 ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
245
246 sum = checksum_nofold(&ph6, sizeof(ph6), 0);
247 } else if (proto == PF_INET) {
248 if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
249 error(1, errno, "inet_pton source ip pseudo");
250 if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
251 error(1, errno, "inet_pton dest ip pseudo");
252 ph4.protocol = htons(IPPROTO_TCP);
253 ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
254
255 sum = checksum_nofold(&ph4, sizeof(ph4), 0);
256 }
257
258 return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
259 }
260
read_MAC(uint8_t * mac_addr,char * mac)261 static void read_MAC(uint8_t *mac_addr, char *mac)
262 {
263 if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
264 &mac_addr[0], &mac_addr[1], &mac_addr[2],
265 &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
266 error(1, 0, "sscanf");
267 }
268
fill_datalinklayer(void * buf)269 static void fill_datalinklayer(void *buf)
270 {
271 struct ethhdr *eth = buf;
272
273 memcpy(eth->h_dest, dst_mac, ETH_ALEN);
274 memcpy(eth->h_source, src_mac, ETH_ALEN);
275 eth->h_proto = ethhdr_proto;
276 }
277
fill_networklayer(void * buf,int payload_len,int protocol)278 static void fill_networklayer(void *buf, int payload_len, int protocol)
279 {
280 struct ipv6hdr *ip6h = buf;
281 struct iphdr *iph = buf;
282
283 if (proto == PF_INET6) {
284 memset(ip6h, 0, sizeof(*ip6h));
285
286 ip6h->version = 6;
287 ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
288 ip6h->nexthdr = protocol;
289 ip6h->hop_limit = 8;
290 if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
291 error(1, errno, "inet_pton source ip6");
292 if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
293 error(1, errno, "inet_pton dest ip6");
294 } else if (proto == PF_INET) {
295 memset(iph, 0, sizeof(*iph));
296
297 iph->version = 4;
298 iph->ihl = 5;
299 iph->ttl = 8;
300 iph->protocol = protocol;
301 iph->tot_len = htons(sizeof(struct tcphdr) +
302 payload_len + sizeof(struct iphdr));
303 iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
304 if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
305 error(1, errno, "inet_pton source ip");
306 if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
307 error(1, errno, "inet_pton dest ip");
308 iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
309 }
310 }
311
fill_transportlayer(void * buf,int seq_offset,int ack_offset,int payload_len,int fin)312 static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
313 int payload_len, int fin)
314 {
315 struct tcphdr *tcph = buf;
316
317 memset(tcph, 0, sizeof(*tcph));
318
319 tcph->source = htons(SPORT);
320 tcph->dest = htons(DPORT);
321 tcph->seq = ntohl(START_SEQ + seq_offset);
322 tcph->ack_seq = ntohl(START_ACK + ack_offset);
323 tcph->ack = 1;
324 tcph->fin = fin;
325 tcph->doff = 5;
326 tcph->window = htons(TCP_MAXWIN);
327 tcph->urg_ptr = 0;
328 tcph->check = tcp_checksum(tcph, payload_len);
329 }
330
write_packet(int fd,char * buf,int len,struct sockaddr_ll * daddr)331 static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
332 {
333 int ret = -1;
334
335 ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
336 if (ret == -1)
337 error(1, errno, "sendto failure");
338 if (ret != len)
339 error(1, errno, "sendto wrong length");
340 }
341
create_packet(void * buf,int seq_offset,int ack_offset,int payload_len,int fin)342 static void create_packet(void *buf, int seq_offset, int ack_offset,
343 int payload_len, int fin)
344 {
345 memset(buf, 0, total_hdr_len);
346 memset(buf + total_hdr_len, 'a', payload_len);
347
348 fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
349 payload_len, fin);
350
351 if (ipip) {
352 fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr),
353 IPPROTO_IPIP);
354 fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr),
355 payload_len, IPPROTO_TCP);
356 } else {
357 fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP);
358 }
359
360 fill_datalinklayer(buf);
361 }
362
363 #ifndef TH_CWR
364 #define TH_CWR 0x80
365 #endif
set_flags(struct tcphdr * tcph,int payload_len,int psh,int syn,int rst,int urg,int cwr)366 static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn,
367 int rst, int urg, int cwr)
368 {
369 tcph->psh = psh;
370 tcph->syn = syn;
371 tcph->rst = rst;
372 tcph->urg = urg;
373 if (cwr)
374 tcph->th_flags |= TH_CWR;
375 else
376 tcph->th_flags &= ~TH_CWR;
377 tcph->check = 0;
378 tcph->check = tcp_checksum(tcph, payload_len);
379 }
380
381 /* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1)
382 * pkts, not first and not last pkt
383 */
send_flags(int fd,struct sockaddr_ll * daddr,int psh,int syn,int rst,int urg,int cwr)384 static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
385 int rst, int urg, int cwr)
386 {
387 static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN];
388 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
389 int payload_len, pkt_size, i;
390 struct tcphdr *tcph;
391 int flag[2];
392
393 payload_len = PAYLOAD_LEN * (psh || cwr);
394 pkt_size = total_hdr_len + payload_len;
395 flag[0] = NUM_PACKETS / 2;
396 flag[1] = NUM_PACKETS / 2 - 1;
397
398 /* Create and configure packets with flags
399 */
400 for (i = 0; i < 2; i++) {
401 if (flag[i] > 0) {
402 create_packet(flag_buf[i], flag[i] * payload_len, 0,
403 payload_len, 0);
404 tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset);
405 set_flags(tcph, payload_len, psh, syn, rst, urg, cwr);
406 }
407 }
408
409 for (i = 0; i < NUM_PACKETS + 1; i++) {
410 if (i == flag[0]) {
411 write_packet(fd, flag_buf[0], pkt_size, daddr);
412 continue;
413 } else if (i == flag[1] && cwr) {
414 write_packet(fd, flag_buf[1], pkt_size, daddr);
415 continue;
416 }
417 create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
418 write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
419 }
420 }
421
422 /* Test for data of same length, smaller than previous
423 * and of different lengths
424 */
send_data_pkts(int fd,struct sockaddr_ll * daddr,int payload_len1,int payload_len2)425 static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
426 int payload_len1, int payload_len2)
427 {
428 static char buf[ETH_HLEN + IP_MAXPACKET];
429
430 create_packet(buf, 0, 0, payload_len1, 0);
431 write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
432 create_packet(buf, payload_len1, 0, payload_len2, 0);
433 write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
434 }
435
436 /* If incoming segments make tracked segment length exceed
437 * legal IP datagram length, do not coalesce
438 */
send_large(int fd,struct sockaddr_ll * daddr,int remainder)439 static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
440 {
441 static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
442 static char last[TOTAL_HDR_LEN + MSS];
443 static char new_seg[TOTAL_HDR_LEN + MSS];
444 int i;
445
446 for (i = 0; i < NUM_LARGE_PKT; i++)
447 create_packet(pkts[i], i * MSS, 0, MSS, 0);
448 create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
449 create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
450
451 for (i = 0; i < NUM_LARGE_PKT; i++)
452 write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
453 write_packet(fd, last, total_hdr_len + remainder, daddr);
454 write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
455 }
456
457 /* Pure acks and dup acks don't coalesce */
send_ack(int fd,struct sockaddr_ll * daddr)458 static void send_ack(int fd, struct sockaddr_ll *daddr)
459 {
460 static char buf[MAX_HDR_LEN];
461
462 create_packet(buf, 0, 0, 0, 0);
463 write_packet(fd, buf, total_hdr_len, daddr);
464 write_packet(fd, buf, total_hdr_len, daddr);
465 create_packet(buf, 0, 1, 0, 0);
466 write_packet(fd, buf, total_hdr_len, daddr);
467 }
468
recompute_packet(char * buf,char * no_ext,int extlen)469 static void recompute_packet(char *buf, char *no_ext, int extlen)
470 {
471 struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
472 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
473 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
474
475 memmove(buf, no_ext, total_hdr_len);
476 memmove(buf + total_hdr_len + extlen,
477 no_ext + total_hdr_len, PAYLOAD_LEN);
478
479 tcphdr->doff = tcphdr->doff + (extlen / 4);
480 tcphdr->check = 0;
481 tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
482 if (proto == PF_INET) {
483 iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
484 iph->check = 0;
485 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
486
487 if (ipip) {
488 iph += 1;
489 iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
490 iph->check = 0;
491 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
492 }
493 } else {
494 ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
495 }
496 }
497
tcp_write_options(char * buf,int kind,int ts)498 static void tcp_write_options(char *buf, int kind, int ts)
499 {
500 struct tcp_option_ts {
501 uint8_t kind;
502 uint8_t len;
503 uint32_t tsval;
504 uint32_t tsecr;
505 } *opt_ts = (void *)buf;
506 struct tcp_option_window {
507 uint8_t kind;
508 uint8_t len;
509 uint8_t shift;
510 } *opt_window = (void *)buf;
511
512 switch (kind) {
513 case TCPOPT_NOP:
514 buf[0] = TCPOPT_NOP;
515 break;
516 case TCPOPT_WINDOW:
517 memset(opt_window, 0, sizeof(struct tcp_option_window));
518 opt_window->kind = TCPOPT_WINDOW;
519 opt_window->len = TCPOLEN_WINDOW;
520 opt_window->shift = 0;
521 break;
522 case TCPOPT_TIMESTAMP:
523 memset(opt_ts, 0, sizeof(struct tcp_option_ts));
524 opt_ts->kind = TCPOPT_TIMESTAMP;
525 opt_ts->len = TCPOLEN_TIMESTAMP;
526 opt_ts->tsval = ts;
527 opt_ts->tsecr = 0;
528 break;
529 default:
530 error(1, 0, "unimplemented TCP option");
531 break;
532 }
533 }
534
535 /* TCP with options is always a permutation of {TS, NOP, NOP}.
536 * Implement different orders to verify coalescing stops.
537 */
add_standard_tcp_options(char * buf,char * no_ext,int ts,int order)538 static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
539 {
540 switch (order) {
541 case 0:
542 tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
543 tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
544 tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
545 TCPOPT_TIMESTAMP, ts);
546 break;
547 case 1:
548 tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
549 tcp_write_options(buf + total_hdr_len + 1,
550 TCPOPT_TIMESTAMP, ts);
551 tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
552 TCPOPT_NOP, 0);
553 break;
554 case 2:
555 tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
556 tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
557 TCPOPT_NOP, 0);
558 tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
559 TCPOPT_NOP, 0);
560 break;
561 default:
562 error(1, 0, "unknown order");
563 break;
564 }
565 recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
566 }
567
568 /* Packets with invalid checksum don't coalesce. */
send_changed_checksum(int fd,struct sockaddr_ll * daddr)569 static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
570 {
571 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
572 struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
573 int pkt_size = total_hdr_len + PAYLOAD_LEN;
574
575 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
576 write_packet(fd, buf, pkt_size, daddr);
577
578 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
579 tcph->check = tcph->check - 1;
580 write_packet(fd, buf, pkt_size, daddr);
581 }
582
583 /* Packets with non-consecutive sequence number don't coalesce.*/
send_changed_seq(int fd,struct sockaddr_ll * daddr)584 static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
585 {
586 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
587 struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
588 int pkt_size = total_hdr_len + PAYLOAD_LEN;
589
590 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
591 write_packet(fd, buf, pkt_size, daddr);
592
593 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
594 tcph->seq = ntohl(htonl(tcph->seq) + 1);
595 tcph->check = 0;
596 tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
597 write_packet(fd, buf, pkt_size, daddr);
598 }
599
600 /* Packet with different timestamp option or different timestamps
601 * don't coalesce.
602 */
send_changed_ts(int fd,struct sockaddr_ll * daddr)603 static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
604 {
605 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
606 static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
607 int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
608
609 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
610 add_standard_tcp_options(extpkt, buf, 0, 0);
611 write_packet(fd, extpkt, pkt_size, daddr);
612
613 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
614 add_standard_tcp_options(extpkt, buf, 0, 0);
615 write_packet(fd, extpkt, pkt_size, daddr);
616
617 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
618 add_standard_tcp_options(extpkt, buf, 100, 0);
619 write_packet(fd, extpkt, pkt_size, daddr);
620
621 create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
622 add_standard_tcp_options(extpkt, buf, 100, 1);
623 write_packet(fd, extpkt, pkt_size, daddr);
624
625 create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
626 add_standard_tcp_options(extpkt, buf, 100, 2);
627 write_packet(fd, extpkt, pkt_size, daddr);
628 }
629
630 /* Packet with different tcp options don't coalesce. */
send_diff_opt(int fd,struct sockaddr_ll * daddr)631 static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
632 {
633 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
634 static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
635 static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
636 int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
637 int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
638
639 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
640 add_standard_tcp_options(extpkt1, buf, 0, 0);
641 write_packet(fd, extpkt1, extpkt1_size, daddr);
642
643 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
644 add_standard_tcp_options(extpkt1, buf, 0, 0);
645 write_packet(fd, extpkt1, extpkt1_size, daddr);
646
647 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
648 tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
649 tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
650 recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
651 write_packet(fd, extpkt2, extpkt2_size, daddr);
652 }
653
add_ipv4_ts_option(void * buf,void * optpkt)654 static void add_ipv4_ts_option(void *buf, void *optpkt)
655 {
656 struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
657 int optlen = sizeof(struct ip_timestamp);
658 struct iphdr *iph;
659
660 if (optlen % 4)
661 error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
662
663 ts->ipt_code = IPOPT_TS;
664 ts->ipt_len = optlen;
665 ts->ipt_ptr = 5;
666 ts->ipt_flg = IPOPT_TS_TSONLY;
667
668 memcpy(optpkt, buf, tcp_offset);
669 memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
670 sizeof(struct tcphdr) + PAYLOAD_LEN);
671
672 iph = (struct iphdr *)(optpkt + ETH_HLEN);
673 iph->ihl = 5 + (optlen / 4);
674 iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
675 iph->check = 0;
676 iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
677 }
678
add_ipv6_exthdr(void * buf,void * optpkt,__u8 exthdr_type,char * ext_payload)679 static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
680 {
681 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
682 struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
683 char *exthdr_payload_start = (char *)(exthdr + 1);
684
685 exthdr->hdrlen = 0;
686 exthdr->nexthdr = IPPROTO_TCP;
687
688 memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
689
690 memcpy(optpkt, buf, tcp_offset);
691 memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
692 sizeof(struct tcphdr) + PAYLOAD_LEN);
693
694 iph->nexthdr = exthdr_type;
695 iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
696 }
697
fix_ip4_checksum(struct iphdr * iph)698 static void fix_ip4_checksum(struct iphdr *iph)
699 {
700 iph->check = 0;
701 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
702 }
703
send_flush_id_case(int fd,struct sockaddr_ll * daddr,enum flush_id_case tcase)704 static void send_flush_id_case(int fd, struct sockaddr_ll *daddr,
705 enum flush_id_case tcase)
706 {
707 static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
708 static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
709 static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
710 bool send_three = false;
711 struct iphdr *iph1;
712 struct iphdr *iph2;
713 struct iphdr *iph3;
714
715 iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
716 iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
717 iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
718
719 create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
720 create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
721 create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
722
723 switch (tcase) {
724 case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */
725 iph1->frag_off |= htons(IP_DF);
726 iph1->id = htons(8);
727
728 iph2->frag_off |= htons(IP_DF);
729 iph2->id = htons(9);
730 break;
731
732 case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */
733 iph1->frag_off |= htons(IP_DF);
734 iph1->id = htons(8);
735
736 iph2->frag_off |= htons(IP_DF);
737 iph2->id = htons(8);
738 break;
739
740 case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */
741 iph1->frag_off &= ~htons(IP_DF);
742 iph1->id = htons(8);
743
744 iph2->frag_off &= ~htons(IP_DF);
745 iph2->id = htons(9);
746 break;
747
748 case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */
749 iph1->frag_off &= ~htons(IP_DF);
750 iph1->id = htons(8);
751
752 iph2->frag_off &= ~htons(IP_DF);
753 iph2->id = htons(8);
754 break;
755
756 case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and
757 * one fixed - should coalesce only the
758 * first two packets
759 */
760 iph1->frag_off |= htons(IP_DF);
761 iph1->id = htons(8);
762
763 iph2->frag_off |= htons(IP_DF);
764 iph2->id = htons(9);
765
766 iph3->frag_off |= htons(IP_DF);
767 iph3->id = htons(9);
768 send_three = true;
769 break;
770
771 case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one
772 * incrementing - should coalesce only
773 * the first two packets
774 */
775 iph1->frag_off |= htons(IP_DF);
776 iph1->id = htons(8);
777
778 iph2->frag_off |= htons(IP_DF);
779 iph2->id = htons(8);
780
781 iph3->frag_off |= htons(IP_DF);
782 iph3->id = htons(9);
783 send_three = true;
784 break;
785 }
786
787 fix_ip4_checksum(iph1);
788 fix_ip4_checksum(iph2);
789 write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
790 write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
791
792 if (send_three) {
793 fix_ip4_checksum(iph3);
794 write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
795 }
796 }
797
send_ipv6_exthdr(int fd,struct sockaddr_ll * daddr,char * ext_data1,char * ext_data2)798 static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
799 {
800 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
801 static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
802
803 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
804 add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
805 write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
806
807 create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
808 add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
809 write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
810 }
811
812 /* IPv4 options shouldn't coalesce */
send_ip_options(int fd,struct sockaddr_ll * daddr)813 static void send_ip_options(int fd, struct sockaddr_ll *daddr)
814 {
815 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
816 static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
817 int optlen = sizeof(struct ip_timestamp);
818 int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
819
820 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
821 write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
822
823 create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
824 add_ipv4_ts_option(buf, optpkt);
825 write_packet(fd, optpkt, pkt_size, daddr);
826
827 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
828 write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
829 }
830
831 /* IPv4 fragments shouldn't coalesce */
send_fragment4(int fd,struct sockaddr_ll * daddr)832 static void send_fragment4(int fd, struct sockaddr_ll *daddr)
833 {
834 static char buf[IP_MAXPACKET];
835 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
836 int pkt_size = total_hdr_len + PAYLOAD_LEN;
837
838 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
839 write_packet(fd, buf, pkt_size, daddr);
840
841 /* Once fragmented, packet would retain the total_len.
842 * Tcp header is prepared as if rest of data is in follow-up frags,
843 * but follow up frags aren't actually sent.
844 */
845 memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
846 fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
847 fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP);
848 fill_datalinklayer(buf);
849
850 iph->frag_off = htons(0x6000); // DF = 1, MF = 1
851 iph->check = 0;
852 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
853 write_packet(fd, buf, pkt_size, daddr);
854 }
855
856 /* IPv4 packets with different ttl don't coalesce.*/
send_changed_ttl(int fd,struct sockaddr_ll * daddr)857 static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
858 {
859 int pkt_size = total_hdr_len + PAYLOAD_LEN;
860 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
861 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
862
863 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
864 write_packet(fd, buf, pkt_size, daddr);
865
866 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
867 iph->ttl = 7;
868 iph->check = 0;
869 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
870 write_packet(fd, buf, pkt_size, daddr);
871 }
872
873 /* Packets with different tos don't coalesce.*/
send_changed_tos(int fd,struct sockaddr_ll * daddr)874 static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
875 {
876 int pkt_size = total_hdr_len + PAYLOAD_LEN;
877 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
878 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
879 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
880
881 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
882 write_packet(fd, buf, pkt_size, daddr);
883
884 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
885 if (proto == PF_INET) {
886 iph->tos = 1;
887 iph->check = 0;
888 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
889 } else if (proto == PF_INET6) {
890 ip6h->priority = 0xf;
891 }
892 write_packet(fd, buf, pkt_size, daddr);
893 }
894
895 /* Packets with different ECN don't coalesce.*/
send_changed_ECN(int fd,struct sockaddr_ll * daddr)896 static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
897 {
898 int pkt_size = total_hdr_len + PAYLOAD_LEN;
899 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
900 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
901
902 create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
903 write_packet(fd, buf, pkt_size, daddr);
904
905 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
906 if (proto == PF_INET) {
907 buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
908 iph->check = 0;
909 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
910 } else {
911 buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
912 }
913 write_packet(fd, buf, pkt_size, daddr);
914 }
915
916 /* IPv6 fragments and packets with extensions don't coalesce.*/
send_fragment6(int fd,struct sockaddr_ll * daddr)917 static void send_fragment6(int fd, struct sockaddr_ll *daddr)
918 {
919 static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
920 static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
921 sizeof(struct ip6_frag)];
922 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
923 struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
924 int extlen = sizeof(struct ip6_frag);
925 int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
926 int extpkt_len = bufpkt_len + extlen;
927 int i;
928
929 for (i = 0; i < 2; i++) {
930 create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
931 write_packet(fd, buf, bufpkt_len, daddr);
932 }
933 sleep(1);
934 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
935 memset(extpkt, 0, extpkt_len);
936
937 ip6h->nexthdr = IPPROTO_FRAGMENT;
938 ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
939 frag->ip6f_nxt = IPPROTO_TCP;
940
941 memcpy(extpkt, buf, tcp_offset);
942 memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
943 sizeof(struct tcphdr) + PAYLOAD_LEN);
944 write_packet(fd, extpkt, extpkt_len, daddr);
945
946 create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
947 write_packet(fd, buf, bufpkt_len, daddr);
948 }
949
bind_packetsocket(int fd)950 static void bind_packetsocket(int fd)
951 {
952 struct sockaddr_ll daddr = {};
953
954 daddr.sll_family = AF_PACKET;
955 daddr.sll_protocol = ethhdr_proto;
956 daddr.sll_ifindex = if_nametoindex(ifname);
957 if (daddr.sll_ifindex == 0)
958 error(1, errno, "if_nametoindex");
959
960 if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
961 error(1, errno, "could not bind socket");
962 }
963
set_timeout(int fd)964 static void set_timeout(int fd)
965 {
966 struct timeval timeout;
967
968 timeout.tv_sec = 3;
969 timeout.tv_usec = 0;
970 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
971 sizeof(timeout)) < 0)
972 error(1, errno, "cannot set timeout, setsockopt failed");
973 }
974
set_rcvbuf(int fd)975 static void set_rcvbuf(int fd)
976 {
977 int bufsize = 1 * 1024 * 1024; /* 1 MB */
978
979 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)))
980 error(1, errno, "cannot set rcvbuf size, setsockopt failed");
981 }
982
recv_error(int fd,int rcv_errno)983 static void recv_error(int fd, int rcv_errno)
984 {
985 struct tpacket_stats stats;
986 socklen_t len;
987
988 len = sizeof(stats);
989 if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len))
990 error(1, errno, "can't get stats");
991
992 fprintf(stderr, "Socket stats: packets=%u, drops=%u\n",
993 stats.tp_packets, stats.tp_drops);
994 error(1, rcv_errno, "could not receive");
995 }
996
check_recv_pkts(int fd,int * correct_payload,int correct_num_pkts)997 static void check_recv_pkts(int fd, int *correct_payload,
998 int correct_num_pkts)
999 {
1000 static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
1001 struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
1002 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
1003 struct tcphdr *tcph;
1004 bool bad_packet = false;
1005 int tcp_ext_len = 0;
1006 int ip_ext_len = 0;
1007 int pkt_size = -1;
1008 int data_len = 0;
1009 int num_pkt = 0;
1010 int i;
1011
1012 vlog("Expected {");
1013 for (i = 0; i < correct_num_pkts; i++)
1014 vlog("%d ", correct_payload[i]);
1015 vlog("}, Total %d packets\nReceived {", correct_num_pkts);
1016
1017 while (1) {
1018 ip_ext_len = 0;
1019 pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
1020 if (pkt_size < 0)
1021 recv_error(fd, errno);
1022
1023 if (iph->version == 4)
1024 ip_ext_len = (iph->ihl - 5) * 4;
1025 else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
1026 ip_ext_len = MIN_EXTHDR_SIZE;
1027
1028 tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
1029
1030 if (tcph->fin)
1031 break;
1032
1033 tcp_ext_len = (tcph->doff - 5) * 4;
1034 data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
1035 /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
1036 * Ipv4/tcp packets without at least 6 bytes of data will be padded.
1037 * Packet sockets are protocol agnostic, and will not trim the padding.
1038 */
1039 if (pkt_size == ETH_ZLEN && iph->version == 4) {
1040 data_len = ntohs(iph->tot_len)
1041 - sizeof(struct tcphdr) - sizeof(struct iphdr);
1042 }
1043 vlog("%d ", data_len);
1044 if (data_len != correct_payload[num_pkt]) {
1045 vlog("[!=%d]", correct_payload[num_pkt]);
1046 bad_packet = true;
1047 }
1048 num_pkt++;
1049 }
1050 vlog("}, Total %d packets.\n", num_pkt);
1051 if (num_pkt != correct_num_pkts)
1052 error(1, 0, "incorrect number of packets");
1053 if (bad_packet)
1054 error(1, 0, "incorrect packet geometry");
1055
1056 printf("Test succeeded\n\n");
1057 }
1058
gro_sender(void)1059 static void gro_sender(void)
1060 {
1061 const int fin_delay_us = 100 * 1000;
1062 static char fin_pkt[MAX_HDR_LEN];
1063 struct sockaddr_ll daddr = {};
1064 int txfd = -1;
1065
1066 txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
1067 if (txfd < 0)
1068 error(1, errno, "socket creation");
1069
1070 memset(&daddr, 0, sizeof(daddr));
1071 daddr.sll_ifindex = if_nametoindex(ifname);
1072 if (daddr.sll_ifindex == 0)
1073 error(1, errno, "if_nametoindex");
1074 daddr.sll_family = AF_PACKET;
1075 memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
1076 daddr.sll_halen = ETH_ALEN;
1077 create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
1078
1079 /* data sub-tests */
1080 if (strcmp(testname, "data_same") == 0) {
1081 send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
1082 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1083 } else if (strcmp(testname, "data_lrg_sml") == 0) {
1084 send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
1085 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1086 } else if (strcmp(testname, "data_sml_lrg") == 0) {
1087 send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
1088 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1089
1090 /* ack test */
1091 } else if (strcmp(testname, "ack") == 0) {
1092 send_ack(txfd, &daddr);
1093 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1094
1095 /* flags sub-tests */
1096 } else if (strcmp(testname, "flags_psh") == 0) {
1097 send_flags(txfd, &daddr, 1, 0, 0, 0, 0);
1098 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1099 } else if (strcmp(testname, "flags_syn") == 0) {
1100 send_flags(txfd, &daddr, 0, 1, 0, 0, 0);
1101 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1102 } else if (strcmp(testname, "flags_rst") == 0) {
1103 send_flags(txfd, &daddr, 0, 0, 1, 0, 0);
1104 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1105 } else if (strcmp(testname, "flags_urg") == 0) {
1106 send_flags(txfd, &daddr, 0, 0, 0, 1, 0);
1107 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1108 } else if (strcmp(testname, "flags_cwr") == 0) {
1109 send_flags(txfd, &daddr, 0, 0, 0, 0, 1);
1110 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1111
1112 /* tcp sub-tests */
1113 } else if (strcmp(testname, "tcp_csum") == 0) {
1114 send_changed_checksum(txfd, &daddr);
1115 usleep(fin_delay_us);
1116 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1117 } else if (strcmp(testname, "tcp_seq") == 0) {
1118 send_changed_seq(txfd, &daddr);
1119 usleep(fin_delay_us);
1120 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1121 } else if (strcmp(testname, "tcp_ts") == 0) {
1122 send_changed_ts(txfd, &daddr);
1123 usleep(fin_delay_us);
1124 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1125 } else if (strcmp(testname, "tcp_opt") == 0) {
1126 send_diff_opt(txfd, &daddr);
1127 usleep(fin_delay_us);
1128 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1129
1130 /* ip sub-tests - shared between IPv4 and IPv6 */
1131 } else if (strcmp(testname, "ip_ecn") == 0) {
1132 send_changed_ECN(txfd, &daddr);
1133 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1134 } else if (strcmp(testname, "ip_tos") == 0) {
1135 send_changed_tos(txfd, &daddr);
1136 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1137
1138 /* ip sub-tests - IPv4 only */
1139 } else if (strcmp(testname, "ip_ttl") == 0) {
1140 send_changed_ttl(txfd, &daddr);
1141 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1142 } else if (strcmp(testname, "ip_opt") == 0) {
1143 send_ip_options(txfd, &daddr);
1144 usleep(fin_delay_us);
1145 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1146 } else if (strcmp(testname, "ip_frag4") == 0) {
1147 send_fragment4(txfd, &daddr);
1148 usleep(fin_delay_us);
1149 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1150 } else if (strcmp(testname, "ip_id_df1_inc") == 0) {
1151 send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC);
1152 usleep(fin_delay_us);
1153 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1154 } else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
1155 send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED);
1156 usleep(fin_delay_us);
1157 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1158 } else if (strcmp(testname, "ip_id_df0_inc") == 0) {
1159 send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC);
1160 usleep(fin_delay_us);
1161 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1162 } else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
1163 send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED);
1164 usleep(fin_delay_us);
1165 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1166 } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
1167 send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED);
1168 usleep(fin_delay_us);
1169 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1170 } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
1171 send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC);
1172 usleep(fin_delay_us);
1173 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1174
1175 /* ip sub-tests - IPv6 only */
1176 } else if (strcmp(testname, "ip_frag6") == 0) {
1177 send_fragment6(txfd, &daddr);
1178 usleep(fin_delay_us);
1179 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1180 } else if (strcmp(testname, "ip_v6ext_same") == 0) {
1181 send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
1182 usleep(fin_delay_us);
1183 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1184 } else if (strcmp(testname, "ip_v6ext_diff") == 0) {
1185 send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
1186 usleep(fin_delay_us);
1187 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1188
1189 /* large sub-tests */
1190 } else if (strcmp(testname, "large_max") == 0) {
1191 int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1192 int remainder = (MAX_PAYLOAD + offset) % MSS;
1193
1194 send_large(txfd, &daddr, remainder);
1195 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1196 } else if (strcmp(testname, "large_rem") == 0) {
1197 int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1198 int remainder = (MAX_PAYLOAD + offset) % MSS;
1199
1200 send_large(txfd, &daddr, remainder + 1);
1201 write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1202 } else {
1203 error(1, 0, "Unknown testcase: %s", testname);
1204 }
1205
1206 if (close(txfd))
1207 error(1, errno, "socket close");
1208 }
1209
gro_receiver(void)1210 static void gro_receiver(void)
1211 {
1212 static int correct_payload[NUM_PACKETS];
1213 int rxfd = -1;
1214
1215 rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
1216 if (rxfd < 0)
1217 error(1, 0, "socket creation");
1218 setup_sock_filter(rxfd);
1219 set_timeout(rxfd);
1220 set_rcvbuf(rxfd);
1221 bind_packetsocket(rxfd);
1222
1223 ksft_ready();
1224
1225 memset(correct_payload, 0, sizeof(correct_payload));
1226
1227 /* data sub-tests */
1228 if (strcmp(testname, "data_same") == 0) {
1229 printf("pure data packet of same size: ");
1230 correct_payload[0] = PAYLOAD_LEN * 2;
1231 check_recv_pkts(rxfd, correct_payload, 1);
1232 } else if (strcmp(testname, "data_lrg_sml") == 0) {
1233 printf("large data packets followed by a smaller one: ");
1234 correct_payload[0] = PAYLOAD_LEN * 1.5;
1235 check_recv_pkts(rxfd, correct_payload, 1);
1236 } else if (strcmp(testname, "data_sml_lrg") == 0) {
1237 printf("small data packets followed by a larger one: ");
1238 correct_payload[0] = PAYLOAD_LEN / 2;
1239 correct_payload[1] = PAYLOAD_LEN;
1240 check_recv_pkts(rxfd, correct_payload, 2);
1241
1242 /* ack test */
1243 } else if (strcmp(testname, "ack") == 0) {
1244 printf("duplicate ack and pure ack: ");
1245 check_recv_pkts(rxfd, correct_payload, 3);
1246
1247 /* flags sub-tests */
1248 } else if (strcmp(testname, "flags_psh") == 0) {
1249 correct_payload[0] = PAYLOAD_LEN * 3;
1250 correct_payload[1] = PAYLOAD_LEN * 2;
1251 printf("psh flag ends coalescing: ");
1252 check_recv_pkts(rxfd, correct_payload, 2);
1253 } else if (strcmp(testname, "flags_syn") == 0) {
1254 correct_payload[0] = PAYLOAD_LEN * 2;
1255 correct_payload[1] = 0;
1256 correct_payload[2] = PAYLOAD_LEN * 2;
1257 printf("syn flag ends coalescing: ");
1258 check_recv_pkts(rxfd, correct_payload, 3);
1259 } else if (strcmp(testname, "flags_rst") == 0) {
1260 correct_payload[0] = PAYLOAD_LEN * 2;
1261 correct_payload[1] = 0;
1262 correct_payload[2] = PAYLOAD_LEN * 2;
1263 printf("rst flag ends coalescing: ");
1264 check_recv_pkts(rxfd, correct_payload, 3);
1265 } else if (strcmp(testname, "flags_urg") == 0) {
1266 correct_payload[0] = PAYLOAD_LEN * 2;
1267 correct_payload[1] = 0;
1268 correct_payload[2] = PAYLOAD_LEN * 2;
1269 printf("urg flag ends coalescing: ");
1270 check_recv_pkts(rxfd, correct_payload, 3);
1271 } else if (strcmp(testname, "flags_cwr") == 0) {
1272 correct_payload[0] = PAYLOAD_LEN;
1273 correct_payload[1] = PAYLOAD_LEN * 2;
1274 correct_payload[2] = PAYLOAD_LEN * 2;
1275 printf("cwr flag ends coalescing: ");
1276 check_recv_pkts(rxfd, correct_payload, 3);
1277
1278 /* tcp sub-tests */
1279 } else if (strcmp(testname, "tcp_csum") == 0) {
1280 correct_payload[0] = PAYLOAD_LEN;
1281 correct_payload[1] = PAYLOAD_LEN;
1282 printf("changed checksum does not coalesce: ");
1283 check_recv_pkts(rxfd, correct_payload, 2);
1284 } else if (strcmp(testname, "tcp_seq") == 0) {
1285 correct_payload[0] = PAYLOAD_LEN;
1286 correct_payload[1] = PAYLOAD_LEN;
1287 printf("Wrong Seq number doesn't coalesce: ");
1288 check_recv_pkts(rxfd, correct_payload, 2);
1289 } else if (strcmp(testname, "tcp_ts") == 0) {
1290 correct_payload[0] = PAYLOAD_LEN * 2;
1291 correct_payload[1] = PAYLOAD_LEN;
1292 correct_payload[2] = PAYLOAD_LEN;
1293 correct_payload[3] = PAYLOAD_LEN;
1294 printf("Different timestamp doesn't coalesce: ");
1295 check_recv_pkts(rxfd, correct_payload, 4);
1296 } else if (strcmp(testname, "tcp_opt") == 0) {
1297 correct_payload[0] = PAYLOAD_LEN * 2;
1298 correct_payload[1] = PAYLOAD_LEN;
1299 printf("Different options doesn't coalesce: ");
1300 check_recv_pkts(rxfd, correct_payload, 2);
1301
1302 /* ip sub-tests - shared between IPv4 and IPv6 */
1303 } else if (strcmp(testname, "ip_ecn") == 0) {
1304 correct_payload[0] = PAYLOAD_LEN;
1305 correct_payload[1] = PAYLOAD_LEN;
1306 printf("different ECN doesn't coalesce: ");
1307 check_recv_pkts(rxfd, correct_payload, 2);
1308 } else if (strcmp(testname, "ip_tos") == 0) {
1309 correct_payload[0] = PAYLOAD_LEN;
1310 correct_payload[1] = PAYLOAD_LEN;
1311 printf("different tos doesn't coalesce: ");
1312 check_recv_pkts(rxfd, correct_payload, 2);
1313
1314 /* ip sub-tests - IPv4 only */
1315 } else if (strcmp(testname, "ip_ttl") == 0) {
1316 correct_payload[0] = PAYLOAD_LEN;
1317 correct_payload[1] = PAYLOAD_LEN;
1318 printf("different ttl doesn't coalesce: ");
1319 check_recv_pkts(rxfd, correct_payload, 2);
1320 } else if (strcmp(testname, "ip_opt") == 0) {
1321 correct_payload[0] = PAYLOAD_LEN;
1322 correct_payload[1] = PAYLOAD_LEN;
1323 correct_payload[2] = PAYLOAD_LEN;
1324 printf("ip options doesn't coalesce: ");
1325 check_recv_pkts(rxfd, correct_payload, 3);
1326 } else if (strcmp(testname, "ip_frag4") == 0) {
1327 correct_payload[0] = PAYLOAD_LEN;
1328 correct_payload[1] = PAYLOAD_LEN;
1329 printf("fragmented ip4 doesn't coalesce: ");
1330 check_recv_pkts(rxfd, correct_payload, 2);
1331 } else if (strcmp(testname, "ip_id_df1_inc") == 0) {
1332 printf("DF=1, Incrementing - should coalesce: ");
1333 correct_payload[0] = PAYLOAD_LEN * 2;
1334 check_recv_pkts(rxfd, correct_payload, 1);
1335 } else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
1336 printf("DF=1, Fixed - should coalesce: ");
1337 correct_payload[0] = PAYLOAD_LEN * 2;
1338 check_recv_pkts(rxfd, correct_payload, 1);
1339 } else if (strcmp(testname, "ip_id_df0_inc") == 0) {
1340 printf("DF=0, Incrementing - should coalesce: ");
1341 correct_payload[0] = PAYLOAD_LEN * 2;
1342 check_recv_pkts(rxfd, correct_payload, 1);
1343 } else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
1344 printf("DF=0, Fixed - should coalesce: ");
1345 correct_payload[0] = PAYLOAD_LEN * 2;
1346 check_recv_pkts(rxfd, correct_payload, 1);
1347 } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
1348 printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
1349 correct_payload[0] = PAYLOAD_LEN * 2;
1350 correct_payload[1] = PAYLOAD_LEN;
1351 check_recv_pkts(rxfd, correct_payload, 2);
1352 } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
1353 printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
1354 correct_payload[0] = PAYLOAD_LEN * 2;
1355 correct_payload[1] = PAYLOAD_LEN;
1356 check_recv_pkts(rxfd, correct_payload, 2);
1357
1358 /* ip sub-tests - IPv6 only */
1359 } else if (strcmp(testname, "ip_frag6") == 0) {
1360 /* GRO doesn't check for ipv6 hop limit when flushing.
1361 * Hence no corresponding test to the ipv4 case.
1362 */
1363 printf("fragmented ip6 doesn't coalesce: ");
1364 correct_payload[0] = PAYLOAD_LEN * 2;
1365 correct_payload[1] = PAYLOAD_LEN;
1366 correct_payload[2] = PAYLOAD_LEN;
1367 check_recv_pkts(rxfd, correct_payload, 3);
1368 } else if (strcmp(testname, "ip_v6ext_same") == 0) {
1369 printf("ipv6 with ext header does coalesce: ");
1370 correct_payload[0] = PAYLOAD_LEN * 2;
1371 check_recv_pkts(rxfd, correct_payload, 1);
1372 } else if (strcmp(testname, "ip_v6ext_diff") == 0) {
1373 printf("ipv6 with ext header with different payloads doesn't coalesce: ");
1374 correct_payload[0] = PAYLOAD_LEN;
1375 correct_payload[1] = PAYLOAD_LEN;
1376 check_recv_pkts(rxfd, correct_payload, 2);
1377
1378 /* large sub-tests */
1379 } else if (strcmp(testname, "large_max") == 0) {
1380 int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1381 int remainder = (MAX_PAYLOAD + offset) % MSS;
1382
1383 correct_payload[0] = (MAX_PAYLOAD + offset);
1384 correct_payload[1] = remainder;
1385 printf("Shouldn't coalesce if exceed IP max pkt size: ");
1386 check_recv_pkts(rxfd, correct_payload, 2);
1387 } else if (strcmp(testname, "large_rem") == 0) {
1388 int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1389 int remainder = (MAX_PAYLOAD + offset) % MSS;
1390
1391 /* last segment sent individually, doesn't start new segment */
1392 correct_payload[0] = (MAX_PAYLOAD + offset) - remainder;
1393 correct_payload[1] = remainder + 1;
1394 correct_payload[2] = remainder + 1;
1395 printf("last segment sent individually: ");
1396 check_recv_pkts(rxfd, correct_payload, 3);
1397 } else {
1398 error(1, 0, "Test case error: unknown testname %s", testname);
1399 }
1400
1401 if (close(rxfd))
1402 error(1, 0, "socket close");
1403 }
1404
parse_args(int argc,char ** argv)1405 static void parse_args(int argc, char **argv)
1406 {
1407 static const struct option opts[] = {
1408 { "daddr", required_argument, NULL, 'd' },
1409 { "dmac", required_argument, NULL, 'D' },
1410 { "iface", required_argument, NULL, 'i' },
1411 { "ipv4", no_argument, NULL, '4' },
1412 { "ipv6", no_argument, NULL, '6' },
1413 { "ipip", no_argument, NULL, 'e' },
1414 { "rx", no_argument, NULL, 'r' },
1415 { "saddr", required_argument, NULL, 's' },
1416 { "smac", required_argument, NULL, 'S' },
1417 { "test", required_argument, NULL, 't' },
1418 { "verbose", no_argument, NULL, 'v' },
1419 { 0, 0, 0, 0 }
1420 };
1421 int c;
1422
1423 while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) {
1424 switch (c) {
1425 case '4':
1426 proto = PF_INET;
1427 ethhdr_proto = htons(ETH_P_IP);
1428 break;
1429 case '6':
1430 proto = PF_INET6;
1431 ethhdr_proto = htons(ETH_P_IPV6);
1432 break;
1433 case 'e':
1434 ipip = true;
1435 proto = PF_INET;
1436 ethhdr_proto = htons(ETH_P_IP);
1437 break;
1438 case 'd':
1439 addr4_dst = addr6_dst = optarg;
1440 break;
1441 case 'D':
1442 dmac = optarg;
1443 break;
1444 case 'i':
1445 ifname = optarg;
1446 break;
1447 case 'r':
1448 tx_socket = false;
1449 break;
1450 case 's':
1451 addr4_src = addr6_src = optarg;
1452 break;
1453 case 'S':
1454 smac = optarg;
1455 break;
1456 case 't':
1457 testname = optarg;
1458 break;
1459 case 'v':
1460 verbose = true;
1461 break;
1462 default:
1463 error(1, 0, "%s invalid option %c\n", __func__, c);
1464 break;
1465 }
1466 }
1467 }
1468
main(int argc,char ** argv)1469 int main(int argc, char **argv)
1470 {
1471 parse_args(argc, argv);
1472
1473 if (ipip) {
1474 tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2;
1475 total_hdr_len = tcp_offset + sizeof(struct tcphdr);
1476 } else if (proto == PF_INET) {
1477 tcp_offset = ETH_HLEN + sizeof(struct iphdr);
1478 total_hdr_len = tcp_offset + sizeof(struct tcphdr);
1479 } else if (proto == PF_INET6) {
1480 tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
1481 total_hdr_len = MAX_HDR_LEN;
1482 } else {
1483 error(1, 0, "Protocol family is not ipv4 or ipv6");
1484 }
1485
1486 read_MAC(src_mac, smac);
1487 read_MAC(dst_mac, dmac);
1488
1489 if (tx_socket) {
1490 gro_sender();
1491 } else {
1492 /* Only the receiver exit status determines test success. */
1493 gro_receiver();
1494 fprintf(stderr, "Gro::%s test passed.\n", testname);
1495 }
1496
1497 return 0;
1498 }
1499