1b0b04fc4SAlexei Starovoitov // SPDX-License-Identifier: GPL-2.0
2b0b04fc4SAlexei Starovoitov // Copyright (c) 2017 Facebook
3b0b04fc4SAlexei Starovoitov #include <stddef.h>
4b0b04fc4SAlexei Starovoitov #include <stdbool.h>
5b0b04fc4SAlexei Starovoitov #include <string.h>
6b0b04fc4SAlexei Starovoitov #include <linux/pkt_cls.h>
7b0b04fc4SAlexei Starovoitov #include <linux/bpf.h>
8b0b04fc4SAlexei Starovoitov #include <linux/in.h>
9b0b04fc4SAlexei Starovoitov #include <linux/if_ether.h>
10b0b04fc4SAlexei Starovoitov #include <linux/ip.h>
11b0b04fc4SAlexei Starovoitov #include <linux/ipv6.h>
12b0b04fc4SAlexei Starovoitov #include <linux/icmp.h>
13b0b04fc4SAlexei Starovoitov #include <linux/icmpv6.h>
14b0b04fc4SAlexei Starovoitov #include <linux/tcp.h>
15b0b04fc4SAlexei Starovoitov #include <linux/udp.h>
163e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
173e689141SToke Høiland-Jørgensen #include <bpf/bpf_endian.h>
18*52dbd67dSJose E. Marchesi #include "bpf_compiler.h"
19b0b04fc4SAlexei Starovoitov
rol32(__u32 word,unsigned int shift)20baaf680eSAndrii Nakryiko static __always_inline __u32 rol32(__u32 word, unsigned int shift)
21b0b04fc4SAlexei Starovoitov {
22b0b04fc4SAlexei Starovoitov return (word << shift) | (word >> ((-shift) & 31));
23b0b04fc4SAlexei Starovoitov }
24b0b04fc4SAlexei Starovoitov
25b0b04fc4SAlexei Starovoitov /* copy paste of jhash from kernel sources to make sure llvm
26b0b04fc4SAlexei Starovoitov * can compile it into valid sequence of bpf instructions
27b0b04fc4SAlexei Starovoitov */
28b0b04fc4SAlexei Starovoitov #define __jhash_mix(a, b, c) \
29b0b04fc4SAlexei Starovoitov { \
30b0b04fc4SAlexei Starovoitov a -= c; a ^= rol32(c, 4); c += b; \
31b0b04fc4SAlexei Starovoitov b -= a; b ^= rol32(a, 6); a += c; \
32b0b04fc4SAlexei Starovoitov c -= b; c ^= rol32(b, 8); b += a; \
33b0b04fc4SAlexei Starovoitov a -= c; a ^= rol32(c, 16); c += b; \
34b0b04fc4SAlexei Starovoitov b -= a; b ^= rol32(a, 19); a += c; \
35b0b04fc4SAlexei Starovoitov c -= b; c ^= rol32(b, 4); b += a; \
36b0b04fc4SAlexei Starovoitov }
37b0b04fc4SAlexei Starovoitov
38b0b04fc4SAlexei Starovoitov #define __jhash_final(a, b, c) \
39b0b04fc4SAlexei Starovoitov { \
40b0b04fc4SAlexei Starovoitov c ^= b; c -= rol32(b, 14); \
41b0b04fc4SAlexei Starovoitov a ^= c; a -= rol32(c, 11); \
42b0b04fc4SAlexei Starovoitov b ^= a; b -= rol32(a, 25); \
43b0b04fc4SAlexei Starovoitov c ^= b; c -= rol32(b, 16); \
44b0b04fc4SAlexei Starovoitov a ^= c; a -= rol32(c, 4); \
45b0b04fc4SAlexei Starovoitov b ^= a; b -= rol32(a, 14); \
46b0b04fc4SAlexei Starovoitov c ^= b; c -= rol32(b, 24); \
47b0b04fc4SAlexei Starovoitov }
48b0b04fc4SAlexei Starovoitov
49b0b04fc4SAlexei Starovoitov #define JHASH_INITVAL 0xdeadbeef
50b0b04fc4SAlexei Starovoitov
51b0b04fc4SAlexei Starovoitov typedef unsigned int u32;
52b0b04fc4SAlexei Starovoitov
53baaf680eSAndrii Nakryiko static __noinline
jhash(const void * key,u32 length,u32 initval)54b0b04fc4SAlexei Starovoitov u32 jhash(const void *key, u32 length, u32 initval)
55b0b04fc4SAlexei Starovoitov {
56b0b04fc4SAlexei Starovoitov u32 a, b, c;
57b0b04fc4SAlexei Starovoitov const unsigned char *k = key;
58b0b04fc4SAlexei Starovoitov
59b0b04fc4SAlexei Starovoitov a = b = c = JHASH_INITVAL + length + initval;
60b0b04fc4SAlexei Starovoitov
61b0b04fc4SAlexei Starovoitov while (length > 12) {
62b0b04fc4SAlexei Starovoitov a += *(u32 *)(k);
63b0b04fc4SAlexei Starovoitov b += *(u32 *)(k + 4);
64b0b04fc4SAlexei Starovoitov c += *(u32 *)(k + 8);
65b0b04fc4SAlexei Starovoitov __jhash_mix(a, b, c);
66b0b04fc4SAlexei Starovoitov length -= 12;
67b0b04fc4SAlexei Starovoitov k += 12;
68b0b04fc4SAlexei Starovoitov }
69b0b04fc4SAlexei Starovoitov switch (length) {
70b0b04fc4SAlexei Starovoitov case 12: c += (u32)k[11]<<24;
71b0b04fc4SAlexei Starovoitov case 11: c += (u32)k[10]<<16;
72b0b04fc4SAlexei Starovoitov case 10: c += (u32)k[9]<<8;
73b0b04fc4SAlexei Starovoitov case 9: c += k[8];
74b0b04fc4SAlexei Starovoitov case 8: b += (u32)k[7]<<24;
75b0b04fc4SAlexei Starovoitov case 7: b += (u32)k[6]<<16;
76b0b04fc4SAlexei Starovoitov case 6: b += (u32)k[5]<<8;
77b0b04fc4SAlexei Starovoitov case 5: b += k[4];
78b0b04fc4SAlexei Starovoitov case 4: a += (u32)k[3]<<24;
79b0b04fc4SAlexei Starovoitov case 3: a += (u32)k[2]<<16;
80b0b04fc4SAlexei Starovoitov case 2: a += (u32)k[1]<<8;
81b0b04fc4SAlexei Starovoitov case 1: a += k[0];
82b0b04fc4SAlexei Starovoitov __jhash_final(a, b, c);
83b0b04fc4SAlexei Starovoitov case 0: /* Nothing left to add */
84b0b04fc4SAlexei Starovoitov break;
85b0b04fc4SAlexei Starovoitov }
86b0b04fc4SAlexei Starovoitov
87b0b04fc4SAlexei Starovoitov return c;
88b0b04fc4SAlexei Starovoitov }
89b0b04fc4SAlexei Starovoitov
90baaf680eSAndrii Nakryiko __noinline
__jhash_nwords(u32 a,u32 b,u32 c,u32 initval)91b0b04fc4SAlexei Starovoitov u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
92b0b04fc4SAlexei Starovoitov {
93b0b04fc4SAlexei Starovoitov a += initval;
94b0b04fc4SAlexei Starovoitov b += initval;
95b0b04fc4SAlexei Starovoitov c += initval;
96b0b04fc4SAlexei Starovoitov __jhash_final(a, b, c);
97b0b04fc4SAlexei Starovoitov return c;
98b0b04fc4SAlexei Starovoitov }
99b0b04fc4SAlexei Starovoitov
100baaf680eSAndrii Nakryiko __noinline
jhash_2words(u32 a,u32 b,u32 initval)101b0b04fc4SAlexei Starovoitov u32 jhash_2words(u32 a, u32 b, u32 initval)
102b0b04fc4SAlexei Starovoitov {
103b0b04fc4SAlexei Starovoitov return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
104b0b04fc4SAlexei Starovoitov }
105b0b04fc4SAlexei Starovoitov
106b0b04fc4SAlexei Starovoitov struct flow_key {
107b0b04fc4SAlexei Starovoitov union {
108b0b04fc4SAlexei Starovoitov __be32 src;
109b0b04fc4SAlexei Starovoitov __be32 srcv6[4];
110b0b04fc4SAlexei Starovoitov };
111b0b04fc4SAlexei Starovoitov union {
112b0b04fc4SAlexei Starovoitov __be32 dst;
113b0b04fc4SAlexei Starovoitov __be32 dstv6[4];
114b0b04fc4SAlexei Starovoitov };
115b0b04fc4SAlexei Starovoitov union {
116b0b04fc4SAlexei Starovoitov __u32 ports;
117b0b04fc4SAlexei Starovoitov __u16 port16[2];
118b0b04fc4SAlexei Starovoitov };
119b0b04fc4SAlexei Starovoitov __u8 proto;
120b0b04fc4SAlexei Starovoitov };
121b0b04fc4SAlexei Starovoitov
122b0b04fc4SAlexei Starovoitov struct packet_description {
123b0b04fc4SAlexei Starovoitov struct flow_key flow;
124b0b04fc4SAlexei Starovoitov __u8 flags;
125b0b04fc4SAlexei Starovoitov };
126b0b04fc4SAlexei Starovoitov
127b0b04fc4SAlexei Starovoitov struct ctl_value {
128b0b04fc4SAlexei Starovoitov union {
129b0b04fc4SAlexei Starovoitov __u64 value;
130b0b04fc4SAlexei Starovoitov __u32 ifindex;
131b0b04fc4SAlexei Starovoitov __u8 mac[6];
132b0b04fc4SAlexei Starovoitov };
133b0b04fc4SAlexei Starovoitov };
134b0b04fc4SAlexei Starovoitov
135b0b04fc4SAlexei Starovoitov struct vip_definition {
136b0b04fc4SAlexei Starovoitov union {
137b0b04fc4SAlexei Starovoitov __be32 vip;
138b0b04fc4SAlexei Starovoitov __be32 vipv6[4];
139b0b04fc4SAlexei Starovoitov };
140b0b04fc4SAlexei Starovoitov __u16 port;
141b0b04fc4SAlexei Starovoitov __u16 family;
142b0b04fc4SAlexei Starovoitov __u8 proto;
143b0b04fc4SAlexei Starovoitov };
144b0b04fc4SAlexei Starovoitov
145b0b04fc4SAlexei Starovoitov struct vip_meta {
146b0b04fc4SAlexei Starovoitov __u32 flags;
147b0b04fc4SAlexei Starovoitov __u32 vip_num;
148b0b04fc4SAlexei Starovoitov };
149b0b04fc4SAlexei Starovoitov
150b0b04fc4SAlexei Starovoitov struct real_pos_lru {
151b0b04fc4SAlexei Starovoitov __u32 pos;
152b0b04fc4SAlexei Starovoitov __u64 atime;
153b0b04fc4SAlexei Starovoitov };
154b0b04fc4SAlexei Starovoitov
155b0b04fc4SAlexei Starovoitov struct real_definition {
156b0b04fc4SAlexei Starovoitov union {
157b0b04fc4SAlexei Starovoitov __be32 dst;
158b0b04fc4SAlexei Starovoitov __be32 dstv6[4];
159b0b04fc4SAlexei Starovoitov };
160b0b04fc4SAlexei Starovoitov __u8 flags;
161b0b04fc4SAlexei Starovoitov };
162b0b04fc4SAlexei Starovoitov
163b0b04fc4SAlexei Starovoitov struct lb_stats {
164b0b04fc4SAlexei Starovoitov __u64 v2;
165b0b04fc4SAlexei Starovoitov __u64 v1;
166b0b04fc4SAlexei Starovoitov };
167b0b04fc4SAlexei Starovoitov
168df0b7792SAndrii Nakryiko struct {
169bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_HASH);
170bc7430ccSAndrii Nakryiko __uint(max_entries, 512);
171bc7430ccSAndrii Nakryiko __type(key, struct vip_definition);
172bc7430ccSAndrii Nakryiko __type(value, struct vip_meta);
173bc7430ccSAndrii Nakryiko } vip_map SEC(".maps");
174b0b04fc4SAlexei Starovoitov
175df0b7792SAndrii Nakryiko struct {
176bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_LRU_HASH);
177bc7430ccSAndrii Nakryiko __uint(max_entries, 300);
178bc7430ccSAndrii Nakryiko __uint(map_flags, 1U << 1);
179bc7430ccSAndrii Nakryiko __type(key, struct flow_key);
180bc7430ccSAndrii Nakryiko __type(value, struct real_pos_lru);
181bc7430ccSAndrii Nakryiko } lru_cache SEC(".maps");
182b0b04fc4SAlexei Starovoitov
183df0b7792SAndrii Nakryiko struct {
184bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY);
185bc7430ccSAndrii Nakryiko __uint(max_entries, 12 * 655);
186bc7430ccSAndrii Nakryiko __type(key, __u32);
187bc7430ccSAndrii Nakryiko __type(value, __u32);
188bc7430ccSAndrii Nakryiko } ch_rings SEC(".maps");
189b0b04fc4SAlexei Starovoitov
190df0b7792SAndrii Nakryiko struct {
191bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY);
192bc7430ccSAndrii Nakryiko __uint(max_entries, 40);
193bc7430ccSAndrii Nakryiko __type(key, __u32);
194bc7430ccSAndrii Nakryiko __type(value, struct real_definition);
195bc7430ccSAndrii Nakryiko } reals SEC(".maps");
196b0b04fc4SAlexei Starovoitov
197df0b7792SAndrii Nakryiko struct {
198bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
199bc7430ccSAndrii Nakryiko __uint(max_entries, 515);
200bc7430ccSAndrii Nakryiko __type(key, __u32);
201bc7430ccSAndrii Nakryiko __type(value, struct lb_stats);
202bc7430ccSAndrii Nakryiko } stats SEC(".maps");
203b0b04fc4SAlexei Starovoitov
204df0b7792SAndrii Nakryiko struct {
205bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY);
206bc7430ccSAndrii Nakryiko __uint(max_entries, 16);
207bc7430ccSAndrii Nakryiko __type(key, __u32);
208bc7430ccSAndrii Nakryiko __type(value, struct ctl_value);
209bc7430ccSAndrii Nakryiko } ctl_array SEC(".maps");
210b0b04fc4SAlexei Starovoitov
211b0b04fc4SAlexei Starovoitov struct eth_hdr {
212b0b04fc4SAlexei Starovoitov unsigned char eth_dest[6];
213b0b04fc4SAlexei Starovoitov unsigned char eth_source[6];
214b0b04fc4SAlexei Starovoitov unsigned short eth_proto;
215b0b04fc4SAlexei Starovoitov };
216b0b04fc4SAlexei Starovoitov
calc_offset(bool is_ipv6,bool is_icmp)217baaf680eSAndrii Nakryiko static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
218b0b04fc4SAlexei Starovoitov {
219b0b04fc4SAlexei Starovoitov __u64 off = sizeof(struct eth_hdr);
220b0b04fc4SAlexei Starovoitov if (is_ipv6) {
221b0b04fc4SAlexei Starovoitov off += sizeof(struct ipv6hdr);
222b0b04fc4SAlexei Starovoitov if (is_icmp)
223b0b04fc4SAlexei Starovoitov off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
224b0b04fc4SAlexei Starovoitov } else {
225b0b04fc4SAlexei Starovoitov off += sizeof(struct iphdr);
226b0b04fc4SAlexei Starovoitov if (is_icmp)
227b0b04fc4SAlexei Starovoitov off += sizeof(struct icmphdr) + sizeof(struct iphdr);
228b0b04fc4SAlexei Starovoitov }
229b0b04fc4SAlexei Starovoitov return off;
230b0b04fc4SAlexei Starovoitov }
231b0b04fc4SAlexei Starovoitov
232b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
parse_udp(void * data,void * data_end,bool is_ipv6,struct packet_description * pckt)233b0b04fc4SAlexei Starovoitov bool parse_udp(void *data, void *data_end,
234b0b04fc4SAlexei Starovoitov bool is_ipv6, struct packet_description *pckt)
235b0b04fc4SAlexei Starovoitov {
236b0b04fc4SAlexei Starovoitov
237b0b04fc4SAlexei Starovoitov bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
238b0b04fc4SAlexei Starovoitov __u64 off = calc_offset(is_ipv6, is_icmp);
239b0b04fc4SAlexei Starovoitov struct udphdr *udp;
240b0b04fc4SAlexei Starovoitov udp = data + off;
241b0b04fc4SAlexei Starovoitov
242b0b04fc4SAlexei Starovoitov if (udp + 1 > data_end)
24394bf6aadSLinkui Xiao return false;
244b0b04fc4SAlexei Starovoitov if (!is_icmp) {
245b0b04fc4SAlexei Starovoitov pckt->flow.port16[0] = udp->source;
246b0b04fc4SAlexei Starovoitov pckt->flow.port16[1] = udp->dest;
247b0b04fc4SAlexei Starovoitov } else {
248b0b04fc4SAlexei Starovoitov pckt->flow.port16[0] = udp->dest;
249b0b04fc4SAlexei Starovoitov pckt->flow.port16[1] = udp->source;
250b0b04fc4SAlexei Starovoitov }
25194bf6aadSLinkui Xiao return true;
252b0b04fc4SAlexei Starovoitov }
253b0b04fc4SAlexei Starovoitov
254b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
parse_tcp(void * data,void * data_end,bool is_ipv6,struct packet_description * pckt)255b0b04fc4SAlexei Starovoitov bool parse_tcp(void *data, void *data_end,
256b0b04fc4SAlexei Starovoitov bool is_ipv6, struct packet_description *pckt)
257b0b04fc4SAlexei Starovoitov {
258b0b04fc4SAlexei Starovoitov
259b0b04fc4SAlexei Starovoitov bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
260b0b04fc4SAlexei Starovoitov __u64 off = calc_offset(is_ipv6, is_icmp);
261b0b04fc4SAlexei Starovoitov struct tcphdr *tcp;
262b0b04fc4SAlexei Starovoitov
263b0b04fc4SAlexei Starovoitov tcp = data + off;
264b0b04fc4SAlexei Starovoitov if (tcp + 1 > data_end)
26594bf6aadSLinkui Xiao return false;
266b0b04fc4SAlexei Starovoitov if (tcp->syn)
267b0b04fc4SAlexei Starovoitov pckt->flags |= (1 << 1);
268b0b04fc4SAlexei Starovoitov if (!is_icmp) {
269b0b04fc4SAlexei Starovoitov pckt->flow.port16[0] = tcp->source;
270b0b04fc4SAlexei Starovoitov pckt->flow.port16[1] = tcp->dest;
271b0b04fc4SAlexei Starovoitov } else {
272b0b04fc4SAlexei Starovoitov pckt->flow.port16[0] = tcp->dest;
273b0b04fc4SAlexei Starovoitov pckt->flow.port16[1] = tcp->source;
274b0b04fc4SAlexei Starovoitov }
27594bf6aadSLinkui Xiao return true;
276b0b04fc4SAlexei Starovoitov }
277b0b04fc4SAlexei Starovoitov
278b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
encap_v6(struct xdp_md * xdp,struct ctl_value * cval,struct packet_description * pckt,struct real_definition * dst,__u32 pkt_bytes)279b0b04fc4SAlexei Starovoitov bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
280b0b04fc4SAlexei Starovoitov struct packet_description *pckt,
281b0b04fc4SAlexei Starovoitov struct real_definition *dst, __u32 pkt_bytes)
282b0b04fc4SAlexei Starovoitov {
283b0b04fc4SAlexei Starovoitov struct eth_hdr *new_eth;
284b0b04fc4SAlexei Starovoitov struct eth_hdr *old_eth;
285b0b04fc4SAlexei Starovoitov struct ipv6hdr *ip6h;
286b0b04fc4SAlexei Starovoitov __u32 ip_suffix;
287b0b04fc4SAlexei Starovoitov void *data_end;
288b0b04fc4SAlexei Starovoitov void *data;
289b0b04fc4SAlexei Starovoitov
290b0b04fc4SAlexei Starovoitov if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
29194bf6aadSLinkui Xiao return false;
292b0b04fc4SAlexei Starovoitov data = (void *)(long)xdp->data;
293b0b04fc4SAlexei Starovoitov data_end = (void *)(long)xdp->data_end;
294b0b04fc4SAlexei Starovoitov new_eth = data;
295b0b04fc4SAlexei Starovoitov ip6h = data + sizeof(struct eth_hdr);
296b0b04fc4SAlexei Starovoitov old_eth = data + sizeof(struct ipv6hdr);
297b0b04fc4SAlexei Starovoitov if (new_eth + 1 > data_end ||
298b0b04fc4SAlexei Starovoitov old_eth + 1 > data_end || ip6h + 1 > data_end)
29994bf6aadSLinkui Xiao return false;
300b0b04fc4SAlexei Starovoitov memcpy(new_eth->eth_dest, cval->mac, 6);
301b0b04fc4SAlexei Starovoitov memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
302b0b04fc4SAlexei Starovoitov new_eth->eth_proto = 56710;
303b0b04fc4SAlexei Starovoitov ip6h->version = 6;
304b0b04fc4SAlexei Starovoitov ip6h->priority = 0;
305b0b04fc4SAlexei Starovoitov memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
306b0b04fc4SAlexei Starovoitov
307b0b04fc4SAlexei Starovoitov ip6h->nexthdr = IPPROTO_IPV6;
308b0b04fc4SAlexei Starovoitov ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
309b0b04fc4SAlexei Starovoitov ip6h->payload_len =
31059fd3486SIlya Leoshkevich bpf_htons(pkt_bytes + sizeof(struct ipv6hdr));
311b0b04fc4SAlexei Starovoitov ip6h->hop_limit = 4;
312b0b04fc4SAlexei Starovoitov
313b0b04fc4SAlexei Starovoitov ip6h->saddr.in6_u.u6_addr32[0] = 1;
314b0b04fc4SAlexei Starovoitov ip6h->saddr.in6_u.u6_addr32[1] = 2;
315b0b04fc4SAlexei Starovoitov ip6h->saddr.in6_u.u6_addr32[2] = 3;
316b0b04fc4SAlexei Starovoitov ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
317b0b04fc4SAlexei Starovoitov memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
31894bf6aadSLinkui Xiao return true;
319b0b04fc4SAlexei Starovoitov }
320b0b04fc4SAlexei Starovoitov
321b0b04fc4SAlexei Starovoitov #ifndef __clang__
322b0b04fc4SAlexei Starovoitov #pragma GCC push_options
323b0b04fc4SAlexei Starovoitov /* GCC optimization collapses functions and increases the number of arguments
324b0b04fc4SAlexei Starovoitov * beyond the compatible amount supported by BPF.
325b0b04fc4SAlexei Starovoitov */
326b0b04fc4SAlexei Starovoitov #pragma GCC optimize("-fno-ipa-sra")
32759fd3486SIlya Leoshkevich #endif
328b0b04fc4SAlexei Starovoitov
329b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
encap_v4(struct xdp_md * xdp,struct ctl_value * cval,struct packet_description * pckt,struct real_definition * dst,__u32 pkt_bytes)330b0b04fc4SAlexei Starovoitov bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
331b0b04fc4SAlexei Starovoitov struct packet_description *pckt,
332b0b04fc4SAlexei Starovoitov struct real_definition *dst, __u32 pkt_bytes)
333b0b04fc4SAlexei Starovoitov {
334b0b04fc4SAlexei Starovoitov
335b0b04fc4SAlexei Starovoitov __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]);
336b0b04fc4SAlexei Starovoitov struct eth_hdr *new_eth;
337b0b04fc4SAlexei Starovoitov struct eth_hdr *old_eth;
338b0b04fc4SAlexei Starovoitov __u16 *next_iph_u16;
33994bf6aadSLinkui Xiao struct iphdr *iph;
340b0b04fc4SAlexei Starovoitov __u32 csum = 0;
341b0b04fc4SAlexei Starovoitov void *data_end;
342b0b04fc4SAlexei Starovoitov void *data;
343b0b04fc4SAlexei Starovoitov
344b0b04fc4SAlexei Starovoitov ip_suffix <<= 15;
345b0b04fc4SAlexei Starovoitov ip_suffix ^= pckt->flow.src;
346b0b04fc4SAlexei Starovoitov if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
34794bf6aadSLinkui Xiao return false;
348b0b04fc4SAlexei Starovoitov data = (void *)(long)xdp->data;
349b0b04fc4SAlexei Starovoitov data_end = (void *)(long)xdp->data_end;
350b0b04fc4SAlexei Starovoitov new_eth = data;
351b0b04fc4SAlexei Starovoitov iph = data + sizeof(struct eth_hdr);
352b0b04fc4SAlexei Starovoitov old_eth = data + sizeof(struct iphdr);
353b0b04fc4SAlexei Starovoitov if (new_eth + 1 > data_end ||
354b0b04fc4SAlexei Starovoitov old_eth + 1 > data_end || iph + 1 > data_end)
355b0b04fc4SAlexei Starovoitov return false;
356b0b04fc4SAlexei Starovoitov memcpy(new_eth->eth_dest, cval->mac, 6);
35759fd3486SIlya Leoshkevich memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
358b0b04fc4SAlexei Starovoitov new_eth->eth_proto = 8;
359b0b04fc4SAlexei Starovoitov iph->version = 4;
360b0b04fc4SAlexei Starovoitov iph->ihl = 5;
361b0b04fc4SAlexei Starovoitov iph->frag_off = 0;
362b0b04fc4SAlexei Starovoitov iph->protocol = IPPROTO_IPIP;
363b0b04fc4SAlexei Starovoitov iph->check = 0;
364b0b04fc4SAlexei Starovoitov iph->tos = 1;
365b0b04fc4SAlexei Starovoitov iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr));
366*52dbd67dSJose E. Marchesi /* don't update iph->daddr, since it will overwrite old eth_proto
367b0b04fc4SAlexei Starovoitov * and multiple iterations of bpf_prog_run() will fail
368b0b04fc4SAlexei Starovoitov */
369b0b04fc4SAlexei Starovoitov
370b0b04fc4SAlexei Starovoitov iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
37194bf6aadSLinkui Xiao iph->ttl = 4;
37294bf6aadSLinkui Xiao
373b0b04fc4SAlexei Starovoitov next_iph_u16 = (__u16 *) iph;
374b0b04fc4SAlexei Starovoitov __pragma_loop_unroll_full
375b0b04fc4SAlexei Starovoitov for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
376b0b04fc4SAlexei Starovoitov csum += *next_iph_u16++;
377b0b04fc4SAlexei Starovoitov iph->check = ~((csum & 0xffff) + (csum >> 16));
378b0b04fc4SAlexei Starovoitov if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
379b0b04fc4SAlexei Starovoitov return false;
380b0b04fc4SAlexei Starovoitov return true;
381b0b04fc4SAlexei Starovoitov }
382b0b04fc4SAlexei Starovoitov
383b0b04fc4SAlexei Starovoitov #ifndef __clang__
384b0b04fc4SAlexei Starovoitov #pragma GCC pop_options
385b0b04fc4SAlexei Starovoitov #endif
386b0b04fc4SAlexei Starovoitov
387b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
swap_mac_and_send(void * data,void * data_end)388b0b04fc4SAlexei Starovoitov int swap_mac_and_send(void *data, void *data_end)
389b0b04fc4SAlexei Starovoitov {
390b0b04fc4SAlexei Starovoitov unsigned char tmp_mac[6];
391b0b04fc4SAlexei Starovoitov struct eth_hdr *eth;
392b0b04fc4SAlexei Starovoitov
393b0b04fc4SAlexei Starovoitov eth = data;
394b0b04fc4SAlexei Starovoitov memcpy(tmp_mac, eth->eth_source, 6);
395b0b04fc4SAlexei Starovoitov memcpy(eth->eth_source, eth->eth_dest, 6);
396b0b04fc4SAlexei Starovoitov memcpy(eth->eth_dest, tmp_mac, 6);
397b0b04fc4SAlexei Starovoitov return XDP_TX;
398b0b04fc4SAlexei Starovoitov }
399b0b04fc4SAlexei Starovoitov
400b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
send_icmp_reply(void * data,void * data_end)401b0b04fc4SAlexei Starovoitov int send_icmp_reply(void *data, void *data_end)
402b0b04fc4SAlexei Starovoitov {
403b0b04fc4SAlexei Starovoitov struct icmphdr *icmp_hdr;
404b0b04fc4SAlexei Starovoitov __u16 *next_iph_u16;
405b0b04fc4SAlexei Starovoitov __u32 tmp_addr = 0;
406b0b04fc4SAlexei Starovoitov struct iphdr *iph;
407b0b04fc4SAlexei Starovoitov __u32 csum = 0;
408b0b04fc4SAlexei Starovoitov __u64 off = 0;
409b0b04fc4SAlexei Starovoitov
410b0b04fc4SAlexei Starovoitov if (data + sizeof(struct eth_hdr)
411b0b04fc4SAlexei Starovoitov + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
412b0b04fc4SAlexei Starovoitov return XDP_DROP;
413*52dbd67dSJose E. Marchesi off += sizeof(struct eth_hdr);
414b0b04fc4SAlexei Starovoitov iph = data + off;
415b0b04fc4SAlexei Starovoitov off += sizeof(struct iphdr);
416b0b04fc4SAlexei Starovoitov icmp_hdr = data + off;
417b0b04fc4SAlexei Starovoitov icmp_hdr->type = 0;
418b0b04fc4SAlexei Starovoitov icmp_hdr->checksum += 0x0007;
419b0b04fc4SAlexei Starovoitov iph->ttl = 4;
420b0b04fc4SAlexei Starovoitov tmp_addr = iph->daddr;
421b0b04fc4SAlexei Starovoitov iph->daddr = iph->saddr;
422b0b04fc4SAlexei Starovoitov iph->saddr = tmp_addr;
423b0b04fc4SAlexei Starovoitov iph->check = 0;
424b0b04fc4SAlexei Starovoitov next_iph_u16 = (__u16 *) iph;
425b0b04fc4SAlexei Starovoitov __pragma_loop_unroll_full
426b0b04fc4SAlexei Starovoitov for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
427b0b04fc4SAlexei Starovoitov csum += *next_iph_u16++;
428b0b04fc4SAlexei Starovoitov iph->check = ~((csum & 0xffff) + (csum >> 16));
429b0b04fc4SAlexei Starovoitov return swap_mac_and_send(data, data_end);
430b0b04fc4SAlexei Starovoitov }
431b0b04fc4SAlexei Starovoitov
432b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
send_icmp6_reply(void * data,void * data_end)433b0b04fc4SAlexei Starovoitov int send_icmp6_reply(void *data, void *data_end)
434b0b04fc4SAlexei Starovoitov {
435b0b04fc4SAlexei Starovoitov struct icmp6hdr *icmp_hdr;
436b0b04fc4SAlexei Starovoitov struct ipv6hdr *ip6h;
437b0b04fc4SAlexei Starovoitov __be32 tmp_addr[4];
438b0b04fc4SAlexei Starovoitov __u64 off = 0;
439b0b04fc4SAlexei Starovoitov
440b0b04fc4SAlexei Starovoitov if (data + sizeof(struct eth_hdr)
441b0b04fc4SAlexei Starovoitov + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
442b0b04fc4SAlexei Starovoitov return XDP_DROP;
443b0b04fc4SAlexei Starovoitov off += sizeof(struct eth_hdr);
444b0b04fc4SAlexei Starovoitov ip6h = data + off;
445b0b04fc4SAlexei Starovoitov off += sizeof(struct ipv6hdr);
446b0b04fc4SAlexei Starovoitov icmp_hdr = data + off;
447b0b04fc4SAlexei Starovoitov icmp_hdr->icmp6_type = 129;
448b0b04fc4SAlexei Starovoitov icmp_hdr->icmp6_cksum -= 0x0001;
449b0b04fc4SAlexei Starovoitov ip6h->hop_limit = 4;
450b0b04fc4SAlexei Starovoitov memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
451b0b04fc4SAlexei Starovoitov memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
452b0b04fc4SAlexei Starovoitov memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
453b0b04fc4SAlexei Starovoitov return swap_mac_and_send(data, data_end);
454b0b04fc4SAlexei Starovoitov }
455b0b04fc4SAlexei Starovoitov
456b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
parse_icmpv6(void * data,void * data_end,__u64 off,struct packet_description * pckt)457b0b04fc4SAlexei Starovoitov int parse_icmpv6(void *data, void *data_end, __u64 off,
458b0b04fc4SAlexei Starovoitov struct packet_description *pckt)
459b0b04fc4SAlexei Starovoitov {
460b0b04fc4SAlexei Starovoitov struct icmp6hdr *icmp_hdr;
461b0b04fc4SAlexei Starovoitov struct ipv6hdr *ip6h;
462b0b04fc4SAlexei Starovoitov
463b0b04fc4SAlexei Starovoitov icmp_hdr = data + off;
464b0b04fc4SAlexei Starovoitov if (icmp_hdr + 1 > data_end)
465b0b04fc4SAlexei Starovoitov return XDP_DROP;
466b0b04fc4SAlexei Starovoitov if (icmp_hdr->icmp6_type == 128)
467b0b04fc4SAlexei Starovoitov return send_icmp6_reply(data, data_end);
468b0b04fc4SAlexei Starovoitov if (icmp_hdr->icmp6_type != 3)
469b0b04fc4SAlexei Starovoitov return XDP_PASS;
470b0b04fc4SAlexei Starovoitov off += sizeof(struct icmp6hdr);
471b0b04fc4SAlexei Starovoitov ip6h = data + off;
472b0b04fc4SAlexei Starovoitov if (ip6h + 1 > data_end)
473b0b04fc4SAlexei Starovoitov return XDP_DROP;
474b0b04fc4SAlexei Starovoitov pckt->flow.proto = ip6h->nexthdr;
475b0b04fc4SAlexei Starovoitov pckt->flags |= (1 << 0);
476b0b04fc4SAlexei Starovoitov memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
477b0b04fc4SAlexei Starovoitov memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
478b0b04fc4SAlexei Starovoitov return -1;
479b0b04fc4SAlexei Starovoitov }
480b0b04fc4SAlexei Starovoitov
481b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
parse_icmp(void * data,void * data_end,__u64 off,struct packet_description * pckt)482b0b04fc4SAlexei Starovoitov int parse_icmp(void *data, void *data_end, __u64 off,
483b0b04fc4SAlexei Starovoitov struct packet_description *pckt)
484b0b04fc4SAlexei Starovoitov {
485b0b04fc4SAlexei Starovoitov struct icmphdr *icmp_hdr;
486b0b04fc4SAlexei Starovoitov struct iphdr *iph;
487b0b04fc4SAlexei Starovoitov
488b0b04fc4SAlexei Starovoitov icmp_hdr = data + off;
489b0b04fc4SAlexei Starovoitov if (icmp_hdr + 1 > data_end)
490b0b04fc4SAlexei Starovoitov return XDP_DROP;
491b0b04fc4SAlexei Starovoitov if (icmp_hdr->type == 8)
492b0b04fc4SAlexei Starovoitov return send_icmp_reply(data, data_end);
493b0b04fc4SAlexei Starovoitov if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
494b0b04fc4SAlexei Starovoitov return XDP_PASS;
495b0b04fc4SAlexei Starovoitov off += sizeof(struct icmphdr);
496b0b04fc4SAlexei Starovoitov iph = data + off;
497b0b04fc4SAlexei Starovoitov if (iph + 1 > data_end)
498b0b04fc4SAlexei Starovoitov return XDP_DROP;
499b0b04fc4SAlexei Starovoitov if (iph->ihl != 5)
500b0b04fc4SAlexei Starovoitov return XDP_DROP;
501b0b04fc4SAlexei Starovoitov pckt->flow.proto = iph->protocol;
502b0b04fc4SAlexei Starovoitov pckt->flags |= (1 << 0);
503b0b04fc4SAlexei Starovoitov pckt->flow.src = iph->daddr;
504b0b04fc4SAlexei Starovoitov pckt->flow.dst = iph->saddr;
505b0b04fc4SAlexei Starovoitov return -1;
506b0b04fc4SAlexei Starovoitov }
507b0b04fc4SAlexei Starovoitov
508b0b04fc4SAlexei Starovoitov static __attribute__ ((noinline))
get_packet_hash(struct packet_description * pckt,bool hash_16bytes)509b0b04fc4SAlexei Starovoitov __u32 get_packet_hash(struct packet_description *pckt,
510b0b04fc4SAlexei Starovoitov bool hash_16bytes)
511b0b04fc4SAlexei Starovoitov {
512b0b04fc4SAlexei Starovoitov if (hash_16bytes)
513b0b04fc4SAlexei Starovoitov return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
514b0b04fc4SAlexei Starovoitov pckt->flow.ports, 24);
515b0b04fc4SAlexei Starovoitov else
516b0b04fc4SAlexei Starovoitov return jhash_2words(pckt->flow.src, pckt->flow.ports,
517b0b04fc4SAlexei Starovoitov 24);
518b0b04fc4SAlexei Starovoitov }
519b0b04fc4SAlexei Starovoitov
520b0b04fc4SAlexei Starovoitov __attribute__ ((noinline))
get_packet_dst(struct real_definition ** real,struct packet_description * pckt,struct vip_meta * vip_info,bool is_ipv6,void * lru_map)521b0b04fc4SAlexei Starovoitov static bool get_packet_dst(struct real_definition **real,
522b0b04fc4SAlexei Starovoitov struct packet_description *pckt,
523b0b04fc4SAlexei Starovoitov struct vip_meta *vip_info,
524b0b04fc4SAlexei Starovoitov bool is_ipv6, void *lru_map)
525b0b04fc4SAlexei Starovoitov {
526b0b04fc4SAlexei Starovoitov struct real_pos_lru new_dst_lru = { };
527b0b04fc4SAlexei Starovoitov bool hash_16bytes = is_ipv6;
528f6d60facSHaowen Bai __u32 *real_pos, hash, key;
529b0b04fc4SAlexei Starovoitov __u64 cur_time;
530b0b04fc4SAlexei Starovoitov
531b0b04fc4SAlexei Starovoitov if (vip_info->flags & (1 << 2))
532f6d60facSHaowen Bai hash_16bytes = 1;
533b0b04fc4SAlexei Starovoitov if (vip_info->flags & (1 << 3)) {
534b0b04fc4SAlexei Starovoitov pckt->flow.port16[0] = pckt->flow.port16[1];
535b0b04fc4SAlexei Starovoitov memset(pckt->flow.srcv6, 0, 16);
536f6d60facSHaowen Bai }
537b0b04fc4SAlexei Starovoitov hash = get_packet_hash(pckt, hash_16bytes);
538b0b04fc4SAlexei Starovoitov if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
539b0b04fc4SAlexei Starovoitov hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
540b0b04fc4SAlexei Starovoitov return false;
541b0b04fc4SAlexei Starovoitov key = 2 * vip_info->vip_num + hash % 2;
542b0b04fc4SAlexei Starovoitov real_pos = bpf_map_lookup_elem(&ch_rings, &key);
543f6d60facSHaowen Bai if (!real_pos)
544b0b04fc4SAlexei Starovoitov return false;
545b0b04fc4SAlexei Starovoitov key = *real_pos;
546b0b04fc4SAlexei Starovoitov *real = bpf_map_lookup_elem(&reals, &key);
547b0b04fc4SAlexei Starovoitov if (!(*real))
548b0b04fc4SAlexei Starovoitov return false;
549b0b04fc4SAlexei Starovoitov if (!(vip_info->flags & (1 << 1))) {
550b0b04fc4SAlexei Starovoitov __u32 conn_rate_key = 512 + 2;
551f6d60facSHaowen Bai struct lb_stats *conn_rate_stats =
552b0b04fc4SAlexei Starovoitov bpf_map_lookup_elem(&stats, &conn_rate_key);
553b0b04fc4SAlexei Starovoitov
554b0b04fc4SAlexei Starovoitov if (!conn_rate_stats)
555b0b04fc4SAlexei Starovoitov return true;
556b0b04fc4SAlexei Starovoitov cur_time = bpf_ktime_get_ns();
557b0b04fc4SAlexei Starovoitov if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
558f6d60facSHaowen Bai conn_rate_stats->v1 = 1;
559b0b04fc4SAlexei Starovoitov conn_rate_stats->v2 = cur_time;
560b0b04fc4SAlexei Starovoitov } else {
561b0b04fc4SAlexei Starovoitov conn_rate_stats->v1 += 1;
562b0b04fc4SAlexei Starovoitov if (conn_rate_stats->v1 >= 1)
563b0b04fc4SAlexei Starovoitov return true;
564b0b04fc4SAlexei Starovoitov }
565b0b04fc4SAlexei Starovoitov if (pckt->flow.proto == IPPROTO_UDP)
566b0b04fc4SAlexei Starovoitov new_dst_lru.atime = cur_time;
567b0b04fc4SAlexei Starovoitov new_dst_lru.pos = key;
568b0b04fc4SAlexei Starovoitov bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
569b0b04fc4SAlexei Starovoitov }
570b0b04fc4SAlexei Starovoitov return true;
571b0b04fc4SAlexei Starovoitov }
572b0b04fc4SAlexei Starovoitov
573b0b04fc4SAlexei Starovoitov __attribute__ ((noinline))
connection_table_lookup(struct real_definition ** real,struct packet_description * pckt,void * lru_map)574b0b04fc4SAlexei Starovoitov static void connection_table_lookup(struct real_definition **real,
575b0b04fc4SAlexei Starovoitov struct packet_description *pckt,
576b0b04fc4SAlexei Starovoitov void *lru_map)
577b0b04fc4SAlexei Starovoitov {
578b0b04fc4SAlexei Starovoitov
579b0b04fc4SAlexei Starovoitov struct real_pos_lru *dst_lru;
580b0b04fc4SAlexei Starovoitov __u64 cur_time;
581b0b04fc4SAlexei Starovoitov __u32 key;
582b0b04fc4SAlexei Starovoitov
583b0b04fc4SAlexei Starovoitov dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
584b0b04fc4SAlexei Starovoitov if (!dst_lru)
585b0b04fc4SAlexei Starovoitov return;
586b0b04fc4SAlexei Starovoitov if (pckt->flow.proto == IPPROTO_UDP) {
587b0b04fc4SAlexei Starovoitov cur_time = bpf_ktime_get_ns();
588b0b04fc4SAlexei Starovoitov if (cur_time - dst_lru->atime > 300000)
589b0b04fc4SAlexei Starovoitov return;
590b0b04fc4SAlexei Starovoitov dst_lru->atime = cur_time;
591b0b04fc4SAlexei Starovoitov }
592b0b04fc4SAlexei Starovoitov key = dst_lru->pos;
593b0b04fc4SAlexei Starovoitov *real = bpf_map_lookup_elem(&reals, &key);
594b0b04fc4SAlexei Starovoitov }
595b0b04fc4SAlexei Starovoitov
596b0b04fc4SAlexei Starovoitov /* don't believe your eyes!
597b0b04fc4SAlexei Starovoitov * below function has 6 arguments whereas bpf and llvm allow maximum of 5
598b0b04fc4SAlexei Starovoitov * but since it's _static_ llvm can optimize one argument away
599b0b04fc4SAlexei Starovoitov */
600b0b04fc4SAlexei Starovoitov __attribute__ ((noinline))
process_l3_headers_v6(struct packet_description * pckt,__u8 * protocol,__u64 off,__u16 * pkt_bytes,void * extra_args[2])601b0b04fc4SAlexei Starovoitov static int process_l3_headers_v6(struct packet_description *pckt,
602b0b04fc4SAlexei Starovoitov __u8 *protocol, __u64 off,
603b0b04fc4SAlexei Starovoitov __u16 *pkt_bytes, void *extra_args[2])
60459fd3486SIlya Leoshkevich {
605b0b04fc4SAlexei Starovoitov struct ipv6hdr *ip6h;
606b0b04fc4SAlexei Starovoitov __u64 iph_len;
607b0b04fc4SAlexei Starovoitov int action;
608b0b04fc4SAlexei Starovoitov void *data = extra_args[0];
609b0b04fc4SAlexei Starovoitov void *data_end = extra_args[1];
610b0b04fc4SAlexei Starovoitov
611b0b04fc4SAlexei Starovoitov ip6h = data + off;
612b0b04fc4SAlexei Starovoitov if (ip6h + 1 > data_end)
613b0b04fc4SAlexei Starovoitov return XDP_DROP;
614b0b04fc4SAlexei Starovoitov iph_len = sizeof(struct ipv6hdr);
615b0b04fc4SAlexei Starovoitov *protocol = ip6h->nexthdr;
616b0b04fc4SAlexei Starovoitov pckt->flow.proto = *protocol;
617b0b04fc4SAlexei Starovoitov *pkt_bytes = bpf_ntohs(ip6h->payload_len);
618b0b04fc4SAlexei Starovoitov off += iph_len;
619b0b04fc4SAlexei Starovoitov if (*protocol == 45) {
620b0b04fc4SAlexei Starovoitov return XDP_DROP;
621b0b04fc4SAlexei Starovoitov } else if (*protocol == 59) {
622b0b04fc4SAlexei Starovoitov action = parse_icmpv6(data, data_end, off, pckt);
623b0b04fc4SAlexei Starovoitov if (action >= 0)
624b0b04fc4SAlexei Starovoitov return action;
625b0b04fc4SAlexei Starovoitov } else {
626b0b04fc4SAlexei Starovoitov memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
627b0b04fc4SAlexei Starovoitov memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
628b0b04fc4SAlexei Starovoitov }
629b0b04fc4SAlexei Starovoitov return -1;
630b0b04fc4SAlexei Starovoitov }
631b0b04fc4SAlexei Starovoitov
632b0b04fc4SAlexei Starovoitov __attribute__ ((noinline))
process_l3_headers_v4(struct packet_description * pckt,__u8 * protocol,__u64 off,__u16 * pkt_bytes,void * extra_args[2])633b0b04fc4SAlexei Starovoitov static int process_l3_headers_v4(struct packet_description *pckt,
634b0b04fc4SAlexei Starovoitov __u8 *protocol, __u64 off,
63559fd3486SIlya Leoshkevich __u16 *pkt_bytes, void *extra_args[2])
636b0b04fc4SAlexei Starovoitov {
637b0b04fc4SAlexei Starovoitov struct iphdr *iph;
638b0b04fc4SAlexei Starovoitov int action;
639b0b04fc4SAlexei Starovoitov void *data = extra_args[0];
640b0b04fc4SAlexei Starovoitov void *data_end = extra_args[1];
641b0b04fc4SAlexei Starovoitov
642b0b04fc4SAlexei Starovoitov iph = data + off;
643b0b04fc4SAlexei Starovoitov if (iph + 1 > data_end)
644b0b04fc4SAlexei Starovoitov return XDP_DROP;
645b0b04fc4SAlexei Starovoitov if (iph->ihl != 5)
646b0b04fc4SAlexei Starovoitov return XDP_DROP;
647b0b04fc4SAlexei Starovoitov *protocol = iph->protocol;
648b0b04fc4SAlexei Starovoitov pckt->flow.proto = *protocol;
649b0b04fc4SAlexei Starovoitov *pkt_bytes = bpf_ntohs(iph->tot_len);
650b0b04fc4SAlexei Starovoitov off += 20;
651b0b04fc4SAlexei Starovoitov if (iph->frag_off & 65343)
652b0b04fc4SAlexei Starovoitov return XDP_DROP;
653b0b04fc4SAlexei Starovoitov if (*protocol == IPPROTO_ICMP) {
654b0b04fc4SAlexei Starovoitov action = parse_icmp(data, data_end, off, pckt);
655b0b04fc4SAlexei Starovoitov if (action >= 0)
656b0b04fc4SAlexei Starovoitov return action;
657b0b04fc4SAlexei Starovoitov } else {
658b0b04fc4SAlexei Starovoitov pckt->flow.src = iph->saddr;
659b0b04fc4SAlexei Starovoitov pckt->flow.dst = iph->daddr;
660b0b04fc4SAlexei Starovoitov }
661b0b04fc4SAlexei Starovoitov return -1;
662b0b04fc4SAlexei Starovoitov }
663b0b04fc4SAlexei Starovoitov
664b0b04fc4SAlexei Starovoitov __attribute__ ((noinline))
process_packet(void * data,__u64 off,void * data_end,bool is_ipv6,struct xdp_md * xdp)665b0b04fc4SAlexei Starovoitov static int process_packet(void *data, __u64 off, void *data_end,
666b0b04fc4SAlexei Starovoitov bool is_ipv6, struct xdp_md *xdp)
667b0b04fc4SAlexei Starovoitov {
668b0b04fc4SAlexei Starovoitov
669b0b04fc4SAlexei Starovoitov struct real_definition *dst = NULL;
670b0b04fc4SAlexei Starovoitov struct packet_description pckt = { };
671b0b04fc4SAlexei Starovoitov struct vip_definition vip = { };
672b0b04fc4SAlexei Starovoitov struct lb_stats *data_stats;
673b0b04fc4SAlexei Starovoitov void *lru_map = &lru_cache;
674b0b04fc4SAlexei Starovoitov struct vip_meta *vip_info;
675b0b04fc4SAlexei Starovoitov __u32 lru_stats_key = 513;
676b0b04fc4SAlexei Starovoitov __u32 mac_addr_pos = 0;
677b0b04fc4SAlexei Starovoitov __u32 stats_key = 512;
678b0b04fc4SAlexei Starovoitov struct ctl_value *cval;
679b0b04fc4SAlexei Starovoitov __u16 pkt_bytes;
680b0b04fc4SAlexei Starovoitov __u8 protocol;
681b0b04fc4SAlexei Starovoitov __u32 vip_num;
682b0b04fc4SAlexei Starovoitov int action;
683b0b04fc4SAlexei Starovoitov void *extra_args[2] = { data, data_end };
684b0b04fc4SAlexei Starovoitov
685b0b04fc4SAlexei Starovoitov if (is_ipv6)
686b0b04fc4SAlexei Starovoitov action = process_l3_headers_v6(&pckt, &protocol, off,
687b0b04fc4SAlexei Starovoitov &pkt_bytes, extra_args);
688b0b04fc4SAlexei Starovoitov else
689b0b04fc4SAlexei Starovoitov action = process_l3_headers_v4(&pckt, &protocol, off,
690b0b04fc4SAlexei Starovoitov &pkt_bytes, extra_args);
691b0b04fc4SAlexei Starovoitov if (action >= 0)
692b0b04fc4SAlexei Starovoitov return action;
693b0b04fc4SAlexei Starovoitov protocol = pckt.flow.proto;
694b0b04fc4SAlexei Starovoitov if (protocol == IPPROTO_TCP) {
695b0b04fc4SAlexei Starovoitov if (!parse_tcp(data, data_end, is_ipv6, &pckt))
696b0b04fc4SAlexei Starovoitov return XDP_DROP;
697b0b04fc4SAlexei Starovoitov } else if (protocol == IPPROTO_UDP) {
698b0b04fc4SAlexei Starovoitov if (!parse_udp(data, data_end, is_ipv6, &pckt))
699b0b04fc4SAlexei Starovoitov return XDP_DROP;
700b0b04fc4SAlexei Starovoitov } else {
701b0b04fc4SAlexei Starovoitov return XDP_TX;
702b0b04fc4SAlexei Starovoitov }
703b0b04fc4SAlexei Starovoitov
704b0b04fc4SAlexei Starovoitov if (is_ipv6)
705b0b04fc4SAlexei Starovoitov memcpy(vip.vipv6, pckt.flow.dstv6, 16);
706b0b04fc4SAlexei Starovoitov else
707b0b04fc4SAlexei Starovoitov vip.vip = pckt.flow.dst;
708b0b04fc4SAlexei Starovoitov vip.port = pckt.flow.port16[1];
709b0b04fc4SAlexei Starovoitov vip.proto = pckt.flow.proto;
710b0b04fc4SAlexei Starovoitov vip_info = bpf_map_lookup_elem(&vip_map, &vip);
711b0b04fc4SAlexei Starovoitov if (!vip_info) {
712b0b04fc4SAlexei Starovoitov vip.port = 0;
713b0b04fc4SAlexei Starovoitov vip_info = bpf_map_lookup_elem(&vip_map, &vip);
714b0b04fc4SAlexei Starovoitov if (!vip_info)
715b0b04fc4SAlexei Starovoitov return XDP_PASS;
716b0b04fc4SAlexei Starovoitov if (!(vip_info->flags & (1 << 4)))
717b0b04fc4SAlexei Starovoitov pckt.flow.port16[1] = 0;
718b0b04fc4SAlexei Starovoitov }
719b0b04fc4SAlexei Starovoitov if (data_end - data > 1400)
720b0b04fc4SAlexei Starovoitov return XDP_DROP;
721b0b04fc4SAlexei Starovoitov data_stats = bpf_map_lookup_elem(&stats, &stats_key);
722b0b04fc4SAlexei Starovoitov if (!data_stats)
723b0b04fc4SAlexei Starovoitov return XDP_DROP;
724b0b04fc4SAlexei Starovoitov data_stats->v1 += 1;
725b0b04fc4SAlexei Starovoitov if (!dst) {
726b0b04fc4SAlexei Starovoitov if (vip_info->flags & (1 << 0))
727b0b04fc4SAlexei Starovoitov pckt.flow.port16[0] = 0;
728b0b04fc4SAlexei Starovoitov if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
729b0b04fc4SAlexei Starovoitov connection_table_lookup(&dst, &pckt, lru_map);
730b0b04fc4SAlexei Starovoitov if (dst)
731b0b04fc4SAlexei Starovoitov goto out;
732b0b04fc4SAlexei Starovoitov if (pckt.flow.proto == IPPROTO_TCP) {
733b0b04fc4SAlexei Starovoitov struct lb_stats *lru_stats =
734b0b04fc4SAlexei Starovoitov bpf_map_lookup_elem(&stats, &lru_stats_key);
735b0b04fc4SAlexei Starovoitov
736b0b04fc4SAlexei Starovoitov if (!lru_stats)
737b0b04fc4SAlexei Starovoitov return XDP_DROP;
738b0b04fc4SAlexei Starovoitov if (pckt.flags & (1 << 1))
739b0b04fc4SAlexei Starovoitov lru_stats->v1 += 1;
740b0b04fc4SAlexei Starovoitov else
741b0b04fc4SAlexei Starovoitov lru_stats->v2 += 1;
742b0b04fc4SAlexei Starovoitov }
743b0b04fc4SAlexei Starovoitov if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
744b0b04fc4SAlexei Starovoitov return XDP_DROP;
745b0b04fc4SAlexei Starovoitov data_stats->v2 += 1;
746b0b04fc4SAlexei Starovoitov }
747b0b04fc4SAlexei Starovoitov out:
748b0b04fc4SAlexei Starovoitov cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
749b0b04fc4SAlexei Starovoitov if (!cval)
750b0b04fc4SAlexei Starovoitov return XDP_DROP;
751b0b04fc4SAlexei Starovoitov if (dst->flags & (1 << 0)) {
752b0b04fc4SAlexei Starovoitov if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
753b0b04fc4SAlexei Starovoitov return XDP_DROP;
754b0b04fc4SAlexei Starovoitov } else {
755b0b04fc4SAlexei Starovoitov if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
756b0b04fc4SAlexei Starovoitov return XDP_DROP;
757b0b04fc4SAlexei Starovoitov }
7588fffa0e3SAndrii Nakryiko vip_num = vip_info->vip_num;
759baaf680eSAndrii Nakryiko data_stats = bpf_map_lookup_elem(&stats, &vip_num);
760b0b04fc4SAlexei Starovoitov if (!data_stats)
761b0b04fc4SAlexei Starovoitov return XDP_DROP;
762b0b04fc4SAlexei Starovoitov data_stats->v1 += 1;
763b0b04fc4SAlexei Starovoitov data_stats->v2 += pkt_bytes;
764b0b04fc4SAlexei Starovoitov
765b0b04fc4SAlexei Starovoitov data = (void *)(long)xdp->data;
766b0b04fc4SAlexei Starovoitov data_end = (void *)(long)xdp->data_end;
767b0b04fc4SAlexei Starovoitov if (data + 4 > data_end)
768b0b04fc4SAlexei Starovoitov return XDP_DROP;
769b0b04fc4SAlexei Starovoitov *(u32 *)data = dst->dst;
77059fd3486SIlya Leoshkevich return XDP_DROP;
77159fd3486SIlya Leoshkevich }
772b0b04fc4SAlexei Starovoitov
773baaf680eSAndrii Nakryiko SEC("xdp")
balancer_ingress_v4(struct xdp_md * ctx)774baaf680eSAndrii Nakryiko int balancer_ingress_v4(struct xdp_md *ctx)
775baaf680eSAndrii Nakryiko {
776baaf680eSAndrii Nakryiko void *data = (void *)(long)ctx->data;
7778fffa0e3SAndrii Nakryiko void *data_end = (void *)(long)ctx->data_end;
778baaf680eSAndrii Nakryiko struct eth_hdr *eth = data;
779baaf680eSAndrii Nakryiko __u32 eth_proto;
780baaf680eSAndrii Nakryiko __u32 nh_off;
781baaf680eSAndrii Nakryiko
782baaf680eSAndrii Nakryiko nh_off = sizeof(struct eth_hdr);
783baaf680eSAndrii Nakryiko if (data + nh_off > data_end)
784baaf680eSAndrii Nakryiko return XDP_DROP;
785baaf680eSAndrii Nakryiko eth_proto = bpf_ntohs(eth->eth_proto);
786baaf680eSAndrii Nakryiko if (eth_proto == ETH_P_IP)
787baaf680eSAndrii Nakryiko return process_packet(data, nh_off, data_end, 0, ctx);
788baaf680eSAndrii Nakryiko else
789baaf680eSAndrii Nakryiko return XDP_DROP;
790baaf680eSAndrii Nakryiko }
791b0b04fc4SAlexei Starovoitov
792b0b04fc4SAlexei Starovoitov SEC("xdp")
balancer_ingress_v6(struct xdp_md * ctx)793b0b04fc4SAlexei Starovoitov int balancer_ingress_v6(struct xdp_md *ctx)
794b0b04fc4SAlexei Starovoitov {
795b0b04fc4SAlexei Starovoitov void *data = (void *)(long)ctx->data;
796baaf680eSAndrii Nakryiko void *data_end = (void *)(long)ctx->data_end;
797 struct eth_hdr *eth = data;
798 __u32 eth_proto;
799 __u32 nh_off;
800
801 nh_off = sizeof(struct eth_hdr);
802 if (data + nh_off > data_end)
803 return XDP_DROP;
804 eth_proto = bpf_ntohs(eth->eth_proto);
805 if (eth_proto == ETH_P_IPV6)
806 return process_packet(data, nh_off, data_end, 1, ctx);
807 else
808 return XDP_DROP;
809 }
810
811 char _license[] SEC("license") = "GPL";
812