1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
3 #include <linux/ip.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
12 #include <net/ip.h>
13
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
18
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
21
22 #include "../br_private.h"
23
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25 * has been linearized or cloned.
26 */
nf_br_ip_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,struct nf_bridge_frag_data * data,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28 struct sk_buff *skb,
29 struct nf_bridge_frag_data *data,
30 int (*output)(struct net *, struct sock *sk,
31 const struct nf_bridge_frag_data *data,
32 struct sk_buff *))
33 {
34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35 unsigned int hlen, ll_rs, mtu;
36 ktime_t tstamp = skb->tstamp;
37 struct ip_frag_state state;
38 struct iphdr *iph;
39 int err;
40
41 /* for offloaded checksums cleanup checksum before fragmentation */
42 if (skb->ip_summed == CHECKSUM_PARTIAL &&
43 (err = skb_checksum_help(skb)))
44 goto blackhole;
45
46 iph = ip_hdr(skb);
47
48 /*
49 * Setup starting values
50 */
51
52 hlen = iph->ihl * 4;
53 frag_max_size -= hlen;
54 ll_rs = LL_RESERVED_SPACE(skb->dev);
55 mtu = skb->dev->mtu;
56
57 if (skb_has_frag_list(skb)) {
58 unsigned int first_len = skb_pagelen(skb);
59 struct ip_fraglist_iter iter;
60 struct sk_buff *frag;
61
62 if (first_len - hlen > mtu ||
63 skb_headroom(skb) < ll_rs)
64 goto blackhole;
65
66 if (skb_cloned(skb))
67 goto slow_path;
68
69 skb_walk_frags(skb, frag) {
70 if (frag->len > mtu ||
71 skb_headroom(frag) < hlen + ll_rs)
72 goto blackhole;
73
74 if (skb_shared(frag))
75 goto slow_path;
76 }
77
78 ip_fraglist_init(skb, iph, hlen, &iter);
79
80 for (;;) {
81 if (iter.frag)
82 ip_fraglist_prepare(skb, &iter);
83
84 skb->tstamp = tstamp;
85 err = output(net, sk, data, skb);
86 if (err || !iter.frag)
87 break;
88
89 skb = ip_fraglist_next(&iter);
90 }
91 return err;
92 }
93 slow_path:
94 /* This is a linearized skbuff, the original geometry is lost for us.
95 * This may also be a clone skbuff, we could preserve the geometry for
96 * the copies but probably not worth the effort.
97 */
98 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
99
100 while (state.left > 0) {
101 struct sk_buff *skb2;
102
103 skb2 = ip_frag_next(skb, &state);
104 if (IS_ERR(skb2)) {
105 err = PTR_ERR(skb2);
106 goto blackhole;
107 }
108
109 skb2->tstamp = tstamp;
110 err = output(net, sk, data, skb2);
111 if (err)
112 goto blackhole;
113 }
114 consume_skb(skb);
115 return err;
116
117 blackhole:
118 kfree_skb(skb);
119 return 0;
120 }
121
122 /* ip_defrag() expects IPCB() in place. */
br_skb_cb_save(struct sk_buff * skb,struct br_input_skb_cb * cb,size_t inet_skb_parm_size)123 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
124 size_t inet_skb_parm_size)
125 {
126 memcpy(cb, skb->cb, sizeof(*cb));
127 memset(skb->cb, 0, inet_skb_parm_size);
128 }
129
br_skb_cb_restore(struct sk_buff * skb,const struct br_input_skb_cb * cb,u16 fragsz)130 static void br_skb_cb_restore(struct sk_buff *skb,
131 const struct br_input_skb_cb *cb,
132 u16 fragsz)
133 {
134 memcpy(skb->cb, cb, sizeof(*cb));
135 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
136 }
137
nf_ct_br_defrag4(struct sk_buff * skb,const struct nf_hook_state * state)138 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
139 const struct nf_hook_state *state)
140 {
141 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
142 enum ip_conntrack_info ctinfo;
143 struct br_input_skb_cb cb;
144 const struct nf_conn *ct;
145 int err;
146
147 if (!ip_is_fragment(ip_hdr(skb)))
148 return NF_ACCEPT;
149
150 ct = nf_ct_get(skb, &ctinfo);
151 if (ct)
152 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
153
154 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
155 local_bh_disable();
156 err = ip_defrag(state->net, skb,
157 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
158 local_bh_enable();
159 if (!err) {
160 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
161 skb->ignore_df = 1;
162 return NF_ACCEPT;
163 }
164
165 return NF_STOLEN;
166 }
167
nf_ct_br_defrag6(struct sk_buff * skb,const struct nf_hook_state * state)168 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
169 const struct nf_hook_state *state)
170 {
171 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
172 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
173 enum ip_conntrack_info ctinfo;
174 struct br_input_skb_cb cb;
175 const struct nf_conn *ct;
176 int err;
177
178 ct = nf_ct_get(skb, &ctinfo);
179 if (ct)
180 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
181
182 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
183
184 err = nf_ct_frag6_gather(state->net, skb,
185 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
186 /* queued */
187 if (err == -EINPROGRESS)
188 return NF_STOLEN;
189
190 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
191 return err == 0 ? NF_ACCEPT : NF_DROP;
192 #else
193 return NF_ACCEPT;
194 #endif
195 }
196
nf_ct_br_ip_check(const struct sk_buff * skb)197 static int nf_ct_br_ip_check(const struct sk_buff *skb)
198 {
199 const struct iphdr *iph;
200 int nhoff, len;
201
202 nhoff = skb_network_offset(skb);
203 iph = ip_hdr(skb);
204 if (iph->ihl < 5 ||
205 iph->version != 4)
206 return -1;
207
208 len = ntohs(iph->tot_len);
209 if (skb->len < nhoff + len ||
210 len < (iph->ihl * 4))
211 return -1;
212
213 return 0;
214 }
215
nf_ct_br_ipv6_check(const struct sk_buff * skb)216 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
217 {
218 const struct ipv6hdr *hdr;
219 int nhoff, len;
220
221 nhoff = skb_network_offset(skb);
222 hdr = ipv6_hdr(skb);
223 if (hdr->version != 6)
224 return -1;
225
226 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
227 if (skb->len < len)
228 return -1;
229
230 return 0;
231 }
232
nf_ct_bridge_pre(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)233 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
234 const struct nf_hook_state *state)
235 {
236 struct nf_hook_state bridge_state = *state;
237 enum ip_conntrack_info ctinfo;
238 struct nf_conn *ct;
239 u32 len;
240 int ret;
241
242 ct = nf_ct_get(skb, &ctinfo);
243 if ((ct && !nf_ct_is_template(ct)) ||
244 ctinfo == IP_CT_UNTRACKED)
245 return NF_ACCEPT;
246
247 switch (skb->protocol) {
248 case htons(ETH_P_IP):
249 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
250 return NF_ACCEPT;
251
252 len = ntohs(ip_hdr(skb)->tot_len);
253 if (pskb_trim_rcsum(skb, len))
254 return NF_ACCEPT;
255
256 if (nf_ct_br_ip_check(skb))
257 return NF_ACCEPT;
258
259 bridge_state.pf = NFPROTO_IPV4;
260 ret = nf_ct_br_defrag4(skb, &bridge_state);
261 break;
262 case htons(ETH_P_IPV6):
263 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
264 return NF_ACCEPT;
265
266 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
267 if (pskb_trim_rcsum(skb, len))
268 return NF_ACCEPT;
269
270 if (nf_ct_br_ipv6_check(skb))
271 return NF_ACCEPT;
272
273 bridge_state.pf = NFPROTO_IPV6;
274 ret = nf_ct_br_defrag6(skb, &bridge_state);
275 break;
276 default:
277 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
278 return NF_ACCEPT;
279 }
280
281 if (ret != NF_ACCEPT)
282 return ret;
283
284 return nf_conntrack_in(skb, &bridge_state);
285 }
286
nf_ct_bridge_frag_save(struct sk_buff * skb,struct nf_bridge_frag_data * data)287 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
288 struct nf_bridge_frag_data *data)
289 {
290 if (skb_vlan_tag_present(skb)) {
291 data->vlan_present = true;
292 data->vlan_tci = skb->vlan_tci;
293 data->vlan_proto = skb->vlan_proto;
294 } else {
295 data->vlan_present = false;
296 }
297 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
298 }
299
300 static unsigned int
nf_ct_bridge_refrag(struct sk_buff * skb,const struct nf_hook_state * state,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))301 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
302 int (*output)(struct net *, struct sock *sk,
303 const struct nf_bridge_frag_data *data,
304 struct sk_buff *))
305 {
306 struct nf_bridge_frag_data data;
307
308 if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
309 return NF_ACCEPT;
310
311 nf_ct_bridge_frag_save(skb, &data);
312 switch (skb->protocol) {
313 case htons(ETH_P_IP):
314 nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
315 break;
316 case htons(ETH_P_IPV6):
317 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
318 break;
319 default:
320 WARN_ON_ONCE(1);
321 return NF_DROP;
322 }
323
324 return NF_STOLEN;
325 }
326
327 /* Actually only slow path refragmentation needs this. */
nf_ct_bridge_frag_restore(struct sk_buff * skb,const struct nf_bridge_frag_data * data)328 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
329 const struct nf_bridge_frag_data *data)
330 {
331 int err;
332
333 err = skb_cow_head(skb, ETH_HLEN);
334 if (err) {
335 kfree_skb(skb);
336 return -ENOMEM;
337 }
338 if (data->vlan_present)
339 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
340 else if (skb_vlan_tag_present(skb))
341 __vlan_hwaccel_clear_tag(skb);
342
343 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
344 skb_reset_mac_header(skb);
345
346 return 0;
347 }
348
nf_ct_bridge_refrag_post(struct net * net,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff * skb)349 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
350 const struct nf_bridge_frag_data *data,
351 struct sk_buff *skb)
352 {
353 int err;
354
355 err = nf_ct_bridge_frag_restore(skb, data);
356 if (err < 0)
357 return err;
358
359 return br_dev_queue_push_xmit(net, sk, skb);
360 }
361
nf_ct_bridge_confirm(struct sk_buff * skb)362 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
363 {
364 enum ip_conntrack_info ctinfo;
365 struct nf_conn *ct;
366 int protoff;
367
368 ct = nf_ct_get(skb, &ctinfo);
369 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
370 return nf_conntrack_confirm(skb);
371
372 switch (skb->protocol) {
373 case htons(ETH_P_IP):
374 protoff = skb_network_offset(skb) + ip_hdrlen(skb);
375 break;
376 case htons(ETH_P_IPV6): {
377 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
378 __be16 frag_off;
379
380 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
381 &frag_off);
382 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
383 return nf_conntrack_confirm(skb);
384 }
385 break;
386 default:
387 return NF_ACCEPT;
388 }
389 return nf_confirm(skb, protoff, ct, ctinfo);
390 }
391
nf_ct_bridge_post(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)392 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
393 const struct nf_hook_state *state)
394 {
395 int ret;
396
397 ret = nf_ct_bridge_confirm(skb);
398 if (ret != NF_ACCEPT)
399 return ret;
400
401 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
402 }
403
404 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
405 {
406 .hook = nf_ct_bridge_pre,
407 .pf = NFPROTO_BRIDGE,
408 .hooknum = NF_BR_PRE_ROUTING,
409 .priority = NF_IP_PRI_CONNTRACK,
410 },
411 {
412 .hook = nf_ct_bridge_post,
413 .pf = NFPROTO_BRIDGE,
414 .hooknum = NF_BR_POST_ROUTING,
415 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
416 },
417 };
418
419 static struct nf_ct_bridge_info bridge_info = {
420 .ops = nf_ct_bridge_hook_ops,
421 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
422 .me = THIS_MODULE,
423 };
424
nf_conntrack_l3proto_bridge_init(void)425 static int __init nf_conntrack_l3proto_bridge_init(void)
426 {
427 nf_ct_bridge_register(&bridge_info);
428
429 return 0;
430 }
431
nf_conntrack_l3proto_bridge_fini(void)432 static void __exit nf_conntrack_l3proto_bridge_fini(void)
433 {
434 nf_ct_bridge_unregister(&bridge_info);
435 }
436
437 module_init(nf_conntrack_l3proto_bridge_init);
438 module_exit(nf_conntrack_l3proto_bridge_fini);
439
440 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
441 MODULE_LICENSE("GPL");
442