1 // SPDX-License-Identifier: GPL-2.0-only
2 /* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
4 */
5
6 #include <linux/types.h>
7 #include <linux/export.h>
8 #include <linux/init.h>
9 #include <linux/udp.h>
10 #include <linux/tcp.h>
11 #include <linux/icmp.h>
12 #include <linux/icmpv6.h>
13
14 #include <linux/dccp.h>
15 #include <linux/sctp.h>
16 #include <net/sctp/checksum.h>
17
18 #include <linux/netfilter.h>
19 #include <net/netfilter/nf_nat.h>
20
21 #include <linux/ipv6.h>
22 #include <linux/netfilter_ipv6.h>
23 #include <net/checksum.h>
24 #include <net/ip6_checksum.h>
25 #include <net/ip6_route.h>
26 #include <net/xfrm.h>
27 #include <net/ipv6.h>
28
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack.h>
31 #include <linux/netfilter/nfnetlink_conntrack.h>
32
33 static void nf_csum_update(struct sk_buff *skb,
34 unsigned int iphdroff, __sum16 *check,
35 const struct nf_conntrack_tuple *t,
36 enum nf_nat_manip_type maniptype);
37
38 static void
__udp_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,struct udphdr * hdr,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype,bool do_csum)39 __udp_manip_pkt(struct sk_buff *skb,
40 unsigned int iphdroff, struct udphdr *hdr,
41 const struct nf_conntrack_tuple *tuple,
42 enum nf_nat_manip_type maniptype, bool do_csum)
43 {
44 __be16 *portptr, newport;
45
46 if (maniptype == NF_NAT_MANIP_SRC) {
47 /* Get rid of src port */
48 newport = tuple->src.u.udp.port;
49 portptr = &hdr->source;
50 } else {
51 /* Get rid of dst port */
52 newport = tuple->dst.u.udp.port;
53 portptr = &hdr->dest;
54 }
55 if (do_csum) {
56 nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
57 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
58 false);
59 if (!hdr->check)
60 hdr->check = CSUM_MANGLED_0;
61 }
62 *portptr = newport;
63 }
64
udp_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)65 static bool udp_manip_pkt(struct sk_buff *skb,
66 unsigned int iphdroff, unsigned int hdroff,
67 const struct nf_conntrack_tuple *tuple,
68 enum nf_nat_manip_type maniptype)
69 {
70 struct udphdr *hdr;
71
72 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
73 return false;
74
75 hdr = (struct udphdr *)(skb->data + hdroff);
76 __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
77
78 return true;
79 }
80
udplite_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)81 static bool udplite_manip_pkt(struct sk_buff *skb,
82 unsigned int iphdroff, unsigned int hdroff,
83 const struct nf_conntrack_tuple *tuple,
84 enum nf_nat_manip_type maniptype)
85 {
86 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
87 struct udphdr *hdr;
88
89 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
90 return false;
91
92 hdr = (struct udphdr *)(skb->data + hdroff);
93 __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
94 #endif
95 return true;
96 }
97
98 static bool
sctp_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)99 sctp_manip_pkt(struct sk_buff *skb,
100 unsigned int iphdroff, unsigned int hdroff,
101 const struct nf_conntrack_tuple *tuple,
102 enum nf_nat_manip_type maniptype)
103 {
104 #ifdef CONFIG_NF_CT_PROTO_SCTP
105 struct sctphdr *hdr;
106 int hdrsize = 8;
107
108 /* This could be an inner header returned in imcp packet; in such
109 * cases we cannot update the checksum field since it is outside
110 * of the 8 bytes of transport layer headers we are guaranteed.
111 */
112 if (skb->len >= hdroff + sizeof(*hdr))
113 hdrsize = sizeof(*hdr);
114
115 if (skb_ensure_writable(skb, hdroff + hdrsize))
116 return false;
117
118 hdr = (struct sctphdr *)(skb->data + hdroff);
119
120 if (maniptype == NF_NAT_MANIP_SRC) {
121 /* Get rid of src port */
122 hdr->source = tuple->src.u.sctp.port;
123 } else {
124 /* Get rid of dst port */
125 hdr->dest = tuple->dst.u.sctp.port;
126 }
127
128 if (hdrsize < sizeof(*hdr))
129 return true;
130
131 if (skb->ip_summed != CHECKSUM_PARTIAL) {
132 hdr->checksum = sctp_compute_cksum(skb, hdroff);
133 skb->ip_summed = CHECKSUM_NONE;
134 }
135
136 #endif
137 return true;
138 }
139
140 static bool
tcp_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)141 tcp_manip_pkt(struct sk_buff *skb,
142 unsigned int iphdroff, unsigned int hdroff,
143 const struct nf_conntrack_tuple *tuple,
144 enum nf_nat_manip_type maniptype)
145 {
146 struct tcphdr *hdr;
147 __be16 *portptr, newport, oldport;
148 int hdrsize = 8; /* TCP connection tracking guarantees this much */
149
150 /* this could be a inner header returned in icmp packet; in such
151 cases we cannot update the checksum field since it is outside of
152 the 8 bytes of transport layer headers we are guaranteed */
153 if (skb->len >= hdroff + sizeof(struct tcphdr))
154 hdrsize = sizeof(struct tcphdr);
155
156 if (skb_ensure_writable(skb, hdroff + hdrsize))
157 return false;
158
159 hdr = (struct tcphdr *)(skb->data + hdroff);
160
161 if (maniptype == NF_NAT_MANIP_SRC) {
162 /* Get rid of src port */
163 newport = tuple->src.u.tcp.port;
164 portptr = &hdr->source;
165 } else {
166 /* Get rid of dst port */
167 newport = tuple->dst.u.tcp.port;
168 portptr = &hdr->dest;
169 }
170
171 oldport = *portptr;
172 *portptr = newport;
173
174 if (hdrsize < sizeof(*hdr))
175 return true;
176
177 nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
178 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
179 return true;
180 }
181
182 static bool
icmp_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)183 icmp_manip_pkt(struct sk_buff *skb,
184 unsigned int iphdroff, unsigned int hdroff,
185 const struct nf_conntrack_tuple *tuple,
186 enum nf_nat_manip_type maniptype)
187 {
188 struct icmphdr *hdr;
189
190 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
191 return false;
192
193 hdr = (struct icmphdr *)(skb->data + hdroff);
194 switch (hdr->type) {
195 case ICMP_ECHO:
196 case ICMP_ECHOREPLY:
197 case ICMP_TIMESTAMP:
198 case ICMP_TIMESTAMPREPLY:
199 case ICMP_INFO_REQUEST:
200 case ICMP_INFO_REPLY:
201 case ICMP_ADDRESS:
202 case ICMP_ADDRESSREPLY:
203 break;
204 default:
205 return true;
206 }
207 inet_proto_csum_replace2(&hdr->checksum, skb,
208 hdr->un.echo.id, tuple->src.u.icmp.id, false);
209 hdr->un.echo.id = tuple->src.u.icmp.id;
210 return true;
211 }
212
213 static bool
icmpv6_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)214 icmpv6_manip_pkt(struct sk_buff *skb,
215 unsigned int iphdroff, unsigned int hdroff,
216 const struct nf_conntrack_tuple *tuple,
217 enum nf_nat_manip_type maniptype)
218 {
219 struct icmp6hdr *hdr;
220
221 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
222 return false;
223
224 hdr = (struct icmp6hdr *)(skb->data + hdroff);
225 nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
226 if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
227 hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
228 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
229 hdr->icmp6_identifier,
230 tuple->src.u.icmp.id, false);
231 hdr->icmp6_identifier = tuple->src.u.icmp.id;
232 }
233 return true;
234 }
235
236 /* manipulate a GRE packet according to maniptype */
237 static bool
gre_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)238 gre_manip_pkt(struct sk_buff *skb,
239 unsigned int iphdroff, unsigned int hdroff,
240 const struct nf_conntrack_tuple *tuple,
241 enum nf_nat_manip_type maniptype)
242 {
243 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
244 const struct gre_base_hdr *greh;
245 struct pptp_gre_header *pgreh;
246
247 /* pgreh includes two optional 32bit fields which are not required
248 * to be there. That's where the magic '8' comes from */
249 if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
250 return false;
251
252 greh = (void *)skb->data + hdroff;
253 pgreh = (struct pptp_gre_header *)greh;
254
255 /* we only have destination manip of a packet, since 'source key'
256 * is not present in the packet itself */
257 if (maniptype != NF_NAT_MANIP_DST)
258 return true;
259
260 switch (greh->flags & GRE_VERSION) {
261 case GRE_VERSION_0:
262 /* We do not currently NAT any GREv0 packets.
263 * Try to behave like "nf_nat_proto_unknown" */
264 break;
265 case GRE_VERSION_1:
266 pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
267 pgreh->call_id = tuple->dst.u.gre.key;
268 break;
269 default:
270 pr_debug("can't nat unknown GRE version\n");
271 return false;
272 }
273 #endif
274 return true;
275 }
276
l4proto_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,unsigned int hdroff,const struct nf_conntrack_tuple * tuple,enum nf_nat_manip_type maniptype)277 static bool l4proto_manip_pkt(struct sk_buff *skb,
278 unsigned int iphdroff, unsigned int hdroff,
279 const struct nf_conntrack_tuple *tuple,
280 enum nf_nat_manip_type maniptype)
281 {
282 switch (tuple->dst.protonum) {
283 case IPPROTO_TCP:
284 return tcp_manip_pkt(skb, iphdroff, hdroff,
285 tuple, maniptype);
286 case IPPROTO_UDP:
287 return udp_manip_pkt(skb, iphdroff, hdroff,
288 tuple, maniptype);
289 case IPPROTO_UDPLITE:
290 return udplite_manip_pkt(skb, iphdroff, hdroff,
291 tuple, maniptype);
292 case IPPROTO_SCTP:
293 return sctp_manip_pkt(skb, iphdroff, hdroff,
294 tuple, maniptype);
295 case IPPROTO_ICMP:
296 return icmp_manip_pkt(skb, iphdroff, hdroff,
297 tuple, maniptype);
298 case IPPROTO_ICMPV6:
299 return icmpv6_manip_pkt(skb, iphdroff, hdroff,
300 tuple, maniptype);
301 case IPPROTO_GRE:
302 return gre_manip_pkt(skb, iphdroff, hdroff,
303 tuple, maniptype);
304 }
305
306 /* If we don't know protocol -- no error, pass it unmodified. */
307 return true;
308 }
309
nf_nat_ipv4_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,const struct nf_conntrack_tuple * target,enum nf_nat_manip_type maniptype)310 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
311 unsigned int iphdroff,
312 const struct nf_conntrack_tuple *target,
313 enum nf_nat_manip_type maniptype)
314 {
315 struct iphdr *iph;
316 unsigned int hdroff;
317
318 if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
319 return false;
320
321 iph = (void *)skb->data + iphdroff;
322 hdroff = iphdroff + iph->ihl * 4;
323
324 if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
325 return false;
326 iph = (void *)skb->data + iphdroff;
327
328 if (maniptype == NF_NAT_MANIP_SRC) {
329 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
330 iph->saddr = target->src.u3.ip;
331 } else {
332 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
333 iph->daddr = target->dst.u3.ip;
334 }
335 return true;
336 }
337
nf_nat_ipv6_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,const struct nf_conntrack_tuple * target,enum nf_nat_manip_type maniptype)338 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
339 unsigned int iphdroff,
340 const struct nf_conntrack_tuple *target,
341 enum nf_nat_manip_type maniptype)
342 {
343 #if IS_ENABLED(CONFIG_IPV6)
344 struct ipv6hdr *ipv6h;
345 __be16 frag_off;
346 int hdroff;
347 u8 nexthdr;
348
349 if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
350 return false;
351
352 ipv6h = (void *)skb->data + iphdroff;
353 nexthdr = ipv6h->nexthdr;
354 hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
355 &nexthdr, &frag_off);
356 if (hdroff < 0)
357 goto manip_addr;
358
359 if ((frag_off & htons(~0x7)) == 0 &&
360 !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
361 return false;
362
363 /* must reload, offset might have changed */
364 ipv6h = (void *)skb->data + iphdroff;
365
366 manip_addr:
367 if (maniptype == NF_NAT_MANIP_SRC)
368 ipv6h->saddr = target->src.u3.in6;
369 else
370 ipv6h->daddr = target->dst.u3.in6;
371
372 #endif
373 return true;
374 }
375
nf_nat_manip_pkt(struct sk_buff * skb,struct nf_conn * ct,enum nf_nat_manip_type mtype,enum ip_conntrack_dir dir)376 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
377 enum nf_nat_manip_type mtype,
378 enum ip_conntrack_dir dir)
379 {
380 struct nf_conntrack_tuple target;
381
382 /* We are aiming to look like inverse of other direction. */
383 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
384
385 switch (target.src.l3num) {
386 case NFPROTO_IPV6:
387 if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
388 return NF_ACCEPT;
389 break;
390 case NFPROTO_IPV4:
391 if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
392 return NF_ACCEPT;
393 break;
394 default:
395 WARN_ON_ONCE(1);
396 break;
397 }
398
399 return NF_DROP;
400 }
401
nf_nat_ipv4_csum_update(struct sk_buff * skb,unsigned int iphdroff,__sum16 * check,const struct nf_conntrack_tuple * t,enum nf_nat_manip_type maniptype)402 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
403 unsigned int iphdroff, __sum16 *check,
404 const struct nf_conntrack_tuple *t,
405 enum nf_nat_manip_type maniptype)
406 {
407 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
408 __be32 oldip, newip;
409
410 if (maniptype == NF_NAT_MANIP_SRC) {
411 oldip = iph->saddr;
412 newip = t->src.u3.ip;
413 } else {
414 oldip = iph->daddr;
415 newip = t->dst.u3.ip;
416 }
417 inet_proto_csum_replace4(check, skb, oldip, newip, true);
418 }
419
nf_nat_ipv6_csum_update(struct sk_buff * skb,unsigned int iphdroff,__sum16 * check,const struct nf_conntrack_tuple * t,enum nf_nat_manip_type maniptype)420 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
421 unsigned int iphdroff, __sum16 *check,
422 const struct nf_conntrack_tuple *t,
423 enum nf_nat_manip_type maniptype)
424 {
425 #if IS_ENABLED(CONFIG_IPV6)
426 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
427 const struct in6_addr *oldip, *newip;
428
429 if (maniptype == NF_NAT_MANIP_SRC) {
430 oldip = &ipv6h->saddr;
431 newip = &t->src.u3.in6;
432 } else {
433 oldip = &ipv6h->daddr;
434 newip = &t->dst.u3.in6;
435 }
436 inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
437 newip->s6_addr32, true);
438 #endif
439 }
440
nf_csum_update(struct sk_buff * skb,unsigned int iphdroff,__sum16 * check,const struct nf_conntrack_tuple * t,enum nf_nat_manip_type maniptype)441 static void nf_csum_update(struct sk_buff *skb,
442 unsigned int iphdroff, __sum16 *check,
443 const struct nf_conntrack_tuple *t,
444 enum nf_nat_manip_type maniptype)
445 {
446 switch (t->src.l3num) {
447 case NFPROTO_IPV4:
448 nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
449 return;
450 case NFPROTO_IPV6:
451 nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
452 return;
453 }
454 }
455
nf_nat_ipv4_csum_recalc(struct sk_buff * skb,u8 proto,void * data,__sum16 * check,int datalen,int oldlen)456 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
457 u8 proto, void *data, __sum16 *check,
458 int datalen, int oldlen)
459 {
460 if (skb->ip_summed != CHECKSUM_PARTIAL) {
461 const struct iphdr *iph = ip_hdr(skb);
462
463 skb->ip_summed = CHECKSUM_PARTIAL;
464 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
465 ip_hdrlen(skb);
466 skb->csum_offset = (void *)check - data;
467 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
468 proto, 0);
469 } else {
470 inet_proto_csum_replace2(check, skb,
471 htons(oldlen), htons(datalen), true);
472 }
473 }
474
475 #if IS_ENABLED(CONFIG_IPV6)
nf_nat_ipv6_csum_recalc(struct sk_buff * skb,u8 proto,void * data,__sum16 * check,int datalen,int oldlen)476 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
477 u8 proto, void *data, __sum16 *check,
478 int datalen, int oldlen)
479 {
480 if (skb->ip_summed != CHECKSUM_PARTIAL) {
481 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
482
483 skb->ip_summed = CHECKSUM_PARTIAL;
484 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
485 (data - (void *)skb->data);
486 skb->csum_offset = (void *)check - data;
487 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
488 datalen, proto, 0);
489 } else {
490 inet_proto_csum_replace2(check, skb,
491 htons(oldlen), htons(datalen), true);
492 }
493 }
494 #endif
495
nf_nat_csum_recalc(struct sk_buff * skb,u8 nfproto,u8 proto,void * data,__sum16 * check,int datalen,int oldlen)496 void nf_nat_csum_recalc(struct sk_buff *skb,
497 u8 nfproto, u8 proto, void *data, __sum16 *check,
498 int datalen, int oldlen)
499 {
500 switch (nfproto) {
501 case NFPROTO_IPV4:
502 nf_nat_ipv4_csum_recalc(skb, proto, data, check,
503 datalen, oldlen);
504 return;
505 #if IS_ENABLED(CONFIG_IPV6)
506 case NFPROTO_IPV6:
507 nf_nat_ipv6_csum_recalc(skb, proto, data, check,
508 datalen, oldlen);
509 return;
510 #endif
511 }
512
513 WARN_ON_ONCE(1);
514 }
515
nf_nat_icmp_reply_translation(struct sk_buff * skb,struct nf_conn * ct,enum ip_conntrack_info ctinfo,unsigned int hooknum)516 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
517 struct nf_conn *ct,
518 enum ip_conntrack_info ctinfo,
519 unsigned int hooknum)
520 {
521 struct {
522 struct icmphdr icmp;
523 struct iphdr ip;
524 } *inside;
525 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
526 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
527 unsigned int hdrlen = ip_hdrlen(skb);
528 struct nf_conntrack_tuple target;
529 unsigned long statusbit;
530
531 WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
532
533 if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
534 return 0;
535 if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
536 return 0;
537
538 inside = (void *)skb->data + hdrlen;
539 if (inside->icmp.type == ICMP_REDIRECT) {
540 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
541 return 0;
542 if (ct->status & IPS_NAT_MASK)
543 return 0;
544 }
545
546 if (manip == NF_NAT_MANIP_SRC)
547 statusbit = IPS_SRC_NAT;
548 else
549 statusbit = IPS_DST_NAT;
550
551 /* Invert if this is reply direction */
552 if (dir == IP_CT_DIR_REPLY)
553 statusbit ^= IPS_NAT_MASK;
554
555 if (!(ct->status & statusbit))
556 return 1;
557
558 if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
559 &ct->tuplehash[!dir].tuple, !manip))
560 return 0;
561
562 if (skb->ip_summed != CHECKSUM_PARTIAL) {
563 /* Reloading "inside" here since manip_pkt may reallocate */
564 inside = (void *)skb->data + hdrlen;
565 inside->icmp.checksum = 0;
566 inside->icmp.checksum =
567 csum_fold(skb_checksum(skb, hdrlen,
568 skb->len - hdrlen, 0));
569 }
570
571 /* Change outer to look like the reply to an incoming packet */
572 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
573 target.dst.protonum = IPPROTO_ICMP;
574 if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
575 return 0;
576
577 return 1;
578 }
579 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
580
581 static unsigned int
nf_nat_ipv4_fn(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)582 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
583 const struct nf_hook_state *state)
584 {
585 struct nf_conn *ct;
586 enum ip_conntrack_info ctinfo;
587
588 ct = nf_ct_get(skb, &ctinfo);
589 if (!ct)
590 return NF_ACCEPT;
591
592 if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
593 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
594 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
595 state->hook))
596 return NF_DROP;
597 else
598 return NF_ACCEPT;
599 }
600 }
601
602 return nf_nat_inet_fn(priv, skb, state);
603 }
604
605 static unsigned int
nf_nat_ipv4_pre_routing(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)606 nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
607 const struct nf_hook_state *state)
608 {
609 unsigned int ret;
610 __be32 daddr = ip_hdr(skb)->daddr;
611
612 ret = nf_nat_ipv4_fn(priv, skb, state);
613 if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
614 skb_dst_drop(skb);
615
616 return ret;
617 }
618
619 #ifdef CONFIG_XFRM
nf_xfrm_me_harder(struct net * net,struct sk_buff * skb,unsigned int family)620 static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
621 {
622 struct sock *sk = skb->sk;
623 struct dst_entry *dst;
624 unsigned int hh_len;
625 struct flowi fl;
626 int err;
627
628 err = xfrm_decode_session(net, skb, &fl, family);
629 if (err < 0)
630 return err;
631
632 dst = skb_dst(skb);
633 if (dst->xfrm)
634 dst = ((struct xfrm_dst *)dst)->route;
635 if (!dst_hold_safe(dst))
636 return -EHOSTUNREACH;
637
638 if (sk && !net_eq(net, sock_net(sk)))
639 sk = NULL;
640
641 dst = xfrm_lookup(net, dst, &fl, sk, 0);
642 if (IS_ERR(dst))
643 return PTR_ERR(dst);
644
645 skb_dst_drop(skb);
646 skb_dst_set(skb, dst);
647
648 /* Change in oif may mean change in hh_len. */
649 hh_len = skb_dst(skb)->dev->hard_header_len;
650 if (skb_headroom(skb) < hh_len &&
651 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
652 return -ENOMEM;
653 return 0;
654 }
655 #endif
656
nf_nat_inet_port_was_mangled(const struct sk_buff * skb,__be16 sport)657 static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
658 {
659 enum ip_conntrack_info ctinfo;
660 enum ip_conntrack_dir dir;
661 const struct nf_conn *ct;
662
663 ct = nf_ct_get(skb, &ctinfo);
664 if (!ct)
665 return false;
666
667 switch (nf_ct_protonum(ct)) {
668 case IPPROTO_TCP:
669 case IPPROTO_UDP:
670 break;
671 default:
672 return false;
673 }
674
675 dir = CTINFO2DIR(ctinfo);
676 if (dir != IP_CT_DIR_ORIGINAL)
677 return false;
678
679 return ct->tuplehash[!dir].tuple.dst.u.all != sport;
680 }
681
682 static unsigned int
nf_nat_ipv4_local_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)683 nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
684 const struct nf_hook_state *state)
685 {
686 __be32 saddr = ip_hdr(skb)->saddr;
687 struct sock *sk = skb->sk;
688 unsigned int ret;
689
690 ret = nf_nat_ipv4_fn(priv, skb, state);
691
692 if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
693 return ret;
694
695 /* skb has a socket assigned via tcp edemux. We need to check
696 * if nf_nat_ipv4_fn() has mangled the packet in a way that
697 * edemux would not have found this socket.
698 *
699 * This includes both changes to the source address and changes
700 * to the source port, which are both handled by the
701 * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
702 * might have found a socket for the old (pre-snat) address.
703 */
704 if (saddr != ip_hdr(skb)->saddr ||
705 nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
706 skb_orphan(skb); /* TCP edemux obtained wrong socket */
707
708 return ret;
709 }
710
711 static unsigned int
nf_nat_ipv4_out(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)712 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
713 const struct nf_hook_state *state)
714 {
715 #ifdef CONFIG_XFRM
716 const struct nf_conn *ct;
717 enum ip_conntrack_info ctinfo;
718 int err;
719 #endif
720 unsigned int ret;
721
722 ret = nf_nat_ipv4_fn(priv, skb, state);
723 #ifdef CONFIG_XFRM
724 if (ret != NF_ACCEPT)
725 return ret;
726
727 if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
728 return ret;
729
730 ct = nf_ct_get(skb, &ctinfo);
731 if (ct) {
732 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
733
734 if (ct->tuplehash[dir].tuple.src.u3.ip !=
735 ct->tuplehash[!dir].tuple.dst.u3.ip ||
736 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
737 ct->tuplehash[dir].tuple.src.u.all !=
738 ct->tuplehash[!dir].tuple.dst.u.all)) {
739 err = nf_xfrm_me_harder(state->net, skb, AF_INET);
740 if (err < 0)
741 ret = NF_DROP_ERR(err);
742 }
743 }
744 #endif
745 return ret;
746 }
747
748 static unsigned int
nf_nat_ipv4_local_fn(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)749 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
750 const struct nf_hook_state *state)
751 {
752 const struct nf_conn *ct;
753 enum ip_conntrack_info ctinfo;
754 unsigned int ret;
755 int err;
756
757 ret = nf_nat_ipv4_fn(priv, skb, state);
758 if (ret != NF_ACCEPT)
759 return ret;
760
761 ct = nf_ct_get(skb, &ctinfo);
762 if (ct) {
763 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
764
765 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
766 ct->tuplehash[!dir].tuple.src.u3.ip) {
767 err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
768 if (err < 0)
769 ret = NF_DROP_ERR(err);
770 }
771 #ifdef CONFIG_XFRM
772 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
773 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
774 ct->tuplehash[dir].tuple.dst.u.all !=
775 ct->tuplehash[!dir].tuple.src.u.all) {
776 err = nf_xfrm_me_harder(state->net, skb, AF_INET);
777 if (err < 0)
778 ret = NF_DROP_ERR(err);
779 }
780 #endif
781 }
782 return ret;
783 }
784
785 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
786 /* Before packet filtering, change destination */
787 {
788 .hook = nf_nat_ipv4_pre_routing,
789 .pf = NFPROTO_IPV4,
790 .hooknum = NF_INET_PRE_ROUTING,
791 .priority = NF_IP_PRI_NAT_DST,
792 },
793 /* After packet filtering, change source */
794 {
795 .hook = nf_nat_ipv4_out,
796 .pf = NFPROTO_IPV4,
797 .hooknum = NF_INET_POST_ROUTING,
798 .priority = NF_IP_PRI_NAT_SRC,
799 },
800 /* Before packet filtering, change destination */
801 {
802 .hook = nf_nat_ipv4_local_fn,
803 .pf = NFPROTO_IPV4,
804 .hooknum = NF_INET_LOCAL_OUT,
805 .priority = NF_IP_PRI_NAT_DST,
806 },
807 /* After packet filtering, change source */
808 {
809 .hook = nf_nat_ipv4_local_in,
810 .pf = NFPROTO_IPV4,
811 .hooknum = NF_INET_LOCAL_IN,
812 .priority = NF_IP_PRI_NAT_SRC,
813 },
814 };
815
nf_nat_ipv4_register_fn(struct net * net,const struct nf_hook_ops * ops)816 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
817 {
818 return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
819 ARRAY_SIZE(nf_nat_ipv4_ops));
820 }
821 EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
822
nf_nat_ipv4_unregister_fn(struct net * net,const struct nf_hook_ops * ops)823 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
824 {
825 nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
826 }
827 EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
828
829 #if IS_ENABLED(CONFIG_IPV6)
nf_nat_icmpv6_reply_translation(struct sk_buff * skb,struct nf_conn * ct,enum ip_conntrack_info ctinfo,unsigned int hooknum,unsigned int hdrlen)830 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
831 struct nf_conn *ct,
832 enum ip_conntrack_info ctinfo,
833 unsigned int hooknum,
834 unsigned int hdrlen)
835 {
836 struct {
837 struct icmp6hdr icmp6;
838 struct ipv6hdr ip6;
839 } *inside;
840 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
841 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
842 struct nf_conntrack_tuple target;
843 unsigned long statusbit;
844
845 WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
846
847 if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
848 return 0;
849 if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
850 return 0;
851
852 inside = (void *)skb->data + hdrlen;
853 if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
854 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
855 return 0;
856 if (ct->status & IPS_NAT_MASK)
857 return 0;
858 }
859
860 if (manip == NF_NAT_MANIP_SRC)
861 statusbit = IPS_SRC_NAT;
862 else
863 statusbit = IPS_DST_NAT;
864
865 /* Invert if this is reply direction */
866 if (dir == IP_CT_DIR_REPLY)
867 statusbit ^= IPS_NAT_MASK;
868
869 if (!(ct->status & statusbit))
870 return 1;
871
872 if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
873 &ct->tuplehash[!dir].tuple, !manip))
874 return 0;
875
876 if (skb->ip_summed != CHECKSUM_PARTIAL) {
877 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
878
879 inside = (void *)skb->data + hdrlen;
880 inside->icmp6.icmp6_cksum = 0;
881 inside->icmp6.icmp6_cksum =
882 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
883 skb->len - hdrlen, IPPROTO_ICMPV6,
884 skb_checksum(skb, hdrlen,
885 skb->len - hdrlen, 0));
886 }
887
888 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
889 target.dst.protonum = IPPROTO_ICMPV6;
890 if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
891 return 0;
892
893 return 1;
894 }
895 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
896
897 static unsigned int
nf_nat_ipv6_fn(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)898 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
899 const struct nf_hook_state *state)
900 {
901 struct nf_conn *ct;
902 enum ip_conntrack_info ctinfo;
903 __be16 frag_off;
904 int hdrlen;
905 u8 nexthdr;
906
907 ct = nf_ct_get(skb, &ctinfo);
908 /* Can't track? It's not due to stress, or conntrack would
909 * have dropped it. Hence it's the user's responsibilty to
910 * packet filter it out, or implement conntrack/NAT for that
911 * protocol. 8) --RR
912 */
913 if (!ct)
914 return NF_ACCEPT;
915
916 if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
917 nexthdr = ipv6_hdr(skb)->nexthdr;
918 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
919 &nexthdr, &frag_off);
920
921 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
922 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
923 state->hook,
924 hdrlen))
925 return NF_DROP;
926 else
927 return NF_ACCEPT;
928 }
929 }
930
931 return nf_nat_inet_fn(priv, skb, state);
932 }
933
934 static unsigned int
nf_nat_ipv6_local_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)935 nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
936 const struct nf_hook_state *state)
937 {
938 struct in6_addr saddr = ipv6_hdr(skb)->saddr;
939 struct sock *sk = skb->sk;
940 unsigned int ret;
941
942 ret = nf_nat_ipv6_fn(priv, skb, state);
943
944 if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
945 return ret;
946
947 /* see nf_nat_ipv4_local_in */
948 if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
949 nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
950 skb_orphan(skb);
951
952 return ret;
953 }
954
955 static unsigned int
nf_nat_ipv6_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)956 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
957 const struct nf_hook_state *state)
958 {
959 unsigned int ret, verdict;
960 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
961
962 ret = nf_nat_ipv6_fn(priv, skb, state);
963 verdict = ret & NF_VERDICT_MASK;
964 if (verdict != NF_DROP && verdict != NF_STOLEN &&
965 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
966 skb_dst_drop(skb);
967
968 return ret;
969 }
970
971 static unsigned int
nf_nat_ipv6_out(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)972 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
973 const struct nf_hook_state *state)
974 {
975 #ifdef CONFIG_XFRM
976 const struct nf_conn *ct;
977 enum ip_conntrack_info ctinfo;
978 int err;
979 #endif
980 unsigned int ret;
981
982 ret = nf_nat_ipv6_fn(priv, skb, state);
983 #ifdef CONFIG_XFRM
984 if (ret != NF_ACCEPT)
985 return ret;
986
987 if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
988 return ret;
989 ct = nf_ct_get(skb, &ctinfo);
990 if (ct) {
991 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
992
993 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
994 &ct->tuplehash[!dir].tuple.dst.u3) ||
995 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
996 ct->tuplehash[dir].tuple.src.u.all !=
997 ct->tuplehash[!dir].tuple.dst.u.all)) {
998 err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
999 if (err < 0)
1000 ret = NF_DROP_ERR(err);
1001 }
1002 }
1003 #endif
1004
1005 return ret;
1006 }
1007
1008 static unsigned int
nf_nat_ipv6_local_fn(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)1009 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
1010 const struct nf_hook_state *state)
1011 {
1012 const struct nf_conn *ct;
1013 enum ip_conntrack_info ctinfo;
1014 unsigned int ret;
1015 int err;
1016
1017 ret = nf_nat_ipv6_fn(priv, skb, state);
1018 if (ret != NF_ACCEPT)
1019 return ret;
1020
1021 ct = nf_ct_get(skb, &ctinfo);
1022 if (ct) {
1023 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
1024
1025 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
1026 &ct->tuplehash[!dir].tuple.src.u3)) {
1027 err = nf_ip6_route_me_harder(state->net, state->sk, skb);
1028 if (err < 0)
1029 ret = NF_DROP_ERR(err);
1030 }
1031 #ifdef CONFIG_XFRM
1032 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
1033 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
1034 ct->tuplehash[dir].tuple.dst.u.all !=
1035 ct->tuplehash[!dir].tuple.src.u.all) {
1036 err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
1037 if (err < 0)
1038 ret = NF_DROP_ERR(err);
1039 }
1040 #endif
1041 }
1042
1043 return ret;
1044 }
1045
1046 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
1047 /* Before packet filtering, change destination */
1048 {
1049 .hook = nf_nat_ipv6_in,
1050 .pf = NFPROTO_IPV6,
1051 .hooknum = NF_INET_PRE_ROUTING,
1052 .priority = NF_IP6_PRI_NAT_DST,
1053 },
1054 /* After packet filtering, change source */
1055 {
1056 .hook = nf_nat_ipv6_out,
1057 .pf = NFPROTO_IPV6,
1058 .hooknum = NF_INET_POST_ROUTING,
1059 .priority = NF_IP6_PRI_NAT_SRC,
1060 },
1061 /* Before packet filtering, change destination */
1062 {
1063 .hook = nf_nat_ipv6_local_fn,
1064 .pf = NFPROTO_IPV6,
1065 .hooknum = NF_INET_LOCAL_OUT,
1066 .priority = NF_IP6_PRI_NAT_DST,
1067 },
1068 /* After packet filtering, change source */
1069 {
1070 .hook = nf_nat_ipv6_local_in,
1071 .pf = NFPROTO_IPV6,
1072 .hooknum = NF_INET_LOCAL_IN,
1073 .priority = NF_IP6_PRI_NAT_SRC,
1074 },
1075 };
1076
nf_nat_ipv6_register_fn(struct net * net,const struct nf_hook_ops * ops)1077 int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
1078 {
1079 return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
1080 ARRAY_SIZE(nf_nat_ipv6_ops));
1081 }
1082 EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
1083
nf_nat_ipv6_unregister_fn(struct net * net,const struct nf_hook_ops * ops)1084 void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1085 {
1086 nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1087 }
1088 EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
1089 #endif /* CONFIG_IPV6 */
1090
1091 #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
nf_nat_inet_register_fn(struct net * net,const struct nf_hook_ops * ops)1092 int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1093 {
1094 int ret;
1095
1096 if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1097 return -EINVAL;
1098
1099 ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1100 ARRAY_SIZE(nf_nat_ipv6_ops));
1101 if (ret)
1102 return ret;
1103
1104 ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1105 ARRAY_SIZE(nf_nat_ipv4_ops));
1106 if (ret)
1107 nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
1108 ARRAY_SIZE(nf_nat_ipv6_ops));
1109 return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1112
nf_nat_inet_unregister_fn(struct net * net,const struct nf_hook_ops * ops)1113 void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1114 {
1115 nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1116 nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1117 }
1118 EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1119 #endif /* NFT INET NAT */
1120