1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/if_vlan.h> 8 #include <linux/init.h> 9 #include <linux/module.h> 10 #include <linux/netlink.h> 11 #include <linux/netfilter.h> 12 #include <linux/netfilter/nf_tables.h> 13 #include <net/netfilter/nf_tables_core.h> 14 #include <net/netfilter/nf_tables.h> 15 #include <net/netfilter/nft_meta.h> 16 #include <net/netfilter/nf_tables_offload.h> 17 #include <linux/tcp.h> 18 #include <linux/udp.h> 19 #include <net/gre.h> 20 #include <net/geneve.h> 21 #include <net/ip.h> 22 #include <linux/icmpv6.h> 23 #include <linux/ip.h> 24 #include <linux/ipv6.h> 25 26 struct nft_inner_tun_ctx_locked { 27 struct nft_inner_tun_ctx ctx; 28 local_lock_t bh_lock; 29 }; 30 31 static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = { 32 .bh_lock = INIT_LOCAL_LOCK(bh_lock), 33 }; 34 35 /* Same layout as nft_expr but it embeds the private expression data area. */ 36 struct __nft_expr { 37 const struct nft_expr_ops *ops; 38 union { 39 struct nft_payload payload; 40 struct nft_meta meta; 41 } __attribute__((aligned(__alignof__(u64)))); 42 }; 43 44 enum { 45 NFT_INNER_EXPR_PAYLOAD, 46 NFT_INNER_EXPR_META, 47 }; 48 49 struct nft_inner { 50 u8 flags; 51 u8 hdrsize; 52 u8 type; 53 u8 expr_type; 54 55 struct __nft_expr expr; 56 }; 57 58 static int nft_inner_parse_l2l3(const struct nft_inner *priv, 59 const struct nft_pktinfo *pkt, 60 struct nft_inner_tun_ctx *ctx, u32 off) 61 { 62 __be16 llproto, outer_llproto; 63 u32 nhoff, thoff; 64 65 if (priv->flags & NFT_INNER_LL) { 66 struct vlan_ethhdr *veth, _veth; 67 struct ethhdr *eth, _eth; 68 u32 hdrsize; 69 70 eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth); 71 if (!eth) 72 return -1; 73 74 switch (eth->h_proto) { 75 case htons(ETH_P_IP): 76 case htons(ETH_P_IPV6): 77 llproto = eth->h_proto; 78 hdrsize = sizeof(_eth); 79 break; 80 case htons(ETH_P_8021Q): 81 veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth); 82 if (!veth) 83 return -1; 84 85 outer_llproto = veth->h_vlan_encapsulated_proto; 86 llproto = veth->h_vlan_proto; 87 hdrsize = sizeof(_veth); 88 break; 89 default: 90 return -1; 91 } 92 93 ctx->inner_lloff = off; 94 ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; 95 off += hdrsize; 96 } else { 97 struct iphdr *iph; 98 u32 _version; 99 100 iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version); 101 if (!iph) 102 return -1; 103 104 switch (iph->version) { 105 case 4: 106 llproto = htons(ETH_P_IP); 107 break; 108 case 6: 109 llproto = htons(ETH_P_IPV6); 110 break; 111 default: 112 return -1; 113 } 114 } 115 116 ctx->llproto = llproto; 117 if (llproto == htons(ETH_P_8021Q)) 118 llproto = outer_llproto; 119 120 nhoff = off; 121 122 switch (llproto) { 123 case htons(ETH_P_IP): { 124 struct iphdr *iph, _iph; 125 126 iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph); 127 if (!iph) 128 return -1; 129 130 if (iph->ihl < 5 || iph->version != 4) 131 return -1; 132 133 ctx->inner_nhoff = nhoff; 134 ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; 135 136 thoff = nhoff + (iph->ihl * 4); 137 if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { 138 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; 139 ctx->inner_thoff = thoff; 140 ctx->l4proto = iph->protocol; 141 } 142 } 143 break; 144 case htons(ETH_P_IPV6): { 145 struct ipv6hdr *ip6h, _ip6h; 146 int fh_flags = IP6_FH_F_AUTH; 147 unsigned short fragoff; 148 int l4proto; 149 150 ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h); 151 if (!ip6h) 152 return -1; 153 154 if (ip6h->version != 6) 155 return -1; 156 157 ctx->inner_nhoff = nhoff; 158 ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; 159 160 thoff = nhoff; 161 l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags); 162 if (l4proto < 0 || thoff > U16_MAX) 163 return -1; 164 165 if (fragoff == 0) { 166 thoff = nhoff + sizeof(_ip6h); 167 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; 168 ctx->inner_thoff = thoff; 169 ctx->l4proto = l4proto; 170 } 171 } 172 break; 173 default: 174 return -1; 175 } 176 177 return 0; 178 } 179 180 static int nft_inner_parse_tunhdr(const struct nft_inner *priv, 181 const struct nft_pktinfo *pkt, 182 struct nft_inner_tun_ctx *ctx, u32 *off) 183 { 184 if (pkt->tprot == IPPROTO_GRE) { 185 ctx->inner_tunoff = pkt->thoff; 186 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; 187 return 0; 188 } 189 190 if (pkt->tprot != IPPROTO_UDP) 191 return -1; 192 193 ctx->inner_tunoff = *off; 194 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; 195 *off += priv->hdrsize; 196 197 switch (priv->type) { 198 case NFT_INNER_GENEVE: { 199 struct genevehdr *gnvh, _gnvh; 200 201 gnvh = skb_header_pointer(pkt->skb, pkt->inneroff, 202 sizeof(_gnvh), &_gnvh); 203 if (!gnvh) 204 return -1; 205 206 *off += gnvh->opt_len * 4; 207 } 208 break; 209 default: 210 break; 211 } 212 213 return 0; 214 } 215 216 static int nft_inner_parse(const struct nft_inner *priv, 217 struct nft_pktinfo *pkt, 218 struct nft_inner_tun_ctx *tun_ctx) 219 { 220 u32 off = pkt->inneroff; 221 222 if (priv->flags & NFT_INNER_HDRSIZE && 223 nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0) 224 return -1; 225 226 if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { 227 if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0) 228 return -1; 229 } else if (priv->flags & NFT_INNER_TH) { 230 tun_ctx->inner_thoff = off; 231 tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; 232 } 233 234 tun_ctx->type = priv->type; 235 tun_ctx->cookie = (unsigned long)pkt->skb; 236 pkt->flags |= NFT_PKTINFO_INNER_FULL; 237 238 return 0; 239 } 240 241 static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, 242 struct nft_inner_tun_ctx *tun_ctx) 243 { 244 struct nft_inner_tun_ctx *this_cpu_tun_ctx; 245 246 local_bh_disable(); 247 local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 248 this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); 249 if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { 250 local_bh_enable(); 251 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 252 return false; 253 } 254 *tun_ctx = *this_cpu_tun_ctx; 255 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 256 local_bh_enable(); 257 258 return true; 259 } 260 261 static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt, 262 const struct nft_inner_tun_ctx *tun_ctx) 263 { 264 struct nft_inner_tun_ctx *this_cpu_tun_ctx; 265 266 local_bh_disable(); 267 local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 268 this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); 269 if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) 270 *this_cpu_tun_ctx = *tun_ctx; 271 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 272 local_bh_enable(); 273 } 274 275 static bool nft_inner_parse_needed(const struct nft_inner *priv, 276 const struct nft_pktinfo *pkt, 277 struct nft_inner_tun_ctx *tun_ctx) 278 { 279 if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) 280 return true; 281 282 if (!nft_inner_restore_tun_ctx(pkt, tun_ctx)) 283 return true; 284 285 if (priv->type != tun_ctx->type) 286 return true; 287 288 return false; 289 } 290 291 static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, 292 const struct nft_pktinfo *pkt) 293 { 294 const struct nft_inner *priv = nft_expr_priv(expr); 295 struct nft_inner_tun_ctx tun_ctx = {}; 296 297 if (nft_payload_inner_offset(pkt) < 0) 298 goto err; 299 300 if (nft_inner_parse_needed(priv, pkt, &tun_ctx) && 301 nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0) 302 goto err; 303 304 switch (priv->expr_type) { 305 case NFT_INNER_EXPR_PAYLOAD: 306 nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); 307 break; 308 case NFT_INNER_EXPR_META: 309 nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); 310 break; 311 default: 312 WARN_ON_ONCE(1); 313 goto err; 314 } 315 nft_inner_save_tun_ctx(pkt, &tun_ctx); 316 317 return; 318 err: 319 regs->verdict.code = NFT_BREAK; 320 } 321 322 static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { 323 [NFTA_INNER_NUM] = { .type = NLA_U32 }, 324 [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, 325 [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, 326 [NFTA_INNER_TYPE] = { .type = NLA_U32 }, 327 [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, 328 }; 329 330 struct nft_expr_info { 331 const struct nft_expr_ops *ops; 332 const struct nlattr *attr; 333 struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; 334 }; 335 336 static int nft_inner_init(const struct nft_ctx *ctx, 337 const struct nft_expr *expr, 338 const struct nlattr * const tb[]) 339 { 340 struct nft_inner *priv = nft_expr_priv(expr); 341 u32 flags, hdrsize, type, num; 342 struct nft_expr_info expr_info; 343 int err; 344 345 if (!tb[NFTA_INNER_FLAGS] || 346 !tb[NFTA_INNER_NUM] || 347 !tb[NFTA_INNER_HDRSIZE] || 348 !tb[NFTA_INNER_TYPE] || 349 !tb[NFTA_INNER_EXPR]) 350 return -EINVAL; 351 352 flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); 353 if (flags & ~NFT_INNER_MASK) 354 return -EOPNOTSUPP; 355 356 num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); 357 if (num != 0) 358 return -EOPNOTSUPP; 359 360 hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); 361 type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); 362 363 if (type > U8_MAX) 364 return -EINVAL; 365 366 if (flags & NFT_INNER_HDRSIZE) { 367 if (hdrsize == 0 || hdrsize > 64) 368 return -EOPNOTSUPP; 369 } 370 371 priv->flags = flags; 372 priv->hdrsize = hdrsize; 373 priv->type = type; 374 375 err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info); 376 if (err < 0) 377 return err; 378 379 priv->expr.ops = expr_info.ops; 380 381 if (!strcmp(expr_info.ops->type->name, "payload")) 382 priv->expr_type = NFT_INNER_EXPR_PAYLOAD; 383 else if (!strcmp(expr_info.ops->type->name, "meta")) 384 priv->expr_type = NFT_INNER_EXPR_META; 385 else 386 return -EINVAL; 387 388 err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, 389 (const struct nlattr * const*)expr_info.tb); 390 if (err < 0) 391 return err; 392 393 return 0; 394 } 395 396 static int nft_inner_dump(struct sk_buff *skb, 397 const struct nft_expr *expr, bool reset) 398 { 399 const struct nft_inner *priv = nft_expr_priv(expr); 400 401 if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) || 402 nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) || 403 nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) || 404 nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) 405 goto nla_put_failure; 406 407 if (nft_expr_dump(skb, NFTA_INNER_EXPR, 408 (struct nft_expr *)&priv->expr, reset) < 0) 409 goto nla_put_failure; 410 411 return 0; 412 413 nla_put_failure: 414 return -1; 415 } 416 417 static const struct nft_expr_ops nft_inner_ops = { 418 .type = &nft_inner_type, 419 .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), 420 .eval = nft_inner_eval, 421 .init = nft_inner_init, 422 .dump = nft_inner_dump, 423 }; 424 425 struct nft_expr_type nft_inner_type __read_mostly = { 426 .name = "inner", 427 .ops = &nft_inner_ops, 428 .policy = nft_inner_policy, 429 .maxattr = NFTA_INNER_MAX, 430 .owner = THIS_MODULE, 431 }; 432