1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2007-2014 Nicira, Inc. 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/init.h> 9 #include <linux/module.h> 10 #include <linux/if_arp.h> 11 #include <linux/if_vlan.h> 12 #include <linux/in.h> 13 #include <linux/ip.h> 14 #include <linux/jhash.h> 15 #include <linux/delay.h> 16 #include <linux/time.h> 17 #include <linux/etherdevice.h> 18 #include <linux/kernel.h> 19 #include <linux/kthread.h> 20 #include <linux/mutex.h> 21 #include <linux/percpu.h> 22 #include <linux/rcupdate.h> 23 #include <linux/tcp.h> 24 #include <linux/udp.h> 25 #include <linux/ethtool.h> 26 #include <linux/wait.h> 27 #include <asm/div64.h> 28 #include <linux/highmem.h> 29 #include <linux/netfilter_bridge.h> 30 #include <linux/netfilter_ipv4.h> 31 #include <linux/inetdevice.h> 32 #include <linux/list.h> 33 #include <linux/openvswitch.h> 34 #include <linux/rculist.h> 35 #include <linux/dmi.h> 36 #include <net/genetlink.h> 37 #include <net/gso.h> 38 #include <net/net_namespace.h> 39 #include <net/netns/generic.h> 40 #include <net/pkt_cls.h> 41 42 #include "datapath.h" 43 #include "drop.h" 44 #include "flow.h" 45 #include "flow_table.h" 46 #include "flow_netlink.h" 47 #include "meter.h" 48 #include "openvswitch_trace.h" 49 #include "vport-internal_dev.h" 50 #include "vport-netdev.h" 51 52 unsigned int ovs_net_id __read_mostly; 53 54 static struct genl_family dp_packet_genl_family; 55 static struct genl_family dp_flow_genl_family; 56 static struct genl_family dp_datapath_genl_family; 57 58 static const struct nla_policy flow_policy[]; 59 60 static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 61 .name = OVS_FLOW_MCGROUP, 62 }; 63 64 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 65 .name = OVS_DATAPATH_MCGROUP, 66 }; 67 68 static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 69 .name = OVS_VPORT_MCGROUP, 70 }; 71 72 /* Check if need to build a reply message. 73 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 74 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, 75 unsigned int group) 76 { 77 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 78 genl_has_listeners(family, genl_info_net(info), group); 79 } 80 81 static void ovs_notify(struct genl_family *family, 82 struct sk_buff *skb, struct genl_info *info) 83 { 84 genl_notify(family, skb, info, 0, GFP_KERNEL); 85 } 86 87 /** 88 * DOC: Locking: 89 * 90 * All writes e.g. Writes to device state (add/remove datapath, port, set 91 * operations on vports, etc.), Writes to other state (flow table 92 * modifications, set miscellaneous datapath parameters, etc.) are protected 93 * by ovs_lock. 94 * 95 * Reads are protected by RCU. 96 * 97 * There are a few special cases (mostly stats) that have their own 98 * synchronization but they nest under all of above and don't interact with 99 * each other. 100 * 101 * The RTNL lock nests inside ovs_mutex. 102 */ 103 104 static DEFINE_MUTEX(ovs_mutex); 105 106 void ovs_lock(void) 107 { 108 mutex_lock(&ovs_mutex); 109 } 110 111 void ovs_unlock(void) 112 { 113 mutex_unlock(&ovs_mutex); 114 } 115 116 #ifdef CONFIG_LOCKDEP 117 int lockdep_ovsl_is_held(void) 118 { 119 if (debug_locks) 120 return lockdep_is_held(&ovs_mutex); 121 else 122 return 1; 123 } 124 #endif 125 126 static struct vport *new_vport(const struct vport_parms *); 127 static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 128 const struct sw_flow_key *, 129 const struct dp_upcall_info *, 130 uint32_t cutlen); 131 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 132 const struct sw_flow_key *, 133 const struct dp_upcall_info *, 134 uint32_t cutlen); 135 136 static void ovs_dp_masks_rebalance(struct work_struct *work); 137 138 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); 139 140 /* Must be called with rcu_read_lock or ovs_mutex. */ 141 const char *ovs_dp_name(const struct datapath *dp) 142 { 143 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 144 return ovs_vport_name(vport); 145 } 146 147 static int get_dpifindex(const struct datapath *dp) 148 { 149 struct vport *local; 150 int ifindex; 151 152 rcu_read_lock(); 153 154 local = ovs_vport_rcu(dp, OVSP_LOCAL); 155 if (local) 156 ifindex = local->dev->ifindex; 157 else 158 ifindex = 0; 159 160 rcu_read_unlock(); 161 162 return ifindex; 163 } 164 165 static void destroy_dp_rcu(struct rcu_head *rcu) 166 { 167 struct datapath *dp = container_of(rcu, struct datapath, rcu); 168 169 ovs_flow_tbl_destroy(&dp->table); 170 free_percpu(dp->stats_percpu); 171 kfree(dp->ports); 172 ovs_meters_exit(dp); 173 kfree(rcu_dereference_raw(dp->upcall_portids)); 174 kfree(dp); 175 } 176 177 static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 178 u16 port_no) 179 { 180 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 181 } 182 183 /* Called with ovs_mutex or RCU read lock. */ 184 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 185 { 186 struct vport *vport; 187 struct hlist_head *head; 188 189 head = vport_hash_bucket(dp, port_no); 190 hlist_for_each_entry_rcu(vport, head, dp_hash_node, 191 lockdep_ovsl_is_held()) { 192 if (vport->port_no == port_no) 193 return vport; 194 } 195 return NULL; 196 } 197 198 /* Called with ovs_mutex. */ 199 static struct vport *new_vport(const struct vport_parms *parms) 200 { 201 struct vport *vport; 202 203 vport = ovs_vport_add(parms); 204 if (!IS_ERR(vport)) { 205 struct datapath *dp = parms->dp; 206 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 207 208 hlist_add_head_rcu(&vport->dp_hash_node, head); 209 } 210 return vport; 211 } 212 213 static void ovs_vport_update_upcall_stats(struct sk_buff *skb, 214 const struct dp_upcall_info *upcall_info, 215 bool upcall_result) 216 { 217 struct vport *p = OVS_CB(skb)->input_vport; 218 struct vport_upcall_stats_percpu *stats; 219 220 if (upcall_info->cmd != OVS_PACKET_CMD_MISS && 221 upcall_info->cmd != OVS_PACKET_CMD_ACTION) 222 return; 223 224 stats = this_cpu_ptr(p->upcall_stats); 225 u64_stats_update_begin(&stats->syncp); 226 if (upcall_result) 227 u64_stats_inc(&stats->n_success); 228 else 229 u64_stats_inc(&stats->n_fail); 230 u64_stats_update_end(&stats->syncp); 231 } 232 233 void ovs_dp_detach_port(struct vport *p) 234 { 235 ASSERT_OVSL(); 236 237 /* First drop references to device. */ 238 hlist_del_rcu(&p->dp_hash_node); 239 240 /* Then destroy it. */ 241 ovs_vport_del(p); 242 } 243 244 /* Must be called with rcu_read_lock. */ 245 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 246 { 247 struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); 248 const struct vport *p = OVS_CB(skb)->input_vport; 249 struct datapath *dp = p->dp; 250 struct sw_flow *flow; 251 struct sw_flow_actions *sf_acts; 252 struct dp_stats_percpu *stats; 253 bool ovs_pcpu_locked = false; 254 u64 *stats_counter; 255 u32 n_mask_hit; 256 u32 n_cache_hit; 257 int error; 258 259 stats = this_cpu_ptr(dp->stats_percpu); 260 261 /* Look up flow. */ 262 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), 263 &n_mask_hit, &n_cache_hit); 264 if (unlikely(!flow)) { 265 struct dp_upcall_info upcall; 266 267 memset(&upcall, 0, sizeof(upcall)); 268 upcall.cmd = OVS_PACKET_CMD_MISS; 269 270 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) 271 upcall.portid = 272 ovs_dp_get_upcall_portid(dp, smp_processor_id()); 273 else 274 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 275 276 upcall.mru = OVS_CB(skb)->mru; 277 error = ovs_dp_upcall(dp, skb, key, &upcall, 0); 278 switch (error) { 279 case 0: 280 case -EAGAIN: 281 case -ERESTARTSYS: 282 case -EINTR: 283 consume_skb(skb); 284 break; 285 default: 286 kfree_skb(skb); 287 break; 288 } 289 stats_counter = &stats->n_missed; 290 goto out; 291 } 292 293 ovs_flow_stats_update(flow, key->tp.flags, skb); 294 sf_acts = rcu_dereference(flow->sf_acts); 295 /* This path can be invoked recursively: Use the current task to 296 * identify recursive invocation - the lock must be acquired only once. 297 * Even with disabled bottom halves this can be preempted on PREEMPT_RT. 298 * Limit the locking to RT to avoid assigning `owner' if it can be 299 * avoided. 300 */ 301 if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { 302 local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); 303 ovs_pcpu->owner = current; 304 ovs_pcpu_locked = true; 305 } 306 307 error = ovs_execute_actions(dp, skb, sf_acts, key); 308 if (unlikely(error)) 309 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", 310 ovs_dp_name(dp), error); 311 if (ovs_pcpu_locked) { 312 ovs_pcpu->owner = NULL; 313 local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); 314 } 315 316 stats_counter = &stats->n_hit; 317 318 out: 319 /* Update datapath statistics. */ 320 u64_stats_update_begin(&stats->syncp); 321 (*stats_counter)++; 322 stats->n_mask_hit += n_mask_hit; 323 stats->n_cache_hit += n_cache_hit; 324 u64_stats_update_end(&stats->syncp); 325 } 326 327 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 328 const struct sw_flow_key *key, 329 const struct dp_upcall_info *upcall_info, 330 uint32_t cutlen) 331 { 332 struct dp_stats_percpu *stats; 333 int err; 334 335 if (trace_ovs_dp_upcall_enabled()) 336 trace_ovs_dp_upcall(dp, skb, key, upcall_info); 337 338 if (upcall_info->portid == 0) { 339 err = -ENOTCONN; 340 goto err; 341 } 342 343 if (!skb_is_gso(skb)) 344 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 345 else 346 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); 347 348 ovs_vport_update_upcall_stats(skb, upcall_info, !err); 349 if (err) 350 goto err; 351 352 return 0; 353 354 err: 355 stats = this_cpu_ptr(dp->stats_percpu); 356 357 u64_stats_update_begin(&stats->syncp); 358 stats->n_lost++; 359 u64_stats_update_end(&stats->syncp); 360 361 return err; 362 } 363 364 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 365 const struct sw_flow_key *key, 366 const struct dp_upcall_info *upcall_info, 367 uint32_t cutlen) 368 { 369 unsigned int gso_type = skb_shinfo(skb)->gso_type; 370 struct sw_flow_key later_key; 371 struct sk_buff *segs, *nskb; 372 int err; 373 374 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); 375 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 376 if (IS_ERR(segs)) 377 return PTR_ERR(segs); 378 if (segs == NULL) 379 return -EINVAL; 380 381 if (gso_type & SKB_GSO_UDP) { 382 /* The initial flow key extracted by ovs_flow_key_extract() 383 * in this case is for a first fragment, so we need to 384 * properly mark later fragments. 385 */ 386 later_key = *key; 387 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 388 } 389 390 /* Queue all of the segments. */ 391 skb_list_walk_safe(segs, skb, nskb) { 392 if (gso_type & SKB_GSO_UDP && skb != segs) 393 key = &later_key; 394 395 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 396 if (err) 397 break; 398 399 } 400 401 /* Free all of the segments. */ 402 skb_list_walk_safe(segs, skb, nskb) { 403 if (err) 404 kfree_skb(skb); 405 else 406 consume_skb(skb); 407 } 408 return err; 409 } 410 411 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, 412 unsigned int hdrlen, int actions_attrlen) 413 { 414 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 415 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 416 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ 417 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ 418 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ 419 420 /* OVS_PACKET_ATTR_USERDATA */ 421 if (upcall_info->userdata) 422 size += NLA_ALIGN(upcall_info->userdata->nla_len); 423 424 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ 425 if (upcall_info->egress_tun_info) 426 size += nla_total_size(ovs_tun_key_attr_size()); 427 428 /* OVS_PACKET_ATTR_ACTIONS */ 429 if (upcall_info->actions_len) 430 size += nla_total_size(actions_attrlen); 431 432 /* OVS_PACKET_ATTR_MRU */ 433 if (upcall_info->mru) 434 size += nla_total_size(sizeof(upcall_info->mru)); 435 436 return size; 437 } 438 439 static void pad_packet(struct datapath *dp, struct sk_buff *skb) 440 { 441 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 442 size_t plen = NLA_ALIGN(skb->len) - skb->len; 443 444 if (plen > 0) 445 skb_put_zero(skb, plen); 446 } 447 } 448 449 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 450 const struct sw_flow_key *key, 451 const struct dp_upcall_info *upcall_info, 452 uint32_t cutlen) 453 { 454 struct ovs_header *upcall; 455 struct sk_buff *nskb = NULL; 456 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 457 struct nlattr *nla; 458 size_t len; 459 unsigned int hlen; 460 int err, dp_ifindex; 461 u64 hash; 462 463 dp_ifindex = get_dpifindex(dp); 464 if (!dp_ifindex) 465 return -ENODEV; 466 467 if (skb_vlan_tag_present(skb)) { 468 nskb = skb_clone(skb, GFP_ATOMIC); 469 if (!nskb) 470 return -ENOMEM; 471 472 nskb = __vlan_hwaccel_push_inside(nskb); 473 if (!nskb) 474 return -ENOMEM; 475 476 skb = nskb; 477 } 478 479 if (nla_attr_size(skb->len) > USHRT_MAX) { 480 err = -EFBIG; 481 goto out; 482 } 483 484 /* Complete checksum if needed */ 485 if (skb->ip_summed == CHECKSUM_PARTIAL && 486 (err = skb_csum_hwoffload_help(skb, 0))) 487 goto out; 488 489 /* Older versions of OVS user space enforce alignment of the last 490 * Netlink attribute to NLA_ALIGNTO which would require extensive 491 * padding logic. Only perform zerocopy if padding is not required. 492 */ 493 if (dp->user_features & OVS_DP_F_UNALIGNED) 494 hlen = skb_zerocopy_headlen(skb); 495 else 496 hlen = skb->len; 497 498 len = upcall_msg_size(upcall_info, hlen - cutlen, 499 OVS_CB(skb)->acts_origlen); 500 user_skb = genlmsg_new(len, GFP_ATOMIC); 501 if (!user_skb) { 502 err = -ENOMEM; 503 goto out; 504 } 505 506 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 507 0, upcall_info->cmd); 508 if (!upcall) { 509 err = -EINVAL; 510 goto out; 511 } 512 upcall->dp_ifindex = dp_ifindex; 513 514 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); 515 if (err) 516 goto out; 517 518 if (upcall_info->userdata) 519 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 520 nla_len(upcall_info->userdata), 521 nla_data(upcall_info->userdata)); 522 523 if (upcall_info->egress_tun_info) { 524 nla = nla_nest_start_noflag(user_skb, 525 OVS_PACKET_ATTR_EGRESS_TUN_KEY); 526 if (!nla) { 527 err = -EMSGSIZE; 528 goto out; 529 } 530 err = ovs_nla_put_tunnel_info(user_skb, 531 upcall_info->egress_tun_info); 532 if (err) 533 goto out; 534 535 nla_nest_end(user_skb, nla); 536 } 537 538 if (upcall_info->actions_len) { 539 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); 540 if (!nla) { 541 err = -EMSGSIZE; 542 goto out; 543 } 544 err = ovs_nla_put_actions(upcall_info->actions, 545 upcall_info->actions_len, 546 user_skb); 547 if (!err) 548 nla_nest_end(user_skb, nla); 549 else 550 nla_nest_cancel(user_skb, nla); 551 } 552 553 /* Add OVS_PACKET_ATTR_MRU */ 554 if (upcall_info->mru && 555 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { 556 err = -ENOBUFS; 557 goto out; 558 } 559 560 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ 561 if (cutlen > 0 && 562 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { 563 err = -ENOBUFS; 564 goto out; 565 } 566 567 /* Add OVS_PACKET_ATTR_HASH */ 568 hash = skb_get_hash_raw(skb); 569 if (skb->sw_hash) 570 hash |= OVS_PACKET_HASH_SW_BIT; 571 572 if (skb->l4_hash) 573 hash |= OVS_PACKET_HASH_L4_BIT; 574 575 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { 576 err = -ENOBUFS; 577 goto out; 578 } 579 580 /* Only reserve room for attribute header, packet data is added 581 * in skb_zerocopy() */ 582 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 583 err = -ENOBUFS; 584 goto out; 585 } 586 nla->nla_len = nla_attr_size(skb->len - cutlen); 587 588 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen); 589 if (err) 590 goto out; 591 592 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 593 pad_packet(dp, user_skb); 594 595 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 596 597 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 598 user_skb = NULL; 599 out: 600 if (err) 601 skb_tx_error(skb); 602 consume_skb(user_skb); 603 consume_skb(nskb); 604 605 return err; 606 } 607 608 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 609 { 610 struct ovs_header *ovs_header = genl_info_userhdr(info); 611 struct net *net = sock_net(skb->sk); 612 struct nlattr **a = info->attrs; 613 struct sw_flow_actions *acts; 614 struct sk_buff *packet; 615 struct sw_flow *flow; 616 struct sw_flow_actions *sf_acts; 617 struct datapath *dp; 618 struct vport *input_vport; 619 u16 mru = 0; 620 u64 hash; 621 int len; 622 int err; 623 bool log = !a[OVS_PACKET_ATTR_PROBE]; 624 625 err = -EINVAL; 626 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 627 !a[OVS_PACKET_ATTR_ACTIONS]) 628 goto err; 629 630 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 631 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 632 err = -ENOMEM; 633 if (!packet) 634 goto err; 635 skb_reserve(packet, NET_IP_ALIGN); 636 637 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 638 639 /* Set packet's mru */ 640 if (a[OVS_PACKET_ATTR_MRU]) { 641 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); 642 packet->ignore_df = 1; 643 } 644 OVS_CB(packet)->mru = mru; 645 646 if (a[OVS_PACKET_ATTR_HASH]) { 647 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); 648 649 __skb_set_hash(packet, hash & 0xFFFFFFFFULL, 650 !!(hash & OVS_PACKET_HASH_SW_BIT), 651 !!(hash & OVS_PACKET_HASH_L4_BIT)); 652 } 653 654 /* Build an sw_flow for sending this packet. */ 655 flow = ovs_flow_alloc(); 656 err = PTR_ERR(flow); 657 if (IS_ERR(flow)) 658 goto err_kfree_skb; 659 660 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], 661 packet, &flow->key, log); 662 if (err) 663 goto err_flow_free; 664 665 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], 666 &flow->key, &acts, log); 667 if (err) 668 goto err_flow_free; 669 670 rcu_assign_pointer(flow->sf_acts, acts); 671 packet->priority = flow->key.phy.priority; 672 packet->mark = flow->key.phy.skb_mark; 673 674 rcu_read_lock(); 675 dp = get_dp_rcu(net, ovs_header->dp_ifindex); 676 err = -ENODEV; 677 if (!dp) 678 goto err_unlock; 679 680 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); 681 if (!input_vport) 682 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); 683 684 if (!input_vport) 685 goto err_unlock; 686 687 packet->dev = input_vport->dev; 688 OVS_CB(packet)->input_vport = input_vport; 689 sf_acts = rcu_dereference(flow->sf_acts); 690 691 local_bh_disable(); 692 local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); 693 if (IS_ENABLED(CONFIG_PREEMPT_RT)) 694 this_cpu_write(ovs_pcpu_storage->owner, current); 695 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); 696 if (IS_ENABLED(CONFIG_PREEMPT_RT)) 697 this_cpu_write(ovs_pcpu_storage->owner, NULL); 698 local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); 699 local_bh_enable(); 700 rcu_read_unlock(); 701 702 ovs_flow_free(flow, false); 703 return err; 704 705 err_unlock: 706 rcu_read_unlock(); 707 err_flow_free: 708 ovs_flow_free(flow, false); 709 err_kfree_skb: 710 kfree_skb(packet); 711 err: 712 return err; 713 } 714 715 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 716 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 717 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 718 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 719 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, 720 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, 721 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, 722 }; 723 724 static const struct genl_small_ops dp_packet_genl_ops[] = { 725 { .cmd = OVS_PACKET_CMD_EXECUTE, 726 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 727 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 728 .doit = ovs_packet_cmd_execute 729 } 730 }; 731 732 static struct genl_family dp_packet_genl_family __ro_after_init = { 733 .hdrsize = sizeof(struct ovs_header), 734 .name = OVS_PACKET_FAMILY, 735 .version = OVS_PACKET_VERSION, 736 .maxattr = OVS_PACKET_ATTR_MAX, 737 .policy = packet_policy, 738 .netnsok = true, 739 .parallel_ops = true, 740 .small_ops = dp_packet_genl_ops, 741 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), 742 .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1, 743 .module = THIS_MODULE, 744 }; 745 746 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, 747 struct ovs_dp_megaflow_stats *mega_stats) 748 { 749 int i; 750 751 memset(mega_stats, 0, sizeof(*mega_stats)); 752 753 stats->n_flows = ovs_flow_tbl_count(&dp->table); 754 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 755 756 stats->n_hit = stats->n_missed = stats->n_lost = 0; 757 758 for_each_possible_cpu(i) { 759 const struct dp_stats_percpu *percpu_stats; 760 struct dp_stats_percpu local_stats; 761 unsigned int start; 762 763 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 764 765 do { 766 start = u64_stats_fetch_begin(&percpu_stats->syncp); 767 local_stats = *percpu_stats; 768 } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); 769 770 stats->n_hit += local_stats.n_hit; 771 stats->n_missed += local_stats.n_missed; 772 stats->n_lost += local_stats.n_lost; 773 mega_stats->n_mask_hit += local_stats.n_mask_hit; 774 mega_stats->n_cache_hit += local_stats.n_cache_hit; 775 } 776 } 777 778 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) 779 { 780 return ovs_identifier_is_ufid(sfid) && 781 !(ufid_flags & OVS_UFID_F_OMIT_KEY); 782 } 783 784 static bool should_fill_mask(uint32_t ufid_flags) 785 { 786 return !(ufid_flags & OVS_UFID_F_OMIT_MASK); 787 } 788 789 static bool should_fill_actions(uint32_t ufid_flags) 790 { 791 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); 792 } 793 794 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, 795 const struct sw_flow_id *sfid, 796 uint32_t ufid_flags) 797 { 798 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); 799 800 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback 801 * see ovs_nla_put_identifier() 802 */ 803 if (sfid && ovs_identifier_is_ufid(sfid)) 804 len += nla_total_size(sfid->ufid_len); 805 else 806 len += nla_total_size(ovs_key_attr_size()); 807 808 /* OVS_FLOW_ATTR_KEY */ 809 if (!sfid || should_fill_key(sfid, ufid_flags)) 810 len += nla_total_size(ovs_key_attr_size()); 811 812 /* OVS_FLOW_ATTR_MASK */ 813 if (should_fill_mask(ufid_flags)) 814 len += nla_total_size(ovs_key_attr_size()); 815 816 /* OVS_FLOW_ATTR_ACTIONS */ 817 if (should_fill_actions(ufid_flags)) 818 len += nla_total_size(acts->orig_len); 819 820 return len 821 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 822 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 823 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ 824 } 825 826 /* Called with ovs_mutex or RCU read lock. */ 827 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, 828 struct sk_buff *skb) 829 { 830 struct ovs_flow_stats stats; 831 __be16 tcp_flags; 832 unsigned long used; 833 834 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 835 836 if (used && 837 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), 838 OVS_FLOW_ATTR_PAD)) 839 return -EMSGSIZE; 840 841 if (stats.n_packets && 842 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, 843 sizeof(struct ovs_flow_stats), &stats, 844 OVS_FLOW_ATTR_PAD)) 845 return -EMSGSIZE; 846 847 if ((u8)ntohs(tcp_flags) && 848 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 849 return -EMSGSIZE; 850 851 return 0; 852 } 853 854 /* Called with ovs_mutex or RCU read lock. */ 855 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, 856 struct sk_buff *skb, int skb_orig_len) 857 { 858 struct nlattr *start; 859 int err; 860 861 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 862 * this is the first flow to be dumped into 'skb'. This is unusual for 863 * Netlink but individual action lists can be longer than 864 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 865 * The userspace caller can always fetch the actions separately if it 866 * really wants them. (Most userspace callers in fact don't care.) 867 * 868 * This can only fail for dump operations because the skb is always 869 * properly sized for single flows. 870 */ 871 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); 872 if (start) { 873 const struct sw_flow_actions *sf_acts; 874 875 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 876 err = ovs_nla_put_actions(sf_acts->actions, 877 sf_acts->actions_len, skb); 878 879 if (!err) 880 nla_nest_end(skb, start); 881 else { 882 if (skb_orig_len) 883 return err; 884 885 nla_nest_cancel(skb, start); 886 } 887 } else if (skb_orig_len) { 888 return -EMSGSIZE; 889 } 890 891 return 0; 892 } 893 894 /* Called with ovs_mutex or RCU read lock. */ 895 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 896 struct sk_buff *skb, u32 portid, 897 u32 seq, u32 flags, u8 cmd, u32 ufid_flags) 898 { 899 const int skb_orig_len = skb->len; 900 struct ovs_header *ovs_header; 901 int err; 902 903 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, 904 flags, cmd); 905 if (!ovs_header) 906 return -EMSGSIZE; 907 908 ovs_header->dp_ifindex = dp_ifindex; 909 910 err = ovs_nla_put_identifier(flow, skb); 911 if (err) 912 goto error; 913 914 if (should_fill_key(&flow->id, ufid_flags)) { 915 err = ovs_nla_put_masked_key(flow, skb); 916 if (err) 917 goto error; 918 } 919 920 if (should_fill_mask(ufid_flags)) { 921 err = ovs_nla_put_mask(flow, skb); 922 if (err) 923 goto error; 924 } 925 926 err = ovs_flow_cmd_fill_stats(flow, skb); 927 if (err) 928 goto error; 929 930 if (should_fill_actions(ufid_flags)) { 931 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); 932 if (err) 933 goto error; 934 } 935 936 genlmsg_end(skb, ovs_header); 937 return 0; 938 939 error: 940 genlmsg_cancel(skb, ovs_header); 941 return err; 942 } 943 944 /* May not be called with RCU read lock. */ 945 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 946 const struct sw_flow_id *sfid, 947 struct genl_info *info, 948 bool always, 949 uint32_t ufid_flags) 950 { 951 struct sk_buff *skb; 952 size_t len; 953 954 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) 955 return NULL; 956 957 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); 958 skb = genlmsg_new(len, GFP_KERNEL); 959 if (!skb) 960 return ERR_PTR(-ENOMEM); 961 962 return skb; 963 } 964 965 /* Called with ovs_mutex. */ 966 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 967 int dp_ifindex, 968 struct genl_info *info, u8 cmd, 969 bool always, u32 ufid_flags) 970 { 971 struct sk_buff *skb; 972 int retval; 973 974 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), 975 &flow->id, info, always, ufid_flags); 976 if (IS_ERR_OR_NULL(skb)) 977 return skb; 978 979 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 980 info->snd_portid, info->snd_seq, 0, 981 cmd, ufid_flags); 982 if (WARN_ON_ONCE(retval < 0)) { 983 kfree_skb(skb); 984 skb = ERR_PTR(retval); 985 } 986 return skb; 987 } 988 989 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 990 { 991 struct net *net = sock_net(skb->sk); 992 struct nlattr **a = info->attrs; 993 struct ovs_header *ovs_header = genl_info_userhdr(info); 994 struct sw_flow *flow = NULL, *new_flow; 995 struct sw_flow_mask mask; 996 struct sk_buff *reply; 997 struct datapath *dp; 998 struct sw_flow_key *key; 999 struct sw_flow_actions *acts; 1000 struct sw_flow_match match; 1001 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1002 int error; 1003 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1004 1005 /* Must have key and actions. */ 1006 error = -EINVAL; 1007 if (!a[OVS_FLOW_ATTR_KEY]) { 1008 OVS_NLERR(log, "Flow key attr not present in new flow."); 1009 goto error; 1010 } 1011 if (!a[OVS_FLOW_ATTR_ACTIONS]) { 1012 OVS_NLERR(log, "Flow actions attr not present in new flow."); 1013 goto error; 1014 } 1015 1016 /* Most of the time we need to allocate a new flow, do it before 1017 * locking. 1018 */ 1019 new_flow = ovs_flow_alloc(); 1020 if (IS_ERR(new_flow)) { 1021 error = PTR_ERR(new_flow); 1022 goto error; 1023 } 1024 1025 /* Extract key. */ 1026 key = kzalloc(sizeof(*key), GFP_KERNEL); 1027 if (!key) { 1028 error = -ENOMEM; 1029 goto err_kfree_flow; 1030 } 1031 1032 ovs_match_init(&match, key, false, &mask); 1033 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1034 a[OVS_FLOW_ATTR_MASK], log); 1035 if (error) 1036 goto err_kfree_key; 1037 1038 ovs_flow_mask_key(&new_flow->key, key, true, &mask); 1039 1040 /* Extract flow identifier. */ 1041 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], 1042 key, log); 1043 if (error) 1044 goto err_kfree_key; 1045 1046 /* Validate actions. */ 1047 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], 1048 &new_flow->key, &acts, log); 1049 if (error) { 1050 OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); 1051 goto err_kfree_key; 1052 } 1053 1054 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, 1055 ufid_flags); 1056 if (IS_ERR(reply)) { 1057 error = PTR_ERR(reply); 1058 goto err_kfree_acts; 1059 } 1060 1061 ovs_lock(); 1062 dp = get_dp(net, ovs_header->dp_ifindex); 1063 if (unlikely(!dp)) { 1064 error = -ENODEV; 1065 goto err_unlock_ovs; 1066 } 1067 1068 /* Check if this is a duplicate flow */ 1069 if (ovs_identifier_is_ufid(&new_flow->id)) 1070 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); 1071 if (!flow) 1072 flow = ovs_flow_tbl_lookup(&dp->table, key); 1073 if (likely(!flow)) { 1074 rcu_assign_pointer(new_flow->sf_acts, acts); 1075 1076 /* Put flow in bucket. */ 1077 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 1078 if (unlikely(error)) { 1079 acts = NULL; 1080 goto err_unlock_ovs; 1081 } 1082 1083 if (unlikely(reply)) { 1084 error = ovs_flow_cmd_fill_info(new_flow, 1085 ovs_header->dp_ifindex, 1086 reply, info->snd_portid, 1087 info->snd_seq, 0, 1088 OVS_FLOW_CMD_NEW, 1089 ufid_flags); 1090 BUG_ON(error < 0); 1091 } 1092 ovs_unlock(); 1093 } else { 1094 struct sw_flow_actions *old_acts; 1095 1096 /* Bail out if we're not allowed to modify an existing flow. 1097 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1098 * because Generic Netlink treats the latter as a dump 1099 * request. We also accept NLM_F_EXCL in case that bug ever 1100 * gets fixed. 1101 */ 1102 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 1103 | NLM_F_EXCL))) { 1104 error = -EEXIST; 1105 goto err_unlock_ovs; 1106 } 1107 /* The flow identifier has to be the same for flow updates. 1108 * Look for any overlapping flow. 1109 */ 1110 if (unlikely(!ovs_flow_cmp(flow, &match))) { 1111 if (ovs_identifier_is_key(&flow->id)) 1112 flow = ovs_flow_tbl_lookup_exact(&dp->table, 1113 &match); 1114 else /* UFID matches but key is different */ 1115 flow = NULL; 1116 if (!flow) { 1117 error = -ENOENT; 1118 goto err_unlock_ovs; 1119 } 1120 } 1121 /* Update actions. */ 1122 old_acts = ovsl_dereference(flow->sf_acts); 1123 rcu_assign_pointer(flow->sf_acts, acts); 1124 1125 if (unlikely(reply)) { 1126 error = ovs_flow_cmd_fill_info(flow, 1127 ovs_header->dp_ifindex, 1128 reply, info->snd_portid, 1129 info->snd_seq, 0, 1130 OVS_FLOW_CMD_NEW, 1131 ufid_flags); 1132 BUG_ON(error < 0); 1133 } 1134 ovs_unlock(); 1135 1136 ovs_nla_free_flow_actions_rcu(old_acts); 1137 ovs_flow_free(new_flow, false); 1138 } 1139 1140 if (reply) 1141 ovs_notify(&dp_flow_genl_family, reply, info); 1142 1143 kfree(key); 1144 return 0; 1145 1146 err_unlock_ovs: 1147 ovs_unlock(); 1148 kfree_skb(reply); 1149 err_kfree_acts: 1150 ovs_nla_free_flow_actions(acts); 1151 err_kfree_key: 1152 kfree(key); 1153 err_kfree_flow: 1154 ovs_flow_free(new_flow, false); 1155 error: 1156 return error; 1157 } 1158 1159 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ 1160 static noinline_for_stack 1161 struct sw_flow_actions *get_flow_actions(struct net *net, 1162 const struct nlattr *a, 1163 const struct sw_flow_key *key, 1164 const struct sw_flow_mask *mask, 1165 bool log) 1166 { 1167 struct sw_flow_actions *acts; 1168 struct sw_flow_key masked_key; 1169 int error; 1170 1171 ovs_flow_mask_key(&masked_key, key, true, mask); 1172 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); 1173 if (error) { 1174 OVS_NLERR(log, 1175 "Actions may not be safe on all matching packets"); 1176 return ERR_PTR(error); 1177 } 1178 1179 return acts; 1180 } 1181 1182 /* Factor out match-init and action-copy to avoid 1183 * "Wframe-larger-than=1024" warning. Because mask is only 1184 * used to get actions, we new a function to save some 1185 * stack space. 1186 * 1187 * If there are not key and action attrs, we return 0 1188 * directly. In the case, the caller will also not use the 1189 * match as before. If there is action attr, we try to get 1190 * actions and save them to *acts. Before returning from 1191 * the function, we reset the match->mask pointer. Because 1192 * we should not to return match object with dangling reference 1193 * to mask. 1194 * */ 1195 static noinline_for_stack int 1196 ovs_nla_init_match_and_action(struct net *net, 1197 struct sw_flow_match *match, 1198 struct sw_flow_key *key, 1199 struct nlattr **a, 1200 struct sw_flow_actions **acts, 1201 bool log) 1202 { 1203 struct sw_flow_mask mask; 1204 int error = 0; 1205 1206 if (a[OVS_FLOW_ATTR_KEY]) { 1207 ovs_match_init(match, key, true, &mask); 1208 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], 1209 a[OVS_FLOW_ATTR_MASK], log); 1210 if (error) 1211 goto error; 1212 } 1213 1214 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1215 if (!a[OVS_FLOW_ATTR_KEY]) { 1216 OVS_NLERR(log, 1217 "Flow key attribute not present in set flow."); 1218 error = -EINVAL; 1219 goto error; 1220 } 1221 1222 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, 1223 &mask, log); 1224 if (IS_ERR(*acts)) { 1225 error = PTR_ERR(*acts); 1226 goto error; 1227 } 1228 } 1229 1230 /* On success, error is 0. */ 1231 error: 1232 match->mask = NULL; 1233 return error; 1234 } 1235 1236 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 1237 { 1238 struct net *net = sock_net(skb->sk); 1239 struct nlattr **a = info->attrs; 1240 struct ovs_header *ovs_header = genl_info_userhdr(info); 1241 struct sw_flow_key key; 1242 struct sw_flow *flow; 1243 struct sk_buff *reply = NULL; 1244 struct datapath *dp; 1245 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1246 struct sw_flow_match match; 1247 struct sw_flow_id sfid; 1248 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1249 int error = 0; 1250 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1251 bool ufid_present; 1252 1253 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); 1254 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { 1255 OVS_NLERR(log, 1256 "Flow set message rejected, Key attribute missing."); 1257 return -EINVAL; 1258 } 1259 1260 error = ovs_nla_init_match_and_action(net, &match, &key, a, 1261 &acts, log); 1262 if (error) 1263 goto error; 1264 1265 if (acts) { 1266 /* Can allocate before locking if have acts. */ 1267 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, 1268 ufid_flags); 1269 if (IS_ERR(reply)) { 1270 error = PTR_ERR(reply); 1271 goto err_kfree_acts; 1272 } 1273 } 1274 1275 ovs_lock(); 1276 dp = get_dp(net, ovs_header->dp_ifindex); 1277 if (unlikely(!dp)) { 1278 error = -ENODEV; 1279 goto err_unlock_ovs; 1280 } 1281 /* Check that the flow exists. */ 1282 if (ufid_present) 1283 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); 1284 else 1285 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1286 if (unlikely(!flow)) { 1287 error = -ENOENT; 1288 goto err_unlock_ovs; 1289 } 1290 1291 /* Update actions, if present. */ 1292 if (likely(acts)) { 1293 old_acts = ovsl_dereference(flow->sf_acts); 1294 rcu_assign_pointer(flow->sf_acts, acts); 1295 1296 if (unlikely(reply)) { 1297 error = ovs_flow_cmd_fill_info(flow, 1298 ovs_header->dp_ifindex, 1299 reply, info->snd_portid, 1300 info->snd_seq, 0, 1301 OVS_FLOW_CMD_SET, 1302 ufid_flags); 1303 BUG_ON(error < 0); 1304 } 1305 } else { 1306 /* Could not alloc without acts before locking. */ 1307 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1308 info, OVS_FLOW_CMD_SET, false, 1309 ufid_flags); 1310 1311 if (IS_ERR(reply)) { 1312 error = PTR_ERR(reply); 1313 goto err_unlock_ovs; 1314 } 1315 } 1316 1317 /* Clear stats. */ 1318 if (a[OVS_FLOW_ATTR_CLEAR]) 1319 ovs_flow_stats_clear(flow); 1320 ovs_unlock(); 1321 1322 if (reply) 1323 ovs_notify(&dp_flow_genl_family, reply, info); 1324 if (old_acts) 1325 ovs_nla_free_flow_actions_rcu(old_acts); 1326 1327 return 0; 1328 1329 err_unlock_ovs: 1330 ovs_unlock(); 1331 kfree_skb(reply); 1332 err_kfree_acts: 1333 ovs_nla_free_flow_actions(acts); 1334 error: 1335 return error; 1336 } 1337 1338 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1339 { 1340 struct nlattr **a = info->attrs; 1341 struct ovs_header *ovs_header = genl_info_userhdr(info); 1342 struct net *net = sock_net(skb->sk); 1343 struct sw_flow_key key; 1344 struct sk_buff *reply; 1345 struct sw_flow *flow; 1346 struct datapath *dp; 1347 struct sw_flow_match match; 1348 struct sw_flow_id ufid; 1349 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1350 int err = 0; 1351 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1352 bool ufid_present; 1353 1354 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1355 if (a[OVS_FLOW_ATTR_KEY]) { 1356 ovs_match_init(&match, &key, true, NULL); 1357 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, 1358 log); 1359 } else if (!ufid_present) { 1360 OVS_NLERR(log, 1361 "Flow get message rejected, Key attribute missing."); 1362 err = -EINVAL; 1363 } 1364 if (err) 1365 return err; 1366 1367 ovs_lock(); 1368 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1369 if (!dp) { 1370 err = -ENODEV; 1371 goto unlock; 1372 } 1373 1374 if (ufid_present) 1375 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1376 else 1377 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1378 if (!flow) { 1379 err = -ENOENT; 1380 goto unlock; 1381 } 1382 1383 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1384 OVS_FLOW_CMD_GET, true, ufid_flags); 1385 if (IS_ERR(reply)) { 1386 err = PTR_ERR(reply); 1387 goto unlock; 1388 } 1389 1390 ovs_unlock(); 1391 return genlmsg_reply(reply, info); 1392 unlock: 1393 ovs_unlock(); 1394 return err; 1395 } 1396 1397 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1398 { 1399 struct nlattr **a = info->attrs; 1400 struct ovs_header *ovs_header = genl_info_userhdr(info); 1401 struct net *net = sock_net(skb->sk); 1402 struct sw_flow_key key; 1403 struct sk_buff *reply; 1404 struct sw_flow *flow = NULL; 1405 struct datapath *dp; 1406 struct sw_flow_match match; 1407 struct sw_flow_id ufid; 1408 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1409 int err; 1410 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1411 bool ufid_present; 1412 1413 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1414 if (a[OVS_FLOW_ATTR_KEY]) { 1415 ovs_match_init(&match, &key, true, NULL); 1416 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1417 NULL, log); 1418 if (unlikely(err)) 1419 return err; 1420 } 1421 1422 ovs_lock(); 1423 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1424 if (unlikely(!dp)) { 1425 err = -ENODEV; 1426 goto unlock; 1427 } 1428 1429 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { 1430 err = ovs_flow_tbl_flush(&dp->table); 1431 goto unlock; 1432 } 1433 1434 if (ufid_present) 1435 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1436 else 1437 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1438 if (unlikely(!flow)) { 1439 err = -ENOENT; 1440 goto unlock; 1441 } 1442 1443 ovs_flow_tbl_remove(&dp->table, flow); 1444 ovs_unlock(); 1445 1446 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1447 &flow->id, info, false, ufid_flags); 1448 if (likely(reply)) { 1449 if (!IS_ERR(reply)) { 1450 rcu_read_lock(); /*To keep RCU checker happy. */ 1451 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1452 reply, info->snd_portid, 1453 info->snd_seq, 0, 1454 OVS_FLOW_CMD_DEL, 1455 ufid_flags); 1456 rcu_read_unlock(); 1457 if (WARN_ON_ONCE(err < 0)) { 1458 kfree_skb(reply); 1459 goto out_free; 1460 } 1461 1462 ovs_notify(&dp_flow_genl_family, reply, info); 1463 } else { 1464 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, 1465 PTR_ERR(reply)); 1466 } 1467 } 1468 1469 out_free: 1470 ovs_flow_free(flow, true); 1471 return 0; 1472 unlock: 1473 ovs_unlock(); 1474 return err; 1475 } 1476 1477 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1478 { 1479 struct nlattr *a[__OVS_FLOW_ATTR_MAX]; 1480 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1481 struct table_instance *ti; 1482 struct datapath *dp; 1483 u32 ufid_flags; 1484 int err; 1485 1486 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, 1487 OVS_FLOW_ATTR_MAX, flow_policy, NULL); 1488 if (err) 1489 return err; 1490 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1491 1492 rcu_read_lock(); 1493 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 1494 if (!dp) { 1495 rcu_read_unlock(); 1496 return -ENODEV; 1497 } 1498 1499 ti = rcu_dereference(dp->table.ti); 1500 for (;;) { 1501 struct sw_flow *flow; 1502 u32 bucket, obj; 1503 1504 bucket = cb->args[0]; 1505 obj = cb->args[1]; 1506 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1507 if (!flow) 1508 break; 1509 1510 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1511 NETLINK_CB(cb->skb).portid, 1512 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1513 OVS_FLOW_CMD_GET, ufid_flags) < 0) 1514 break; 1515 1516 cb->args[0] = bucket; 1517 cb->args[1] = obj; 1518 } 1519 rcu_read_unlock(); 1520 return skb->len; 1521 } 1522 1523 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1524 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1525 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, 1526 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1527 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1528 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, 1529 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, 1530 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, 1531 }; 1532 1533 static const struct genl_small_ops dp_flow_genl_ops[] = { 1534 { .cmd = OVS_FLOW_CMD_NEW, 1535 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1536 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1537 .doit = ovs_flow_cmd_new 1538 }, 1539 { .cmd = OVS_FLOW_CMD_DEL, 1540 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1541 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1542 .doit = ovs_flow_cmd_del 1543 }, 1544 { .cmd = OVS_FLOW_CMD_GET, 1545 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1546 .flags = 0, /* OK for unprivileged users. */ 1547 .doit = ovs_flow_cmd_get, 1548 .dumpit = ovs_flow_cmd_dump 1549 }, 1550 { .cmd = OVS_FLOW_CMD_SET, 1551 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1552 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1553 .doit = ovs_flow_cmd_set, 1554 }, 1555 }; 1556 1557 static struct genl_family dp_flow_genl_family __ro_after_init = { 1558 .hdrsize = sizeof(struct ovs_header), 1559 .name = OVS_FLOW_FAMILY, 1560 .version = OVS_FLOW_VERSION, 1561 .maxattr = OVS_FLOW_ATTR_MAX, 1562 .policy = flow_policy, 1563 .netnsok = true, 1564 .parallel_ops = true, 1565 .small_ops = dp_flow_genl_ops, 1566 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), 1567 .resv_start_op = OVS_FLOW_CMD_SET + 1, 1568 .mcgrps = &ovs_dp_flow_multicast_group, 1569 .n_mcgrps = 1, 1570 .module = THIS_MODULE, 1571 }; 1572 1573 static size_t ovs_dp_cmd_msg_size(void) 1574 { 1575 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1576 1577 msgsize += nla_total_size(IFNAMSIZ); 1578 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); 1579 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); 1580 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1581 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ 1582 msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */ 1583 1584 return msgsize; 1585 } 1586 1587 /* Called with ovs_mutex. */ 1588 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1589 u32 portid, u32 seq, u32 flags, u8 cmd) 1590 { 1591 struct ovs_header *ovs_header; 1592 struct ovs_dp_stats dp_stats; 1593 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1594 struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids); 1595 int err, pids_len; 1596 1597 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1598 flags, cmd); 1599 if (!ovs_header) 1600 goto error; 1601 1602 ovs_header->dp_ifindex = get_dpifindex(dp); 1603 1604 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1605 if (err) 1606 goto nla_put_failure; 1607 1608 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1609 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1610 &dp_stats, OVS_DP_ATTR_PAD)) 1611 goto nla_put_failure; 1612 1613 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1614 sizeof(struct ovs_dp_megaflow_stats), 1615 &dp_megaflow_stats, OVS_DP_ATTR_PAD)) 1616 goto nla_put_failure; 1617 1618 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1619 goto nla_put_failure; 1620 1621 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, 1622 ovs_flow_tbl_masks_cache_size(&dp->table))) 1623 goto nla_put_failure; 1624 1625 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) { 1626 pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32); 1627 if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids)) 1628 goto nla_put_failure; 1629 } 1630 1631 genlmsg_end(skb, ovs_header); 1632 return 0; 1633 1634 nla_put_failure: 1635 genlmsg_cancel(skb, ovs_header); 1636 error: 1637 return -EMSGSIZE; 1638 } 1639 1640 static struct sk_buff *ovs_dp_cmd_alloc_info(void) 1641 { 1642 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); 1643 } 1644 1645 /* Called with rcu_read_lock or ovs_mutex. */ 1646 static struct datapath *lookup_datapath(struct net *net, 1647 const struct ovs_header *ovs_header, 1648 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1649 { 1650 struct datapath *dp; 1651 1652 if (!a[OVS_DP_ATTR_NAME]) 1653 dp = get_dp(net, ovs_header->dp_ifindex); 1654 else { 1655 struct vport *vport; 1656 1657 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1658 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1659 } 1660 return dp ? dp : ERR_PTR(-ENODEV); 1661 } 1662 1663 static void ovs_dp_reset_user_features(struct sk_buff *skb, 1664 struct genl_info *info) 1665 { 1666 struct datapath *dp; 1667 1668 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 1669 info->attrs); 1670 if (IS_ERR(dp)) 1671 return; 1672 1673 pr_warn("%s: Dropping previously announced user features\n", 1674 ovs_dp_name(dp)); 1675 dp->user_features = 0; 1676 } 1677 1678 static int ovs_dp_set_upcall_portids(struct datapath *dp, 1679 const struct nlattr *ids) 1680 { 1681 struct dp_nlsk_pids *old, *dp_nlsk_pids; 1682 1683 if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) 1684 return -EINVAL; 1685 1686 old = ovsl_dereference(dp->upcall_portids); 1687 1688 dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), 1689 GFP_KERNEL); 1690 if (!dp_nlsk_pids) 1691 return -ENOMEM; 1692 1693 dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); 1694 nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); 1695 1696 rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); 1697 1698 kfree_rcu(old, rcu); 1699 1700 return 0; 1701 } 1702 1703 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) 1704 { 1705 struct dp_nlsk_pids *dp_nlsk_pids; 1706 1707 dp_nlsk_pids = rcu_dereference(dp->upcall_portids); 1708 1709 if (dp_nlsk_pids) { 1710 if (cpu_id < dp_nlsk_pids->n_pids) { 1711 return dp_nlsk_pids->pids[cpu_id]; 1712 } else if (dp_nlsk_pids->n_pids > 0 && 1713 cpu_id >= dp_nlsk_pids->n_pids) { 1714 /* If the number of netlink PIDs is mismatched with 1715 * the number of CPUs as seen by the kernel, log this 1716 * and send the upcall to an arbitrary socket (0) in 1717 * order to not drop packets 1718 */ 1719 pr_info_ratelimited("cpu_id mismatch with handler threads"); 1720 return dp_nlsk_pids->pids[cpu_id % 1721 dp_nlsk_pids->n_pids]; 1722 } else { 1723 return 0; 1724 } 1725 } else { 1726 return 0; 1727 } 1728 } 1729 1730 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) 1731 { 1732 u32 user_features = 0, old_features = dp->user_features; 1733 int err; 1734 1735 if (a[OVS_DP_ATTR_USER_FEATURES]) { 1736 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1737 1738 if (user_features & ~(OVS_DP_F_VPORT_PIDS | 1739 OVS_DP_F_UNALIGNED | 1740 OVS_DP_F_TC_RECIRC_SHARING | 1741 OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) 1742 return -EOPNOTSUPP; 1743 1744 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1745 if (user_features & OVS_DP_F_TC_RECIRC_SHARING) 1746 return -EOPNOTSUPP; 1747 #endif 1748 } 1749 1750 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { 1751 int err; 1752 u32 cache_size; 1753 1754 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); 1755 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); 1756 if (err) 1757 return err; 1758 } 1759 1760 dp->user_features = user_features; 1761 1762 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && 1763 a[OVS_DP_ATTR_PER_CPU_PIDS]) { 1764 /* Upcall Netlink Port IDs have been updated */ 1765 err = ovs_dp_set_upcall_portids(dp, 1766 a[OVS_DP_ATTR_PER_CPU_PIDS]); 1767 if (err) 1768 return err; 1769 } 1770 1771 if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && 1772 !(old_features & OVS_DP_F_TC_RECIRC_SHARING)) 1773 tc_skb_ext_tc_enable(); 1774 else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && 1775 (old_features & OVS_DP_F_TC_RECIRC_SHARING)) 1776 tc_skb_ext_tc_disable(); 1777 1778 return 0; 1779 } 1780 1781 static int ovs_dp_stats_init(struct datapath *dp) 1782 { 1783 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1784 if (!dp->stats_percpu) 1785 return -ENOMEM; 1786 1787 return 0; 1788 } 1789 1790 static int ovs_dp_vport_init(struct datapath *dp) 1791 { 1792 int i; 1793 1794 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, 1795 sizeof(struct hlist_head), 1796 GFP_KERNEL); 1797 if (!dp->ports) 1798 return -ENOMEM; 1799 1800 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1801 INIT_HLIST_HEAD(&dp->ports[i]); 1802 1803 return 0; 1804 } 1805 1806 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1807 { 1808 struct nlattr **a = info->attrs; 1809 struct vport_parms parms; 1810 struct sk_buff *reply; 1811 struct datapath *dp; 1812 struct vport *vport; 1813 struct ovs_net *ovs_net; 1814 int err; 1815 1816 err = -EINVAL; 1817 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1818 goto err; 1819 1820 reply = ovs_dp_cmd_alloc_info(); 1821 if (!reply) 1822 return -ENOMEM; 1823 1824 err = -ENOMEM; 1825 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1826 if (dp == NULL) 1827 goto err_destroy_reply; 1828 1829 ovs_dp_set_net(dp, sock_net(skb->sk)); 1830 1831 /* Allocate table. */ 1832 err = ovs_flow_tbl_init(&dp->table); 1833 if (err) 1834 goto err_destroy_dp; 1835 1836 err = ovs_dp_stats_init(dp); 1837 if (err) 1838 goto err_destroy_table; 1839 1840 err = ovs_dp_vport_init(dp); 1841 if (err) 1842 goto err_destroy_stats; 1843 1844 err = ovs_meters_init(dp); 1845 if (err) 1846 goto err_destroy_ports; 1847 1848 /* Set up our datapath device. */ 1849 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1850 parms.type = OVS_VPORT_TYPE_INTERNAL; 1851 parms.options = NULL; 1852 parms.dp = dp; 1853 parms.port_no = OVSP_LOCAL; 1854 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1855 parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0); 1856 1857 /* So far only local changes have been made, now need the lock. */ 1858 ovs_lock(); 1859 1860 err = ovs_dp_change(dp, a); 1861 if (err) 1862 goto err_unlock_and_destroy_meters; 1863 1864 vport = new_vport(&parms); 1865 if (IS_ERR(vport)) { 1866 err = PTR_ERR(vport); 1867 if (err == -EBUSY) 1868 err = -EEXIST; 1869 1870 if (err == -EEXIST) { 1871 /* An outdated user space instance that does not understand 1872 * the concept of user_features has attempted to create a new 1873 * datapath and is likely to reuse it. Drop all user features. 1874 */ 1875 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1876 ovs_dp_reset_user_features(skb, info); 1877 } 1878 1879 goto err_destroy_portids; 1880 } 1881 1882 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1883 info->snd_seq, 0, OVS_DP_CMD_NEW); 1884 BUG_ON(err < 0); 1885 1886 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1887 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1888 1889 ovs_unlock(); 1890 1891 ovs_notify(&dp_datapath_genl_family, reply, info); 1892 return 0; 1893 1894 err_destroy_portids: 1895 kfree(rcu_dereference_raw(dp->upcall_portids)); 1896 err_unlock_and_destroy_meters: 1897 ovs_unlock(); 1898 ovs_meters_exit(dp); 1899 err_destroy_ports: 1900 kfree(dp->ports); 1901 err_destroy_stats: 1902 free_percpu(dp->stats_percpu); 1903 err_destroy_table: 1904 ovs_flow_tbl_destroy(&dp->table); 1905 err_destroy_dp: 1906 kfree(dp); 1907 err_destroy_reply: 1908 kfree_skb(reply); 1909 err: 1910 return err; 1911 } 1912 1913 /* Called with ovs_mutex. */ 1914 static void __dp_destroy(struct datapath *dp) 1915 { 1916 struct flow_table *table = &dp->table; 1917 int i; 1918 1919 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) 1920 tc_skb_ext_tc_disable(); 1921 1922 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1923 struct vport *vport; 1924 struct hlist_node *n; 1925 1926 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1927 if (vport->port_no != OVSP_LOCAL) 1928 ovs_dp_detach_port(vport); 1929 } 1930 1931 list_del_rcu(&dp->list_node); 1932 1933 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1934 * all ports in datapath are destroyed first before freeing datapath. 1935 */ 1936 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1937 1938 /* Flush sw_flow in the tables. RCU cb only releases resource 1939 * such as dp, ports and tables. That may avoid some issues 1940 * such as RCU usage warning. 1941 */ 1942 table_instance_flow_flush(table, ovsl_dereference(table->ti), 1943 ovsl_dereference(table->ufid_ti)); 1944 1945 /* RCU destroy the ports, meters and flow tables. */ 1946 call_rcu(&dp->rcu, destroy_dp_rcu); 1947 } 1948 1949 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1950 { 1951 struct sk_buff *reply; 1952 struct datapath *dp; 1953 int err; 1954 1955 reply = ovs_dp_cmd_alloc_info(); 1956 if (!reply) 1957 return -ENOMEM; 1958 1959 ovs_lock(); 1960 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 1961 info->attrs); 1962 err = PTR_ERR(dp); 1963 if (IS_ERR(dp)) 1964 goto err_unlock_free; 1965 1966 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1967 info->snd_seq, 0, OVS_DP_CMD_DEL); 1968 BUG_ON(err < 0); 1969 1970 __dp_destroy(dp); 1971 ovs_unlock(); 1972 1973 ovs_notify(&dp_datapath_genl_family, reply, info); 1974 1975 return 0; 1976 1977 err_unlock_free: 1978 ovs_unlock(); 1979 kfree_skb(reply); 1980 return err; 1981 } 1982 1983 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1984 { 1985 struct sk_buff *reply; 1986 struct datapath *dp; 1987 int err; 1988 1989 reply = ovs_dp_cmd_alloc_info(); 1990 if (!reply) 1991 return -ENOMEM; 1992 1993 ovs_lock(); 1994 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 1995 info->attrs); 1996 err = PTR_ERR(dp); 1997 if (IS_ERR(dp)) 1998 goto err_unlock_free; 1999 2000 err = ovs_dp_change(dp, info->attrs); 2001 if (err) 2002 goto err_unlock_free; 2003 2004 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 2005 info->snd_seq, 0, OVS_DP_CMD_SET); 2006 BUG_ON(err < 0); 2007 2008 ovs_unlock(); 2009 ovs_notify(&dp_datapath_genl_family, reply, info); 2010 2011 return 0; 2012 2013 err_unlock_free: 2014 ovs_unlock(); 2015 kfree_skb(reply); 2016 return err; 2017 } 2018 2019 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 2020 { 2021 struct sk_buff *reply; 2022 struct datapath *dp; 2023 int err; 2024 2025 reply = ovs_dp_cmd_alloc_info(); 2026 if (!reply) 2027 return -ENOMEM; 2028 2029 ovs_lock(); 2030 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 2031 info->attrs); 2032 if (IS_ERR(dp)) { 2033 err = PTR_ERR(dp); 2034 goto err_unlock_free; 2035 } 2036 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 2037 info->snd_seq, 0, OVS_DP_CMD_GET); 2038 BUG_ON(err < 0); 2039 ovs_unlock(); 2040 2041 return genlmsg_reply(reply, info); 2042 2043 err_unlock_free: 2044 ovs_unlock(); 2045 kfree_skb(reply); 2046 return err; 2047 } 2048 2049 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 2050 { 2051 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 2052 struct datapath *dp; 2053 int skip = cb->args[0]; 2054 int i = 0; 2055 2056 ovs_lock(); 2057 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2058 if (i >= skip && 2059 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 2060 cb->nlh->nlmsg_seq, NLM_F_MULTI, 2061 OVS_DP_CMD_GET) < 0) 2062 break; 2063 i++; 2064 } 2065 ovs_unlock(); 2066 2067 cb->args[0] = i; 2068 2069 return skb->len; 2070 } 2071 2072 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 2073 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2074 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 2075 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 2076 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, 2077 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), 2078 [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), 2079 }; 2080 2081 static const struct genl_small_ops dp_datapath_genl_ops[] = { 2082 { .cmd = OVS_DP_CMD_NEW, 2083 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2084 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2085 .doit = ovs_dp_cmd_new 2086 }, 2087 { .cmd = OVS_DP_CMD_DEL, 2088 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2089 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2090 .doit = ovs_dp_cmd_del 2091 }, 2092 { .cmd = OVS_DP_CMD_GET, 2093 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2094 .flags = 0, /* OK for unprivileged users. */ 2095 .doit = ovs_dp_cmd_get, 2096 .dumpit = ovs_dp_cmd_dump 2097 }, 2098 { .cmd = OVS_DP_CMD_SET, 2099 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2100 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2101 .doit = ovs_dp_cmd_set, 2102 }, 2103 }; 2104 2105 static struct genl_family dp_datapath_genl_family __ro_after_init = { 2106 .hdrsize = sizeof(struct ovs_header), 2107 .name = OVS_DATAPATH_FAMILY, 2108 .version = OVS_DATAPATH_VERSION, 2109 .maxattr = OVS_DP_ATTR_MAX, 2110 .policy = datapath_policy, 2111 .netnsok = true, 2112 .parallel_ops = true, 2113 .small_ops = dp_datapath_genl_ops, 2114 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), 2115 .resv_start_op = OVS_DP_CMD_SET + 1, 2116 .mcgrps = &ovs_dp_datapath_multicast_group, 2117 .n_mcgrps = 1, 2118 .module = THIS_MODULE, 2119 }; 2120 2121 /* Called with ovs_mutex or RCU read lock. */ 2122 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 2123 struct net *net, u32 portid, u32 seq, 2124 u32 flags, u8 cmd, gfp_t gfp) 2125 { 2126 struct ovs_header *ovs_header; 2127 struct ovs_vport_stats vport_stats; 2128 struct net *net_vport; 2129 int err; 2130 2131 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 2132 flags, cmd); 2133 if (!ovs_header) 2134 return -EMSGSIZE; 2135 2136 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 2137 2138 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 2139 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 2140 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 2141 ovs_vport_name(vport)) || 2142 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) 2143 goto nla_put_failure; 2144 2145 rcu_read_lock(); 2146 net_vport = dev_net_rcu(vport->dev); 2147 if (!net_eq(net, net_vport)) { 2148 int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC); 2149 2150 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) 2151 goto nla_put_failure_unlock; 2152 } 2153 rcu_read_unlock(); 2154 2155 ovs_vport_get_stats(vport, &vport_stats); 2156 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, 2157 sizeof(struct ovs_vport_stats), &vport_stats, 2158 OVS_VPORT_ATTR_PAD)) 2159 goto nla_put_failure; 2160 2161 if (ovs_vport_get_upcall_stats(vport, skb)) 2162 goto nla_put_failure; 2163 2164 if (ovs_vport_get_upcall_portids(vport, skb)) 2165 goto nla_put_failure; 2166 2167 err = ovs_vport_get_options(vport, skb); 2168 if (err == -EMSGSIZE) 2169 goto error; 2170 2171 genlmsg_end(skb, ovs_header); 2172 return 0; 2173 2174 nla_put_failure_unlock: 2175 rcu_read_unlock(); 2176 nla_put_failure: 2177 err = -EMSGSIZE; 2178 error: 2179 genlmsg_cancel(skb, ovs_header); 2180 return err; 2181 } 2182 2183 static struct sk_buff *ovs_vport_cmd_alloc_info(void) 2184 { 2185 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2186 } 2187 2188 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 2189 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, 2190 u32 portid, u32 seq, u8 cmd) 2191 { 2192 struct sk_buff *skb; 2193 int retval; 2194 2195 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2196 if (!skb) 2197 return ERR_PTR(-ENOMEM); 2198 2199 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, 2200 GFP_KERNEL); 2201 BUG_ON(retval < 0); 2202 2203 return skb; 2204 } 2205 2206 /* Called with ovs_mutex or RCU read lock. */ 2207 static struct vport *lookup_vport(struct net *net, 2208 const struct ovs_header *ovs_header, 2209 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 2210 { 2211 struct datapath *dp; 2212 struct vport *vport; 2213 2214 if (a[OVS_VPORT_ATTR_IFINDEX]) 2215 return ERR_PTR(-EOPNOTSUPP); 2216 if (a[OVS_VPORT_ATTR_NAME]) { 2217 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 2218 if (!vport) 2219 return ERR_PTR(-ENODEV); 2220 if (ovs_header->dp_ifindex && 2221 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 2222 return ERR_PTR(-ENODEV); 2223 return vport; 2224 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 2225 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 2226 2227 if (port_no >= DP_MAX_PORTS) 2228 return ERR_PTR(-EFBIG); 2229 2230 dp = get_dp(net, ovs_header->dp_ifindex); 2231 if (!dp) 2232 return ERR_PTR(-ENODEV); 2233 2234 vport = ovs_vport_ovsl_rcu(dp, port_no); 2235 if (!vport) 2236 return ERR_PTR(-ENODEV); 2237 return vport; 2238 } else 2239 return ERR_PTR(-EINVAL); 2240 2241 } 2242 2243 static unsigned int ovs_get_max_headroom(struct datapath *dp) 2244 { 2245 unsigned int dev_headroom, max_headroom = 0; 2246 struct net_device *dev; 2247 struct vport *vport; 2248 int i; 2249 2250 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2251 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2252 lockdep_ovsl_is_held()) { 2253 dev = vport->dev; 2254 dev_headroom = netdev_get_fwd_headroom(dev); 2255 if (dev_headroom > max_headroom) 2256 max_headroom = dev_headroom; 2257 } 2258 } 2259 2260 return max_headroom; 2261 } 2262 2263 /* Called with ovs_mutex */ 2264 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) 2265 { 2266 struct vport *vport; 2267 int i; 2268 2269 dp->max_headroom = new_headroom; 2270 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2271 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2272 lockdep_ovsl_is_held()) 2273 netdev_set_rx_headroom(vport->dev, new_headroom); 2274 } 2275 } 2276 2277 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 2278 { 2279 struct nlattr **a = info->attrs; 2280 struct ovs_header *ovs_header = genl_info_userhdr(info); 2281 struct vport_parms parms; 2282 struct sk_buff *reply; 2283 struct vport *vport; 2284 struct datapath *dp; 2285 unsigned int new_headroom; 2286 u32 port_no; 2287 int err; 2288 2289 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 2290 !a[OVS_VPORT_ATTR_UPCALL_PID]) 2291 return -EINVAL; 2292 2293 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 2294 2295 if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL) 2296 return -EOPNOTSUPP; 2297 2298 port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0); 2299 if (port_no >= DP_MAX_PORTS) 2300 return -EFBIG; 2301 2302 reply = ovs_vport_cmd_alloc_info(); 2303 if (!reply) 2304 return -ENOMEM; 2305 2306 ovs_lock(); 2307 restart: 2308 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 2309 err = -ENODEV; 2310 if (!dp) 2311 goto exit_unlock_free; 2312 2313 if (port_no) { 2314 vport = ovs_vport_ovsl(dp, port_no); 2315 err = -EBUSY; 2316 if (vport) 2317 goto exit_unlock_free; 2318 } else { 2319 for (port_no = 1; ; port_no++) { 2320 if (port_no >= DP_MAX_PORTS) { 2321 err = -EFBIG; 2322 goto exit_unlock_free; 2323 } 2324 vport = ovs_vport_ovsl(dp, port_no); 2325 if (!vport) 2326 break; 2327 } 2328 } 2329 2330 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 2331 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 2332 parms.dp = dp; 2333 parms.port_no = port_no; 2334 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2335 parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX], 2336 0); 2337 2338 vport = new_vport(&parms); 2339 err = PTR_ERR(vport); 2340 if (IS_ERR(vport)) { 2341 if (err == -EAGAIN) 2342 goto restart; 2343 goto exit_unlock_free; 2344 } 2345 2346 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2347 info->snd_portid, info->snd_seq, 0, 2348 OVS_VPORT_CMD_NEW, GFP_KERNEL); 2349 2350 new_headroom = netdev_get_fwd_headroom(vport->dev); 2351 2352 if (new_headroom > dp->max_headroom) 2353 ovs_update_headroom(dp, new_headroom); 2354 else 2355 netdev_set_rx_headroom(vport->dev, dp->max_headroom); 2356 2357 BUG_ON(err < 0); 2358 ovs_unlock(); 2359 2360 ovs_notify(&dp_vport_genl_family, reply, info); 2361 return 0; 2362 2363 exit_unlock_free: 2364 ovs_unlock(); 2365 kfree_skb(reply); 2366 return err; 2367 } 2368 2369 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 2370 { 2371 struct nlattr **a = info->attrs; 2372 struct sk_buff *reply; 2373 struct vport *vport; 2374 int err; 2375 2376 reply = ovs_vport_cmd_alloc_info(); 2377 if (!reply) 2378 return -ENOMEM; 2379 2380 ovs_lock(); 2381 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a); 2382 err = PTR_ERR(vport); 2383 if (IS_ERR(vport)) 2384 goto exit_unlock_free; 2385 2386 if (a[OVS_VPORT_ATTR_TYPE] && 2387 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 2388 err = -EINVAL; 2389 goto exit_unlock_free; 2390 } 2391 2392 if (a[OVS_VPORT_ATTR_OPTIONS]) { 2393 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 2394 if (err) 2395 goto exit_unlock_free; 2396 } 2397 2398 2399 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 2400 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2401 2402 err = ovs_vport_set_upcall_portids(vport, ids); 2403 if (err) 2404 goto exit_unlock_free; 2405 } 2406 2407 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2408 info->snd_portid, info->snd_seq, 0, 2409 OVS_VPORT_CMD_SET, GFP_KERNEL); 2410 BUG_ON(err < 0); 2411 2412 ovs_unlock(); 2413 ovs_notify(&dp_vport_genl_family, reply, info); 2414 return 0; 2415 2416 exit_unlock_free: 2417 ovs_unlock(); 2418 kfree_skb(reply); 2419 return err; 2420 } 2421 2422 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 2423 { 2424 bool update_headroom = false; 2425 struct nlattr **a = info->attrs; 2426 struct sk_buff *reply; 2427 struct datapath *dp; 2428 struct vport *vport; 2429 unsigned int new_headroom; 2430 int err; 2431 2432 reply = ovs_vport_cmd_alloc_info(); 2433 if (!reply) 2434 return -ENOMEM; 2435 2436 ovs_lock(); 2437 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a); 2438 err = PTR_ERR(vport); 2439 if (IS_ERR(vport)) 2440 goto exit_unlock_free; 2441 2442 if (vport->port_no == OVSP_LOCAL) { 2443 err = -EINVAL; 2444 goto exit_unlock_free; 2445 } 2446 2447 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2448 info->snd_portid, info->snd_seq, 0, 2449 OVS_VPORT_CMD_DEL, GFP_KERNEL); 2450 BUG_ON(err < 0); 2451 2452 /* the vport deletion may trigger dp headroom update */ 2453 dp = vport->dp; 2454 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) 2455 update_headroom = true; 2456 2457 netdev_reset_rx_headroom(vport->dev); 2458 ovs_dp_detach_port(vport); 2459 2460 if (update_headroom) { 2461 new_headroom = ovs_get_max_headroom(dp); 2462 2463 if (new_headroom < dp->max_headroom) 2464 ovs_update_headroom(dp, new_headroom); 2465 } 2466 ovs_unlock(); 2467 2468 ovs_notify(&dp_vport_genl_family, reply, info); 2469 return 0; 2470 2471 exit_unlock_free: 2472 ovs_unlock(); 2473 kfree_skb(reply); 2474 return err; 2475 } 2476 2477 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 2478 { 2479 struct nlattr **a = info->attrs; 2480 struct ovs_header *ovs_header = genl_info_userhdr(info); 2481 struct sk_buff *reply; 2482 struct vport *vport; 2483 int err; 2484 2485 reply = ovs_vport_cmd_alloc_info(); 2486 if (!reply) 2487 return -ENOMEM; 2488 2489 rcu_read_lock(); 2490 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 2491 err = PTR_ERR(vport); 2492 if (IS_ERR(vport)) 2493 goto exit_unlock_free; 2494 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2495 info->snd_portid, info->snd_seq, 0, 2496 OVS_VPORT_CMD_GET, GFP_ATOMIC); 2497 BUG_ON(err < 0); 2498 rcu_read_unlock(); 2499 2500 return genlmsg_reply(reply, info); 2501 2502 exit_unlock_free: 2503 rcu_read_unlock(); 2504 kfree_skb(reply); 2505 return err; 2506 } 2507 2508 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 2509 { 2510 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 2511 struct datapath *dp; 2512 int bucket = cb->args[0], skip = cb->args[1]; 2513 int i, j = 0; 2514 2515 rcu_read_lock(); 2516 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 2517 if (!dp) { 2518 rcu_read_unlock(); 2519 return -ENODEV; 2520 } 2521 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 2522 struct vport *vport; 2523 2524 j = 0; 2525 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2526 if (j >= skip && 2527 ovs_vport_cmd_fill_info(vport, skb, 2528 sock_net(skb->sk), 2529 NETLINK_CB(cb->skb).portid, 2530 cb->nlh->nlmsg_seq, 2531 NLM_F_MULTI, 2532 OVS_VPORT_CMD_GET, 2533 GFP_ATOMIC) < 0) 2534 goto out; 2535 2536 j++; 2537 } 2538 skip = 0; 2539 } 2540 out: 2541 rcu_read_unlock(); 2542 2543 cb->args[0] = i; 2544 cb->args[1] = j; 2545 2546 return skb->len; 2547 } 2548 2549 static void ovs_dp_masks_rebalance(struct work_struct *work) 2550 { 2551 struct ovs_net *ovs_net = container_of(work, struct ovs_net, 2552 masks_rebalance.work); 2553 struct datapath *dp; 2554 2555 ovs_lock(); 2556 2557 list_for_each_entry(dp, &ovs_net->dps, list_node) 2558 ovs_flow_masks_rebalance(&dp->table); 2559 2560 ovs_unlock(); 2561 2562 schedule_delayed_work(&ovs_net->masks_rebalance, 2563 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2564 } 2565 2566 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 2567 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2568 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 2569 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 2570 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2571 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, 2572 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2573 [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), 2574 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, 2575 [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED }, 2576 }; 2577 2578 static const struct genl_small_ops dp_vport_genl_ops[] = { 2579 { .cmd = OVS_VPORT_CMD_NEW, 2580 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2581 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2582 .doit = ovs_vport_cmd_new 2583 }, 2584 { .cmd = OVS_VPORT_CMD_DEL, 2585 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2586 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2587 .doit = ovs_vport_cmd_del 2588 }, 2589 { .cmd = OVS_VPORT_CMD_GET, 2590 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2591 .flags = 0, /* OK for unprivileged users. */ 2592 .doit = ovs_vport_cmd_get, 2593 .dumpit = ovs_vport_cmd_dump 2594 }, 2595 { .cmd = OVS_VPORT_CMD_SET, 2596 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2597 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2598 .doit = ovs_vport_cmd_set, 2599 }, 2600 }; 2601 2602 struct genl_family dp_vport_genl_family __ro_after_init = { 2603 .hdrsize = sizeof(struct ovs_header), 2604 .name = OVS_VPORT_FAMILY, 2605 .version = OVS_VPORT_VERSION, 2606 .maxattr = OVS_VPORT_ATTR_MAX, 2607 .policy = vport_policy, 2608 .netnsok = true, 2609 .parallel_ops = true, 2610 .small_ops = dp_vport_genl_ops, 2611 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), 2612 .resv_start_op = OVS_VPORT_CMD_SET + 1, 2613 .mcgrps = &ovs_dp_vport_multicast_group, 2614 .n_mcgrps = 1, 2615 .module = THIS_MODULE, 2616 }; 2617 2618 static struct genl_family * const dp_genl_families[] = { 2619 &dp_datapath_genl_family, 2620 &dp_vport_genl_family, 2621 &dp_flow_genl_family, 2622 &dp_packet_genl_family, 2623 &dp_meter_genl_family, 2624 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 2625 &dp_ct_limit_genl_family, 2626 #endif 2627 }; 2628 2629 static void dp_unregister_genl(int n_families) 2630 { 2631 int i; 2632 2633 for (i = 0; i < n_families; i++) 2634 genl_unregister_family(dp_genl_families[i]); 2635 } 2636 2637 static int __init dp_register_genl(void) 2638 { 2639 int err; 2640 int i; 2641 2642 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2643 2644 err = genl_register_family(dp_genl_families[i]); 2645 if (err) 2646 goto error; 2647 } 2648 2649 return 0; 2650 2651 error: 2652 dp_unregister_genl(i); 2653 return err; 2654 } 2655 2656 static int __net_init ovs_init_net(struct net *net) 2657 { 2658 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2659 int err; 2660 2661 INIT_LIST_HEAD(&ovs_net->dps); 2662 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2663 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); 2664 2665 err = ovs_ct_init(net); 2666 if (err) 2667 return err; 2668 2669 schedule_delayed_work(&ovs_net->masks_rebalance, 2670 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2671 return 0; 2672 } 2673 2674 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, 2675 struct list_head *head) 2676 { 2677 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2678 struct datapath *dp; 2679 2680 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2681 int i; 2682 2683 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2684 struct vport *vport; 2685 2686 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { 2687 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) 2688 continue; 2689 2690 if (dev_net(vport->dev) == dnet) 2691 list_add(&vport->detach_list, head); 2692 } 2693 } 2694 } 2695 } 2696 2697 static void __net_exit ovs_exit_net(struct net *dnet) 2698 { 2699 struct datapath *dp, *dp_next; 2700 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); 2701 struct vport *vport, *vport_next; 2702 struct net *net; 2703 LIST_HEAD(head); 2704 2705 ovs_lock(); 2706 2707 ovs_ct_exit(dnet); 2708 2709 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2710 __dp_destroy(dp); 2711 2712 down_read(&net_rwsem); 2713 for_each_net(net) 2714 list_vports_from_net(net, dnet, &head); 2715 up_read(&net_rwsem); 2716 2717 /* Detach all vports from given namespace. */ 2718 list_for_each_entry_safe(vport, vport_next, &head, detach_list) { 2719 list_del(&vport->detach_list); 2720 ovs_dp_detach_port(vport); 2721 } 2722 2723 ovs_unlock(); 2724 2725 cancel_delayed_work_sync(&ovs_net->masks_rebalance); 2726 cancel_work_sync(&ovs_net->dp_notify_work); 2727 } 2728 2729 static struct pernet_operations ovs_net_ops = { 2730 .init = ovs_init_net, 2731 .exit = ovs_exit_net, 2732 .id = &ovs_net_id, 2733 .size = sizeof(struct ovs_net), 2734 }; 2735 2736 static const char * const ovs_drop_reasons[] = { 2737 #define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), 2738 OVS_DROP_REASONS(S) 2739 #undef S 2740 }; 2741 2742 static struct drop_reason_list drop_reason_list_ovs = { 2743 .reasons = ovs_drop_reasons, 2744 .n_reasons = ARRAY_SIZE(ovs_drop_reasons), 2745 }; 2746 2747 static int __init ovs_alloc_percpu_storage(void) 2748 { 2749 unsigned int cpu; 2750 2751 ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage); 2752 if (!ovs_pcpu_storage) 2753 return -ENOMEM; 2754 2755 for_each_possible_cpu(cpu) { 2756 struct ovs_pcpu_storage *ovs_pcpu; 2757 2758 ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu); 2759 local_lock_init(&ovs_pcpu->bh_lock); 2760 } 2761 return 0; 2762 } 2763 2764 static void ovs_free_percpu_storage(void) 2765 { 2766 free_percpu(ovs_pcpu_storage); 2767 } 2768 2769 static int __init dp_init(void) 2770 { 2771 int err; 2772 2773 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > 2774 sizeof_field(struct sk_buff, cb)); 2775 2776 pr_info("Open vSwitch switching datapath\n"); 2777 2778 err = ovs_alloc_percpu_storage(); 2779 if (err) 2780 goto error; 2781 2782 err = ovs_internal_dev_rtnl_link_register(); 2783 if (err) 2784 goto error; 2785 2786 err = ovs_flow_init(); 2787 if (err) 2788 goto error_unreg_rtnl_link; 2789 2790 err = ovs_vport_init(); 2791 if (err) 2792 goto error_flow_exit; 2793 2794 err = register_pernet_device(&ovs_net_ops); 2795 if (err) 2796 goto error_vport_exit; 2797 2798 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2799 if (err) 2800 goto error_netns_exit; 2801 2802 err = ovs_netdev_init(); 2803 if (err) 2804 goto error_unreg_notifier; 2805 2806 err = dp_register_genl(); 2807 if (err < 0) 2808 goto error_unreg_netdev; 2809 2810 drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH, 2811 &drop_reason_list_ovs); 2812 2813 return 0; 2814 2815 error_unreg_netdev: 2816 ovs_netdev_exit(); 2817 error_unreg_notifier: 2818 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2819 error_netns_exit: 2820 unregister_pernet_device(&ovs_net_ops); 2821 error_vport_exit: 2822 ovs_vport_exit(); 2823 error_flow_exit: 2824 ovs_flow_exit(); 2825 error_unreg_rtnl_link: 2826 ovs_internal_dev_rtnl_link_unregister(); 2827 error: 2828 ovs_free_percpu_storage(); 2829 return err; 2830 } 2831 2832 static void dp_cleanup(void) 2833 { 2834 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2835 ovs_netdev_exit(); 2836 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2837 unregister_pernet_device(&ovs_net_ops); 2838 drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH); 2839 rcu_barrier(); 2840 ovs_vport_exit(); 2841 ovs_flow_exit(); 2842 ovs_internal_dev_rtnl_link_unregister(); 2843 ovs_free_percpu_storage(); 2844 } 2845 2846 module_init(dp_init); 2847 module_exit(dp_cleanup); 2848 2849 MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2850 MODULE_LICENSE("GPL"); 2851 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); 2852 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); 2853 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); 2854 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); 2855 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); 2856 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY); 2857