1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Generic nexthop implementation
4 *
5 * Copyright (c) 2017-19 Cumulus Networks
6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
7 */
8
9 #ifndef __LINUX_NEXTHOP_H
10 #define __LINUX_NEXTHOP_H
11
12 #include <linux/netdevice.h>
13 #include <linux/notifier.h>
14 #include <linux/route.h>
15 #include <linux/types.h>
16 #include <net/ip_fib.h>
17 #include <net/ip6_fib.h>
18 #include <net/netlink.h>
19
20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
21
22 struct nexthop;
23
24 struct nh_config {
25 u32 nh_id;
26
27 u8 nh_family;
28 u8 nh_protocol;
29 u8 nh_blackhole;
30 u8 nh_fdb;
31 u32 nh_flags;
32
33 int nh_ifindex;
34 struct net_device *dev;
35
36 union {
37 __be32 ipv4;
38 struct in6_addr ipv6;
39 } gw;
40
41 struct nlattr *nh_grp;
42 u16 nh_grp_type;
43 u16 nh_grp_res_num_buckets;
44 unsigned long nh_grp_res_idle_timer;
45 unsigned long nh_grp_res_unbalanced_timer;
46 bool nh_grp_res_has_num_buckets;
47 bool nh_grp_res_has_idle_timer;
48 bool nh_grp_res_has_unbalanced_timer;
49
50 bool nh_hw_stats;
51
52 struct nlattr *nh_encap;
53 u16 nh_encap_type;
54
55 u32 nlflags;
56 struct nl_info nlinfo;
57 };
58
59 struct nh_info {
60 struct hlist_node dev_hash; /* entry on netns devhash */
61 struct nexthop *nh_parent;
62
63 u8 family;
64 bool reject_nh;
65 bool fdb_nh;
66
67 union {
68 struct fib_nh_common fib_nhc;
69 struct fib_nh fib_nh;
70 struct fib6_nh fib6_nh;
71 };
72 };
73
74 struct nh_res_bucket {
75 struct nh_grp_entry __rcu *nh_entry;
76 atomic_long_t used_time;
77 unsigned long migrated_time;
78 bool occupied;
79 u8 nh_flags;
80 };
81
82 struct nh_res_table {
83 struct net *net;
84 u32 nhg_id;
85 struct delayed_work upkeep_dw;
86
87 /* List of NHGEs that have too few buckets ("uw" for underweight).
88 * Reclaimed buckets will be given to entries in this list.
89 */
90 struct list_head uw_nh_entries;
91 unsigned long unbalanced_since;
92
93 u32 idle_timer;
94 u32 unbalanced_timer;
95
96 u16 num_nh_buckets;
97 struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
98 };
99
100 struct nh_grp_entry_stats {
101 u64_stats_t packets;
102 struct u64_stats_sync syncp;
103 };
104
105 struct nh_grp_entry {
106 struct nexthop *nh;
107 struct nh_grp_entry_stats __percpu *stats;
108 u16 weight;
109
110 union {
111 struct {
112 atomic_t upper_bound;
113 } hthr;
114 struct {
115 /* Member on uw_nh_entries. */
116 struct list_head uw_nh_entry;
117
118 u16 count_buckets;
119 u16 wants_buckets;
120 } res;
121 };
122
123 struct list_head nh_list;
124 struct nexthop *nh_parent; /* nexthop of group with this entry */
125 u64 packets_hw;
126 };
127
128 struct nh_group {
129 struct nh_group *spare; /* spare group for removals */
130 u16 num_nh;
131 bool is_multipath;
132 bool hash_threshold;
133 bool resilient;
134 bool fdb_nh;
135 bool has_v4;
136 bool hw_stats;
137
138 struct nh_res_table __rcu *res_table;
139 struct nh_grp_entry nh_entries[] __counted_by(num_nh);
140 };
141
142 struct nexthop {
143 struct rb_node rb_node; /* entry on netns rbtree */
144 struct list_head fi_list; /* v4 entries using nh */
145 struct list_head f6i_list; /* v6 entries using nh */
146 struct list_head fdb_list; /* fdb entries using this nh */
147 struct list_head grp_list; /* nh group entries using this nh */
148 struct net *net;
149
150 u32 id;
151
152 u8 protocol; /* app managing this nh */
153 u8 nh_flags;
154 bool is_group;
155 bool dead;
156 spinlock_t lock; /* protect dead and f6i_list */
157
158 refcount_t refcnt;
159 struct rcu_head rcu;
160
161 union {
162 struct nh_info __rcu *nh_info;
163 struct nh_group __rcu *nh_grp;
164 };
165 };
166
167 enum nexthop_event_type {
168 NEXTHOP_EVENT_DEL,
169 NEXTHOP_EVENT_REPLACE,
170 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
171 NEXTHOP_EVENT_BUCKET_REPLACE,
172 NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
173 };
174
175 enum nh_notifier_info_type {
176 NH_NOTIFIER_INFO_TYPE_SINGLE,
177 NH_NOTIFIER_INFO_TYPE_GRP,
178 NH_NOTIFIER_INFO_TYPE_RES_TABLE,
179 NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
180 NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
181 };
182
183 struct nh_notifier_single_info {
184 struct net_device *dev;
185 u8 gw_family;
186 union {
187 __be32 ipv4;
188 struct in6_addr ipv6;
189 };
190 u32 id;
191 u8 is_reject:1,
192 is_fdb:1,
193 has_encap:1;
194 };
195
196 struct nh_notifier_grp_entry_info {
197 u16 weight;
198 struct nh_notifier_single_info nh;
199 };
200
201 struct nh_notifier_grp_info {
202 u16 num_nh;
203 bool is_fdb;
204 bool hw_stats;
205 struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
206 };
207
208 struct nh_notifier_res_bucket_info {
209 u16 bucket_index;
210 unsigned int idle_timer_ms;
211 bool force;
212 struct nh_notifier_single_info old_nh;
213 struct nh_notifier_single_info new_nh;
214 };
215
216 struct nh_notifier_res_table_info {
217 u16 num_nh_buckets;
218 bool hw_stats;
219 struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
220 };
221
222 struct nh_notifier_grp_hw_stats_entry_info {
223 u32 id;
224 u64 packets;
225 };
226
227 struct nh_notifier_grp_hw_stats_info {
228 u16 num_nh;
229 bool hw_stats_used;
230 struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
231 };
232
233 struct nh_notifier_info {
234 struct net *net;
235 struct netlink_ext_ack *extack;
236 u32 id;
237 enum nh_notifier_info_type type;
238 union {
239 struct nh_notifier_single_info *nh;
240 struct nh_notifier_grp_info *nh_grp;
241 struct nh_notifier_res_table_info *nh_res_table;
242 struct nh_notifier_res_bucket_info *nh_res_bucket;
243 struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
244 };
245 };
246
247 int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
248 struct netlink_ext_ack *extack);
249 int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
250 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
251 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
252 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
253 bool offload, bool trap);
254 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
255 unsigned long *activity);
256 void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
257 unsigned int nh_idx,
258 u64 delta_packets);
259
260 /* caller is holding rcu or rtnl; no reference taken to nexthop */
261 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
262 void nexthop_free_rcu(struct rcu_head *head);
263
nexthop_get(struct nexthop * nh)264 static inline bool nexthop_get(struct nexthop *nh)
265 {
266 return refcount_inc_not_zero(&nh->refcnt);
267 }
268
nexthop_put(struct nexthop * nh)269 static inline void nexthop_put(struct nexthop *nh)
270 {
271 if (refcount_dec_and_test(&nh->refcnt))
272 call_rcu_hurry(&nh->rcu, nexthop_free_rcu);
273 }
274
nexthop_cmp(const struct nexthop * nh1,const struct nexthop * nh2)275 static inline bool nexthop_cmp(const struct nexthop *nh1,
276 const struct nexthop *nh2)
277 {
278 return nh1 == nh2;
279 }
280
nexthop_is_fdb(const struct nexthop * nh)281 static inline bool nexthop_is_fdb(const struct nexthop *nh)
282 {
283 if (nh->is_group) {
284 const struct nh_group *nh_grp;
285
286 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
287 return nh_grp->fdb_nh;
288 } else {
289 const struct nh_info *nhi;
290
291 nhi = rcu_dereference_rtnl(nh->nh_info);
292 return nhi->fdb_nh;
293 }
294 }
295
nexthop_has_v4(const struct nexthop * nh)296 static inline bool nexthop_has_v4(const struct nexthop *nh)
297 {
298 if (nh->is_group) {
299 struct nh_group *nh_grp;
300
301 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
302 return nh_grp->has_v4;
303 }
304 return false;
305 }
306
nexthop_is_multipath(const struct nexthop * nh)307 static inline bool nexthop_is_multipath(const struct nexthop *nh)
308 {
309 if (nh->is_group) {
310 struct nh_group *nh_grp;
311
312 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
313 return nh_grp->is_multipath;
314 }
315 return false;
316 }
317
318 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
319
nexthop_num_path(const struct nexthop * nh)320 static inline unsigned int nexthop_num_path(const struct nexthop *nh)
321 {
322 unsigned int rc = 1;
323
324 if (nh->is_group) {
325 struct nh_group *nh_grp;
326
327 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
328 if (nh_grp->is_multipath)
329 rc = nh_grp->num_nh;
330 }
331
332 return rc;
333 }
334
335 static inline
nexthop_mpath_select(const struct nh_group * nhg,int nhsel)336 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
337 {
338 /* for_nexthops macros in fib_semantics.c grabs a pointer to
339 * the nexthop before checking nhsel
340 */
341 if (nhsel >= nhg->num_nh)
342 return NULL;
343
344 return nhg->nh_entries[nhsel].nh;
345 }
346
347 static inline
nexthop_mpath_fill_node(struct sk_buff * skb,struct nexthop * nh,u8 rt_family)348 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
349 u8 rt_family)
350 {
351 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
352 int i;
353
354 for (i = 0; i < nhg->num_nh; i++) {
355 struct nexthop *nhe = nhg->nh_entries[i].nh;
356 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
357 struct fib_nh_common *nhc = &nhi->fib_nhc;
358 int weight = nhg->nh_entries[i].weight;
359
360 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
361 return -EMSGSIZE;
362 }
363
364 return 0;
365 }
366
367 /* called with rcu lock */
nexthop_is_blackhole(const struct nexthop * nh)368 static inline bool nexthop_is_blackhole(const struct nexthop *nh)
369 {
370 const struct nh_info *nhi;
371
372 if (nh->is_group) {
373 struct nh_group *nh_grp;
374
375 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
376 if (nh_grp->num_nh > 1)
377 return false;
378
379 nh = nh_grp->nh_entries[0].nh;
380 }
381
382 nhi = rcu_dereference_rtnl(nh->nh_info);
383 return nhi->reject_nh;
384 }
385
nexthop_path_fib_result(struct fib_result * res,int hash)386 static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
387 {
388 struct nh_info *nhi;
389 struct nexthop *nh;
390
391 nh = nexthop_select_path(res->fi->nh, hash);
392 nhi = rcu_dereference(nh->nh_info);
393 res->nhc = &nhi->fib_nhc;
394 }
395
396 /* called with rcu read lock or rtnl held */
397 static inline
nexthop_fib_nhc(struct nexthop * nh,int nhsel)398 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
399 {
400 struct nh_info *nhi;
401
402 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
403 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
404
405 if (nh->is_group) {
406 struct nh_group *nh_grp;
407
408 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
409 if (nh_grp->is_multipath) {
410 nh = nexthop_mpath_select(nh_grp, nhsel);
411 if (!nh)
412 return NULL;
413 }
414 }
415
416 nhi = rcu_dereference_rtnl(nh->nh_info);
417 return &nhi->fib_nhc;
418 }
419
420 /* called from fib_table_lookup with rcu_lock */
421 static inline
nexthop_get_nhc_lookup(const struct nexthop * nh,int fib_flags,const struct flowi4 * flp,int * nhsel)422 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
423 int fib_flags,
424 const struct flowi4 *flp,
425 int *nhsel)
426 {
427 struct nh_info *nhi;
428
429 if (nh->is_group) {
430 struct nh_group *nhg = rcu_dereference(nh->nh_grp);
431 int i;
432
433 for (i = 0; i < nhg->num_nh; i++) {
434 struct nexthop *nhe = nhg->nh_entries[i].nh;
435
436 nhi = rcu_dereference(nhe->nh_info);
437 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
438 *nhsel = i;
439 return &nhi->fib_nhc;
440 }
441 }
442 } else {
443 nhi = rcu_dereference(nh->nh_info);
444 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
445 *nhsel = 0;
446 return &nhi->fib_nhc;
447 }
448 }
449
450 return NULL;
451 }
452
nexthop_uses_dev(const struct nexthop * nh,const struct net_device * dev)453 static inline bool nexthop_uses_dev(const struct nexthop *nh,
454 const struct net_device *dev)
455 {
456 struct nh_info *nhi;
457
458 if (nh->is_group) {
459 struct nh_group *nhg = rcu_dereference(nh->nh_grp);
460 int i;
461
462 for (i = 0; i < nhg->num_nh; i++) {
463 struct nexthop *nhe = nhg->nh_entries[i].nh;
464
465 nhi = rcu_dereference(nhe->nh_info);
466 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
467 return true;
468 }
469 } else {
470 nhi = rcu_dereference(nh->nh_info);
471 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
472 return true;
473 }
474
475 return false;
476 }
477
fib_info_num_path(const struct fib_info * fi)478 static inline unsigned int fib_info_num_path(const struct fib_info *fi)
479 {
480 if (unlikely(fi->nh))
481 return nexthop_num_path(fi->nh);
482
483 return fi->fib_nhs;
484 }
485
486 int fib_check_nexthop(struct nexthop *nh, u8 scope,
487 struct netlink_ext_ack *extack);
488
fib_info_nhc(struct fib_info * fi,int nhsel)489 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
490 {
491 if (unlikely(fi->nh))
492 return nexthop_fib_nhc(fi->nh, nhsel);
493
494 return &fi->fib_nh[nhsel].nh_common;
495 }
496
497 /* only used when fib_nh is built into fib_info */
fib_info_nh(struct fib_info * fi,int nhsel)498 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
499 {
500 WARN_ON(fi->nh);
501
502 return &fi->fib_nh[nhsel];
503 }
504
505 /*
506 * IPv6 variants
507 */
508 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
509 struct netlink_ext_ack *extack);
510
511 /* Caller should either hold rcu_read_lock(), or RTNL. */
nexthop_fib6_nh(struct nexthop * nh)512 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
513 {
514 struct nh_info *nhi;
515
516 if (nh->is_group) {
517 struct nh_group *nh_grp;
518
519 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
520 nh = nexthop_mpath_select(nh_grp, 0);
521 if (!nh)
522 return NULL;
523 }
524
525 nhi = rcu_dereference_rtnl(nh->nh_info);
526 if (nhi->family == AF_INET6)
527 return &nhi->fib6_nh;
528
529 return NULL;
530 }
531
fib6_info_nh_dev(struct fib6_info * f6i)532 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
533 {
534 struct fib6_nh *fib6_nh;
535
536 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
537 return fib6_nh->fib_nh_dev;
538 }
539
nexthop_path_fib6_result(struct fib6_result * res,int hash)540 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
541 {
542 struct nexthop *nh = res->f6i->nh;
543 struct nh_info *nhi;
544
545 nh = nexthop_select_path(nh, hash);
546
547 nhi = rcu_dereference_rtnl(nh->nh_info);
548 if (nhi->reject_nh) {
549 res->fib6_type = RTN_BLACKHOLE;
550 res->fib6_flags |= RTF_REJECT;
551 res->nh = nexthop_fib6_nh(nh);
552 } else {
553 res->nh = &nhi->fib6_nh;
554 }
555 }
556
557 int nexthop_for_each_fib6_nh(struct nexthop *nh,
558 int (*cb)(struct fib6_nh *nh, void *arg),
559 void *arg);
560
nexthop_get_family(struct nexthop * nh)561 static inline int nexthop_get_family(struct nexthop *nh)
562 {
563 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
564
565 return nhi->family;
566 }
567
568 static inline
nexthop_fdb_nhc(struct nexthop * nh)569 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
570 {
571 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
572
573 return &nhi->fib_nhc;
574 }
575
nexthop_path_fdb_result(struct nexthop * nh,int hash)576 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
577 int hash)
578 {
579 struct nh_info *nhi;
580 struct nexthop *nhp;
581
582 nhp = nexthop_select_path(nh, hash);
583 if (unlikely(!nhp))
584 return NULL;
585 nhi = rcu_dereference(nhp->nh_info);
586 return &nhi->fib_nhc;
587 }
588 #endif
589