xref: /linux/net/core/netdev-genl.c (revision 91a4855d6c03e770e42f17c798a36a3c46e63de2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #include <linux/netdevice.h>
4 #include <linux/notifier.h>
5 #include <linux/rtnetlink.h>
6 #include <net/busy_poll.h>
7 #include <net/net_namespace.h>
8 #include <net/netdev_queues.h>
9 #include <net/netdev_rx_queue.h>
10 #include <net/sock.h>
11 #include <net/xdp.h>
12 #include <net/xdp_sock.h>
13 #include <net/page_pool/memory_provider.h>
14 
15 #include "dev.h"
16 #include "devmem.h"
17 #include "netdev-genl-gen.h"
18 
19 struct netdev_nl_dump_ctx {
20 	unsigned long	ifindex;
21 	unsigned int	rxq_idx;
22 	unsigned int	txq_idx;
23 	unsigned int	napi_id;
24 };
25 
26 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
27 {
28 	NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
29 
30 	return (struct netdev_nl_dump_ctx *)cb->ctx;
31 }
32 
33 static int
34 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
35 		   const struct genl_info *info)
36 {
37 	u64 xsk_features = 0;
38 	u64 xdp_rx_meta = 0;
39 	void *hdr;
40 
41 	netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */
42 
43 	hdr = genlmsg_iput(rsp, info);
44 	if (!hdr)
45 		return -EMSGSIZE;
46 
47 #define XDP_METADATA_KFUNC(_, flag, __, xmo) \
48 	if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
49 		xdp_rx_meta |= flag;
50 XDP_METADATA_KFUNC_xxx
51 #undef XDP_METADATA_KFUNC
52 
53 	if (netdev->xsk_tx_metadata_ops) {
54 		if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
55 			xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
56 		if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
57 			xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
58 		if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time)
59 			xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO;
60 	}
61 
62 	if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
63 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
64 			      netdev->xdp_features, NETDEV_A_DEV_PAD) ||
65 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
66 			      xdp_rx_meta, NETDEV_A_DEV_PAD) ||
67 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
68 			      xsk_features, NETDEV_A_DEV_PAD))
69 		goto err_cancel_msg;
70 
71 	if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
72 		if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
73 				netdev->xdp_zc_max_segs))
74 			goto err_cancel_msg;
75 	}
76 
77 	genlmsg_end(rsp, hdr);
78 
79 	return 0;
80 
81 err_cancel_msg:
82 	genlmsg_cancel(rsp, hdr);
83 	return -EMSGSIZE;
84 }
85 
86 static void
87 netdev_genl_dev_notify(struct net_device *netdev, int cmd)
88 {
89 	struct genl_info info;
90 	struct sk_buff *ntf;
91 
92 	if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
93 				NETDEV_NLGRP_MGMT))
94 		return;
95 
96 	genl_info_init_ntf(&info, &netdev_nl_family, cmd);
97 
98 	ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
99 	if (!ntf)
100 		return;
101 
102 	if (netdev_nl_dev_fill(netdev, ntf, &info)) {
103 		nlmsg_free(ntf);
104 		return;
105 	}
106 
107 	genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
108 				0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
109 }
110 
111 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
112 {
113 	struct net_device *netdev;
114 	struct sk_buff *rsp;
115 	u32 ifindex;
116 	int err;
117 
118 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
119 		return -EINVAL;
120 
121 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
122 
123 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
124 	if (!rsp)
125 		return -ENOMEM;
126 
127 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
128 	if (!netdev) {
129 		err = -ENODEV;
130 		goto err_free_msg;
131 	}
132 
133 	err = netdev_nl_dev_fill(netdev, rsp, info);
134 	netdev_unlock(netdev);
135 
136 	if (err)
137 		goto err_free_msg;
138 
139 	return genlmsg_reply(rsp, info);
140 
141 err_free_msg:
142 	nlmsg_free(rsp);
143 	return err;
144 }
145 
146 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
147 {
148 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
149 	struct net *net = sock_net(skb->sk);
150 	int err;
151 
152 	for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
153 		err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
154 		if (err < 0)
155 			return err;
156 	}
157 
158 	return 0;
159 }
160 
161 static int
162 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
163 			const struct genl_info *info)
164 {
165 	unsigned long irq_suspend_timeout;
166 	unsigned long gro_flush_timeout;
167 	u32 napi_defer_hard_irqs;
168 	void *hdr;
169 	pid_t pid;
170 
171 	if (!napi->dev->up)
172 		return 0;
173 
174 	hdr = genlmsg_iput(rsp, info);
175 	if (!hdr)
176 		return -EMSGSIZE;
177 
178 	if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
179 		goto nla_put_failure;
180 
181 	if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
182 		goto nla_put_failure;
183 
184 	if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
185 		goto nla_put_failure;
186 
187 	if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
188 			 napi_get_threaded(napi)))
189 		goto nla_put_failure;
190 
191 	if (napi->thread) {
192 		pid = task_pid_nr(napi->thread);
193 		if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
194 			goto nla_put_failure;
195 	}
196 
197 	napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
198 	if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
199 			napi_defer_hard_irqs))
200 		goto nla_put_failure;
201 
202 	irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
203 	if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
204 			 irq_suspend_timeout))
205 		goto nla_put_failure;
206 
207 	gro_flush_timeout = napi_get_gro_flush_timeout(napi);
208 	if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
209 			 gro_flush_timeout))
210 		goto nla_put_failure;
211 
212 	genlmsg_end(rsp, hdr);
213 
214 	return 0;
215 
216 nla_put_failure:
217 	genlmsg_cancel(rsp, hdr);
218 	return -EMSGSIZE;
219 }
220 
221 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
222 {
223 	struct napi_struct *napi;
224 	struct sk_buff *rsp;
225 	u32 napi_id;
226 	int err;
227 
228 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
229 		return -EINVAL;
230 
231 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
232 
233 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
234 	if (!rsp)
235 		return -ENOMEM;
236 
237 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
238 	if (napi) {
239 		err = netdev_nl_napi_fill_one(rsp, napi, info);
240 		netdev_unlock(napi->dev);
241 	} else {
242 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
243 		err = -ENOENT;
244 	}
245 
246 	if (err) {
247 		goto err_free_msg;
248 	} else if (!rsp->len) {
249 		err = -ENOENT;
250 		goto err_free_msg;
251 	}
252 
253 	return genlmsg_reply(rsp, info);
254 
255 err_free_msg:
256 	nlmsg_free(rsp);
257 	return err;
258 }
259 
260 static int
261 netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
262 			const struct genl_info *info,
263 			struct netdev_nl_dump_ctx *ctx)
264 {
265 	struct napi_struct *napi;
266 	unsigned int prev_id;
267 	int err = 0;
268 
269 	if (!netdev->up)
270 		return err;
271 
272 	prev_id = UINT_MAX;
273 	list_for_each_entry(napi, &netdev->napi_list, dev_list) {
274 		if (!napi_id_valid(napi->napi_id))
275 			continue;
276 
277 		/* Dump continuation below depends on the list being sorted */
278 		WARN_ON_ONCE(napi->napi_id >= prev_id);
279 		prev_id = napi->napi_id;
280 
281 		if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
282 			continue;
283 
284 		err = netdev_nl_napi_fill_one(rsp, napi, info);
285 		if (err)
286 			return err;
287 		ctx->napi_id = napi->napi_id;
288 	}
289 	return err;
290 }
291 
292 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
293 {
294 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
295 	const struct genl_info *info = genl_info_dump(cb);
296 	struct net *net = sock_net(skb->sk);
297 	struct net_device *netdev;
298 	u32 ifindex = 0;
299 	int err = 0;
300 
301 	if (info->attrs[NETDEV_A_NAPI_IFINDEX])
302 		ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
303 
304 	if (ifindex) {
305 		netdev = netdev_get_by_index_lock(net, ifindex);
306 		if (netdev) {
307 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
308 			netdev_unlock(netdev);
309 		} else {
310 			err = -ENODEV;
311 		}
312 	} else {
313 		for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
314 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
315 			if (err < 0)
316 				break;
317 			ctx->napi_id = 0;
318 		}
319 	}
320 
321 	return err;
322 }
323 
324 static int
325 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
326 {
327 	u64 irq_suspend_timeout = 0;
328 	u64 gro_flush_timeout = 0;
329 	u8 threaded = 0;
330 	u32 defer = 0;
331 
332 	if (info->attrs[NETDEV_A_NAPI_THREADED]) {
333 		int ret;
334 
335 		threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
336 		ret = napi_set_threaded(napi, threaded);
337 		if (ret)
338 			return ret;
339 	}
340 
341 	if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
342 		defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
343 		napi_set_defer_hard_irqs(napi, defer);
344 	}
345 
346 	if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
347 		irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
348 		napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
349 	}
350 
351 	if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
352 		gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
353 		napi_set_gro_flush_timeout(napi, gro_flush_timeout);
354 	}
355 
356 	return 0;
357 }
358 
359 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
360 {
361 	struct napi_struct *napi;
362 	unsigned int napi_id;
363 	int err;
364 
365 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
366 		return -EINVAL;
367 
368 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
369 
370 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
371 	if (napi) {
372 		err = netdev_nl_napi_set_config(napi, info);
373 		netdev_unlock(napi->dev);
374 	} else {
375 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
376 		err = -ENOENT;
377 	}
378 
379 	return err;
380 }
381 
382 static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
383 {
384 	if (napi && napi_id_valid(napi->napi_id))
385 		return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id);
386 	return 0;
387 }
388 
389 static int
390 netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev,
391 			   u32 q_idx, u32 q_type)
392 {
393 	struct net_device *orig_netdev = netdev;
394 	struct nlattr *nest_lease, *nest_queue;
395 	struct netdev_rx_queue *rxq;
396 	struct net *net, *peer_net;
397 
398 	rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, NETIF_PHYS_TO_VIRT);
399 	if (!rxq || orig_netdev == netdev)
400 		return 0;
401 
402 	nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
403 	if (!nest_lease)
404 		goto nla_put_failure;
405 
406 	nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
407 	if (!nest_queue)
408 		goto nla_put_failure;
409 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx))
410 		goto nla_put_failure;
411 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
412 		goto nla_put_failure;
413 	nla_nest_end(rsp, nest_queue);
414 
415 	if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
416 			READ_ONCE(netdev->ifindex)))
417 		goto nla_put_failure;
418 
419 	rcu_read_lock();
420 	peer_net = dev_net_rcu(netdev);
421 	net = dev_net_rcu(orig_netdev);
422 	if (!net_eq(net, peer_net)) {
423 		s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);
424 
425 		if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
426 			goto nla_put_failure_unlock;
427 	}
428 	rcu_read_unlock();
429 	nla_nest_end(rsp, nest_lease);
430 	return 0;
431 
432 nla_put_failure_unlock:
433 	rcu_read_unlock();
434 nla_put_failure:
435 	return -ENOMEM;
436 }
437 
438 static int
439 __netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct netdev_rx_queue *rxq)
440 {
441 	struct pp_memory_provider_params *params = &rxq->mp_params;
442 
443 	if (params->mp_ops &&
444 	    params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
445 		return -EMSGSIZE;
446 
447 #ifdef CONFIG_XDP_SOCKETS
448 	if (rxq->pool)
449 		if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
450 			return -EMSGSIZE;
451 #endif
452 	return 0;
453 }
454 
455 static int
456 netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct net_device *netdev,
457 			struct netdev_rx_queue *rxq)
458 {
459 	struct netdev_rx_queue *hw_rxq;
460 	int ret;
461 
462 	hw_rxq = rxq->lease;
463 	if (!hw_rxq || !netif_is_queue_leasee(netdev))
464 		return __netdev_nl_queue_fill_mp(rsp, rxq);
465 
466 	netdev_lock(hw_rxq->dev);
467 	ret = __netdev_nl_queue_fill_mp(rsp, hw_rxq);
468 	netdev_unlock(hw_rxq->dev);
469 	return ret;
470 }
471 
472 static int
473 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
474 			 u32 q_idx, u32 q_type, const struct genl_info *info)
475 {
476 	struct netdev_rx_queue *rxq;
477 	struct netdev_queue *txq;
478 	void *hdr;
479 
480 	hdr = genlmsg_iput(rsp, info);
481 	if (!hdr)
482 		return -EMSGSIZE;
483 
484 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
485 	    nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
486 	    nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
487 		goto nla_put_failure;
488 
489 	switch (q_type) {
490 	case NETDEV_QUEUE_TYPE_RX:
491 		rxq = __netif_get_rx_queue(netdev, q_idx);
492 		if (nla_put_napi_id(rsp, rxq->napi))
493 			goto nla_put_failure;
494 		if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type))
495 			goto nla_put_failure;
496 		if (netdev_nl_queue_fill_mp(rsp, netdev, rxq))
497 			goto nla_put_failure;
498 		break;
499 	case NETDEV_QUEUE_TYPE_TX:
500 		txq = netdev_get_tx_queue(netdev, q_idx);
501 		if (nla_put_napi_id(rsp, txq->napi))
502 			goto nla_put_failure;
503 #ifdef CONFIG_XDP_SOCKETS
504 		if (txq->pool)
505 			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
506 				goto nla_put_failure;
507 #endif
508 		break;
509 	}
510 
511 	genlmsg_end(rsp, hdr);
512 
513 	return 0;
514 
515 nla_put_failure:
516 	genlmsg_cancel(rsp, hdr);
517 	return -EMSGSIZE;
518 }
519 
520 static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
521 				    u32 q_type)
522 {
523 	switch (q_type) {
524 	case NETDEV_QUEUE_TYPE_RX:
525 		if (q_id >= netdev->real_num_rx_queues)
526 			return -EINVAL;
527 		return 0;
528 	case NETDEV_QUEUE_TYPE_TX:
529 		if (q_id >= netdev->real_num_tx_queues)
530 			return -EINVAL;
531 	}
532 	return 0;
533 }
534 
535 static int
536 netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
537 		     u32 q_type, const struct genl_info *info)
538 {
539 	int err;
540 
541 	if (!netdev->up)
542 		return -ENOENT;
543 
544 	err = netdev_nl_queue_validate(netdev, q_idx, q_type);
545 	if (err)
546 		return err;
547 
548 	return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
549 }
550 
551 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
552 {
553 	u32 q_id, q_type, ifindex;
554 	struct net_device *netdev;
555 	struct sk_buff *rsp;
556 	int err;
557 
558 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
559 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
560 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
561 		return -EINVAL;
562 
563 	q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
564 	q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
565 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
566 
567 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
568 	if (!rsp)
569 		return -ENOMEM;
570 
571 	netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info),
572 						     ifindex);
573 	if (netdev) {
574 		err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
575 		netdev_unlock_ops_compat(netdev);
576 	} else {
577 		err = -ENODEV;
578 	}
579 
580 	if (err)
581 		goto err_free_msg;
582 
583 	return genlmsg_reply(rsp, info);
584 
585 err_free_msg:
586 	nlmsg_free(rsp);
587 	return err;
588 }
589 
590 static int
591 netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
592 			 const struct genl_info *info,
593 			 struct netdev_nl_dump_ctx *ctx)
594 {
595 	int err = 0;
596 
597 	if (!netdev->up)
598 		return err;
599 
600 	for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
601 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx,
602 					       NETDEV_QUEUE_TYPE_RX, info);
603 		if (err)
604 			return err;
605 	}
606 	for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) {
607 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx,
608 					       NETDEV_QUEUE_TYPE_TX, info);
609 		if (err)
610 			return err;
611 	}
612 
613 	return err;
614 }
615 
616 int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
617 {
618 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
619 	const struct genl_info *info = genl_info_dump(cb);
620 	struct net *net = sock_net(skb->sk);
621 	struct net_device *netdev;
622 	u32 ifindex = 0;
623 	int err = 0;
624 
625 	if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
626 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
627 
628 	if (ifindex) {
629 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
630 		if (netdev) {
631 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
632 			netdev_unlock_ops_compat(netdev);
633 		} else {
634 			err = -ENODEV;
635 		}
636 	} else {
637 		for_each_netdev_lock_ops_compat_scoped(net, netdev,
638 						       ctx->ifindex) {
639 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
640 			if (err < 0)
641 				break;
642 			ctx->rxq_idx = 0;
643 			ctx->txq_idx = 0;
644 		}
645 	}
646 
647 	return err;
648 }
649 
650 #define NETDEV_STAT_NOT_SET		(~0ULL)
651 
652 static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
653 {
654 	const u64 *add = _add;
655 	u64 *sum = _sum;
656 
657 	while (size) {
658 		if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
659 			*sum += *add;
660 		sum++;
661 		add++;
662 		size -= 8;
663 	}
664 }
665 
666 static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
667 {
668 	if (value == NETDEV_STAT_NOT_SET)
669 		return 0;
670 	return nla_put_uint(rsp, attr_id, value);
671 }
672 
673 static int
674 netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
675 {
676 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
677 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
678 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
679 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
680 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
681 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
682 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
683 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
684 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
685 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
686 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
687 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
688 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
689 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
690 		return -EMSGSIZE;
691 	return 0;
692 }
693 
694 static int
695 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
696 {
697 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
698 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
699 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
700 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
701 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
702 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
703 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
704 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
705 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
706 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
707 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
708 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
709 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
710 		return -EMSGSIZE;
711 	return 0;
712 }
713 
714 static int
715 netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
716 		      u32 q_type, int i, const struct genl_info *info)
717 {
718 	const struct netdev_stat_ops *ops = netdev->stat_ops;
719 	struct netdev_queue_stats_rx rx;
720 	struct netdev_queue_stats_tx tx;
721 	void *hdr;
722 
723 	hdr = genlmsg_iput(rsp, info);
724 	if (!hdr)
725 		return -EMSGSIZE;
726 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
727 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
728 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
729 		goto nla_put_failure;
730 
731 	switch (q_type) {
732 	case NETDEV_QUEUE_TYPE_RX:
733 		memset(&rx, 0xff, sizeof(rx));
734 		ops->get_queue_stats_rx(netdev, i, &rx);
735 		if (!memchr_inv(&rx, 0xff, sizeof(rx)))
736 			goto nla_cancel;
737 		if (netdev_nl_stats_write_rx(rsp, &rx))
738 			goto nla_put_failure;
739 		break;
740 	case NETDEV_QUEUE_TYPE_TX:
741 		memset(&tx, 0xff, sizeof(tx));
742 		ops->get_queue_stats_tx(netdev, i, &tx);
743 		if (!memchr_inv(&tx, 0xff, sizeof(tx)))
744 			goto nla_cancel;
745 		if (netdev_nl_stats_write_tx(rsp, &tx))
746 			goto nla_put_failure;
747 		break;
748 	}
749 
750 	genlmsg_end(rsp, hdr);
751 	return 0;
752 
753 nla_cancel:
754 	genlmsg_cancel(rsp, hdr);
755 	return 0;
756 nla_put_failure:
757 	genlmsg_cancel(rsp, hdr);
758 	return -EMSGSIZE;
759 }
760 
761 static int
762 netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
763 			 const struct genl_info *info,
764 			 struct netdev_nl_dump_ctx *ctx)
765 {
766 	const struct netdev_stat_ops *ops = netdev->stat_ops;
767 	int i, err;
768 
769 	if (!(netdev->flags & IFF_UP))
770 		return 0;
771 
772 	i = ctx->rxq_idx;
773 	while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
774 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
775 					    i, info);
776 		if (err)
777 			return err;
778 		ctx->rxq_idx = ++i;
779 	}
780 	i = ctx->txq_idx;
781 	while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
782 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
783 					    i, info);
784 		if (err)
785 			return err;
786 		ctx->txq_idx = ++i;
787 	}
788 
789 	ctx->rxq_idx = 0;
790 	ctx->txq_idx = 0;
791 	return 0;
792 }
793 
794 /**
795  * netdev_stat_queue_sum() - add up queue stats from range of queues
796  * @netdev:	net_device
797  * @rx_start:	index of the first Rx queue to query
798  * @rx_end:	index after the last Rx queue (first *not* to query)
799  * @rx_sum:	output Rx stats, should be already initialized
800  * @tx_start:	index of the first Tx queue to query
801  * @tx_end:	index after the last Tx queue (first *not* to query)
802  * @tx_sum:	output Tx stats, should be already initialized
803  *
804  * Add stats from [start, end) range of queue IDs to *x_sum structs.
805  * The sum structs must be already initialized. Usually this
806  * helper is invoked from the .get_base_stats callbacks of drivers
807  * to account for stats of disabled queues. In that case the ranges
808  * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues).
809  */
810 void netdev_stat_queue_sum(struct net_device *netdev,
811 			   int rx_start, int rx_end,
812 			   struct netdev_queue_stats_rx *rx_sum,
813 			   int tx_start, int tx_end,
814 			   struct netdev_queue_stats_tx *tx_sum)
815 {
816 	const struct netdev_stat_ops *ops;
817 	struct netdev_queue_stats_rx rx;
818 	struct netdev_queue_stats_tx tx;
819 	int i;
820 
821 	ops = netdev->stat_ops;
822 
823 	for (i = rx_start; i < rx_end; i++) {
824 		memset(&rx, 0xff, sizeof(rx));
825 		if (ops->get_queue_stats_rx)
826 			ops->get_queue_stats_rx(netdev, i, &rx);
827 		netdev_nl_stats_add(rx_sum, &rx, sizeof(rx));
828 	}
829 	for (i = tx_start; i < tx_end; i++) {
830 		memset(&tx, 0xff, sizeof(tx));
831 		if (ops->get_queue_stats_tx)
832 			ops->get_queue_stats_tx(netdev, i, &tx);
833 		netdev_nl_stats_add(tx_sum, &tx, sizeof(tx));
834 	}
835 }
836 EXPORT_SYMBOL(netdev_stat_queue_sum);
837 
838 static int
839 netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
840 			  const struct genl_info *info)
841 {
842 	struct netdev_queue_stats_rx rx_sum;
843 	struct netdev_queue_stats_tx tx_sum;
844 	void *hdr;
845 
846 	/* Netdev can't guarantee any complete counters */
847 	if (!netdev->stat_ops->get_base_stats)
848 		return 0;
849 
850 	memset(&rx_sum, 0xff, sizeof(rx_sum));
851 	memset(&tx_sum, 0xff, sizeof(tx_sum));
852 
853 	netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum);
854 
855 	/* The op was there, but nothing reported, don't bother */
856 	if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
857 	    !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
858 		return 0;
859 
860 	hdr = genlmsg_iput(rsp, info);
861 	if (!hdr)
862 		return -EMSGSIZE;
863 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
864 		goto nla_put_failure;
865 
866 	netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum,
867 			      0, netdev->real_num_tx_queues, &tx_sum);
868 
869 	if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
870 	    netdev_nl_stats_write_tx(rsp, &tx_sum))
871 		goto nla_put_failure;
872 
873 	genlmsg_end(rsp, hdr);
874 	return 0;
875 
876 nla_put_failure:
877 	genlmsg_cancel(rsp, hdr);
878 	return -EMSGSIZE;
879 }
880 
881 static int
882 netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
883 			      struct sk_buff *skb, const struct genl_info *info,
884 			      struct netdev_nl_dump_ctx *ctx)
885 {
886 	if (!netdev->stat_ops)
887 		return 0;
888 
889 	switch (scope) {
890 	case 0:
891 		return netdev_nl_stats_by_netdev(netdev, skb, info);
892 	case NETDEV_QSTATS_SCOPE_QUEUE:
893 		return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
894 	}
895 
896 	return -EINVAL;	/* Should not happen, per netlink policy */
897 }
898 
899 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
900 				struct netlink_callback *cb)
901 {
902 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
903 	const struct genl_info *info = genl_info_dump(cb);
904 	struct net *net = sock_net(skb->sk);
905 	struct net_device *netdev;
906 	unsigned int ifindex;
907 	unsigned int scope;
908 	int err = 0;
909 
910 	scope = 0;
911 	if (info->attrs[NETDEV_A_QSTATS_SCOPE])
912 		scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
913 
914 	ifindex = 0;
915 	if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
916 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
917 
918 	if (ifindex) {
919 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
920 		if (!netdev) {
921 			NL_SET_BAD_ATTR(info->extack,
922 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
923 			return -ENODEV;
924 		}
925 		if (netdev->stat_ops) {
926 			err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
927 							    info, ctx);
928 		} else {
929 			NL_SET_BAD_ATTR(info->extack,
930 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
931 			err = -EOPNOTSUPP;
932 		}
933 		netdev_unlock_ops_compat(netdev);
934 		return err;
935 	}
936 
937 	for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) {
938 		err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
939 						    info, ctx);
940 		if (err < 0)
941 			break;
942 	}
943 
944 	return err;
945 }
946 
947 static int netdev_nl_read_rxq_bitmap(struct genl_info *info,
948 				     u32 rxq_bitmap_len,
949 				     unsigned long *rxq_bitmap)
950 {
951 	const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
952 	struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
953 	struct nlattr *attr;
954 	int rem, err = 0;
955 	u32 rxq_idx;
956 
957 	nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
958 			       genlmsg_data(info->genlhdr),
959 			       genlmsg_len(info->genlhdr), rem) {
960 		err = nla_parse_nested(tb, maxtype, attr,
961 				       netdev_queue_id_nl_policy, info->extack);
962 		if (err < 0)
963 			return err;
964 
965 		if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
966 		    NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE))
967 			return -EINVAL;
968 
969 		if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
970 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
971 			return -EINVAL;
972 		}
973 
974 		rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
975 		if (rxq_idx >= rxq_bitmap_len) {
976 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]);
977 			return -EINVAL;
978 		}
979 
980 		bitmap_set(rxq_bitmap, rxq_idx, 1);
981 	}
982 
983 	return 0;
984 }
985 
986 static struct device *
987 netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap,
988 		      struct netlink_ext_ack *extack)
989 {
990 	struct device *dma_dev = NULL;
991 	u32 rxq_idx, prev_rxq_idx;
992 
993 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
994 		struct device *rxq_dma_dev;
995 
996 		rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx,
997 						       NETDEV_QUEUE_TYPE_RX);
998 		if (dma_dev && rxq_dma_dev != dma_dev) {
999 			NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)",
1000 					   rxq_idx, prev_rxq_idx);
1001 			return ERR_PTR(-EOPNOTSUPP);
1002 		}
1003 
1004 		dma_dev = rxq_dma_dev;
1005 		prev_rxq_idx = rxq_idx;
1006 	}
1007 
1008 	return dma_dev;
1009 }
1010 
1011 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
1012 {
1013 	struct net_devmem_dmabuf_binding *binding;
1014 	u32 ifindex, dmabuf_fd, rxq_idx;
1015 	struct netdev_nl_sock *priv;
1016 	struct net_device *netdev;
1017 	unsigned long *rxq_bitmap;
1018 	struct device *dma_dev;
1019 	struct sk_buff *rsp;
1020 	int err = 0;
1021 	void *hdr;
1022 
1023 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1024 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
1025 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
1026 		return -EINVAL;
1027 
1028 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1029 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1030 
1031 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1032 	if (IS_ERR(priv))
1033 		return PTR_ERR(priv);
1034 
1035 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1036 	if (!rsp)
1037 		return -ENOMEM;
1038 
1039 	hdr = genlmsg_iput(rsp, info);
1040 	if (!hdr) {
1041 		err = -EMSGSIZE;
1042 		goto err_genlmsg_free;
1043 	}
1044 
1045 	mutex_lock(&priv->lock);
1046 
1047 	err = 0;
1048 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1049 	if (!netdev) {
1050 		err = -ENODEV;
1051 		goto err_unlock_sock;
1052 	}
1053 	if (!netif_device_present(netdev))
1054 		err = -ENODEV;
1055 	else if (!netdev_need_ops_lock(netdev))
1056 		err = -EOPNOTSUPP;
1057 	if (err) {
1058 		NL_SET_BAD_ATTR(info->extack,
1059 				info->attrs[NETDEV_A_DEV_IFINDEX]);
1060 		goto err_unlock;
1061 	}
1062 
1063 	rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL);
1064 	if (!rxq_bitmap) {
1065 		err = -ENOMEM;
1066 		goto err_unlock;
1067 	}
1068 
1069 	err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues,
1070 					rxq_bitmap);
1071 	if (err)
1072 		goto err_rxq_bitmap;
1073 
1074 	dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack);
1075 	if (IS_ERR(dma_dev)) {
1076 		err = PTR_ERR(dma_dev);
1077 		goto err_rxq_bitmap;
1078 	}
1079 
1080 	binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_FROM_DEVICE,
1081 					 dmabuf_fd, priv, info->extack);
1082 	if (IS_ERR(binding)) {
1083 		err = PTR_ERR(binding);
1084 		goto err_rxq_bitmap;
1085 	}
1086 
1087 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
1088 		err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
1089 						      info->extack);
1090 		if (err)
1091 			goto err_unbind;
1092 	}
1093 
1094 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1095 	genlmsg_end(rsp, hdr);
1096 
1097 	err = genlmsg_reply(rsp, info);
1098 	if (err)
1099 		goto err_unbind;
1100 
1101 	bitmap_free(rxq_bitmap);
1102 
1103 	netdev_unlock(netdev);
1104 
1105 	mutex_unlock(&priv->lock);
1106 
1107 	return 0;
1108 
1109 err_unbind:
1110 	net_devmem_unbind_dmabuf(binding);
1111 err_rxq_bitmap:
1112 	bitmap_free(rxq_bitmap);
1113 err_unlock:
1114 	netdev_unlock(netdev);
1115 err_unlock_sock:
1116 	mutex_unlock(&priv->lock);
1117 err_genlmsg_free:
1118 	nlmsg_free(rsp);
1119 	return err;
1120 }
1121 
1122 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
1123 {
1124 	struct net_devmem_dmabuf_binding *binding;
1125 	struct netdev_nl_sock *priv;
1126 	struct net_device *netdev;
1127 	struct device *dma_dev;
1128 	u32 ifindex, dmabuf_fd;
1129 	struct sk_buff *rsp;
1130 	int err = 0;
1131 	void *hdr;
1132 
1133 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1134 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD))
1135 		return -EINVAL;
1136 
1137 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1138 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1139 
1140 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1141 	if (IS_ERR(priv))
1142 		return PTR_ERR(priv);
1143 
1144 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1145 	if (!rsp)
1146 		return -ENOMEM;
1147 
1148 	hdr = genlmsg_iput(rsp, info);
1149 	if (!hdr) {
1150 		err = -EMSGSIZE;
1151 		goto err_genlmsg_free;
1152 	}
1153 
1154 	mutex_lock(&priv->lock);
1155 
1156 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1157 	if (!netdev) {
1158 		err = -ENODEV;
1159 		goto err_unlock_sock;
1160 	}
1161 
1162 	if (!netif_device_present(netdev)) {
1163 		err = -ENODEV;
1164 		goto err_unlock_netdev;
1165 	}
1166 
1167 	if (!netdev->netmem_tx) {
1168 		err = -EOPNOTSUPP;
1169 		NL_SET_ERR_MSG(info->extack,
1170 			       "Driver does not support netmem TX");
1171 		goto err_unlock_netdev;
1172 	}
1173 
1174 	dma_dev = netdev_queue_get_dma_dev(netdev, 0, NETDEV_QUEUE_TYPE_TX);
1175 	binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE,
1176 					 dmabuf_fd, priv, info->extack);
1177 	if (IS_ERR(binding)) {
1178 		err = PTR_ERR(binding);
1179 		goto err_unlock_netdev;
1180 	}
1181 
1182 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1183 	genlmsg_end(rsp, hdr);
1184 
1185 	netdev_unlock(netdev);
1186 	mutex_unlock(&priv->lock);
1187 
1188 	return genlmsg_reply(rsp, info);
1189 
1190 err_unlock_netdev:
1191 	netdev_unlock(netdev);
1192 err_unlock_sock:
1193 	mutex_unlock(&priv->lock);
1194 err_genlmsg_free:
1195 	nlmsg_free(rsp);
1196 	return err;
1197 }
1198 
1199 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
1200 {
1201 	const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
1202 	const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
1203 	int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
1204 	struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
1205 	struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
1206 	struct netdev_rx_queue *rxq, *rxq_lease;
1207 	struct net_device *dev, *dev_lease;
1208 	netdevice_tracker dev_tracker;
1209 	s32 netns_lease = -1;
1210 	struct nlattr *nest;
1211 	struct sk_buff *rsp;
1212 	struct net *net;
1213 	void *hdr;
1214 
1215 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
1216 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
1217 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
1218 		return -EINVAL;
1219 	if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
1220 	    NETDEV_QUEUE_TYPE_RX) {
1221 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
1222 		return -EINVAL;
1223 	}
1224 
1225 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
1226 
1227 	nest = info->attrs[NETDEV_A_QUEUE_LEASE];
1228 	err = nla_parse_nested(ltb, lmaxtype, nest,
1229 			       netdev_lease_nl_policy, info->extack);
1230 	if (err < 0)
1231 		return err;
1232 	if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
1233 	    NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
1234 		return -EINVAL;
1235 	if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
1236 		if (!capable(CAP_NET_ADMIN))
1237 			return -EPERM;
1238 		netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]);
1239 	}
1240 
1241 	ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
1242 
1243 	nest = ltb[NETDEV_A_LEASE_QUEUE];
1244 	err = nla_parse_nested(qtb, qmaxtype, nest,
1245 			       netdev_queue_id_nl_policy, info->extack);
1246 	if (err < 0)
1247 		return err;
1248 	if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
1249 	    NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
1250 		return -EINVAL;
1251 	if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
1252 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
1253 		return -EINVAL;
1254 	}
1255 
1256 	queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
1257 
1258 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1259 	if (!rsp)
1260 		return -ENOMEM;
1261 
1262 	hdr = genlmsg_iput(rsp, info);
1263 	if (!hdr) {
1264 		err = -EMSGSIZE;
1265 		goto err_genlmsg_free;
1266 	}
1267 
1268 	/* Locking order is always from the virtual to the physical device
1269 	 * since this is also the same order when applications open the
1270 	 * memory provider later on.
1271 	 */
1272 	dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1273 	if (!dev) {
1274 		err = -ENODEV;
1275 		goto err_genlmsg_free;
1276 	}
1277 	if (!netdev_can_create_queue(dev, info->extack)) {
1278 		err = -EINVAL;
1279 		goto err_unlock_dev;
1280 	}
1281 
1282 	net = genl_info_net(info);
1283 	if (netns_lease >= 0) {
1284 		net = get_net_ns_by_id(net, netns_lease);
1285 		if (!net) {
1286 			err = -ENONET;
1287 			goto err_unlock_dev;
1288 		}
1289 	}
1290 
1291 	dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker,
1292 					GFP_KERNEL);
1293 	if (!dev_lease) {
1294 		err = -ENODEV;
1295 		goto err_put_netns;
1296 	}
1297 	if (!netdev_can_lease_queue(dev_lease, info->extack)) {
1298 		netdev_put(dev_lease, &dev_tracker);
1299 		err = -EINVAL;
1300 		goto err_put_netns;
1301 	}
1302 
1303 	dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker);
1304 	if (!dev_lease) {
1305 		err = -ENODEV;
1306 		goto err_put_netns;
1307 	}
1308 	if (queue_id_lease >= dev_lease->real_num_rx_queues) {
1309 		err = -ERANGE;
1310 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
1311 		goto err_unlock_dev_lease;
1312 	}
1313 	if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX,
1314 			      info->extack)) {
1315 		err = -EBUSY;
1316 		goto err_unlock_dev_lease;
1317 	}
1318 
1319 	rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
1320 	rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
1321 
1322 	/* Leasing queues from different physical devices is currently
1323 	 * not supported. Capabilities such as XDP features and DMA
1324 	 * device may differ between physical devices, and computing
1325 	 * a correct intersection for the virtual device is not yet
1326 	 * implemented.
1327 	 */
1328 	if (rxq->lease && rxq->lease->dev != dev_lease) {
1329 		err = -EOPNOTSUPP;
1330 		NL_SET_ERR_MSG(info->extack,
1331 			       "Leasing queues from different devices not supported");
1332 		goto err_unlock_dev_lease;
1333 	}
1334 
1335 	queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack);
1336 	if (queue_id < 0) {
1337 		err = queue_id;
1338 		goto err_unlock_dev_lease;
1339 	}
1340 	rxq = __netif_get_rx_queue(dev, queue_id);
1341 
1342 	netdev_rx_queue_lease(rxq, rxq_lease);
1343 
1344 	nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
1345 	genlmsg_end(rsp, hdr);
1346 
1347 	netdev_unlock(dev_lease);
1348 	netdev_unlock(dev);
1349 	if (netns_lease >= 0)
1350 		put_net(net);
1351 
1352 	return genlmsg_reply(rsp, info);
1353 
1354 err_unlock_dev_lease:
1355 	netdev_unlock(dev_lease);
1356 err_put_netns:
1357 	if (netns_lease >= 0)
1358 		put_net(net);
1359 err_unlock_dev:
1360 	netdev_unlock(dev);
1361 err_genlmsg_free:
1362 	nlmsg_free(rsp);
1363 	return err;
1364 }
1365 
1366 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
1367 {
1368 	INIT_LIST_HEAD(&priv->bindings);
1369 	mutex_init(&priv->lock);
1370 }
1371 
1372 void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
1373 {
1374 	struct net_devmem_dmabuf_binding *binding;
1375 	struct net_devmem_dmabuf_binding *temp;
1376 	netdevice_tracker dev_tracker;
1377 	struct net_device *dev;
1378 
1379 	mutex_lock(&priv->lock);
1380 	list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
1381 		mutex_lock(&binding->lock);
1382 		dev = binding->dev;
1383 		if (!dev) {
1384 			mutex_unlock(&binding->lock);
1385 			net_devmem_unbind_dmabuf(binding);
1386 			continue;
1387 		}
1388 		netdev_hold(dev, &dev_tracker, GFP_KERNEL);
1389 		mutex_unlock(&binding->lock);
1390 
1391 		netdev_lock(dev);
1392 		net_devmem_unbind_dmabuf(binding);
1393 		netdev_unlock(dev);
1394 		netdev_put(dev, &dev_tracker);
1395 	}
1396 	mutex_unlock(&priv->lock);
1397 }
1398 
1399 static int netdev_genl_netdevice_event(struct notifier_block *nb,
1400 				       unsigned long event, void *ptr)
1401 {
1402 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1403 
1404 	switch (event) {
1405 	case NETDEV_REGISTER:
1406 		netdev_lock_ops_to_full(netdev);
1407 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
1408 		netdev_unlock_full_to_ops(netdev);
1409 		break;
1410 	case NETDEV_UNREGISTER:
1411 		netdev_lock(netdev);
1412 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
1413 		netdev_unlock(netdev);
1414 		break;
1415 	case NETDEV_XDP_FEAT_CHANGE:
1416 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
1417 		break;
1418 	}
1419 
1420 	return NOTIFY_OK;
1421 }
1422 
1423 static struct notifier_block netdev_genl_nb = {
1424 	.notifier_call	= netdev_genl_netdevice_event,
1425 };
1426 
1427 static int __init netdev_genl_init(void)
1428 {
1429 	int err;
1430 
1431 	err = register_netdevice_notifier(&netdev_genl_nb);
1432 	if (err)
1433 		return err;
1434 
1435 	err = genl_register_family(&netdev_nl_family);
1436 	if (err)
1437 		goto err_unreg_ntf;
1438 
1439 	return 0;
1440 
1441 err_unreg_ntf:
1442 	unregister_netdevice_notifier(&netdev_genl_nb);
1443 	return err;
1444 }
1445 
1446 subsys_initcall(netdev_genl_init);
1447