1 /*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. Neither the names of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * Alternatively, this software may be distributed under the terms of the
17 * GNU General Public License ("GPL") version 2 as published by the Free
18 * Software Foundation.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 #include "uverbs.h"
45
46 /*
47 * This determines whether a non-privileged user is allowed to specify a
48 * controlled QKEY or not, when true non-privileged user is allowed to specify
49 * a controlled QKEY.
50 */
51 static bool privileged_qkey;
52
53 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
54 struct rdma_restrack_entry*, uint32_t);
55
56 /*
57 * Sort array elements by the netlink attribute name
58 */
59 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
60 [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
61 [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
62 [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
63 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
64 [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
65 .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
66 [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
67 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
68 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
69 .len = IB_DEVICE_NAME_MAX },
70 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
71 [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
72 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
74 [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
75 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
76 [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
77 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
78 [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
79 [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
80 [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
81 [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
82 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
83 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
84 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
85 [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
86 .len = IFNAMSIZ },
87 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
88 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
89 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
90 .len = IFNAMSIZ },
91 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
92 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
93 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
94 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
95 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
96 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
97 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
98 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
99 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
100 [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
101 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
102 [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
103 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
104 [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
105 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
106 .len = sizeof(struct __kernel_sockaddr_storage) },
107 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
108 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
109 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
110 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
111 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
112 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
113 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
114 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
115 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
116 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
117 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
118 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
119 [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
120 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
121 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
122 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
123 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
124 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
125 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
126 [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
127 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
128 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
129 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
130 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
131 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
132 .len = sizeof(struct __kernel_sockaddr_storage) },
133 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
134 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
135 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
136 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
137 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
138 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
139 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
140 [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING,
141 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
142 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
143 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
144 [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
145 [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
146 [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
147 [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
148 [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
149 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
150 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
151 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
152 [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
153 [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
154 [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
155 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
156 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
157 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
158 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
159 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
160 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
161 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
162 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
163 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
164 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
165 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
166 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
167 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
168 [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
169 [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 },
170 [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
171 [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
172 [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
173 [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
174 [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
175 };
176
put_driver_name_print_type(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type)177 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
178 enum rdma_nldev_print_type print_type)
179 {
180 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
181 return -EMSGSIZE;
182 if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
183 nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
184 return -EMSGSIZE;
185
186 return 0;
187 }
188
_rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u32 value)189 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
190 enum rdma_nldev_print_type print_type,
191 u32 value)
192 {
193 if (put_driver_name_print_type(msg, name, print_type))
194 return -EMSGSIZE;
195 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
196 return -EMSGSIZE;
197
198 return 0;
199 }
200
_rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u64 value)201 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
202 enum rdma_nldev_print_type print_type,
203 u64 value)
204 {
205 if (put_driver_name_print_type(msg, name, print_type))
206 return -EMSGSIZE;
207 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
208 RDMA_NLDEV_ATTR_PAD))
209 return -EMSGSIZE;
210
211 return 0;
212 }
213
rdma_nl_put_driver_string(struct sk_buff * msg,const char * name,const char * str)214 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
215 const char *str)
216 {
217 if (put_driver_name_print_type(msg, name,
218 RDMA_NLDEV_PRINT_TYPE_UNSPEC))
219 return -EMSGSIZE;
220 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
221 return -EMSGSIZE;
222
223 return 0;
224 }
225 EXPORT_SYMBOL(rdma_nl_put_driver_string);
226
rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,u32 value)227 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
228 {
229 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
230 value);
231 }
232 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
233
rdma_nl_put_driver_u32_hex(struct sk_buff * msg,const char * name,u32 value)234 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
235 u32 value)
236 {
237 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
238 value);
239 }
240 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
241
rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,u64 value)242 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
243 {
244 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
245 value);
246 }
247 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
248
rdma_nl_put_driver_u64_hex(struct sk_buff * msg,const char * name,u64 value)249 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
250 {
251 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
252 value);
253 }
254 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
255
rdma_nl_get_privileged_qkey(void)256 bool rdma_nl_get_privileged_qkey(void)
257 {
258 return privileged_qkey || capable(CAP_NET_RAW);
259 }
260 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
261
fill_nldev_handle(struct sk_buff * msg,struct ib_device * device)262 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
263 {
264 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
265 return -EMSGSIZE;
266 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
267 dev_name(&device->dev)))
268 return -EMSGSIZE;
269
270 return 0;
271 }
272
fill_dev_info(struct sk_buff * msg,struct ib_device * device)273 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
274 {
275 char fw[IB_FW_VERSION_NAME_MAX];
276 int ret = 0;
277 u32 port;
278
279 if (fill_nldev_handle(msg, device))
280 return -EMSGSIZE;
281
282 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
283 return -EMSGSIZE;
284
285 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
286 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
287 device->attrs.device_cap_flags,
288 RDMA_NLDEV_ATTR_PAD))
289 return -EMSGSIZE;
290
291 ib_get_device_fw_str(device, fw);
292 /* Device without FW has strlen(fw) = 0 */
293 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
294 return -EMSGSIZE;
295
296 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
297 be64_to_cpu(device->node_guid),
298 RDMA_NLDEV_ATTR_PAD))
299 return -EMSGSIZE;
300 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
301 be64_to_cpu(device->attrs.sys_image_guid),
302 RDMA_NLDEV_ATTR_PAD))
303 return -EMSGSIZE;
304 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
305 return -EMSGSIZE;
306 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
307 return -EMSGSIZE;
308
309 if (device->type &&
310 nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
311 return -EMSGSIZE;
312
313 if (device->parent &&
314 nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
315 dev_name(&device->parent->dev)))
316 return -EMSGSIZE;
317
318 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
319 device->name_assign_type))
320 return -EMSGSIZE;
321
322 /*
323 * Link type is determined on first port and mlx4 device
324 * which can potentially have two different link type for the same
325 * IB device is considered as better to be avoided in the future,
326 */
327 port = rdma_start_port(device);
328 if (rdma_cap_opa_mad(device, port))
329 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
330 else if (rdma_protocol_ib(device, port))
331 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
332 else if (rdma_protocol_iwarp(device, port))
333 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
334 else if (rdma_protocol_roce(device, port))
335 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
336 else if (rdma_protocol_usnic(device, port))
337 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
338 "usnic");
339 return ret;
340 }
341
fill_port_info(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)342 static int fill_port_info(struct sk_buff *msg,
343 struct ib_device *device, u32 port,
344 const struct net *net)
345 {
346 struct net_device *netdev = NULL;
347 struct ib_port_attr attr;
348 int ret;
349 u64 cap_flags = 0;
350
351 if (fill_nldev_handle(msg, device))
352 return -EMSGSIZE;
353
354 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
355 return -EMSGSIZE;
356
357 ret = ib_query_port(device, port, &attr);
358 if (ret)
359 return ret;
360
361 if (rdma_protocol_ib(device, port)) {
362 BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
363 sizeof(attr.port_cap_flags2)) > sizeof(u64));
364 cap_flags = attr.port_cap_flags |
365 ((u64)attr.port_cap_flags2 << 32);
366 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
367 cap_flags, RDMA_NLDEV_ATTR_PAD))
368 return -EMSGSIZE;
369 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
370 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
371 return -EMSGSIZE;
372 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
373 return -EMSGSIZE;
374 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
375 return -EMSGSIZE;
376 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
377 return -EMSGSIZE;
378 }
379 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
380 return -EMSGSIZE;
381 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
382 return -EMSGSIZE;
383
384 netdev = ib_device_get_netdev(device, port);
385 if (netdev && net_eq(dev_net(netdev), net)) {
386 ret = nla_put_u32(msg,
387 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
388 if (ret)
389 goto out;
390 ret = nla_put_string(msg,
391 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
392 }
393
394 out:
395 dev_put(netdev);
396 return ret;
397 }
398
fill_res_info_entry(struct sk_buff * msg,const char * name,u64 curr)399 static int fill_res_info_entry(struct sk_buff *msg,
400 const char *name, u64 curr)
401 {
402 struct nlattr *entry_attr;
403
404 entry_attr = nla_nest_start_noflag(msg,
405 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
406 if (!entry_attr)
407 return -EMSGSIZE;
408
409 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
410 goto err;
411 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
412 RDMA_NLDEV_ATTR_PAD))
413 goto err;
414
415 nla_nest_end(msg, entry_attr);
416 return 0;
417
418 err:
419 nla_nest_cancel(msg, entry_attr);
420 return -EMSGSIZE;
421 }
422
fill_res_info(struct sk_buff * msg,struct ib_device * device,bool show_details)423 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
424 bool show_details)
425 {
426 static const char * const names[RDMA_RESTRACK_MAX] = {
427 [RDMA_RESTRACK_PD] = "pd",
428 [RDMA_RESTRACK_CQ] = "cq",
429 [RDMA_RESTRACK_QP] = "qp",
430 [RDMA_RESTRACK_CM_ID] = "cm_id",
431 [RDMA_RESTRACK_MR] = "mr",
432 [RDMA_RESTRACK_CTX] = "ctx",
433 [RDMA_RESTRACK_SRQ] = "srq",
434 };
435
436 struct nlattr *table_attr;
437 int ret, i, curr;
438
439 if (fill_nldev_handle(msg, device))
440 return -EMSGSIZE;
441
442 table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
443 if (!table_attr)
444 return -EMSGSIZE;
445
446 for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
447 if (!names[i])
448 continue;
449 curr = rdma_restrack_count(device, i, show_details);
450 ret = fill_res_info_entry(msg, names[i], curr);
451 if (ret)
452 goto err;
453 }
454
455 nla_nest_end(msg, table_attr);
456 return 0;
457
458 err:
459 nla_nest_cancel(msg, table_attr);
460 return ret;
461 }
462
fill_res_name_pid(struct sk_buff * msg,struct rdma_restrack_entry * res)463 static int fill_res_name_pid(struct sk_buff *msg,
464 struct rdma_restrack_entry *res)
465 {
466 int err = 0;
467
468 /*
469 * For user resources, user is should read /proc/PID/comm to get the
470 * name of the task file.
471 */
472 if (rdma_is_kernel_res(res)) {
473 err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
474 res->kern_name);
475 } else {
476 pid_t pid;
477
478 pid = task_pid_vnr(res->task);
479 /*
480 * Task is dead and in zombie state.
481 * There is no need to print PID anymore.
482 */
483 if (pid)
484 /*
485 * This part is racy, task can be killed and PID will
486 * be zero right here but it is ok, next query won't
487 * return PID. We don't promise real-time reflection
488 * of SW objects.
489 */
490 err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
491 }
492
493 return err ? -EMSGSIZE : 0;
494 }
495
fill_res_qp_entry_query(struct sk_buff * msg,struct rdma_restrack_entry * res,struct ib_device * dev,struct ib_qp * qp)496 static int fill_res_qp_entry_query(struct sk_buff *msg,
497 struct rdma_restrack_entry *res,
498 struct ib_device *dev,
499 struct ib_qp *qp)
500 {
501 struct ib_qp_init_attr qp_init_attr;
502 struct ib_qp_attr qp_attr;
503 int ret;
504
505 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
506 if (ret)
507 return ret;
508
509 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
510 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
511 qp_attr.dest_qp_num))
512 goto err;
513 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
514 qp_attr.rq_psn))
515 goto err;
516 }
517
518 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
519 goto err;
520
521 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
522 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
523 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
524 qp_attr.path_mig_state))
525 goto err;
526 }
527 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
528 goto err;
529 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
530 goto err;
531
532 if (dev->ops.fill_res_qp_entry)
533 return dev->ops.fill_res_qp_entry(msg, qp);
534 return 0;
535
536 err: return -EMSGSIZE;
537 }
538
fill_res_qp_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)539 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
540 struct rdma_restrack_entry *res, uint32_t port)
541 {
542 struct ib_qp *qp = container_of(res, struct ib_qp, res);
543 struct ib_device *dev = qp->device;
544 int ret;
545
546 if (port && port != qp->port)
547 return -EAGAIN;
548
549 /* In create_qp() port is not set yet */
550 if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
551 return -EMSGSIZE;
552
553 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
554 if (ret)
555 return -EMSGSIZE;
556
557 if (!rdma_is_kernel_res(res) &&
558 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
559 return -EMSGSIZE;
560
561 ret = fill_res_name_pid(msg, res);
562 if (ret)
563 return -EMSGSIZE;
564
565 return fill_res_qp_entry_query(msg, res, dev, qp);
566 }
567
fill_res_qp_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)568 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
569 struct rdma_restrack_entry *res, uint32_t port)
570 {
571 struct ib_qp *qp = container_of(res, struct ib_qp, res);
572 struct ib_device *dev = qp->device;
573
574 if (port && port != qp->port)
575 return -EAGAIN;
576 if (!dev->ops.fill_res_qp_entry_raw)
577 return -EINVAL;
578 return dev->ops.fill_res_qp_entry_raw(msg, qp);
579 }
580
fill_res_cm_id_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)581 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
582 struct rdma_restrack_entry *res, uint32_t port)
583 {
584 struct rdma_id_private *id_priv =
585 container_of(res, struct rdma_id_private, res);
586 struct ib_device *dev = id_priv->id.device;
587 struct rdma_cm_id *cm_id = &id_priv->id;
588
589 if (port && port != cm_id->port_num)
590 return -EAGAIN;
591
592 if (cm_id->port_num &&
593 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
594 goto err;
595
596 if (id_priv->qp_num) {
597 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
598 goto err;
599 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
600 goto err;
601 }
602
603 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
604 goto err;
605
606 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
607 goto err;
608
609 if (cm_id->route.addr.src_addr.ss_family &&
610 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
611 sizeof(cm_id->route.addr.src_addr),
612 &cm_id->route.addr.src_addr))
613 goto err;
614 if (cm_id->route.addr.dst_addr.ss_family &&
615 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
616 sizeof(cm_id->route.addr.dst_addr),
617 &cm_id->route.addr.dst_addr))
618 goto err;
619
620 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
621 goto err;
622
623 if (fill_res_name_pid(msg, res))
624 goto err;
625
626 if (dev->ops.fill_res_cm_id_entry)
627 return dev->ops.fill_res_cm_id_entry(msg, cm_id);
628 return 0;
629
630 err: return -EMSGSIZE;
631 }
632
fill_res_cq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)633 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
634 struct rdma_restrack_entry *res, uint32_t port)
635 {
636 struct ib_cq *cq = container_of(res, struct ib_cq, res);
637 struct ib_device *dev = cq->device;
638
639 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
640 return -EMSGSIZE;
641 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
642 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
643 return -EMSGSIZE;
644
645 /* Poll context is only valid for kernel CQs */
646 if (rdma_is_kernel_res(res) &&
647 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
648 return -EMSGSIZE;
649
650 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
651 return -EMSGSIZE;
652
653 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
654 return -EMSGSIZE;
655 if (!rdma_is_kernel_res(res) &&
656 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
657 cq->uobject->uevent.uobject.context->res.id))
658 return -EMSGSIZE;
659
660 if (fill_res_name_pid(msg, res))
661 return -EMSGSIZE;
662
663 return (dev->ops.fill_res_cq_entry) ?
664 dev->ops.fill_res_cq_entry(msg, cq) : 0;
665 }
666
fill_res_cq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)667 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
668 struct rdma_restrack_entry *res, uint32_t port)
669 {
670 struct ib_cq *cq = container_of(res, struct ib_cq, res);
671 struct ib_device *dev = cq->device;
672
673 if (!dev->ops.fill_res_cq_entry_raw)
674 return -EINVAL;
675 return dev->ops.fill_res_cq_entry_raw(msg, cq);
676 }
677
fill_res_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)678 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
679 struct rdma_restrack_entry *res, uint32_t port)
680 {
681 struct ib_mr *mr = container_of(res, struct ib_mr, res);
682 struct ib_device *dev = mr->pd->device;
683
684 if (has_cap_net_admin) {
685 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
686 return -EMSGSIZE;
687 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
688 return -EMSGSIZE;
689 }
690
691 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
692 RDMA_NLDEV_ATTR_PAD))
693 return -EMSGSIZE;
694
695 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
696 return -EMSGSIZE;
697
698 if (!rdma_is_kernel_res(res) &&
699 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
700 return -EMSGSIZE;
701
702 if (fill_res_name_pid(msg, res))
703 return -EMSGSIZE;
704
705 return (dev->ops.fill_res_mr_entry) ?
706 dev->ops.fill_res_mr_entry(msg, mr) :
707 0;
708 }
709
fill_res_mr_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)710 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
711 struct rdma_restrack_entry *res, uint32_t port)
712 {
713 struct ib_mr *mr = container_of(res, struct ib_mr, res);
714 struct ib_device *dev = mr->pd->device;
715
716 if (!dev->ops.fill_res_mr_entry_raw)
717 return -EINVAL;
718 return dev->ops.fill_res_mr_entry_raw(msg, mr);
719 }
720
fill_res_pd_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)721 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
722 struct rdma_restrack_entry *res, uint32_t port)
723 {
724 struct ib_pd *pd = container_of(res, struct ib_pd, res);
725
726 if (has_cap_net_admin) {
727 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
728 pd->local_dma_lkey))
729 goto err;
730 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
731 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
732 pd->unsafe_global_rkey))
733 goto err;
734 }
735 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
736 atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
737 goto err;
738
739 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
740 goto err;
741
742 if (!rdma_is_kernel_res(res) &&
743 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
744 pd->uobject->context->res.id))
745 goto err;
746
747 return fill_res_name_pid(msg, res);
748
749 err: return -EMSGSIZE;
750 }
751
fill_res_ctx_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)752 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
753 struct rdma_restrack_entry *res, uint32_t port)
754 {
755 struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
756
757 if (rdma_is_kernel_res(res))
758 return 0;
759
760 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
761 return -EMSGSIZE;
762
763 return fill_res_name_pid(msg, res);
764 }
765
fill_res_range_qp_entry(struct sk_buff * msg,uint32_t min_range,uint32_t max_range)766 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
767 uint32_t max_range)
768 {
769 struct nlattr *entry_attr;
770
771 if (!min_range)
772 return 0;
773
774 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
775 if (!entry_attr)
776 return -EMSGSIZE;
777
778 if (min_range == max_range) {
779 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
780 goto err;
781 } else {
782 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
783 goto err;
784 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
785 goto err;
786 }
787 nla_nest_end(msg, entry_attr);
788 return 0;
789
790 err:
791 nla_nest_cancel(msg, entry_attr);
792 return -EMSGSIZE;
793 }
794
fill_res_srq_qps(struct sk_buff * msg,struct ib_srq * srq)795 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
796 {
797 uint32_t min_range = 0, prev = 0;
798 struct rdma_restrack_entry *res;
799 struct rdma_restrack_root *rt;
800 struct nlattr *table_attr;
801 struct ib_qp *qp = NULL;
802 unsigned long id = 0;
803
804 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
805 if (!table_attr)
806 return -EMSGSIZE;
807
808 rt = &srq->device->res[RDMA_RESTRACK_QP];
809 xa_lock(&rt->xa);
810 xa_for_each(&rt->xa, id, res) {
811 if (!rdma_restrack_get(res))
812 continue;
813
814 qp = container_of(res, struct ib_qp, res);
815 if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
816 rdma_restrack_put(res);
817 continue;
818 }
819
820 if (qp->qp_num < prev)
821 /* qp_num should be ascending */
822 goto err_loop;
823
824 if (min_range == 0) {
825 min_range = qp->qp_num;
826 } else if (qp->qp_num > (prev + 1)) {
827 if (fill_res_range_qp_entry(msg, min_range, prev))
828 goto err_loop;
829
830 min_range = qp->qp_num;
831 }
832 prev = qp->qp_num;
833 rdma_restrack_put(res);
834 }
835
836 xa_unlock(&rt->xa);
837
838 if (fill_res_range_qp_entry(msg, min_range, prev))
839 goto err;
840
841 nla_nest_end(msg, table_attr);
842 return 0;
843
844 err_loop:
845 rdma_restrack_put(res);
846 xa_unlock(&rt->xa);
847 err:
848 nla_nest_cancel(msg, table_attr);
849 return -EMSGSIZE;
850 }
851
fill_res_srq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)852 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
853 struct rdma_restrack_entry *res, uint32_t port)
854 {
855 struct ib_srq *srq = container_of(res, struct ib_srq, res);
856 struct ib_device *dev = srq->device;
857
858 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
859 goto err;
860
861 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
862 goto err;
863
864 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
865 goto err;
866
867 if (ib_srq_has_cq(srq->srq_type)) {
868 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
869 srq->ext.cq->res.id))
870 goto err;
871 }
872
873 if (fill_res_srq_qps(msg, srq))
874 goto err;
875
876 if (fill_res_name_pid(msg, res))
877 goto err;
878
879 if (dev->ops.fill_res_srq_entry)
880 return dev->ops.fill_res_srq_entry(msg, srq);
881
882 return 0;
883
884 err:
885 return -EMSGSIZE;
886 }
887
fill_res_srq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)888 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
889 struct rdma_restrack_entry *res, uint32_t port)
890 {
891 struct ib_srq *srq = container_of(res, struct ib_srq, res);
892 struct ib_device *dev = srq->device;
893
894 if (!dev->ops.fill_res_srq_entry_raw)
895 return -EINVAL;
896 return dev->ops.fill_res_srq_entry_raw(msg, srq);
897 }
898
fill_stat_counter_mode(struct sk_buff * msg,struct rdma_counter * counter)899 static int fill_stat_counter_mode(struct sk_buff *msg,
900 struct rdma_counter *counter)
901 {
902 struct rdma_counter_mode *m = &counter->mode;
903
904 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
905 return -EMSGSIZE;
906
907 if (m->mode == RDMA_COUNTER_MODE_AUTO) {
908 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
909 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
910 return -EMSGSIZE;
911
912 if ((m->mask & RDMA_COUNTER_MASK_PID) &&
913 fill_res_name_pid(msg, &counter->res))
914 return -EMSGSIZE;
915 }
916
917 return 0;
918 }
919
fill_stat_counter_qp_entry(struct sk_buff * msg,u32 qpn)920 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
921 {
922 struct nlattr *entry_attr;
923
924 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
925 if (!entry_attr)
926 return -EMSGSIZE;
927
928 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
929 goto err;
930
931 nla_nest_end(msg, entry_attr);
932 return 0;
933
934 err:
935 nla_nest_cancel(msg, entry_attr);
936 return -EMSGSIZE;
937 }
938
fill_stat_counter_qps(struct sk_buff * msg,struct rdma_counter * counter)939 static int fill_stat_counter_qps(struct sk_buff *msg,
940 struct rdma_counter *counter)
941 {
942 struct rdma_restrack_entry *res;
943 struct rdma_restrack_root *rt;
944 struct nlattr *table_attr;
945 struct ib_qp *qp = NULL;
946 unsigned long id = 0;
947 int ret = 0;
948
949 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
950 if (!table_attr)
951 return -EMSGSIZE;
952
953 rt = &counter->device->res[RDMA_RESTRACK_QP];
954 xa_lock(&rt->xa);
955 xa_for_each(&rt->xa, id, res) {
956 qp = container_of(res, struct ib_qp, res);
957 if (!qp->counter || (qp->counter->id != counter->id))
958 continue;
959
960 ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
961 if (ret)
962 goto err;
963 }
964
965 xa_unlock(&rt->xa);
966 nla_nest_end(msg, table_attr);
967 return 0;
968
969 err:
970 xa_unlock(&rt->xa);
971 nla_nest_cancel(msg, table_attr);
972 return ret;
973 }
974
rdma_nl_stat_hwcounter_entry(struct sk_buff * msg,const char * name,u64 value)975 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
976 u64 value)
977 {
978 struct nlattr *entry_attr;
979
980 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
981 if (!entry_attr)
982 return -EMSGSIZE;
983
984 if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
985 name))
986 goto err;
987 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
988 value, RDMA_NLDEV_ATTR_PAD))
989 goto err;
990
991 nla_nest_end(msg, entry_attr);
992 return 0;
993
994 err:
995 nla_nest_cancel(msg, entry_attr);
996 return -EMSGSIZE;
997 }
998 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
999
fill_stat_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)1000 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
1001 struct rdma_restrack_entry *res, uint32_t port)
1002 {
1003 struct ib_mr *mr = container_of(res, struct ib_mr, res);
1004 struct ib_device *dev = mr->pd->device;
1005
1006 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1007 goto err;
1008
1009 if (dev->ops.fill_stat_mr_entry)
1010 return dev->ops.fill_stat_mr_entry(msg, mr);
1011 return 0;
1012
1013 err:
1014 return -EMSGSIZE;
1015 }
1016
fill_stat_counter_hwcounters(struct sk_buff * msg,struct rdma_counter * counter)1017 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1018 struct rdma_counter *counter)
1019 {
1020 struct rdma_hw_stats *st = counter->stats;
1021 struct nlattr *table_attr;
1022 int i;
1023
1024 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1025 if (!table_attr)
1026 return -EMSGSIZE;
1027
1028 mutex_lock(&st->lock);
1029 for (i = 0; i < st->num_counters; i++) {
1030 if (test_bit(i, st->is_disabled))
1031 continue;
1032 if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1033 st->value[i]))
1034 goto err;
1035 }
1036 mutex_unlock(&st->lock);
1037
1038 nla_nest_end(msg, table_attr);
1039 return 0;
1040
1041 err:
1042 mutex_unlock(&st->lock);
1043 nla_nest_cancel(msg, table_attr);
1044 return -EMSGSIZE;
1045 }
1046
fill_res_counter_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)1047 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1048 struct rdma_restrack_entry *res,
1049 uint32_t port)
1050 {
1051 struct rdma_counter *counter =
1052 container_of(res, struct rdma_counter, res);
1053
1054 if (port && port != counter->port)
1055 return -EAGAIN;
1056
1057 /* Dump it even query failed */
1058 rdma_counter_query_stats(counter);
1059
1060 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1061 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1062 fill_stat_counter_mode(msg, counter) ||
1063 fill_stat_counter_qps(msg, counter) ||
1064 fill_stat_counter_hwcounters(msg, counter))
1065 return -EMSGSIZE;
1066
1067 return 0;
1068 }
1069
nldev_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1070 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1071 struct netlink_ext_ack *extack)
1072 {
1073 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1074 struct ib_device *device;
1075 struct sk_buff *msg;
1076 u32 index;
1077 int err;
1078
1079 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1080 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1081 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1082 return -EINVAL;
1083
1084 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1085
1086 device = ib_device_get_by_index(sock_net(skb->sk), index);
1087 if (!device)
1088 return -EINVAL;
1089
1090 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1091 if (!msg) {
1092 err = -ENOMEM;
1093 goto err;
1094 }
1095
1096 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1097 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1098 0, 0);
1099 if (!nlh) {
1100 err = -EMSGSIZE;
1101 goto err_free;
1102 }
1103
1104 err = fill_dev_info(msg, device);
1105 if (err)
1106 goto err_free;
1107
1108 nlmsg_end(msg, nlh);
1109
1110 ib_device_put(device);
1111 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1112
1113 err_free:
1114 nlmsg_free(msg);
1115 err:
1116 ib_device_put(device);
1117 return err;
1118 }
1119
nldev_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1120 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1121 struct netlink_ext_ack *extack)
1122 {
1123 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1124 struct ib_device *device;
1125 u32 index;
1126 int err;
1127
1128 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1129 nldev_policy, extack);
1130 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1131 return -EINVAL;
1132
1133 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1134 device = ib_device_get_by_index(sock_net(skb->sk), index);
1135 if (!device)
1136 return -EINVAL;
1137
1138 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1139 char name[IB_DEVICE_NAME_MAX] = {};
1140
1141 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1142 IB_DEVICE_NAME_MAX);
1143 if (strlen(name) == 0) {
1144 err = -EINVAL;
1145 goto done;
1146 }
1147 err = ib_device_rename(device, name);
1148 goto done;
1149 }
1150
1151 if (tb[RDMA_NLDEV_NET_NS_FD]) {
1152 u32 ns_fd;
1153
1154 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1155 err = ib_device_set_netns_put(skb, device, ns_fd);
1156 goto put_done;
1157 }
1158
1159 if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1160 u8 use_dim;
1161
1162 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1163 err = ib_device_set_dim(device, use_dim);
1164 goto done;
1165 }
1166
1167 done:
1168 ib_device_put(device);
1169 put_done:
1170 return err;
1171 }
1172
_nldev_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1173 static int _nldev_get_dumpit(struct ib_device *device,
1174 struct sk_buff *skb,
1175 struct netlink_callback *cb,
1176 unsigned int idx)
1177 {
1178 int start = cb->args[0];
1179 struct nlmsghdr *nlh;
1180
1181 if (idx < start)
1182 return 0;
1183
1184 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1185 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1186 0, NLM_F_MULTI);
1187
1188 if (!nlh || fill_dev_info(skb, device)) {
1189 nlmsg_cancel(skb, nlh);
1190 goto out;
1191 }
1192
1193 nlmsg_end(skb, nlh);
1194
1195 idx++;
1196
1197 out: cb->args[0] = idx;
1198 return skb->len;
1199 }
1200
nldev_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1201 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1202 {
1203 /*
1204 * There is no need to take lock, because
1205 * we are relying on ib_core's locking.
1206 */
1207 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1208 }
1209
nldev_port_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1210 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1211 struct netlink_ext_ack *extack)
1212 {
1213 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1214 struct ib_device *device;
1215 struct sk_buff *msg;
1216 u32 index;
1217 u32 port;
1218 int err;
1219
1220 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1221 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1222 if (err ||
1223 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1224 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1225 return -EINVAL;
1226
1227 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1228 device = ib_device_get_by_index(sock_net(skb->sk), index);
1229 if (!device)
1230 return -EINVAL;
1231
1232 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1233 if (!rdma_is_port_valid(device, port)) {
1234 err = -EINVAL;
1235 goto err;
1236 }
1237
1238 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1239 if (!msg) {
1240 err = -ENOMEM;
1241 goto err;
1242 }
1243
1244 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1245 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1246 0, 0);
1247 if (!nlh) {
1248 err = -EMSGSIZE;
1249 goto err_free;
1250 }
1251
1252 err = fill_port_info(msg, device, port, sock_net(skb->sk));
1253 if (err)
1254 goto err_free;
1255
1256 nlmsg_end(msg, nlh);
1257 ib_device_put(device);
1258
1259 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1260
1261 err_free:
1262 nlmsg_free(msg);
1263 err:
1264 ib_device_put(device);
1265 return err;
1266 }
1267
nldev_port_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1268 static int nldev_port_get_dumpit(struct sk_buff *skb,
1269 struct netlink_callback *cb)
1270 {
1271 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1272 struct ib_device *device;
1273 int start = cb->args[0];
1274 struct nlmsghdr *nlh;
1275 u32 idx = 0;
1276 u32 ifindex;
1277 int err;
1278 unsigned int p;
1279
1280 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1281 nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1282 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1283 return -EINVAL;
1284
1285 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1286 device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1287 if (!device)
1288 return -EINVAL;
1289
1290 rdma_for_each_port (device, p) {
1291 /*
1292 * The dumpit function returns all information from specific
1293 * index. This specific index is taken from the netlink
1294 * messages request sent by user and it is available
1295 * in cb->args[0].
1296 *
1297 * Usually, the user doesn't fill this field and it causes
1298 * to return everything.
1299 *
1300 */
1301 if (idx < start) {
1302 idx++;
1303 continue;
1304 }
1305
1306 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1307 cb->nlh->nlmsg_seq,
1308 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1309 RDMA_NLDEV_CMD_PORT_GET),
1310 0, NLM_F_MULTI);
1311
1312 if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1313 nlmsg_cancel(skb, nlh);
1314 goto out;
1315 }
1316 idx++;
1317 nlmsg_end(skb, nlh);
1318 }
1319
1320 out:
1321 ib_device_put(device);
1322 cb->args[0] = idx;
1323 return skb->len;
1324 }
1325
nldev_res_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1326 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1327 struct netlink_ext_ack *extack)
1328 {
1329 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1330 bool show_details = false;
1331 struct ib_device *device;
1332 struct sk_buff *msg;
1333 u32 index;
1334 int ret;
1335
1336 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1337 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1338 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1339 return -EINVAL;
1340
1341 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1342 device = ib_device_get_by_index(sock_net(skb->sk), index);
1343 if (!device)
1344 return -EINVAL;
1345
1346 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1347 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1348
1349 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1350 if (!msg) {
1351 ret = -ENOMEM;
1352 goto err;
1353 }
1354
1355 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1356 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1357 0, 0);
1358 if (!nlh) {
1359 ret = -EMSGSIZE;
1360 goto err_free;
1361 }
1362
1363 ret = fill_res_info(msg, device, show_details);
1364 if (ret)
1365 goto err_free;
1366
1367 nlmsg_end(msg, nlh);
1368 ib_device_put(device);
1369 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1370
1371 err_free:
1372 nlmsg_free(msg);
1373 err:
1374 ib_device_put(device);
1375 return ret;
1376 }
1377
_nldev_res_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1378 static int _nldev_res_get_dumpit(struct ib_device *device,
1379 struct sk_buff *skb,
1380 struct netlink_callback *cb,
1381 unsigned int idx)
1382 {
1383 int start = cb->args[0];
1384 struct nlmsghdr *nlh;
1385
1386 if (idx < start)
1387 return 0;
1388
1389 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1390 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1391 0, NLM_F_MULTI);
1392
1393 if (!nlh || fill_res_info(skb, device, false)) {
1394 nlmsg_cancel(skb, nlh);
1395 goto out;
1396 }
1397 nlmsg_end(skb, nlh);
1398
1399 idx++;
1400
1401 out:
1402 cb->args[0] = idx;
1403 return skb->len;
1404 }
1405
nldev_res_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1406 static int nldev_res_get_dumpit(struct sk_buff *skb,
1407 struct netlink_callback *cb)
1408 {
1409 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1410 }
1411
1412 struct nldev_fill_res_entry {
1413 enum rdma_nldev_attr nldev_attr;
1414 u8 flags;
1415 u32 entry;
1416 u32 id;
1417 };
1418
1419 enum nldev_res_flags {
1420 NLDEV_PER_DEV = 1 << 0,
1421 };
1422
1423 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1424 [RDMA_RESTRACK_QP] = {
1425 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1426 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1427 .id = RDMA_NLDEV_ATTR_RES_LQPN,
1428 },
1429 [RDMA_RESTRACK_CM_ID] = {
1430 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1431 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1432 .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1433 },
1434 [RDMA_RESTRACK_CQ] = {
1435 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1436 .flags = NLDEV_PER_DEV,
1437 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1438 .id = RDMA_NLDEV_ATTR_RES_CQN,
1439 },
1440 [RDMA_RESTRACK_MR] = {
1441 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1442 .flags = NLDEV_PER_DEV,
1443 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1444 .id = RDMA_NLDEV_ATTR_RES_MRN,
1445 },
1446 [RDMA_RESTRACK_PD] = {
1447 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1448 .flags = NLDEV_PER_DEV,
1449 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1450 .id = RDMA_NLDEV_ATTR_RES_PDN,
1451 },
1452 [RDMA_RESTRACK_COUNTER] = {
1453 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1454 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1455 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1456 },
1457 [RDMA_RESTRACK_CTX] = {
1458 .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1459 .flags = NLDEV_PER_DEV,
1460 .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1461 .id = RDMA_NLDEV_ATTR_RES_CTXN,
1462 },
1463 [RDMA_RESTRACK_SRQ] = {
1464 .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1465 .flags = NLDEV_PER_DEV,
1466 .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1467 .id = RDMA_NLDEV_ATTR_RES_SRQN,
1468 },
1469
1470 };
1471
res_get_common_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1472 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1473 struct netlink_ext_ack *extack,
1474 enum rdma_restrack_type res_type,
1475 res_fill_func_t fill_func)
1476 {
1477 const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1478 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1479 struct rdma_restrack_entry *res;
1480 struct ib_device *device;
1481 u32 index, id, port = 0;
1482 bool has_cap_net_admin;
1483 struct sk_buff *msg;
1484 int ret;
1485
1486 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1487 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1488 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1489 return -EINVAL;
1490
1491 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1492 device = ib_device_get_by_index(sock_net(skb->sk), index);
1493 if (!device)
1494 return -EINVAL;
1495
1496 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1497 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1498 if (!rdma_is_port_valid(device, port)) {
1499 ret = -EINVAL;
1500 goto err;
1501 }
1502 }
1503
1504 if ((port && fe->flags & NLDEV_PER_DEV) ||
1505 (!port && ~fe->flags & NLDEV_PER_DEV)) {
1506 ret = -EINVAL;
1507 goto err;
1508 }
1509
1510 id = nla_get_u32(tb[fe->id]);
1511 res = rdma_restrack_get_byid(device, res_type, id);
1512 if (IS_ERR(res)) {
1513 ret = PTR_ERR(res);
1514 goto err;
1515 }
1516
1517 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1518 if (!msg) {
1519 ret = -ENOMEM;
1520 goto err_get;
1521 }
1522
1523 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1524 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1525 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1526 0, 0);
1527
1528 if (!nlh || fill_nldev_handle(msg, device)) {
1529 ret = -EMSGSIZE;
1530 goto err_free;
1531 }
1532
1533 has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1534
1535 ret = fill_func(msg, has_cap_net_admin, res, port);
1536 if (ret)
1537 goto err_free;
1538
1539 rdma_restrack_put(res);
1540 nlmsg_end(msg, nlh);
1541 ib_device_put(device);
1542 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1543
1544 err_free:
1545 nlmsg_free(msg);
1546 err_get:
1547 rdma_restrack_put(res);
1548 err:
1549 ib_device_put(device);
1550 return ret;
1551 }
1552
res_get_common_dumpit(struct sk_buff * skb,struct netlink_callback * cb,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1553 static int res_get_common_dumpit(struct sk_buff *skb,
1554 struct netlink_callback *cb,
1555 enum rdma_restrack_type res_type,
1556 res_fill_func_t fill_func)
1557 {
1558 const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1559 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1560 struct rdma_restrack_entry *res;
1561 struct rdma_restrack_root *rt;
1562 int err, ret = 0, idx = 0;
1563 bool show_details = false;
1564 struct nlattr *table_attr;
1565 struct nlattr *entry_attr;
1566 struct ib_device *device;
1567 int start = cb->args[0];
1568 bool has_cap_net_admin;
1569 struct nlmsghdr *nlh;
1570 unsigned long id;
1571 u32 index, port = 0;
1572 bool filled = false;
1573
1574 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1575 nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1576 /*
1577 * Right now, we are expecting the device index to get res information,
1578 * but it is possible to extend this code to return all devices in
1579 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1580 * if it doesn't exist, we will iterate over all devices.
1581 *
1582 * But it is not needed for now.
1583 */
1584 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1585 return -EINVAL;
1586
1587 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1588 device = ib_device_get_by_index(sock_net(skb->sk), index);
1589 if (!device)
1590 return -EINVAL;
1591
1592 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1593 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1594
1595 /*
1596 * If no PORT_INDEX is supplied, we will return all QPs from that device
1597 */
1598 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1599 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1600 if (!rdma_is_port_valid(device, port)) {
1601 ret = -EINVAL;
1602 goto err_index;
1603 }
1604 }
1605
1606 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1607 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1608 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1609 0, NLM_F_MULTI);
1610
1611 if (!nlh || fill_nldev_handle(skb, device)) {
1612 ret = -EMSGSIZE;
1613 goto err;
1614 }
1615
1616 table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1617 if (!table_attr) {
1618 ret = -EMSGSIZE;
1619 goto err;
1620 }
1621
1622 has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1623
1624 rt = &device->res[res_type];
1625 xa_lock(&rt->xa);
1626 /*
1627 * FIXME: if the skip ahead is something common this loop should
1628 * use xas_for_each & xas_pause to optimize, we can have a lot of
1629 * objects.
1630 */
1631 xa_for_each(&rt->xa, id, res) {
1632 if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1633 goto next;
1634
1635 if (idx < start || !rdma_restrack_get(res))
1636 goto next;
1637
1638 xa_unlock(&rt->xa);
1639
1640 filled = true;
1641
1642 entry_attr = nla_nest_start_noflag(skb, fe->entry);
1643 if (!entry_attr) {
1644 ret = -EMSGSIZE;
1645 rdma_restrack_put(res);
1646 goto msg_full;
1647 }
1648
1649 ret = fill_func(skb, has_cap_net_admin, res, port);
1650
1651 rdma_restrack_put(res);
1652
1653 if (ret) {
1654 nla_nest_cancel(skb, entry_attr);
1655 if (ret == -EMSGSIZE)
1656 goto msg_full;
1657 if (ret == -EAGAIN)
1658 goto again;
1659 goto res_err;
1660 }
1661 nla_nest_end(skb, entry_attr);
1662 again: xa_lock(&rt->xa);
1663 next: idx++;
1664 }
1665 xa_unlock(&rt->xa);
1666
1667 msg_full:
1668 nla_nest_end(skb, table_attr);
1669 nlmsg_end(skb, nlh);
1670 cb->args[0] = idx;
1671
1672 /*
1673 * No more entries to fill, cancel the message and
1674 * return 0 to mark end of dumpit.
1675 */
1676 if (!filled)
1677 goto err;
1678
1679 ib_device_put(device);
1680 return skb->len;
1681
1682 res_err:
1683 nla_nest_cancel(skb, table_attr);
1684
1685 err:
1686 nlmsg_cancel(skb, nlh);
1687
1688 err_index:
1689 ib_device_put(device);
1690 return ret;
1691 }
1692
1693 #define RES_GET_FUNCS(name, type) \
1694 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
1695 struct netlink_callback *cb) \
1696 { \
1697 return res_get_common_dumpit(skb, cb, type, \
1698 fill_res_##name##_entry); \
1699 } \
1700 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
1701 struct nlmsghdr *nlh, \
1702 struct netlink_ext_ack *extack) \
1703 { \
1704 return res_get_common_doit(skb, nlh, extack, type, \
1705 fill_res_##name##_entry); \
1706 }
1707
1708 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1709 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1710 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1711 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1712 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1713 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1714 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1715 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1716 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1717 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1718 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1719 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1720
1721 static LIST_HEAD(link_ops);
1722 static DECLARE_RWSEM(link_ops_rwsem);
1723
link_ops_get(const char * type)1724 static const struct rdma_link_ops *link_ops_get(const char *type)
1725 {
1726 const struct rdma_link_ops *ops;
1727
1728 list_for_each_entry(ops, &link_ops, list) {
1729 if (!strcmp(ops->type, type))
1730 goto out;
1731 }
1732 ops = NULL;
1733 out:
1734 return ops;
1735 }
1736
rdma_link_register(struct rdma_link_ops * ops)1737 void rdma_link_register(struct rdma_link_ops *ops)
1738 {
1739 down_write(&link_ops_rwsem);
1740 if (WARN_ON_ONCE(link_ops_get(ops->type)))
1741 goto out;
1742 list_add(&ops->list, &link_ops);
1743 out:
1744 up_write(&link_ops_rwsem);
1745 }
1746 EXPORT_SYMBOL(rdma_link_register);
1747
rdma_link_unregister(struct rdma_link_ops * ops)1748 void rdma_link_unregister(struct rdma_link_ops *ops)
1749 {
1750 down_write(&link_ops_rwsem);
1751 list_del(&ops->list);
1752 up_write(&link_ops_rwsem);
1753 }
1754 EXPORT_SYMBOL(rdma_link_unregister);
1755
nldev_newlink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1756 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1757 struct netlink_ext_ack *extack)
1758 {
1759 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1760 char ibdev_name[IB_DEVICE_NAME_MAX];
1761 const struct rdma_link_ops *ops;
1762 char ndev_name[IFNAMSIZ];
1763 struct net_device *ndev;
1764 char type[IFNAMSIZ];
1765 int err;
1766
1767 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1768 nldev_policy, extack);
1769 if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1770 !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1771 return -EINVAL;
1772
1773 nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1774 sizeof(ibdev_name));
1775 if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1776 return -EINVAL;
1777
1778 nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1779 nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1780 sizeof(ndev_name));
1781
1782 ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1783 if (!ndev)
1784 return -ENODEV;
1785
1786 down_read(&link_ops_rwsem);
1787 ops = link_ops_get(type);
1788 #ifdef CONFIG_MODULES
1789 if (!ops) {
1790 up_read(&link_ops_rwsem);
1791 request_module("rdma-link-%s", type);
1792 down_read(&link_ops_rwsem);
1793 ops = link_ops_get(type);
1794 }
1795 #endif
1796 err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1797 up_read(&link_ops_rwsem);
1798 dev_put(ndev);
1799
1800 return err;
1801 }
1802
nldev_dellink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1803 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1804 struct netlink_ext_ack *extack)
1805 {
1806 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1807 struct ib_device *device;
1808 u32 index;
1809 int err;
1810
1811 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1812 nldev_policy, extack);
1813 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1814 return -EINVAL;
1815
1816 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1817 device = ib_device_get_by_index(sock_net(skb->sk), index);
1818 if (!device)
1819 return -EINVAL;
1820
1821 if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1822 ib_device_put(device);
1823 return -EINVAL;
1824 }
1825
1826 ib_unregister_device_and_put(device);
1827 return 0;
1828 }
1829
nldev_get_chardev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1830 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1831 struct netlink_ext_ack *extack)
1832 {
1833 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1834 char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1835 struct ib_client_nl_info data = {};
1836 struct ib_device *ibdev = NULL;
1837 struct sk_buff *msg;
1838 u32 index;
1839 int err;
1840
1841 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1842 NL_VALIDATE_LIBERAL, extack);
1843 if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1844 return -EINVAL;
1845
1846 nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1847 sizeof(client_name));
1848
1849 if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1850 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1851 ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1852 if (!ibdev)
1853 return -EINVAL;
1854
1855 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1856 data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1857 if (!rdma_is_port_valid(ibdev, data.port)) {
1858 err = -EINVAL;
1859 goto out_put;
1860 }
1861 } else {
1862 data.port = -1;
1863 }
1864 } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1865 return -EINVAL;
1866 }
1867
1868 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1869 if (!msg) {
1870 err = -ENOMEM;
1871 goto out_put;
1872 }
1873 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1874 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1875 RDMA_NLDEV_CMD_GET_CHARDEV),
1876 0, 0);
1877 if (!nlh) {
1878 err = -EMSGSIZE;
1879 goto out_nlmsg;
1880 }
1881
1882 data.nl_msg = msg;
1883 err = ib_get_client_nl_info(ibdev, client_name, &data);
1884 if (err)
1885 goto out_nlmsg;
1886
1887 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1888 huge_encode_dev(data.cdev->devt),
1889 RDMA_NLDEV_ATTR_PAD);
1890 if (err)
1891 goto out_data;
1892 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1893 RDMA_NLDEV_ATTR_PAD);
1894 if (err)
1895 goto out_data;
1896 if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1897 dev_name(data.cdev))) {
1898 err = -EMSGSIZE;
1899 goto out_data;
1900 }
1901
1902 nlmsg_end(msg, nlh);
1903 put_device(data.cdev);
1904 if (ibdev)
1905 ib_device_put(ibdev);
1906 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1907
1908 out_data:
1909 put_device(data.cdev);
1910 out_nlmsg:
1911 nlmsg_free(msg);
1912 out_put:
1913 if (ibdev)
1914 ib_device_put(ibdev);
1915 return err;
1916 }
1917
nldev_sys_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1918 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1919 struct netlink_ext_ack *extack)
1920 {
1921 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1922 struct sk_buff *msg;
1923 int err;
1924
1925 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1926 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1927 if (err)
1928 return err;
1929
1930 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1931 if (!msg)
1932 return -ENOMEM;
1933
1934 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1935 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1936 RDMA_NLDEV_CMD_SYS_GET),
1937 0, 0);
1938 if (!nlh) {
1939 nlmsg_free(msg);
1940 return -EMSGSIZE;
1941 }
1942
1943 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1944 (u8)ib_devices_shared_netns);
1945 if (err) {
1946 nlmsg_free(msg);
1947 return err;
1948 }
1949
1950 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1951 (u8)privileged_qkey);
1952 if (err) {
1953 nlmsg_free(msg);
1954 return err;
1955 }
1956
1957 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
1958 if (err) {
1959 nlmsg_free(msg);
1960 return err;
1961 }
1962 /*
1963 * Copy-on-fork is supported.
1964 * See commits:
1965 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1966 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1967 * for more details. Don't backport this without them.
1968 *
1969 * Return value ignored on purpose, assume copy-on-fork is not
1970 * supported in case of failure.
1971 */
1972 nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
1973
1974 nlmsg_end(msg, nlh);
1975 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1976 }
1977
nldev_set_sys_set_netns_doit(struct nlattr * tb[])1978 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
1979 {
1980 u8 enable;
1981 int err;
1982
1983 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1984 /* Only 0 and 1 are supported */
1985 if (enable > 1)
1986 return -EINVAL;
1987
1988 err = rdma_compatdev_set(enable);
1989 return err;
1990 }
1991
nldev_set_sys_set_pqkey_doit(struct nlattr * tb[])1992 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
1993 {
1994 u8 enable;
1995
1996 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
1997 /* Only 0 and 1 are supported */
1998 if (enable > 1)
1999 return -EINVAL;
2000
2001 privileged_qkey = enable;
2002 return 0;
2003 }
2004
nldev_set_sys_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2005 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2006 struct netlink_ext_ack *extack)
2007 {
2008 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2009 int err;
2010
2011 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2012 nldev_policy, extack);
2013 if (err)
2014 return -EINVAL;
2015
2016 if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2017 return nldev_set_sys_set_netns_doit(tb);
2018
2019 if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2020 return nldev_set_sys_set_pqkey_doit(tb);
2021
2022 return -EINVAL;
2023 }
2024
2025
nldev_stat_set_mode_doit(struct sk_buff * msg,struct netlink_ext_ack * extack,struct nlattr * tb[],struct ib_device * device,u32 port)2026 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2027 struct netlink_ext_ack *extack,
2028 struct nlattr *tb[],
2029 struct ib_device *device, u32 port)
2030 {
2031 u32 mode, mask = 0, qpn, cntn = 0;
2032 bool opcnt = false;
2033 int ret;
2034
2035 /* Currently only counter for QP is supported */
2036 if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2037 nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2038 return -EINVAL;
2039
2040 if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
2041 opcnt = !!nla_get_u8(
2042 tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
2043
2044 mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2045 if (mode == RDMA_COUNTER_MODE_AUTO) {
2046 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2047 mask = nla_get_u32(
2048 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2049 return rdma_counter_set_auto_mode(device, port, mask, opcnt,
2050 extack);
2051 }
2052
2053 if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2054 return -EINVAL;
2055
2056 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2057 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2058 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2059 ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2060 if (ret)
2061 return ret;
2062 } else {
2063 ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2064 if (ret)
2065 return ret;
2066 }
2067
2068 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2069 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2070 ret = -EMSGSIZE;
2071 goto err_fill;
2072 }
2073
2074 return 0;
2075
2076 err_fill:
2077 rdma_counter_unbind_qpn(device, port, qpn, cntn);
2078 return ret;
2079 }
2080
nldev_stat_set_counter_dynamic_doit(struct nlattr * tb[],struct ib_device * device,u32 port)2081 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2082 struct ib_device *device,
2083 u32 port)
2084 {
2085 struct rdma_hw_stats *stats;
2086 struct nlattr *entry_attr;
2087 unsigned long *target;
2088 int rem, i, ret = 0;
2089 u32 index;
2090
2091 stats = ib_get_hw_stats_port(device, port);
2092 if (!stats)
2093 return -EINVAL;
2094
2095 target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2096 sizeof(*stats->is_disabled), GFP_KERNEL);
2097 if (!target)
2098 return -ENOMEM;
2099
2100 nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2101 rem) {
2102 index = nla_get_u32(entry_attr);
2103 if ((index >= stats->num_counters) ||
2104 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2105 ret = -EINVAL;
2106 goto out;
2107 }
2108
2109 set_bit(index, target);
2110 }
2111
2112 for (i = 0; i < stats->num_counters; i++) {
2113 if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2114 continue;
2115
2116 ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2117 if (ret)
2118 goto out;
2119 }
2120
2121 out:
2122 kfree(target);
2123 return ret;
2124 }
2125
nldev_stat_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2126 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2127 struct netlink_ext_ack *extack)
2128 {
2129 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2130 struct ib_device *device;
2131 struct sk_buff *msg;
2132 u32 index, port;
2133 int ret;
2134
2135 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2136 extack);
2137 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2138 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2139 return -EINVAL;
2140
2141 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2142 device = ib_device_get_by_index(sock_net(skb->sk), index);
2143 if (!device)
2144 return -EINVAL;
2145
2146 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2147 if (!rdma_is_port_valid(device, port)) {
2148 ret = -EINVAL;
2149 goto err_put_device;
2150 }
2151
2152 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2153 !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2154 ret = -EINVAL;
2155 goto err_put_device;
2156 }
2157
2158 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2159 if (!msg) {
2160 ret = -ENOMEM;
2161 goto err_put_device;
2162 }
2163 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2164 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2165 RDMA_NLDEV_CMD_STAT_SET),
2166 0, 0);
2167 if (!nlh || fill_nldev_handle(msg, device) ||
2168 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2169 ret = -EMSGSIZE;
2170 goto err_free_msg;
2171 }
2172
2173 if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2174 ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2175 if (ret)
2176 goto err_free_msg;
2177 }
2178
2179 if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2180 ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2181 if (ret)
2182 goto err_free_msg;
2183 }
2184
2185 nlmsg_end(msg, nlh);
2186 ib_device_put(device);
2187 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2188
2189 err_free_msg:
2190 nlmsg_free(msg);
2191 err_put_device:
2192 ib_device_put(device);
2193 return ret;
2194 }
2195
nldev_stat_del_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2196 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2197 struct netlink_ext_ack *extack)
2198 {
2199 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2200 struct ib_device *device;
2201 struct sk_buff *msg;
2202 u32 index, port, qpn, cntn;
2203 int ret;
2204
2205 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2206 nldev_policy, extack);
2207 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2208 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2209 !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2210 !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2211 return -EINVAL;
2212
2213 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2214 return -EINVAL;
2215
2216 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2217 device = ib_device_get_by_index(sock_net(skb->sk), index);
2218 if (!device)
2219 return -EINVAL;
2220
2221 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2222 if (!rdma_is_port_valid(device, port)) {
2223 ret = -EINVAL;
2224 goto err;
2225 }
2226
2227 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2228 if (!msg) {
2229 ret = -ENOMEM;
2230 goto err;
2231 }
2232 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2233 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2234 RDMA_NLDEV_CMD_STAT_SET),
2235 0, 0);
2236 if (!nlh) {
2237 ret = -EMSGSIZE;
2238 goto err_fill;
2239 }
2240
2241 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2242 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2243 if (fill_nldev_handle(msg, device) ||
2244 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2245 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2246 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2247 ret = -EMSGSIZE;
2248 goto err_fill;
2249 }
2250
2251 ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2252 if (ret)
2253 goto err_fill;
2254
2255 nlmsg_end(msg, nlh);
2256 ib_device_put(device);
2257 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2258
2259 err_fill:
2260 nlmsg_free(msg);
2261 err:
2262 ib_device_put(device);
2263 return ret;
2264 }
2265
stat_get_doit_default_counter(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2266 static int stat_get_doit_default_counter(struct sk_buff *skb,
2267 struct nlmsghdr *nlh,
2268 struct netlink_ext_ack *extack,
2269 struct nlattr *tb[])
2270 {
2271 struct rdma_hw_stats *stats;
2272 struct nlattr *table_attr;
2273 struct ib_device *device;
2274 int ret, num_cnts, i;
2275 struct sk_buff *msg;
2276 u32 index, port;
2277 u64 v;
2278
2279 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2280 return -EINVAL;
2281
2282 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2283 device = ib_device_get_by_index(sock_net(skb->sk), index);
2284 if (!device)
2285 return -EINVAL;
2286
2287 if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2288 ret = -EINVAL;
2289 goto err;
2290 }
2291
2292 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2293 stats = ib_get_hw_stats_port(device, port);
2294 if (!stats) {
2295 ret = -EINVAL;
2296 goto err;
2297 }
2298
2299 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2300 if (!msg) {
2301 ret = -ENOMEM;
2302 goto err;
2303 }
2304
2305 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2306 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2307 RDMA_NLDEV_CMD_STAT_GET),
2308 0, 0);
2309
2310 if (!nlh || fill_nldev_handle(msg, device) ||
2311 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2312 ret = -EMSGSIZE;
2313 goto err_msg;
2314 }
2315
2316 mutex_lock(&stats->lock);
2317
2318 num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2319 if (num_cnts < 0) {
2320 ret = -EINVAL;
2321 goto err_stats;
2322 }
2323
2324 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2325 if (!table_attr) {
2326 ret = -EMSGSIZE;
2327 goto err_stats;
2328 }
2329 for (i = 0; i < num_cnts; i++) {
2330 if (test_bit(i, stats->is_disabled))
2331 continue;
2332
2333 v = stats->value[i] +
2334 rdma_counter_get_hwstat_value(device, port, i);
2335 if (rdma_nl_stat_hwcounter_entry(msg,
2336 stats->descs[i].name, v)) {
2337 ret = -EMSGSIZE;
2338 goto err_table;
2339 }
2340 }
2341 nla_nest_end(msg, table_attr);
2342
2343 mutex_unlock(&stats->lock);
2344 nlmsg_end(msg, nlh);
2345 ib_device_put(device);
2346 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2347
2348 err_table:
2349 nla_nest_cancel(msg, table_attr);
2350 err_stats:
2351 mutex_unlock(&stats->lock);
2352 err_msg:
2353 nlmsg_free(msg);
2354 err:
2355 ib_device_put(device);
2356 return ret;
2357 }
2358
stat_get_doit_qp(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2359 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2360 struct netlink_ext_ack *extack, struct nlattr *tb[])
2361
2362 {
2363 static enum rdma_nl_counter_mode mode;
2364 static enum rdma_nl_counter_mask mask;
2365 struct ib_device *device;
2366 struct sk_buff *msg;
2367 u32 index, port;
2368 bool opcnt;
2369 int ret;
2370
2371 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2372 return nldev_res_get_counter_doit(skb, nlh, extack);
2373
2374 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2375 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2376 return -EINVAL;
2377
2378 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2379 device = ib_device_get_by_index(sock_net(skb->sk), index);
2380 if (!device)
2381 return -EINVAL;
2382
2383 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2384 if (!rdma_is_port_valid(device, port)) {
2385 ret = -EINVAL;
2386 goto err;
2387 }
2388
2389 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2390 if (!msg) {
2391 ret = -ENOMEM;
2392 goto err;
2393 }
2394
2395 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2396 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2397 RDMA_NLDEV_CMD_STAT_GET),
2398 0, 0);
2399 if (!nlh) {
2400 ret = -EMSGSIZE;
2401 goto err_msg;
2402 }
2403
2404 ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
2405 if (ret)
2406 goto err_msg;
2407
2408 if (fill_nldev_handle(msg, device) ||
2409 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2410 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2411 ret = -EMSGSIZE;
2412 goto err_msg;
2413 }
2414
2415 if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2416 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2417 ret = -EMSGSIZE;
2418 goto err_msg;
2419 }
2420
2421 if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2422 nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
2423 ret = -EMSGSIZE;
2424 goto err_msg;
2425 }
2426
2427 nlmsg_end(msg, nlh);
2428 ib_device_put(device);
2429 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2430
2431 err_msg:
2432 nlmsg_free(msg);
2433 err:
2434 ib_device_put(device);
2435 return ret;
2436 }
2437
nldev_stat_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2438 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2439 struct netlink_ext_ack *extack)
2440 {
2441 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2442 int ret;
2443
2444 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2445 nldev_policy, NL_VALIDATE_LIBERAL, extack);
2446 if (ret)
2447 return -EINVAL;
2448
2449 if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2450 return stat_get_doit_default_counter(skb, nlh, extack, tb);
2451
2452 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2453 case RDMA_NLDEV_ATTR_RES_QP:
2454 ret = stat_get_doit_qp(skb, nlh, extack, tb);
2455 break;
2456 case RDMA_NLDEV_ATTR_RES_MR:
2457 ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2458 fill_stat_mr_entry);
2459 break;
2460 default:
2461 ret = -EINVAL;
2462 break;
2463 }
2464
2465 return ret;
2466 }
2467
nldev_stat_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)2468 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2469 struct netlink_callback *cb)
2470 {
2471 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2472 int ret;
2473
2474 ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2475 nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2476 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2477 return -EINVAL;
2478
2479 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2480 case RDMA_NLDEV_ATTR_RES_QP:
2481 ret = nldev_res_get_counter_dumpit(skb, cb);
2482 break;
2483 case RDMA_NLDEV_ATTR_RES_MR:
2484 ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2485 fill_stat_mr_entry);
2486 break;
2487 default:
2488 ret = -EINVAL;
2489 break;
2490 }
2491
2492 return ret;
2493 }
2494
nldev_stat_get_counter_status_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2495 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2496 struct nlmsghdr *nlh,
2497 struct netlink_ext_ack *extack)
2498 {
2499 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2500 struct rdma_hw_stats *stats;
2501 struct ib_device *device;
2502 struct sk_buff *msg;
2503 u32 devid, port;
2504 int ret, i;
2505
2506 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2507 nldev_policy, NL_VALIDATE_LIBERAL, extack);
2508 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2509 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2510 return -EINVAL;
2511
2512 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2513 device = ib_device_get_by_index(sock_net(skb->sk), devid);
2514 if (!device)
2515 return -EINVAL;
2516
2517 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2518 if (!rdma_is_port_valid(device, port)) {
2519 ret = -EINVAL;
2520 goto err;
2521 }
2522
2523 stats = ib_get_hw_stats_port(device, port);
2524 if (!stats) {
2525 ret = -EINVAL;
2526 goto err;
2527 }
2528
2529 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2530 if (!msg) {
2531 ret = -ENOMEM;
2532 goto err;
2533 }
2534
2535 nlh = nlmsg_put(
2536 msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2537 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2538 0, 0);
2539
2540 ret = -EMSGSIZE;
2541 if (!nlh || fill_nldev_handle(msg, device) ||
2542 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2543 goto err_msg;
2544
2545 table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2546 if (!table)
2547 goto err_msg;
2548
2549 mutex_lock(&stats->lock);
2550 for (i = 0; i < stats->num_counters; i++) {
2551 entry = nla_nest_start(msg,
2552 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2553 if (!entry)
2554 goto err_msg_table;
2555
2556 if (nla_put_string(msg,
2557 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2558 stats->descs[i].name) ||
2559 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2560 goto err_msg_entry;
2561
2562 if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2563 (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2564 !test_bit(i, stats->is_disabled))))
2565 goto err_msg_entry;
2566
2567 nla_nest_end(msg, entry);
2568 }
2569 mutex_unlock(&stats->lock);
2570
2571 nla_nest_end(msg, table);
2572 nlmsg_end(msg, nlh);
2573 ib_device_put(device);
2574 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2575
2576 err_msg_entry:
2577 nla_nest_cancel(msg, entry);
2578 err_msg_table:
2579 mutex_unlock(&stats->lock);
2580 nla_nest_cancel(msg, table);
2581 err_msg:
2582 nlmsg_free(msg);
2583 err:
2584 ib_device_put(device);
2585 return ret;
2586 }
2587
nldev_newdev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2588 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2589 struct netlink_ext_ack *extack)
2590 {
2591 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2592 enum rdma_nl_dev_type type;
2593 struct ib_device *parent;
2594 char name[IFNAMSIZ] = {};
2595 u32 parentid;
2596 int ret;
2597
2598 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2599 nldev_policy, extack);
2600 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2601 !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2602 return -EINVAL;
2603
2604 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2605 type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2606 parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2607 parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2608 if (!parent)
2609 return -EINVAL;
2610
2611 ret = ib_add_sub_device(parent, type, name);
2612 ib_device_put(parent);
2613
2614 return ret;
2615 }
2616
nldev_deldev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2617 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2618 struct netlink_ext_ack *extack)
2619 {
2620 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2621 struct ib_device *device;
2622 u32 devid;
2623 int ret;
2624
2625 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2626 nldev_policy, extack);
2627 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2628 return -EINVAL;
2629
2630 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2631 device = ib_device_get_by_index(sock_net(skb->sk), devid);
2632 if (!device)
2633 return -EINVAL;
2634
2635 return ib_del_sub_device_and_put(device);
2636 }
2637
2638 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2639 [RDMA_NLDEV_CMD_GET] = {
2640 .doit = nldev_get_doit,
2641 .dump = nldev_get_dumpit,
2642 },
2643 [RDMA_NLDEV_CMD_GET_CHARDEV] = {
2644 .doit = nldev_get_chardev,
2645 },
2646 [RDMA_NLDEV_CMD_SET] = {
2647 .doit = nldev_set_doit,
2648 .flags = RDMA_NL_ADMIN_PERM,
2649 },
2650 [RDMA_NLDEV_CMD_NEWLINK] = {
2651 .doit = nldev_newlink,
2652 .flags = RDMA_NL_ADMIN_PERM,
2653 },
2654 [RDMA_NLDEV_CMD_DELLINK] = {
2655 .doit = nldev_dellink,
2656 .flags = RDMA_NL_ADMIN_PERM,
2657 },
2658 [RDMA_NLDEV_CMD_PORT_GET] = {
2659 .doit = nldev_port_get_doit,
2660 .dump = nldev_port_get_dumpit,
2661 },
2662 [RDMA_NLDEV_CMD_RES_GET] = {
2663 .doit = nldev_res_get_doit,
2664 .dump = nldev_res_get_dumpit,
2665 },
2666 [RDMA_NLDEV_CMD_RES_QP_GET] = {
2667 .doit = nldev_res_get_qp_doit,
2668 .dump = nldev_res_get_qp_dumpit,
2669 },
2670 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2671 .doit = nldev_res_get_cm_id_doit,
2672 .dump = nldev_res_get_cm_id_dumpit,
2673 },
2674 [RDMA_NLDEV_CMD_RES_CQ_GET] = {
2675 .doit = nldev_res_get_cq_doit,
2676 .dump = nldev_res_get_cq_dumpit,
2677 },
2678 [RDMA_NLDEV_CMD_RES_MR_GET] = {
2679 .doit = nldev_res_get_mr_doit,
2680 .dump = nldev_res_get_mr_dumpit,
2681 },
2682 [RDMA_NLDEV_CMD_RES_PD_GET] = {
2683 .doit = nldev_res_get_pd_doit,
2684 .dump = nldev_res_get_pd_dumpit,
2685 },
2686 [RDMA_NLDEV_CMD_RES_CTX_GET] = {
2687 .doit = nldev_res_get_ctx_doit,
2688 .dump = nldev_res_get_ctx_dumpit,
2689 },
2690 [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2691 .doit = nldev_res_get_srq_doit,
2692 .dump = nldev_res_get_srq_dumpit,
2693 },
2694 [RDMA_NLDEV_CMD_SYS_GET] = {
2695 .doit = nldev_sys_get_doit,
2696 },
2697 [RDMA_NLDEV_CMD_SYS_SET] = {
2698 .doit = nldev_set_sys_set_doit,
2699 .flags = RDMA_NL_ADMIN_PERM,
2700 },
2701 [RDMA_NLDEV_CMD_STAT_SET] = {
2702 .doit = nldev_stat_set_doit,
2703 .flags = RDMA_NL_ADMIN_PERM,
2704 },
2705 [RDMA_NLDEV_CMD_STAT_GET] = {
2706 .doit = nldev_stat_get_doit,
2707 .dump = nldev_stat_get_dumpit,
2708 },
2709 [RDMA_NLDEV_CMD_STAT_DEL] = {
2710 .doit = nldev_stat_del_doit,
2711 .flags = RDMA_NL_ADMIN_PERM,
2712 },
2713 [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2714 .doit = nldev_res_get_qp_raw_doit,
2715 .dump = nldev_res_get_qp_raw_dumpit,
2716 .flags = RDMA_NL_ADMIN_PERM,
2717 },
2718 [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2719 .doit = nldev_res_get_cq_raw_doit,
2720 .dump = nldev_res_get_cq_raw_dumpit,
2721 .flags = RDMA_NL_ADMIN_PERM,
2722 },
2723 [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2724 .doit = nldev_res_get_mr_raw_doit,
2725 .dump = nldev_res_get_mr_raw_dumpit,
2726 .flags = RDMA_NL_ADMIN_PERM,
2727 },
2728 [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
2729 .doit = nldev_res_get_srq_raw_doit,
2730 .dump = nldev_res_get_srq_raw_dumpit,
2731 .flags = RDMA_NL_ADMIN_PERM,
2732 },
2733 [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
2734 .doit = nldev_stat_get_counter_status_doit,
2735 },
2736 [RDMA_NLDEV_CMD_NEWDEV] = {
2737 .doit = nldev_newdev,
2738 .flags = RDMA_NL_ADMIN_PERM,
2739 },
2740 [RDMA_NLDEV_CMD_DELDEV] = {
2741 .doit = nldev_deldev,
2742 .flags = RDMA_NL_ADMIN_PERM,
2743 },
2744 };
2745
fill_mon_netdev_rename(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)2746 static int fill_mon_netdev_rename(struct sk_buff *msg,
2747 struct ib_device *device, u32 port,
2748 const struct net *net)
2749 {
2750 struct net_device *netdev = ib_device_get_netdev(device, port);
2751 int ret = 0;
2752
2753 if (!netdev || !net_eq(dev_net(netdev), net))
2754 goto out;
2755
2756 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2757 if (ret)
2758 goto out;
2759 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2760 out:
2761 dev_put(netdev);
2762 return ret;
2763 }
2764
fill_mon_netdev_association(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)2765 static int fill_mon_netdev_association(struct sk_buff *msg,
2766 struct ib_device *device, u32 port,
2767 const struct net *net)
2768 {
2769 struct net_device *netdev = ib_device_get_netdev(device, port);
2770 int ret = 0;
2771
2772 if (netdev && !net_eq(dev_net(netdev), net))
2773 goto out;
2774
2775 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
2776 if (ret)
2777 goto out;
2778
2779 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
2780 dev_name(&device->dev));
2781 if (ret)
2782 goto out;
2783
2784 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
2785 if (ret)
2786 goto out;
2787
2788 if (netdev) {
2789 ret = nla_put_u32(msg,
2790 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2791 if (ret)
2792 goto out;
2793
2794 ret = nla_put_string(msg,
2795 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2796 }
2797
2798 out:
2799 dev_put(netdev);
2800 return ret;
2801 }
2802
rdma_nl_notify_err_msg(struct ib_device * device,u32 port_num,enum rdma_nl_notify_event_type type)2803 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
2804 enum rdma_nl_notify_event_type type)
2805 {
2806 struct net_device *netdev;
2807
2808 switch (type) {
2809 case RDMA_REGISTER_EVENT:
2810 dev_warn_ratelimited(&device->dev,
2811 "Failed to send RDMA monitor register device event\n");
2812 break;
2813 case RDMA_UNREGISTER_EVENT:
2814 dev_warn_ratelimited(&device->dev,
2815 "Failed to send RDMA monitor unregister device event\n");
2816 break;
2817 case RDMA_NETDEV_ATTACH_EVENT:
2818 netdev = ib_device_get_netdev(device, port_num);
2819 dev_warn_ratelimited(&device->dev,
2820 "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
2821 port_num, netdev->ifindex);
2822 dev_put(netdev);
2823 break;
2824 case RDMA_NETDEV_DETACH_EVENT:
2825 dev_warn_ratelimited(&device->dev,
2826 "Failed to send RDMA monitor netdev detach event: port %d\n",
2827 port_num);
2828 break;
2829 case RDMA_RENAME_EVENT:
2830 dev_warn_ratelimited(&device->dev,
2831 "Failed to send RDMA monitor rename device event\n");
2832 break;
2833
2834 case RDMA_NETDEV_RENAME_EVENT:
2835 netdev = ib_device_get_netdev(device, port_num);
2836 dev_warn_ratelimited(&device->dev,
2837 "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
2838 port_num, netdev->ifindex);
2839 dev_put(netdev);
2840 break;
2841 default:
2842 break;
2843 }
2844 }
2845
rdma_nl_notify_event(struct ib_device * device,u32 port_num,enum rdma_nl_notify_event_type type)2846 int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
2847 enum rdma_nl_notify_event_type type)
2848 {
2849 struct sk_buff *skb;
2850 int ret = -EMSGSIZE;
2851 struct net *net;
2852 void *nlh;
2853
2854 net = read_pnet(&device->coredev.rdma_net);
2855 if (!net)
2856 return -EINVAL;
2857
2858 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2859 if (!skb)
2860 return -ENOMEM;
2861 nlh = nlmsg_put(skb, 0, 0,
2862 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
2863 0, 0);
2864 if (!nlh)
2865 goto err_free;
2866
2867 switch (type) {
2868 case RDMA_REGISTER_EVENT:
2869 case RDMA_UNREGISTER_EVENT:
2870 case RDMA_RENAME_EVENT:
2871 ret = fill_nldev_handle(skb, device);
2872 if (ret)
2873 goto err_free;
2874 break;
2875 case RDMA_NETDEV_ATTACH_EVENT:
2876 case RDMA_NETDEV_DETACH_EVENT:
2877 ret = fill_mon_netdev_association(skb, device, port_num, net);
2878 if (ret)
2879 goto err_free;
2880 break;
2881 case RDMA_NETDEV_RENAME_EVENT:
2882 ret = fill_mon_netdev_rename(skb, device, port_num, net);
2883 if (ret)
2884 goto err_free;
2885 break;
2886 default:
2887 break;
2888 }
2889
2890 ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
2891 if (ret)
2892 goto err_free;
2893
2894 nlmsg_end(skb, nlh);
2895 ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
2896 if (ret && ret != -ESRCH) {
2897 skb = NULL; /* skb is freed in the netlink send-op handling */
2898 goto err_free;
2899 }
2900 return 0;
2901
2902 err_free:
2903 rdma_nl_notify_err_msg(device, port_num, type);
2904 nlmsg_free(skb);
2905 return ret;
2906 }
2907
nldev_init(void)2908 void __init nldev_init(void)
2909 {
2910 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2911 }
2912
nldev_exit(void)2913 void nldev_exit(void)
2914 {
2915 rdma_nl_unregister(RDMA_NL_NLDEV);
2916 }
2917
2918 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2919