1 /*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. Neither the names of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * Alternatively, this software may be distributed under the terms of the
17 * GNU General Public License ("GPL") version 2 as published by the Free
18 * Software Foundation.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 #include "uverbs.h"
45
46 /*
47 * This determines whether a non-privileged user is allowed to specify a
48 * controlled QKEY or not, when true non-privileged user is allowed to specify
49 * a controlled QKEY.
50 */
51 static bool privileged_qkey;
52
53 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
54 struct rdma_restrack_entry*, uint32_t);
55
56 /*
57 * Sort array elements by the netlink attribute name
58 */
59 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
60 [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
61 [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
62 [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
63 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
64 [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
65 .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
66 [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
67 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
68 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
69 .len = IB_DEVICE_NAME_MAX },
70 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
71 [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
72 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
74 [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
75 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
76 [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
77 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
78 [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
79 [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
80 [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
81 [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
82 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
83 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
84 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
85 [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
86 .len = IFNAMSIZ },
87 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
88 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
89 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
90 .len = IFNAMSIZ },
91 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
92 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
93 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
94 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
95 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
96 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
97 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
98 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
99 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
100 [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
101 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
102 [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
103 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
104 [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
105 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
106 .len = sizeof(struct __kernel_sockaddr_storage) },
107 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
108 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
109 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
110 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
111 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
112 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
113 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
114 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
115 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
116 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
117 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
118 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
119 [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
120 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
121 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
122 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
123 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
124 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
125 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
126 [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
127 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
128 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
129 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
130 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
131 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
132 .len = sizeof(struct __kernel_sockaddr_storage) },
133 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
134 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
135 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
136 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
137 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
138 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
139 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
140 [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING,
141 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
142 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
143 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
144 [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
145 [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
146 [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
147 [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
148 [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
149 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
150 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
151 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
152 [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
153 [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
154 [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
155 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
156 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
157 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
158 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
159 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
160 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
161 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
162 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
163 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
164 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
165 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
166 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
167 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
168 [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
169 [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 },
170 [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
171 [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
172 [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
173 [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
174 [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
175 };
176
put_driver_name_print_type(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type)177 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
178 enum rdma_nldev_print_type print_type)
179 {
180 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
181 return -EMSGSIZE;
182 if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
183 nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
184 return -EMSGSIZE;
185
186 return 0;
187 }
188
_rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u32 value)189 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
190 enum rdma_nldev_print_type print_type,
191 u32 value)
192 {
193 if (put_driver_name_print_type(msg, name, print_type))
194 return -EMSGSIZE;
195 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
196 return -EMSGSIZE;
197
198 return 0;
199 }
200
_rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u64 value)201 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
202 enum rdma_nldev_print_type print_type,
203 u64 value)
204 {
205 if (put_driver_name_print_type(msg, name, print_type))
206 return -EMSGSIZE;
207 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
208 RDMA_NLDEV_ATTR_PAD))
209 return -EMSGSIZE;
210
211 return 0;
212 }
213
rdma_nl_put_driver_string(struct sk_buff * msg,const char * name,const char * str)214 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
215 const char *str)
216 {
217 if (put_driver_name_print_type(msg, name,
218 RDMA_NLDEV_PRINT_TYPE_UNSPEC))
219 return -EMSGSIZE;
220 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
221 return -EMSGSIZE;
222
223 return 0;
224 }
225 EXPORT_SYMBOL(rdma_nl_put_driver_string);
226
rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,u32 value)227 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
228 {
229 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
230 value);
231 }
232 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
233
rdma_nl_put_driver_u32_hex(struct sk_buff * msg,const char * name,u32 value)234 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
235 u32 value)
236 {
237 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
238 value);
239 }
240 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
241
rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,u64 value)242 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
243 {
244 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
245 value);
246 }
247 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
248
rdma_nl_put_driver_u64_hex(struct sk_buff * msg,const char * name,u64 value)249 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
250 {
251 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
252 value);
253 }
254 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
255
rdma_nl_get_privileged_qkey(void)256 bool rdma_nl_get_privileged_qkey(void)
257 {
258 return privileged_qkey;
259 }
260 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
261
fill_nldev_handle(struct sk_buff * msg,struct ib_device * device)262 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
263 {
264 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
265 return -EMSGSIZE;
266 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
267 dev_name(&device->dev)))
268 return -EMSGSIZE;
269
270 return 0;
271 }
272
fill_dev_info(struct sk_buff * msg,struct ib_device * device)273 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
274 {
275 char fw[IB_FW_VERSION_NAME_MAX];
276 int ret = 0;
277 u32 port;
278
279 if (fill_nldev_handle(msg, device))
280 return -EMSGSIZE;
281
282 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
283 return -EMSGSIZE;
284
285 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
286 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
287 device->attrs.device_cap_flags,
288 RDMA_NLDEV_ATTR_PAD))
289 return -EMSGSIZE;
290
291 ib_get_device_fw_str(device, fw);
292 /* Device without FW has strlen(fw) = 0 */
293 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
294 return -EMSGSIZE;
295
296 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
297 be64_to_cpu(device->node_guid),
298 RDMA_NLDEV_ATTR_PAD))
299 return -EMSGSIZE;
300 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
301 be64_to_cpu(device->attrs.sys_image_guid),
302 RDMA_NLDEV_ATTR_PAD))
303 return -EMSGSIZE;
304 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
305 return -EMSGSIZE;
306 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
307 return -EMSGSIZE;
308
309 if (device->type &&
310 nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
311 return -EMSGSIZE;
312
313 if (device->parent &&
314 nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
315 dev_name(&device->parent->dev)))
316 return -EMSGSIZE;
317
318 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
319 device->name_assign_type))
320 return -EMSGSIZE;
321
322 /*
323 * Link type is determined on first port and mlx4 device
324 * which can potentially have two different link type for the same
325 * IB device is considered as better to be avoided in the future,
326 */
327 port = rdma_start_port(device);
328 if (rdma_cap_opa_mad(device, port))
329 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
330 else if (rdma_protocol_ib(device, port))
331 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
332 else if (rdma_protocol_iwarp(device, port))
333 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
334 else if (rdma_protocol_roce(device, port))
335 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
336 else if (rdma_protocol_usnic(device, port))
337 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
338 "usnic");
339 return ret;
340 }
341
fill_port_info(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)342 static int fill_port_info(struct sk_buff *msg,
343 struct ib_device *device, u32 port,
344 const struct net *net)
345 {
346 struct net_device *netdev = NULL;
347 struct ib_port_attr attr;
348 int ret;
349 u64 cap_flags = 0;
350
351 if (fill_nldev_handle(msg, device))
352 return -EMSGSIZE;
353
354 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
355 return -EMSGSIZE;
356
357 ret = ib_query_port(device, port, &attr);
358 if (ret)
359 return ret;
360
361 if (rdma_protocol_ib(device, port)) {
362 BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
363 sizeof(attr.port_cap_flags2)) > sizeof(u64));
364 cap_flags = attr.port_cap_flags |
365 ((u64)attr.port_cap_flags2 << 32);
366 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
367 cap_flags, RDMA_NLDEV_ATTR_PAD))
368 return -EMSGSIZE;
369 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
370 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
371 return -EMSGSIZE;
372 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
373 return -EMSGSIZE;
374 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
375 return -EMSGSIZE;
376 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
377 return -EMSGSIZE;
378 }
379 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
380 return -EMSGSIZE;
381 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
382 return -EMSGSIZE;
383
384 netdev = ib_device_get_netdev(device, port);
385 if (netdev && net_eq(dev_net(netdev), net)) {
386 ret = nla_put_u32(msg,
387 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
388 if (ret)
389 goto out;
390 ret = nla_put_string(msg,
391 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
392 }
393
394 out:
395 dev_put(netdev);
396 return ret;
397 }
398
fill_res_info_entry(struct sk_buff * msg,const char * name,u64 curr)399 static int fill_res_info_entry(struct sk_buff *msg,
400 const char *name, u64 curr)
401 {
402 struct nlattr *entry_attr;
403
404 entry_attr = nla_nest_start_noflag(msg,
405 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
406 if (!entry_attr)
407 return -EMSGSIZE;
408
409 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
410 goto err;
411 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
412 RDMA_NLDEV_ATTR_PAD))
413 goto err;
414
415 nla_nest_end(msg, entry_attr);
416 return 0;
417
418 err:
419 nla_nest_cancel(msg, entry_attr);
420 return -EMSGSIZE;
421 }
422
fill_res_info(struct sk_buff * msg,struct ib_device * device,bool show_details)423 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
424 bool show_details)
425 {
426 static const char * const names[RDMA_RESTRACK_MAX] = {
427 [RDMA_RESTRACK_PD] = "pd",
428 [RDMA_RESTRACK_CQ] = "cq",
429 [RDMA_RESTRACK_QP] = "qp",
430 [RDMA_RESTRACK_CM_ID] = "cm_id",
431 [RDMA_RESTRACK_MR] = "mr",
432 [RDMA_RESTRACK_CTX] = "ctx",
433 [RDMA_RESTRACK_SRQ] = "srq",
434 };
435
436 struct nlattr *table_attr;
437 int ret, i, curr;
438
439 if (fill_nldev_handle(msg, device))
440 return -EMSGSIZE;
441
442 table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
443 if (!table_attr)
444 return -EMSGSIZE;
445
446 for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
447 if (!names[i])
448 continue;
449 curr = rdma_restrack_count(device, i, show_details);
450 ret = fill_res_info_entry(msg, names[i], curr);
451 if (ret)
452 goto err;
453 }
454
455 nla_nest_end(msg, table_attr);
456 return 0;
457
458 err:
459 nla_nest_cancel(msg, table_attr);
460 return ret;
461 }
462
fill_res_name_pid(struct sk_buff * msg,struct rdma_restrack_entry * res)463 static int fill_res_name_pid(struct sk_buff *msg,
464 struct rdma_restrack_entry *res)
465 {
466 int err = 0;
467
468 /*
469 * For user resources, user is should read /proc/PID/comm to get the
470 * name of the task file.
471 */
472 if (rdma_is_kernel_res(res)) {
473 err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
474 res->kern_name);
475 } else {
476 pid_t pid;
477
478 pid = task_pid_vnr(res->task);
479 /*
480 * Task is dead and in zombie state.
481 * There is no need to print PID anymore.
482 */
483 if (pid)
484 /*
485 * This part is racy, task can be killed and PID will
486 * be zero right here but it is ok, next query won't
487 * return PID. We don't promise real-time reflection
488 * of SW objects.
489 */
490 err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
491 }
492
493 return err ? -EMSGSIZE : 0;
494 }
495
fill_res_qp_entry_query(struct sk_buff * msg,struct rdma_restrack_entry * res,struct ib_device * dev,struct ib_qp * qp)496 static int fill_res_qp_entry_query(struct sk_buff *msg,
497 struct rdma_restrack_entry *res,
498 struct ib_device *dev,
499 struct ib_qp *qp)
500 {
501 struct ib_qp_init_attr qp_init_attr;
502 struct ib_qp_attr qp_attr;
503 int ret;
504
505 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
506 if (ret)
507 return ret;
508
509 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
510 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
511 qp_attr.dest_qp_num))
512 goto err;
513 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
514 qp_attr.rq_psn))
515 goto err;
516 }
517
518 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
519 goto err;
520
521 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
522 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
523 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
524 qp_attr.path_mig_state))
525 goto err;
526 }
527 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
528 goto err;
529 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
530 goto err;
531
532 if (dev->ops.fill_res_qp_entry)
533 return dev->ops.fill_res_qp_entry(msg, qp);
534 return 0;
535
536 err: return -EMSGSIZE;
537 }
538
fill_res_qp_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)539 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
540 struct rdma_restrack_entry *res, uint32_t port)
541 {
542 struct ib_qp *qp = container_of(res, struct ib_qp, res);
543 struct ib_device *dev = qp->device;
544 int ret;
545
546 if (port && port != qp->port)
547 return -EAGAIN;
548
549 /* In create_qp() port is not set yet */
550 if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
551 return -EMSGSIZE;
552
553 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
554 if (ret)
555 return -EMSGSIZE;
556
557 if (!rdma_is_kernel_res(res) &&
558 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
559 return -EMSGSIZE;
560
561 ret = fill_res_name_pid(msg, res);
562 if (ret)
563 return -EMSGSIZE;
564
565 return fill_res_qp_entry_query(msg, res, dev, qp);
566 }
567
fill_res_qp_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)568 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
569 struct rdma_restrack_entry *res, uint32_t port)
570 {
571 struct ib_qp *qp = container_of(res, struct ib_qp, res);
572 struct ib_device *dev = qp->device;
573
574 if (port && port != qp->port)
575 return -EAGAIN;
576 if (!dev->ops.fill_res_qp_entry_raw)
577 return -EINVAL;
578 return dev->ops.fill_res_qp_entry_raw(msg, qp);
579 }
580
fill_res_cm_id_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)581 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
582 struct rdma_restrack_entry *res, uint32_t port)
583 {
584 struct rdma_id_private *id_priv =
585 container_of(res, struct rdma_id_private, res);
586 struct ib_device *dev = id_priv->id.device;
587 struct rdma_cm_id *cm_id = &id_priv->id;
588
589 if (port && port != cm_id->port_num)
590 return -EAGAIN;
591
592 if (cm_id->port_num &&
593 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
594 goto err;
595
596 if (id_priv->qp_num) {
597 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
598 goto err;
599 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
600 goto err;
601 }
602
603 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
604 goto err;
605
606 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
607 goto err;
608
609 if (cm_id->route.addr.src_addr.ss_family &&
610 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
611 sizeof(cm_id->route.addr.src_addr),
612 &cm_id->route.addr.src_addr))
613 goto err;
614 if (cm_id->route.addr.dst_addr.ss_family &&
615 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
616 sizeof(cm_id->route.addr.dst_addr),
617 &cm_id->route.addr.dst_addr))
618 goto err;
619
620 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
621 goto err;
622
623 if (fill_res_name_pid(msg, res))
624 goto err;
625
626 if (dev->ops.fill_res_cm_id_entry)
627 return dev->ops.fill_res_cm_id_entry(msg, cm_id);
628 return 0;
629
630 err: return -EMSGSIZE;
631 }
632
fill_res_cq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)633 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
634 struct rdma_restrack_entry *res, uint32_t port)
635 {
636 struct ib_cq *cq = container_of(res, struct ib_cq, res);
637 struct ib_device *dev = cq->device;
638
639 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
640 return -EMSGSIZE;
641 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
642 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
643 return -EMSGSIZE;
644
645 /* Poll context is only valid for kernel CQs */
646 if (rdma_is_kernel_res(res) &&
647 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
648 return -EMSGSIZE;
649
650 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
651 return -EMSGSIZE;
652
653 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
654 return -EMSGSIZE;
655 if (!rdma_is_kernel_res(res) &&
656 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
657 cq->uobject->uevent.uobject.context->res.id))
658 return -EMSGSIZE;
659
660 if (fill_res_name_pid(msg, res))
661 return -EMSGSIZE;
662
663 return (dev->ops.fill_res_cq_entry) ?
664 dev->ops.fill_res_cq_entry(msg, cq) : 0;
665 }
666
fill_res_cq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)667 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
668 struct rdma_restrack_entry *res, uint32_t port)
669 {
670 struct ib_cq *cq = container_of(res, struct ib_cq, res);
671 struct ib_device *dev = cq->device;
672
673 if (!dev->ops.fill_res_cq_entry_raw)
674 return -EINVAL;
675 return dev->ops.fill_res_cq_entry_raw(msg, cq);
676 }
677
fill_res_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)678 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
679 struct rdma_restrack_entry *res, uint32_t port)
680 {
681 struct ib_mr *mr = container_of(res, struct ib_mr, res);
682 struct ib_device *dev = mr->pd->device;
683
684 if (has_cap_net_admin) {
685 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
686 return -EMSGSIZE;
687 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
688 return -EMSGSIZE;
689 }
690
691 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
692 RDMA_NLDEV_ATTR_PAD))
693 return -EMSGSIZE;
694
695 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
696 return -EMSGSIZE;
697
698 if (!rdma_is_kernel_res(res) &&
699 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
700 return -EMSGSIZE;
701
702 if (fill_res_name_pid(msg, res))
703 return -EMSGSIZE;
704
705 return (dev->ops.fill_res_mr_entry) ?
706 dev->ops.fill_res_mr_entry(msg, mr) :
707 0;
708 }
709
fill_res_mr_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)710 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
711 struct rdma_restrack_entry *res, uint32_t port)
712 {
713 struct ib_mr *mr = container_of(res, struct ib_mr, res);
714 struct ib_device *dev = mr->pd->device;
715
716 if (!dev->ops.fill_res_mr_entry_raw)
717 return -EINVAL;
718 return dev->ops.fill_res_mr_entry_raw(msg, mr);
719 }
720
fill_res_pd_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)721 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
722 struct rdma_restrack_entry *res, uint32_t port)
723 {
724 struct ib_pd *pd = container_of(res, struct ib_pd, res);
725
726 if (has_cap_net_admin) {
727 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
728 pd->local_dma_lkey))
729 goto err;
730 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
731 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
732 pd->unsafe_global_rkey))
733 goto err;
734 }
735 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
736 atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
737 goto err;
738
739 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
740 goto err;
741
742 if (!rdma_is_kernel_res(res) &&
743 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
744 pd->uobject->context->res.id))
745 goto err;
746
747 return fill_res_name_pid(msg, res);
748
749 err: return -EMSGSIZE;
750 }
751
fill_res_ctx_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)752 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
753 struct rdma_restrack_entry *res, uint32_t port)
754 {
755 struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
756
757 if (rdma_is_kernel_res(res))
758 return 0;
759
760 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
761 return -EMSGSIZE;
762
763 return fill_res_name_pid(msg, res);
764 }
765
fill_res_range_qp_entry(struct sk_buff * msg,uint32_t min_range,uint32_t max_range)766 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
767 uint32_t max_range)
768 {
769 struct nlattr *entry_attr;
770
771 if (!min_range)
772 return 0;
773
774 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
775 if (!entry_attr)
776 return -EMSGSIZE;
777
778 if (min_range == max_range) {
779 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
780 goto err;
781 } else {
782 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
783 goto err;
784 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
785 goto err;
786 }
787 nla_nest_end(msg, entry_attr);
788 return 0;
789
790 err:
791 nla_nest_cancel(msg, entry_attr);
792 return -EMSGSIZE;
793 }
794
fill_res_srq_qps(struct sk_buff * msg,struct ib_srq * srq)795 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
796 {
797 uint32_t min_range = 0, prev = 0;
798 struct rdma_restrack_entry *res;
799 struct rdma_restrack_root *rt;
800 struct nlattr *table_attr;
801 struct ib_qp *qp = NULL;
802 unsigned long id = 0;
803
804 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
805 if (!table_attr)
806 return -EMSGSIZE;
807
808 rt = &srq->device->res[RDMA_RESTRACK_QP];
809 xa_lock(&rt->xa);
810 xa_for_each(&rt->xa, id, res) {
811 if (!rdma_restrack_get(res))
812 continue;
813
814 qp = container_of(res, struct ib_qp, res);
815 if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
816 rdma_restrack_put(res);
817 continue;
818 }
819
820 if (qp->qp_num < prev)
821 /* qp_num should be ascending */
822 goto err_loop;
823
824 if (min_range == 0) {
825 min_range = qp->qp_num;
826 } else if (qp->qp_num > (prev + 1)) {
827 if (fill_res_range_qp_entry(msg, min_range, prev))
828 goto err_loop;
829
830 min_range = qp->qp_num;
831 }
832 prev = qp->qp_num;
833 rdma_restrack_put(res);
834 }
835
836 xa_unlock(&rt->xa);
837
838 if (fill_res_range_qp_entry(msg, min_range, prev))
839 goto err;
840
841 nla_nest_end(msg, table_attr);
842 return 0;
843
844 err_loop:
845 rdma_restrack_put(res);
846 xa_unlock(&rt->xa);
847 err:
848 nla_nest_cancel(msg, table_attr);
849 return -EMSGSIZE;
850 }
851
fill_res_srq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)852 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
853 struct rdma_restrack_entry *res, uint32_t port)
854 {
855 struct ib_srq *srq = container_of(res, struct ib_srq, res);
856 struct ib_device *dev = srq->device;
857
858 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
859 goto err;
860
861 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
862 goto err;
863
864 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
865 goto err;
866
867 if (ib_srq_has_cq(srq->srq_type)) {
868 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
869 srq->ext.cq->res.id))
870 goto err;
871 }
872
873 if (fill_res_srq_qps(msg, srq))
874 goto err;
875
876 if (fill_res_name_pid(msg, res))
877 goto err;
878
879 if (dev->ops.fill_res_srq_entry)
880 return dev->ops.fill_res_srq_entry(msg, srq);
881
882 return 0;
883
884 err:
885 return -EMSGSIZE;
886 }
887
fill_res_srq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)888 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
889 struct rdma_restrack_entry *res, uint32_t port)
890 {
891 struct ib_srq *srq = container_of(res, struct ib_srq, res);
892 struct ib_device *dev = srq->device;
893
894 if (!dev->ops.fill_res_srq_entry_raw)
895 return -EINVAL;
896 return dev->ops.fill_res_srq_entry_raw(msg, srq);
897 }
898
fill_stat_counter_mode(struct sk_buff * msg,struct rdma_counter * counter)899 static int fill_stat_counter_mode(struct sk_buff *msg,
900 struct rdma_counter *counter)
901 {
902 struct rdma_counter_mode *m = &counter->mode;
903
904 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
905 return -EMSGSIZE;
906
907 if (m->mode == RDMA_COUNTER_MODE_AUTO) {
908 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
909 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
910 return -EMSGSIZE;
911
912 if ((m->mask & RDMA_COUNTER_MASK_PID) &&
913 fill_res_name_pid(msg, &counter->res))
914 return -EMSGSIZE;
915 }
916
917 return 0;
918 }
919
fill_stat_counter_qp_entry(struct sk_buff * msg,u32 qpn)920 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
921 {
922 struct nlattr *entry_attr;
923
924 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
925 if (!entry_attr)
926 return -EMSGSIZE;
927
928 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
929 goto err;
930
931 nla_nest_end(msg, entry_attr);
932 return 0;
933
934 err:
935 nla_nest_cancel(msg, entry_attr);
936 return -EMSGSIZE;
937 }
938
fill_stat_counter_qps(struct sk_buff * msg,struct rdma_counter * counter)939 static int fill_stat_counter_qps(struct sk_buff *msg,
940 struct rdma_counter *counter)
941 {
942 struct rdma_restrack_entry *res;
943 struct rdma_restrack_root *rt;
944 struct nlattr *table_attr;
945 struct ib_qp *qp = NULL;
946 unsigned long id = 0;
947 int ret = 0;
948
949 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
950 if (!table_attr)
951 return -EMSGSIZE;
952
953 rt = &counter->device->res[RDMA_RESTRACK_QP];
954 xa_lock(&rt->xa);
955 xa_for_each(&rt->xa, id, res) {
956 qp = container_of(res, struct ib_qp, res);
957 if (!qp->counter || (qp->counter->id != counter->id))
958 continue;
959
960 ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
961 if (ret)
962 goto err;
963 }
964
965 xa_unlock(&rt->xa);
966 nla_nest_end(msg, table_attr);
967 return 0;
968
969 err:
970 xa_unlock(&rt->xa);
971 nla_nest_cancel(msg, table_attr);
972 return ret;
973 }
974
rdma_nl_stat_hwcounter_entry(struct sk_buff * msg,const char * name,u64 value)975 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
976 u64 value)
977 {
978 struct nlattr *entry_attr;
979
980 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
981 if (!entry_attr)
982 return -EMSGSIZE;
983
984 if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
985 name))
986 goto err;
987 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
988 value, RDMA_NLDEV_ATTR_PAD))
989 goto err;
990
991 nla_nest_end(msg, entry_attr);
992 return 0;
993
994 err:
995 nla_nest_cancel(msg, entry_attr);
996 return -EMSGSIZE;
997 }
998 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
999
fill_stat_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)1000 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
1001 struct rdma_restrack_entry *res, uint32_t port)
1002 {
1003 struct ib_mr *mr = container_of(res, struct ib_mr, res);
1004 struct ib_device *dev = mr->pd->device;
1005
1006 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1007 goto err;
1008
1009 if (dev->ops.fill_stat_mr_entry)
1010 return dev->ops.fill_stat_mr_entry(msg, mr);
1011 return 0;
1012
1013 err:
1014 return -EMSGSIZE;
1015 }
1016
fill_stat_counter_hwcounters(struct sk_buff * msg,struct rdma_counter * counter)1017 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1018 struct rdma_counter *counter)
1019 {
1020 struct rdma_hw_stats *st = counter->stats;
1021 struct nlattr *table_attr;
1022 int i;
1023
1024 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1025 if (!table_attr)
1026 return -EMSGSIZE;
1027
1028 mutex_lock(&st->lock);
1029 for (i = 0; i < st->num_counters; i++) {
1030 if (test_bit(i, st->is_disabled))
1031 continue;
1032 if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1033 st->value[i]))
1034 goto err;
1035 }
1036 mutex_unlock(&st->lock);
1037
1038 nla_nest_end(msg, table_attr);
1039 return 0;
1040
1041 err:
1042 mutex_unlock(&st->lock);
1043 nla_nest_cancel(msg, table_attr);
1044 return -EMSGSIZE;
1045 }
1046
fill_res_counter_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)1047 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1048 struct rdma_restrack_entry *res,
1049 uint32_t port)
1050 {
1051 struct rdma_counter *counter =
1052 container_of(res, struct rdma_counter, res);
1053
1054 if (port && port != counter->port)
1055 return -EAGAIN;
1056
1057 /* Dump it even query failed */
1058 rdma_counter_query_stats(counter);
1059
1060 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1061 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1062 fill_stat_counter_mode(msg, counter) ||
1063 fill_stat_counter_qps(msg, counter) ||
1064 fill_stat_counter_hwcounters(msg, counter))
1065 return -EMSGSIZE;
1066
1067 return 0;
1068 }
1069
nldev_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1070 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1071 struct netlink_ext_ack *extack)
1072 {
1073 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1074 struct ib_device *device;
1075 struct sk_buff *msg;
1076 u32 index;
1077 int err;
1078
1079 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1080 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1081 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1082 return -EINVAL;
1083
1084 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1085
1086 device = ib_device_get_by_index(sock_net(skb->sk), index);
1087 if (!device)
1088 return -EINVAL;
1089
1090 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1091 if (!msg) {
1092 err = -ENOMEM;
1093 goto err;
1094 }
1095
1096 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1097 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1098 0, 0);
1099 if (!nlh) {
1100 err = -EMSGSIZE;
1101 goto err_free;
1102 }
1103
1104 err = fill_dev_info(msg, device);
1105 if (err)
1106 goto err_free;
1107
1108 nlmsg_end(msg, nlh);
1109
1110 ib_device_put(device);
1111 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1112
1113 err_free:
1114 nlmsg_free(msg);
1115 err:
1116 ib_device_put(device);
1117 return err;
1118 }
1119
nldev_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1120 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1121 struct netlink_ext_ack *extack)
1122 {
1123 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1124 struct ib_device *device;
1125 u32 index;
1126 int err;
1127
1128 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1129 nldev_policy, extack);
1130 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1131 return -EINVAL;
1132
1133 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1134 device = ib_device_get_by_index(sock_net(skb->sk), index);
1135 if (!device)
1136 return -EINVAL;
1137
1138 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1139 char name[IB_DEVICE_NAME_MAX] = {};
1140
1141 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1142 IB_DEVICE_NAME_MAX);
1143 if (strlen(name) == 0) {
1144 err = -EINVAL;
1145 goto done;
1146 }
1147 err = ib_device_rename(device, name);
1148 goto done;
1149 }
1150
1151 if (tb[RDMA_NLDEV_NET_NS_FD]) {
1152 u32 ns_fd;
1153
1154 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1155 err = ib_device_set_netns_put(skb, device, ns_fd);
1156 goto put_done;
1157 }
1158
1159 if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1160 u8 use_dim;
1161
1162 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1163 err = ib_device_set_dim(device, use_dim);
1164 goto done;
1165 }
1166
1167 done:
1168 ib_device_put(device);
1169 put_done:
1170 return err;
1171 }
1172
_nldev_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1173 static int _nldev_get_dumpit(struct ib_device *device,
1174 struct sk_buff *skb,
1175 struct netlink_callback *cb,
1176 unsigned int idx)
1177 {
1178 int start = cb->args[0];
1179 struct nlmsghdr *nlh;
1180
1181 if (idx < start)
1182 return 0;
1183
1184 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1185 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1186 0, NLM_F_MULTI);
1187
1188 if (!nlh || fill_dev_info(skb, device)) {
1189 nlmsg_cancel(skb, nlh);
1190 goto out;
1191 }
1192
1193 nlmsg_end(skb, nlh);
1194
1195 idx++;
1196
1197 out: cb->args[0] = idx;
1198 return skb->len;
1199 }
1200
nldev_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1201 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1202 {
1203 /*
1204 * There is no need to take lock, because
1205 * we are relying on ib_core's locking.
1206 */
1207 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1208 }
1209
nldev_port_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1210 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1211 struct netlink_ext_ack *extack)
1212 {
1213 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1214 struct ib_device *device;
1215 struct sk_buff *msg;
1216 u32 index;
1217 u32 port;
1218 int err;
1219
1220 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1221 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1222 if (err ||
1223 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1224 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1225 return -EINVAL;
1226
1227 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1228 device = ib_device_get_by_index(sock_net(skb->sk), index);
1229 if (!device)
1230 return -EINVAL;
1231
1232 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1233 if (!rdma_is_port_valid(device, port)) {
1234 err = -EINVAL;
1235 goto err;
1236 }
1237
1238 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1239 if (!msg) {
1240 err = -ENOMEM;
1241 goto err;
1242 }
1243
1244 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1245 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1246 0, 0);
1247 if (!nlh) {
1248 err = -EMSGSIZE;
1249 goto err_free;
1250 }
1251
1252 err = fill_port_info(msg, device, port, sock_net(skb->sk));
1253 if (err)
1254 goto err_free;
1255
1256 nlmsg_end(msg, nlh);
1257 ib_device_put(device);
1258
1259 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1260
1261 err_free:
1262 nlmsg_free(msg);
1263 err:
1264 ib_device_put(device);
1265 return err;
1266 }
1267
nldev_port_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1268 static int nldev_port_get_dumpit(struct sk_buff *skb,
1269 struct netlink_callback *cb)
1270 {
1271 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1272 struct ib_device *device;
1273 int start = cb->args[0];
1274 struct nlmsghdr *nlh;
1275 u32 idx = 0;
1276 u32 ifindex;
1277 int err;
1278 unsigned int p;
1279
1280 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1281 nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1282 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1283 return -EINVAL;
1284
1285 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1286 device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1287 if (!device)
1288 return -EINVAL;
1289
1290 rdma_for_each_port (device, p) {
1291 /*
1292 * The dumpit function returns all information from specific
1293 * index. This specific index is taken from the netlink
1294 * messages request sent by user and it is available
1295 * in cb->args[0].
1296 *
1297 * Usually, the user doesn't fill this field and it causes
1298 * to return everything.
1299 *
1300 */
1301 if (idx < start) {
1302 idx++;
1303 continue;
1304 }
1305
1306 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1307 cb->nlh->nlmsg_seq,
1308 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1309 RDMA_NLDEV_CMD_PORT_GET),
1310 0, NLM_F_MULTI);
1311
1312 if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1313 nlmsg_cancel(skb, nlh);
1314 goto out;
1315 }
1316 idx++;
1317 nlmsg_end(skb, nlh);
1318 }
1319
1320 out:
1321 ib_device_put(device);
1322 cb->args[0] = idx;
1323 return skb->len;
1324 }
1325
nldev_res_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1326 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1327 struct netlink_ext_ack *extack)
1328 {
1329 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1330 bool show_details = false;
1331 struct ib_device *device;
1332 struct sk_buff *msg;
1333 u32 index;
1334 int ret;
1335
1336 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1337 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1338 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1339 return -EINVAL;
1340
1341 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1342 device = ib_device_get_by_index(sock_net(skb->sk), index);
1343 if (!device)
1344 return -EINVAL;
1345
1346 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1347 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1348
1349 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1350 if (!msg) {
1351 ret = -ENOMEM;
1352 goto err;
1353 }
1354
1355 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1356 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1357 0, 0);
1358 if (!nlh) {
1359 ret = -EMSGSIZE;
1360 goto err_free;
1361 }
1362
1363 ret = fill_res_info(msg, device, show_details);
1364 if (ret)
1365 goto err_free;
1366
1367 nlmsg_end(msg, nlh);
1368 ib_device_put(device);
1369 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1370
1371 err_free:
1372 nlmsg_free(msg);
1373 err:
1374 ib_device_put(device);
1375 return ret;
1376 }
1377
_nldev_res_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1378 static int _nldev_res_get_dumpit(struct ib_device *device,
1379 struct sk_buff *skb,
1380 struct netlink_callback *cb,
1381 unsigned int idx)
1382 {
1383 int start = cb->args[0];
1384 struct nlmsghdr *nlh;
1385
1386 if (idx < start)
1387 return 0;
1388
1389 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1390 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1391 0, NLM_F_MULTI);
1392
1393 if (!nlh || fill_res_info(skb, device, false)) {
1394 nlmsg_cancel(skb, nlh);
1395 goto out;
1396 }
1397 nlmsg_end(skb, nlh);
1398
1399 idx++;
1400
1401 out:
1402 cb->args[0] = idx;
1403 return skb->len;
1404 }
1405
nldev_res_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1406 static int nldev_res_get_dumpit(struct sk_buff *skb,
1407 struct netlink_callback *cb)
1408 {
1409 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1410 }
1411
1412 struct nldev_fill_res_entry {
1413 enum rdma_nldev_attr nldev_attr;
1414 u8 flags;
1415 u32 entry;
1416 u32 id;
1417 };
1418
1419 enum nldev_res_flags {
1420 NLDEV_PER_DEV = 1 << 0,
1421 };
1422
1423 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1424 [RDMA_RESTRACK_QP] = {
1425 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1426 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1427 .id = RDMA_NLDEV_ATTR_RES_LQPN,
1428 },
1429 [RDMA_RESTRACK_CM_ID] = {
1430 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1431 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1432 .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1433 },
1434 [RDMA_RESTRACK_CQ] = {
1435 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1436 .flags = NLDEV_PER_DEV,
1437 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1438 .id = RDMA_NLDEV_ATTR_RES_CQN,
1439 },
1440 [RDMA_RESTRACK_MR] = {
1441 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1442 .flags = NLDEV_PER_DEV,
1443 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1444 .id = RDMA_NLDEV_ATTR_RES_MRN,
1445 },
1446 [RDMA_RESTRACK_PD] = {
1447 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1448 .flags = NLDEV_PER_DEV,
1449 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1450 .id = RDMA_NLDEV_ATTR_RES_PDN,
1451 },
1452 [RDMA_RESTRACK_COUNTER] = {
1453 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1454 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1455 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1456 },
1457 [RDMA_RESTRACK_CTX] = {
1458 .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1459 .flags = NLDEV_PER_DEV,
1460 .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1461 .id = RDMA_NLDEV_ATTR_RES_CTXN,
1462 },
1463 [RDMA_RESTRACK_SRQ] = {
1464 .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1465 .flags = NLDEV_PER_DEV,
1466 .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1467 .id = RDMA_NLDEV_ATTR_RES_SRQN,
1468 },
1469
1470 };
1471
1472 static noinline_for_stack int
res_get_common_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1473 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1474 struct netlink_ext_ack *extack,
1475 enum rdma_restrack_type res_type,
1476 res_fill_func_t fill_func)
1477 {
1478 const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1479 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1480 struct rdma_restrack_entry *res;
1481 struct ib_device *device;
1482 u32 index, id, port = 0;
1483 bool has_cap_net_admin;
1484 struct sk_buff *msg;
1485 int ret;
1486
1487 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1488 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1489 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1490 return -EINVAL;
1491
1492 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1493 device = ib_device_get_by_index(sock_net(skb->sk), index);
1494 if (!device)
1495 return -EINVAL;
1496
1497 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1498 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1499 if (!rdma_is_port_valid(device, port)) {
1500 ret = -EINVAL;
1501 goto err;
1502 }
1503 }
1504
1505 if ((port && fe->flags & NLDEV_PER_DEV) ||
1506 (!port && ~fe->flags & NLDEV_PER_DEV)) {
1507 ret = -EINVAL;
1508 goto err;
1509 }
1510
1511 id = nla_get_u32(tb[fe->id]);
1512 res = rdma_restrack_get_byid(device, res_type, id);
1513 if (IS_ERR(res)) {
1514 ret = PTR_ERR(res);
1515 goto err;
1516 }
1517
1518 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1519 if (!msg) {
1520 ret = -ENOMEM;
1521 goto err_get;
1522 }
1523
1524 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1525 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1526 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1527 0, 0);
1528
1529 if (!nlh || fill_nldev_handle(msg, device)) {
1530 ret = -EMSGSIZE;
1531 goto err_free;
1532 }
1533
1534 has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1535
1536 ret = fill_func(msg, has_cap_net_admin, res, port);
1537 if (ret)
1538 goto err_free;
1539
1540 rdma_restrack_put(res);
1541 nlmsg_end(msg, nlh);
1542 ib_device_put(device);
1543 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1544
1545 err_free:
1546 nlmsg_free(msg);
1547 err_get:
1548 rdma_restrack_put(res);
1549 err:
1550 ib_device_put(device);
1551 return ret;
1552 }
1553
res_get_common_dumpit(struct sk_buff * skb,struct netlink_callback * cb,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1554 static int res_get_common_dumpit(struct sk_buff *skb,
1555 struct netlink_callback *cb,
1556 enum rdma_restrack_type res_type,
1557 res_fill_func_t fill_func)
1558 {
1559 const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1560 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1561 struct rdma_restrack_entry *res;
1562 struct rdma_restrack_root *rt;
1563 int err, ret = 0, idx = 0;
1564 bool show_details = false;
1565 struct nlattr *table_attr;
1566 struct nlattr *entry_attr;
1567 struct ib_device *device;
1568 int start = cb->args[0];
1569 bool has_cap_net_admin;
1570 struct nlmsghdr *nlh;
1571 unsigned long id;
1572 u32 index, port = 0;
1573 bool filled = false;
1574
1575 err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1576 nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1577 /*
1578 * Right now, we are expecting the device index to get res information,
1579 * but it is possible to extend this code to return all devices in
1580 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1581 * if it doesn't exist, we will iterate over all devices.
1582 *
1583 * But it is not needed for now.
1584 */
1585 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1586 return -EINVAL;
1587
1588 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1589 device = ib_device_get_by_index(sock_net(skb->sk), index);
1590 if (!device)
1591 return -EINVAL;
1592
1593 if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1594 show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1595
1596 /*
1597 * If no PORT_INDEX is supplied, we will return all QPs from that device
1598 */
1599 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1600 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1601 if (!rdma_is_port_valid(device, port)) {
1602 ret = -EINVAL;
1603 goto err_index;
1604 }
1605 }
1606
1607 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1608 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1609 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1610 0, NLM_F_MULTI);
1611
1612 if (!nlh || fill_nldev_handle(skb, device)) {
1613 ret = -EMSGSIZE;
1614 goto err;
1615 }
1616
1617 table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1618 if (!table_attr) {
1619 ret = -EMSGSIZE;
1620 goto err;
1621 }
1622
1623 has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1624
1625 rt = &device->res[res_type];
1626 xa_lock(&rt->xa);
1627 /*
1628 * FIXME: if the skip ahead is something common this loop should
1629 * use xas_for_each & xas_pause to optimize, we can have a lot of
1630 * objects.
1631 */
1632 xa_for_each(&rt->xa, id, res) {
1633 if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1634 goto next;
1635
1636 if (idx < start || !rdma_restrack_get(res))
1637 goto next;
1638
1639 xa_unlock(&rt->xa);
1640
1641 filled = true;
1642
1643 entry_attr = nla_nest_start_noflag(skb, fe->entry);
1644 if (!entry_attr) {
1645 ret = -EMSGSIZE;
1646 rdma_restrack_put(res);
1647 goto msg_full;
1648 }
1649
1650 ret = fill_func(skb, has_cap_net_admin, res, port);
1651
1652 rdma_restrack_put(res);
1653
1654 if (ret) {
1655 nla_nest_cancel(skb, entry_attr);
1656 if (ret == -EMSGSIZE)
1657 goto msg_full;
1658 if (ret == -EAGAIN)
1659 goto again;
1660 goto res_err;
1661 }
1662 nla_nest_end(skb, entry_attr);
1663 again: xa_lock(&rt->xa);
1664 next: idx++;
1665 }
1666 xa_unlock(&rt->xa);
1667
1668 msg_full:
1669 nla_nest_end(skb, table_attr);
1670 nlmsg_end(skb, nlh);
1671 cb->args[0] = idx;
1672
1673 /*
1674 * No more entries to fill, cancel the message and
1675 * return 0 to mark end of dumpit.
1676 */
1677 if (!filled)
1678 goto err;
1679
1680 ib_device_put(device);
1681 return skb->len;
1682
1683 res_err:
1684 nla_nest_cancel(skb, table_attr);
1685
1686 err:
1687 nlmsg_cancel(skb, nlh);
1688
1689 err_index:
1690 ib_device_put(device);
1691 return ret;
1692 }
1693
1694 #define RES_GET_FUNCS(name, type) \
1695 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
1696 struct netlink_callback *cb) \
1697 { \
1698 return res_get_common_dumpit(skb, cb, type, \
1699 fill_res_##name##_entry); \
1700 } \
1701 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
1702 struct nlmsghdr *nlh, \
1703 struct netlink_ext_ack *extack) \
1704 { \
1705 return res_get_common_doit(skb, nlh, extack, type, \
1706 fill_res_##name##_entry); \
1707 }
1708
1709 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1710 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1711 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1712 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1713 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1714 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1715 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1716 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1717 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1718 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1719 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1720 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1721
1722 static LIST_HEAD(link_ops);
1723 static DECLARE_RWSEM(link_ops_rwsem);
1724
link_ops_get(const char * type)1725 static const struct rdma_link_ops *link_ops_get(const char *type)
1726 {
1727 const struct rdma_link_ops *ops;
1728
1729 list_for_each_entry(ops, &link_ops, list) {
1730 if (!strcmp(ops->type, type))
1731 goto out;
1732 }
1733 ops = NULL;
1734 out:
1735 return ops;
1736 }
1737
rdma_link_register(struct rdma_link_ops * ops)1738 void rdma_link_register(struct rdma_link_ops *ops)
1739 {
1740 down_write(&link_ops_rwsem);
1741 if (WARN_ON_ONCE(link_ops_get(ops->type)))
1742 goto out;
1743 list_add(&ops->list, &link_ops);
1744 out:
1745 up_write(&link_ops_rwsem);
1746 }
1747 EXPORT_SYMBOL(rdma_link_register);
1748
rdma_link_unregister(struct rdma_link_ops * ops)1749 void rdma_link_unregister(struct rdma_link_ops *ops)
1750 {
1751 down_write(&link_ops_rwsem);
1752 list_del(&ops->list);
1753 up_write(&link_ops_rwsem);
1754 }
1755 EXPORT_SYMBOL(rdma_link_unregister);
1756
nldev_newlink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1757 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1758 struct netlink_ext_ack *extack)
1759 {
1760 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1761 char ibdev_name[IB_DEVICE_NAME_MAX];
1762 const struct rdma_link_ops *ops;
1763 char ndev_name[IFNAMSIZ];
1764 struct net_device *ndev;
1765 char type[IFNAMSIZ];
1766 int err;
1767
1768 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1769 nldev_policy, extack);
1770 if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1771 !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1772 return -EINVAL;
1773
1774 nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1775 sizeof(ibdev_name));
1776 if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1777 return -EINVAL;
1778
1779 nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1780 nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1781 sizeof(ndev_name));
1782
1783 ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1784 if (!ndev)
1785 return -ENODEV;
1786
1787 down_read(&link_ops_rwsem);
1788 ops = link_ops_get(type);
1789 #ifdef CONFIG_MODULES
1790 if (!ops) {
1791 up_read(&link_ops_rwsem);
1792 request_module("rdma-link-%s", type);
1793 down_read(&link_ops_rwsem);
1794 ops = link_ops_get(type);
1795 }
1796 #endif
1797 err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1798 up_read(&link_ops_rwsem);
1799 dev_put(ndev);
1800
1801 return err;
1802 }
1803
nldev_dellink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1804 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1805 struct netlink_ext_ack *extack)
1806 {
1807 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1808 struct ib_device *device;
1809 u32 index;
1810 int err;
1811
1812 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1813 nldev_policy, extack);
1814 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1815 return -EINVAL;
1816
1817 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1818 device = ib_device_get_by_index(sock_net(skb->sk), index);
1819 if (!device)
1820 return -EINVAL;
1821
1822 if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1823 ib_device_put(device);
1824 return -EINVAL;
1825 }
1826
1827 ib_unregister_device_and_put(device);
1828 return 0;
1829 }
1830
nldev_get_chardev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1831 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1832 struct netlink_ext_ack *extack)
1833 {
1834 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1835 char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1836 struct ib_client_nl_info data = {};
1837 struct ib_device *ibdev = NULL;
1838 struct sk_buff *msg;
1839 u32 index;
1840 int err;
1841
1842 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1843 NL_VALIDATE_LIBERAL, extack);
1844 if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1845 return -EINVAL;
1846
1847 nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1848 sizeof(client_name));
1849
1850 if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1851 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1852 ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1853 if (!ibdev)
1854 return -EINVAL;
1855
1856 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1857 data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1858 if (!rdma_is_port_valid(ibdev, data.port)) {
1859 err = -EINVAL;
1860 goto out_put;
1861 }
1862 } else {
1863 data.port = -1;
1864 }
1865 } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1866 return -EINVAL;
1867 }
1868
1869 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1870 if (!msg) {
1871 err = -ENOMEM;
1872 goto out_put;
1873 }
1874 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1875 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1876 RDMA_NLDEV_CMD_GET_CHARDEV),
1877 0, 0);
1878 if (!nlh) {
1879 err = -EMSGSIZE;
1880 goto out_nlmsg;
1881 }
1882
1883 data.nl_msg = msg;
1884 err = ib_get_client_nl_info(ibdev, client_name, &data);
1885 if (err)
1886 goto out_nlmsg;
1887
1888 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1889 huge_encode_dev(data.cdev->devt),
1890 RDMA_NLDEV_ATTR_PAD);
1891 if (err)
1892 goto out_data;
1893 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1894 RDMA_NLDEV_ATTR_PAD);
1895 if (err)
1896 goto out_data;
1897 if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1898 dev_name(data.cdev))) {
1899 err = -EMSGSIZE;
1900 goto out_data;
1901 }
1902
1903 nlmsg_end(msg, nlh);
1904 put_device(data.cdev);
1905 if (ibdev)
1906 ib_device_put(ibdev);
1907 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1908
1909 out_data:
1910 put_device(data.cdev);
1911 out_nlmsg:
1912 nlmsg_free(msg);
1913 out_put:
1914 if (ibdev)
1915 ib_device_put(ibdev);
1916 return err;
1917 }
1918
nldev_sys_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1919 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1920 struct netlink_ext_ack *extack)
1921 {
1922 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1923 struct sk_buff *msg;
1924 int err;
1925
1926 err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1927 nldev_policy, NL_VALIDATE_LIBERAL, extack);
1928 if (err)
1929 return err;
1930
1931 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1932 if (!msg)
1933 return -ENOMEM;
1934
1935 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1936 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1937 RDMA_NLDEV_CMD_SYS_GET),
1938 0, 0);
1939 if (!nlh) {
1940 nlmsg_free(msg);
1941 return -EMSGSIZE;
1942 }
1943
1944 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1945 (u8)ib_devices_shared_netns);
1946 if (err) {
1947 nlmsg_free(msg);
1948 return err;
1949 }
1950
1951 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1952 (u8)privileged_qkey);
1953 if (err) {
1954 nlmsg_free(msg);
1955 return err;
1956 }
1957
1958 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
1959 if (err) {
1960 nlmsg_free(msg);
1961 return err;
1962 }
1963 /*
1964 * Copy-on-fork is supported.
1965 * See commits:
1966 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1967 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1968 * for more details. Don't backport this without them.
1969 *
1970 * Return value ignored on purpose, assume copy-on-fork is not
1971 * supported in case of failure.
1972 */
1973 nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
1974
1975 nlmsg_end(msg, nlh);
1976 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1977 }
1978
nldev_set_sys_set_netns_doit(struct nlattr * tb[])1979 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
1980 {
1981 u8 enable;
1982 int err;
1983
1984 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1985 /* Only 0 and 1 are supported */
1986 if (enable > 1)
1987 return -EINVAL;
1988
1989 err = rdma_compatdev_set(enable);
1990 return err;
1991 }
1992
nldev_set_sys_set_pqkey_doit(struct nlattr * tb[])1993 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
1994 {
1995 u8 enable;
1996
1997 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
1998 /* Only 0 and 1 are supported */
1999 if (enable > 1)
2000 return -EINVAL;
2001
2002 privileged_qkey = enable;
2003 return 0;
2004 }
2005
nldev_set_sys_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2006 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2007 struct netlink_ext_ack *extack)
2008 {
2009 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2010 int err;
2011
2012 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2013 nldev_policy, extack);
2014 if (err)
2015 return -EINVAL;
2016
2017 if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2018 return nldev_set_sys_set_netns_doit(tb);
2019
2020 if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2021 return nldev_set_sys_set_pqkey_doit(tb);
2022
2023 return -EINVAL;
2024 }
2025
2026
nldev_stat_set_mode_doit(struct sk_buff * msg,struct netlink_ext_ack * extack,struct nlattr * tb[],struct ib_device * device,u32 port)2027 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2028 struct netlink_ext_ack *extack,
2029 struct nlattr *tb[],
2030 struct ib_device *device, u32 port)
2031 {
2032 u32 mode, mask = 0, qpn, cntn = 0;
2033 bool opcnt = false;
2034 int ret;
2035
2036 /* Currently only counter for QP is supported */
2037 if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2038 nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2039 return -EINVAL;
2040
2041 if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
2042 opcnt = !!nla_get_u8(
2043 tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
2044
2045 mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2046 if (mode == RDMA_COUNTER_MODE_AUTO) {
2047 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2048 mask = nla_get_u32(
2049 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2050 return rdma_counter_set_auto_mode(device, port, mask, opcnt,
2051 extack);
2052 }
2053
2054 if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2055 return -EINVAL;
2056
2057 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2058 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2059 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2060 ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2061 if (ret)
2062 return ret;
2063 } else {
2064 ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2065 if (ret)
2066 return ret;
2067 }
2068
2069 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2070 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2071 ret = -EMSGSIZE;
2072 goto err_fill;
2073 }
2074
2075 return 0;
2076
2077 err_fill:
2078 rdma_counter_unbind_qpn(device, port, qpn, cntn);
2079 return ret;
2080 }
2081
nldev_stat_set_counter_dynamic_doit(struct nlattr * tb[],struct ib_device * device,u32 port)2082 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2083 struct ib_device *device,
2084 u32 port)
2085 {
2086 struct rdma_hw_stats *stats;
2087 struct nlattr *entry_attr;
2088 unsigned long *target;
2089 int rem, i, ret = 0;
2090 u32 index;
2091
2092 stats = ib_get_hw_stats_port(device, port);
2093 if (!stats)
2094 return -EINVAL;
2095
2096 target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2097 sizeof(*stats->is_disabled), GFP_KERNEL);
2098 if (!target)
2099 return -ENOMEM;
2100
2101 nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2102 rem) {
2103 index = nla_get_u32(entry_attr);
2104 if ((index >= stats->num_counters) ||
2105 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2106 ret = -EINVAL;
2107 goto out;
2108 }
2109
2110 set_bit(index, target);
2111 }
2112
2113 for (i = 0; i < stats->num_counters; i++) {
2114 if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2115 continue;
2116
2117 ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2118 if (ret)
2119 goto out;
2120 }
2121
2122 out:
2123 kfree(target);
2124 return ret;
2125 }
2126
nldev_stat_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2127 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2128 struct netlink_ext_ack *extack)
2129 {
2130 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2131 struct ib_device *device;
2132 struct sk_buff *msg;
2133 u32 index, port;
2134 int ret;
2135
2136 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2137 extack);
2138 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2139 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2140 return -EINVAL;
2141
2142 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2143 device = ib_device_get_by_index(sock_net(skb->sk), index);
2144 if (!device)
2145 return -EINVAL;
2146
2147 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2148 if (!rdma_is_port_valid(device, port)) {
2149 ret = -EINVAL;
2150 goto err_put_device;
2151 }
2152
2153 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2154 !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2155 ret = -EINVAL;
2156 goto err_put_device;
2157 }
2158
2159 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2160 if (!msg) {
2161 ret = -ENOMEM;
2162 goto err_put_device;
2163 }
2164 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2165 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2166 RDMA_NLDEV_CMD_STAT_SET),
2167 0, 0);
2168 if (!nlh || fill_nldev_handle(msg, device) ||
2169 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2170 ret = -EMSGSIZE;
2171 goto err_free_msg;
2172 }
2173
2174 if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2175 ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2176 if (ret)
2177 goto err_free_msg;
2178 }
2179
2180 if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2181 ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2182 if (ret)
2183 goto err_free_msg;
2184 }
2185
2186 nlmsg_end(msg, nlh);
2187 ib_device_put(device);
2188 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2189
2190 err_free_msg:
2191 nlmsg_free(msg);
2192 err_put_device:
2193 ib_device_put(device);
2194 return ret;
2195 }
2196
nldev_stat_del_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2197 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2198 struct netlink_ext_ack *extack)
2199 {
2200 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2201 struct ib_device *device;
2202 struct sk_buff *msg;
2203 u32 index, port, qpn, cntn;
2204 int ret;
2205
2206 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2207 nldev_policy, extack);
2208 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2209 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2210 !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2211 !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2212 return -EINVAL;
2213
2214 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2215 return -EINVAL;
2216
2217 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2218 device = ib_device_get_by_index(sock_net(skb->sk), index);
2219 if (!device)
2220 return -EINVAL;
2221
2222 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2223 if (!rdma_is_port_valid(device, port)) {
2224 ret = -EINVAL;
2225 goto err;
2226 }
2227
2228 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2229 if (!msg) {
2230 ret = -ENOMEM;
2231 goto err;
2232 }
2233 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2234 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2235 RDMA_NLDEV_CMD_STAT_SET),
2236 0, 0);
2237 if (!nlh) {
2238 ret = -EMSGSIZE;
2239 goto err_fill;
2240 }
2241
2242 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2243 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2244 if (fill_nldev_handle(msg, device) ||
2245 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2246 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2247 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2248 ret = -EMSGSIZE;
2249 goto err_fill;
2250 }
2251
2252 ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2253 if (ret)
2254 goto err_fill;
2255
2256 nlmsg_end(msg, nlh);
2257 ib_device_put(device);
2258 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2259
2260 err_fill:
2261 nlmsg_free(msg);
2262 err:
2263 ib_device_put(device);
2264 return ret;
2265 }
2266
2267 static noinline_for_stack int
stat_get_doit_default_counter(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2268 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
2269 struct netlink_ext_ack *extack,
2270 struct nlattr *tb[])
2271 {
2272 struct rdma_hw_stats *stats;
2273 struct nlattr *table_attr;
2274 struct ib_device *device;
2275 int ret, num_cnts, i;
2276 struct sk_buff *msg;
2277 u32 index, port;
2278 u64 v;
2279
2280 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2281 return -EINVAL;
2282
2283 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2284 device = ib_device_get_by_index(sock_net(skb->sk), index);
2285 if (!device)
2286 return -EINVAL;
2287
2288 if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2289 ret = -EINVAL;
2290 goto err;
2291 }
2292
2293 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2294 stats = ib_get_hw_stats_port(device, port);
2295 if (!stats) {
2296 ret = -EINVAL;
2297 goto err;
2298 }
2299
2300 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2301 if (!msg) {
2302 ret = -ENOMEM;
2303 goto err;
2304 }
2305
2306 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2307 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2308 RDMA_NLDEV_CMD_STAT_GET),
2309 0, 0);
2310
2311 if (!nlh || fill_nldev_handle(msg, device) ||
2312 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2313 ret = -EMSGSIZE;
2314 goto err_msg;
2315 }
2316
2317 mutex_lock(&stats->lock);
2318
2319 num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2320 if (num_cnts < 0) {
2321 ret = -EINVAL;
2322 goto err_stats;
2323 }
2324
2325 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2326 if (!table_attr) {
2327 ret = -EMSGSIZE;
2328 goto err_stats;
2329 }
2330 for (i = 0; i < num_cnts; i++) {
2331 if (test_bit(i, stats->is_disabled))
2332 continue;
2333
2334 v = stats->value[i] +
2335 rdma_counter_get_hwstat_value(device, port, i);
2336 if (rdma_nl_stat_hwcounter_entry(msg,
2337 stats->descs[i].name, v)) {
2338 ret = -EMSGSIZE;
2339 goto err_table;
2340 }
2341 }
2342 nla_nest_end(msg, table_attr);
2343
2344 mutex_unlock(&stats->lock);
2345 nlmsg_end(msg, nlh);
2346 ib_device_put(device);
2347 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2348
2349 err_table:
2350 nla_nest_cancel(msg, table_attr);
2351 err_stats:
2352 mutex_unlock(&stats->lock);
2353 err_msg:
2354 nlmsg_free(msg);
2355 err:
2356 ib_device_put(device);
2357 return ret;
2358 }
2359
2360 static noinline_for_stack int
stat_get_doit_qp(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2361 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2362 struct netlink_ext_ack *extack, struct nlattr *tb[])
2363
2364 {
2365 static enum rdma_nl_counter_mode mode;
2366 static enum rdma_nl_counter_mask mask;
2367 struct ib_device *device;
2368 struct sk_buff *msg;
2369 u32 index, port;
2370 bool opcnt;
2371 int ret;
2372
2373 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2374 return nldev_res_get_counter_doit(skb, nlh, extack);
2375
2376 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2377 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2378 return -EINVAL;
2379
2380 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2381 device = ib_device_get_by_index(sock_net(skb->sk), index);
2382 if (!device)
2383 return -EINVAL;
2384
2385 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2386 if (!rdma_is_port_valid(device, port)) {
2387 ret = -EINVAL;
2388 goto err;
2389 }
2390
2391 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2392 if (!msg) {
2393 ret = -ENOMEM;
2394 goto err;
2395 }
2396
2397 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2398 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2399 RDMA_NLDEV_CMD_STAT_GET),
2400 0, 0);
2401 if (!nlh) {
2402 ret = -EMSGSIZE;
2403 goto err_msg;
2404 }
2405
2406 ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
2407 if (ret)
2408 goto err_msg;
2409
2410 if (fill_nldev_handle(msg, device) ||
2411 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2412 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2413 ret = -EMSGSIZE;
2414 goto err_msg;
2415 }
2416
2417 if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2418 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2419 ret = -EMSGSIZE;
2420 goto err_msg;
2421 }
2422
2423 if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2424 nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
2425 ret = -EMSGSIZE;
2426 goto err_msg;
2427 }
2428
2429 nlmsg_end(msg, nlh);
2430 ib_device_put(device);
2431 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2432
2433 err_msg:
2434 nlmsg_free(msg);
2435 err:
2436 ib_device_put(device);
2437 return ret;
2438 }
2439
nldev_stat_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2440 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2441 struct netlink_ext_ack *extack)
2442 {
2443 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2444 int ret;
2445
2446 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2447 nldev_policy, NL_VALIDATE_LIBERAL, extack);
2448 if (ret)
2449 return -EINVAL;
2450
2451 if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2452 return stat_get_doit_default_counter(skb, nlh, extack, tb);
2453
2454 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2455 case RDMA_NLDEV_ATTR_RES_QP:
2456 ret = stat_get_doit_qp(skb, nlh, extack, tb);
2457 break;
2458 case RDMA_NLDEV_ATTR_RES_MR:
2459 ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2460 fill_stat_mr_entry);
2461 break;
2462 default:
2463 ret = -EINVAL;
2464 break;
2465 }
2466
2467 return ret;
2468 }
2469
nldev_stat_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)2470 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2471 struct netlink_callback *cb)
2472 {
2473 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2474 int ret;
2475
2476 ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2477 nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2478 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2479 return -EINVAL;
2480
2481 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2482 case RDMA_NLDEV_ATTR_RES_QP:
2483 ret = nldev_res_get_counter_dumpit(skb, cb);
2484 break;
2485 case RDMA_NLDEV_ATTR_RES_MR:
2486 ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2487 fill_stat_mr_entry);
2488 break;
2489 default:
2490 ret = -EINVAL;
2491 break;
2492 }
2493
2494 return ret;
2495 }
2496
nldev_stat_get_counter_status_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2497 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2498 struct nlmsghdr *nlh,
2499 struct netlink_ext_ack *extack)
2500 {
2501 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2502 struct rdma_hw_stats *stats;
2503 struct ib_device *device;
2504 struct sk_buff *msg;
2505 u32 devid, port;
2506 int ret, i;
2507
2508 ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2509 nldev_policy, NL_VALIDATE_LIBERAL, extack);
2510 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2511 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2512 return -EINVAL;
2513
2514 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2515 device = ib_device_get_by_index(sock_net(skb->sk), devid);
2516 if (!device)
2517 return -EINVAL;
2518
2519 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2520 if (!rdma_is_port_valid(device, port)) {
2521 ret = -EINVAL;
2522 goto err;
2523 }
2524
2525 stats = ib_get_hw_stats_port(device, port);
2526 if (!stats) {
2527 ret = -EINVAL;
2528 goto err;
2529 }
2530
2531 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2532 if (!msg) {
2533 ret = -ENOMEM;
2534 goto err;
2535 }
2536
2537 nlh = nlmsg_put(
2538 msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2539 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2540 0, 0);
2541
2542 ret = -EMSGSIZE;
2543 if (!nlh || fill_nldev_handle(msg, device) ||
2544 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2545 goto err_msg;
2546
2547 table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2548 if (!table)
2549 goto err_msg;
2550
2551 mutex_lock(&stats->lock);
2552 for (i = 0; i < stats->num_counters; i++) {
2553 entry = nla_nest_start(msg,
2554 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2555 if (!entry)
2556 goto err_msg_table;
2557
2558 if (nla_put_string(msg,
2559 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2560 stats->descs[i].name) ||
2561 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2562 goto err_msg_entry;
2563
2564 if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2565 (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2566 !test_bit(i, stats->is_disabled))))
2567 goto err_msg_entry;
2568
2569 nla_nest_end(msg, entry);
2570 }
2571 mutex_unlock(&stats->lock);
2572
2573 nla_nest_end(msg, table);
2574 nlmsg_end(msg, nlh);
2575 ib_device_put(device);
2576 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2577
2578 err_msg_entry:
2579 nla_nest_cancel(msg, entry);
2580 err_msg_table:
2581 mutex_unlock(&stats->lock);
2582 nla_nest_cancel(msg, table);
2583 err_msg:
2584 nlmsg_free(msg);
2585 err:
2586 ib_device_put(device);
2587 return ret;
2588 }
2589
nldev_newdev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2590 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2591 struct netlink_ext_ack *extack)
2592 {
2593 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2594 enum rdma_nl_dev_type type;
2595 struct ib_device *parent;
2596 char name[IFNAMSIZ] = {};
2597 u32 parentid;
2598 int ret;
2599
2600 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2601 nldev_policy, extack);
2602 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2603 !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2604 return -EINVAL;
2605
2606 nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2607 type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2608 parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2609 parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2610 if (!parent)
2611 return -EINVAL;
2612
2613 ret = ib_add_sub_device(parent, type, name);
2614 ib_device_put(parent);
2615
2616 return ret;
2617 }
2618
nldev_deldev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2619 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2620 struct netlink_ext_ack *extack)
2621 {
2622 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2623 struct ib_device *device;
2624 u32 devid;
2625 int ret;
2626
2627 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2628 nldev_policy, extack);
2629 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2630 return -EINVAL;
2631
2632 devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2633 device = ib_device_get_by_index(sock_net(skb->sk), devid);
2634 if (!device)
2635 return -EINVAL;
2636
2637 return ib_del_sub_device_and_put(device);
2638 }
2639
2640 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2641 [RDMA_NLDEV_CMD_GET] = {
2642 .doit = nldev_get_doit,
2643 .dump = nldev_get_dumpit,
2644 },
2645 [RDMA_NLDEV_CMD_GET_CHARDEV] = {
2646 .doit = nldev_get_chardev,
2647 },
2648 [RDMA_NLDEV_CMD_SET] = {
2649 .doit = nldev_set_doit,
2650 .flags = RDMA_NL_ADMIN_PERM,
2651 },
2652 [RDMA_NLDEV_CMD_NEWLINK] = {
2653 .doit = nldev_newlink,
2654 .flags = RDMA_NL_ADMIN_PERM,
2655 },
2656 [RDMA_NLDEV_CMD_DELLINK] = {
2657 .doit = nldev_dellink,
2658 .flags = RDMA_NL_ADMIN_PERM,
2659 },
2660 [RDMA_NLDEV_CMD_PORT_GET] = {
2661 .doit = nldev_port_get_doit,
2662 .dump = nldev_port_get_dumpit,
2663 },
2664 [RDMA_NLDEV_CMD_RES_GET] = {
2665 .doit = nldev_res_get_doit,
2666 .dump = nldev_res_get_dumpit,
2667 },
2668 [RDMA_NLDEV_CMD_RES_QP_GET] = {
2669 .doit = nldev_res_get_qp_doit,
2670 .dump = nldev_res_get_qp_dumpit,
2671 },
2672 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2673 .doit = nldev_res_get_cm_id_doit,
2674 .dump = nldev_res_get_cm_id_dumpit,
2675 },
2676 [RDMA_NLDEV_CMD_RES_CQ_GET] = {
2677 .doit = nldev_res_get_cq_doit,
2678 .dump = nldev_res_get_cq_dumpit,
2679 },
2680 [RDMA_NLDEV_CMD_RES_MR_GET] = {
2681 .doit = nldev_res_get_mr_doit,
2682 .dump = nldev_res_get_mr_dumpit,
2683 },
2684 [RDMA_NLDEV_CMD_RES_PD_GET] = {
2685 .doit = nldev_res_get_pd_doit,
2686 .dump = nldev_res_get_pd_dumpit,
2687 },
2688 [RDMA_NLDEV_CMD_RES_CTX_GET] = {
2689 .doit = nldev_res_get_ctx_doit,
2690 .dump = nldev_res_get_ctx_dumpit,
2691 },
2692 [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2693 .doit = nldev_res_get_srq_doit,
2694 .dump = nldev_res_get_srq_dumpit,
2695 },
2696 [RDMA_NLDEV_CMD_SYS_GET] = {
2697 .doit = nldev_sys_get_doit,
2698 },
2699 [RDMA_NLDEV_CMD_SYS_SET] = {
2700 .doit = nldev_set_sys_set_doit,
2701 .flags = RDMA_NL_ADMIN_PERM,
2702 },
2703 [RDMA_NLDEV_CMD_STAT_SET] = {
2704 .doit = nldev_stat_set_doit,
2705 .flags = RDMA_NL_ADMIN_PERM,
2706 },
2707 [RDMA_NLDEV_CMD_STAT_GET] = {
2708 .doit = nldev_stat_get_doit,
2709 .dump = nldev_stat_get_dumpit,
2710 },
2711 [RDMA_NLDEV_CMD_STAT_DEL] = {
2712 .doit = nldev_stat_del_doit,
2713 .flags = RDMA_NL_ADMIN_PERM,
2714 },
2715 [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2716 .doit = nldev_res_get_qp_raw_doit,
2717 .dump = nldev_res_get_qp_raw_dumpit,
2718 .flags = RDMA_NL_ADMIN_PERM,
2719 },
2720 [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2721 .doit = nldev_res_get_cq_raw_doit,
2722 .dump = nldev_res_get_cq_raw_dumpit,
2723 .flags = RDMA_NL_ADMIN_PERM,
2724 },
2725 [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2726 .doit = nldev_res_get_mr_raw_doit,
2727 .dump = nldev_res_get_mr_raw_dumpit,
2728 .flags = RDMA_NL_ADMIN_PERM,
2729 },
2730 [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
2731 .doit = nldev_res_get_srq_raw_doit,
2732 .dump = nldev_res_get_srq_raw_dumpit,
2733 .flags = RDMA_NL_ADMIN_PERM,
2734 },
2735 [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
2736 .doit = nldev_stat_get_counter_status_doit,
2737 },
2738 [RDMA_NLDEV_CMD_NEWDEV] = {
2739 .doit = nldev_newdev,
2740 .flags = RDMA_NL_ADMIN_PERM,
2741 },
2742 [RDMA_NLDEV_CMD_DELDEV] = {
2743 .doit = nldev_deldev,
2744 .flags = RDMA_NL_ADMIN_PERM,
2745 },
2746 };
2747
fill_mon_netdev_rename(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)2748 static int fill_mon_netdev_rename(struct sk_buff *msg,
2749 struct ib_device *device, u32 port,
2750 const struct net *net)
2751 {
2752 struct net_device *netdev = ib_device_get_netdev(device, port);
2753 int ret = 0;
2754
2755 if (!netdev || !net_eq(dev_net(netdev), net))
2756 goto out;
2757
2758 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2759 if (ret)
2760 goto out;
2761 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2762 out:
2763 dev_put(netdev);
2764 return ret;
2765 }
2766
fill_mon_netdev_association(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)2767 static int fill_mon_netdev_association(struct sk_buff *msg,
2768 struct ib_device *device, u32 port,
2769 const struct net *net)
2770 {
2771 struct net_device *netdev = ib_device_get_netdev(device, port);
2772 int ret = 0;
2773
2774 if (netdev && !net_eq(dev_net(netdev), net))
2775 goto out;
2776
2777 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
2778 if (ret)
2779 goto out;
2780
2781 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
2782 dev_name(&device->dev));
2783 if (ret)
2784 goto out;
2785
2786 ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
2787 if (ret)
2788 goto out;
2789
2790 if (netdev) {
2791 ret = nla_put_u32(msg,
2792 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2793 if (ret)
2794 goto out;
2795
2796 ret = nla_put_string(msg,
2797 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2798 }
2799
2800 out:
2801 dev_put(netdev);
2802 return ret;
2803 }
2804
rdma_nl_notify_err_msg(struct ib_device * device,u32 port_num,enum rdma_nl_notify_event_type type)2805 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
2806 enum rdma_nl_notify_event_type type)
2807 {
2808 struct net_device *netdev;
2809
2810 switch (type) {
2811 case RDMA_REGISTER_EVENT:
2812 dev_warn_ratelimited(&device->dev,
2813 "Failed to send RDMA monitor register device event\n");
2814 break;
2815 case RDMA_UNREGISTER_EVENT:
2816 dev_warn_ratelimited(&device->dev,
2817 "Failed to send RDMA monitor unregister device event\n");
2818 break;
2819 case RDMA_NETDEV_ATTACH_EVENT:
2820 netdev = ib_device_get_netdev(device, port_num);
2821 dev_warn_ratelimited(&device->dev,
2822 "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
2823 port_num, netdev->ifindex);
2824 dev_put(netdev);
2825 break;
2826 case RDMA_NETDEV_DETACH_EVENT:
2827 dev_warn_ratelimited(&device->dev,
2828 "Failed to send RDMA monitor netdev detach event: port %d\n",
2829 port_num);
2830 break;
2831 case RDMA_RENAME_EVENT:
2832 dev_warn_ratelimited(&device->dev,
2833 "Failed to send RDMA monitor rename device event\n");
2834 break;
2835
2836 case RDMA_NETDEV_RENAME_EVENT:
2837 netdev = ib_device_get_netdev(device, port_num);
2838 dev_warn_ratelimited(&device->dev,
2839 "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
2840 port_num, netdev->ifindex);
2841 dev_put(netdev);
2842 break;
2843 default:
2844 break;
2845 }
2846 }
2847
rdma_nl_notify_event(struct ib_device * device,u32 port_num,enum rdma_nl_notify_event_type type)2848 int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
2849 enum rdma_nl_notify_event_type type)
2850 {
2851 struct sk_buff *skb;
2852 int ret = -EMSGSIZE;
2853 struct net *net;
2854 void *nlh;
2855
2856 net = read_pnet(&device->coredev.rdma_net);
2857 if (!net)
2858 return -EINVAL;
2859
2860 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2861 if (!skb)
2862 return -ENOMEM;
2863 nlh = nlmsg_put(skb, 0, 0,
2864 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
2865 0, 0);
2866 if (!nlh)
2867 goto err_free;
2868
2869 switch (type) {
2870 case RDMA_REGISTER_EVENT:
2871 case RDMA_UNREGISTER_EVENT:
2872 case RDMA_RENAME_EVENT:
2873 ret = fill_nldev_handle(skb, device);
2874 if (ret)
2875 goto err_free;
2876 break;
2877 case RDMA_NETDEV_ATTACH_EVENT:
2878 case RDMA_NETDEV_DETACH_EVENT:
2879 ret = fill_mon_netdev_association(skb, device, port_num, net);
2880 if (ret)
2881 goto err_free;
2882 break;
2883 case RDMA_NETDEV_RENAME_EVENT:
2884 ret = fill_mon_netdev_rename(skb, device, port_num, net);
2885 if (ret)
2886 goto err_free;
2887 break;
2888 default:
2889 break;
2890 }
2891
2892 ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
2893 if (ret)
2894 goto err_free;
2895
2896 nlmsg_end(skb, nlh);
2897 ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
2898 if (ret && ret != -ESRCH) {
2899 skb = NULL; /* skb is freed in the netlink send-op handling */
2900 goto err_free;
2901 }
2902 return 0;
2903
2904 err_free:
2905 rdma_nl_notify_err_msg(device, port_num, type);
2906 nlmsg_free(skb);
2907 return ret;
2908 }
2909
nldev_init(void)2910 void __init nldev_init(void)
2911 {
2912 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2913 }
2914
nldev_exit(void)2915 void nldev_exit(void)
2916 {
2917 rdma_nl_unregister(RDMA_NL_NLDEV);
2918 }
2919
2920 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2921