1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/rculist.h> 4 #include <linux/list.h> 5 #include <linux/hash.h> 6 #include <linux/types.h> 7 #include <linux/spinlock.h> 8 #include <linux/bpf.h> 9 #include <linux/btf.h> 10 #include <linux/btf_ids.h> 11 #include <linux/bpf_local_storage.h> 12 #include <net/bpf_sk_storage.h> 13 #include <net/sock.h> 14 #include <uapi/linux/sock_diag.h> 15 #include <uapi/linux/btf.h> 16 #include <linux/rcupdate_trace.h> 17 18 DEFINE_BPF_STORAGE_CACHE(sk_cache); 19 20 static struct bpf_local_storage_data * 21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 22 { 23 struct bpf_local_storage *sk_storage; 24 struct bpf_local_storage_map *smap; 25 26 sk_storage = 27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 28 if (!sk_storage) 29 return NULL; 30 31 smap = (struct bpf_local_storage_map *)map; 32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 33 } 34 35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 36 { 37 struct bpf_local_storage_data *sdata; 38 39 sdata = bpf_sk_storage_lookup(sk, map, false); 40 if (!sdata) 41 return -ENOENT; 42 43 bpf_selem_unlink(SELEM(sdata), false); 44 45 return 0; 46 } 47 48 /* Called by __sk_destruct() & bpf_sk_storage_clone() */ 49 void bpf_sk_storage_free(struct sock *sk) 50 { 51 struct bpf_local_storage *sk_storage; 52 53 migrate_disable(); 54 rcu_read_lock(); 55 sk_storage = rcu_dereference(sk->sk_bpf_storage); 56 if (!sk_storage) 57 goto out; 58 59 bpf_local_storage_destroy(sk_storage); 60 out: 61 rcu_read_unlock(); 62 migrate_enable(); 63 } 64 65 static void bpf_sk_storage_map_free(struct bpf_map *map) 66 { 67 bpf_local_storage_map_free(map, &sk_cache, NULL); 68 } 69 70 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 71 { 72 return bpf_local_storage_map_alloc(attr, &sk_cache, false); 73 } 74 75 static int notsupp_get_next_key(struct bpf_map *map, void *key, 76 void *next_key) 77 { 78 return -ENOTSUPP; 79 } 80 81 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) 82 { 83 struct bpf_local_storage_data *sdata; 84 struct socket *sock; 85 int fd, err; 86 87 fd = *(int *)key; 88 sock = sockfd_lookup(fd, &err); 89 if (sock) { 90 sdata = bpf_sk_storage_lookup(sock->sk, map, true); 91 sockfd_put(sock); 92 return sdata ? sdata->data : NULL; 93 } 94 95 return ERR_PTR(err); 96 } 97 98 static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, 99 void *value, u64 map_flags) 100 { 101 struct bpf_local_storage_data *sdata; 102 struct socket *sock; 103 int fd, err; 104 105 fd = *(int *)key; 106 sock = sockfd_lookup(fd, &err); 107 if (sock) { 108 sdata = bpf_local_storage_update( 109 sock->sk, (struct bpf_local_storage_map *)map, value, 110 map_flags, false, GFP_ATOMIC); 111 sockfd_put(sock); 112 return PTR_ERR_OR_ZERO(sdata); 113 } 114 115 return err; 116 } 117 118 static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) 119 { 120 struct socket *sock; 121 int fd, err; 122 123 fd = *(int *)key; 124 sock = sockfd_lookup(fd, &err); 125 if (sock) { 126 err = bpf_sk_storage_del(sock->sk, map); 127 sockfd_put(sock); 128 return err; 129 } 130 131 return err; 132 } 133 134 static struct bpf_local_storage_elem * 135 bpf_sk_storage_clone_elem(struct sock *newsk, 136 struct bpf_local_storage_map *smap, 137 struct bpf_local_storage_elem *selem) 138 { 139 struct bpf_local_storage_elem *copy_selem; 140 141 copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC); 142 if (!copy_selem) 143 return NULL; 144 145 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 146 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, 147 SDATA(selem)->data, true); 148 else 149 copy_map_value(&smap->map, SDATA(copy_selem)->data, 150 SDATA(selem)->data); 151 152 return copy_selem; 153 } 154 155 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) 156 { 157 struct bpf_local_storage *new_sk_storage = NULL; 158 struct bpf_local_storage *sk_storage; 159 struct bpf_local_storage_elem *selem; 160 int ret = 0; 161 162 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 163 164 migrate_disable(); 165 rcu_read_lock(); 166 sk_storage = rcu_dereference(sk->sk_bpf_storage); 167 168 if (!sk_storage || hlist_empty(&sk_storage->list)) 169 goto out; 170 171 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 172 struct bpf_local_storage_elem *copy_selem; 173 struct bpf_local_storage_map *smap; 174 struct bpf_map *map; 175 176 smap = rcu_dereference(SDATA(selem)->smap); 177 if (!(smap->map.map_flags & BPF_F_CLONE)) 178 continue; 179 180 /* Note that for lockless listeners adding new element 181 * here can race with cleanup in bpf_local_storage_map_free. 182 * Try to grab map refcnt to make sure that it's still 183 * alive and prevent concurrent removal. 184 */ 185 map = bpf_map_inc_not_zero(&smap->map); 186 if (IS_ERR(map)) 187 continue; 188 189 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); 190 if (!copy_selem) { 191 ret = -ENOMEM; 192 bpf_map_put(map); 193 goto out; 194 } 195 196 if (new_sk_storage) { 197 bpf_selem_link_map(smap, copy_selem); 198 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); 199 } else { 200 ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); 201 if (ret) { 202 bpf_selem_free(copy_selem, smap, true); 203 atomic_sub(smap->elem_size, 204 &newsk->sk_omem_alloc); 205 bpf_map_put(map); 206 goto out; 207 } 208 209 new_sk_storage = 210 rcu_dereference(copy_selem->local_storage); 211 } 212 bpf_map_put(map); 213 } 214 215 out: 216 rcu_read_unlock(); 217 migrate_enable(); 218 219 /* In case of an error, don't free anything explicitly here, the 220 * caller is responsible to call bpf_sk_storage_free. 221 */ 222 223 return ret; 224 } 225 226 /* *gfp_flags* is a hidden argument provided by the verifier */ 227 BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 228 void *, value, u64, flags, gfp_t, gfp_flags) 229 { 230 struct bpf_local_storage_data *sdata; 231 232 WARN_ON_ONCE(!bpf_rcu_lock_held()); 233 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 234 return (unsigned long)NULL; 235 236 sdata = bpf_sk_storage_lookup(sk, map, true); 237 if (sdata) 238 return (unsigned long)sdata->data; 239 240 if (flags == BPF_SK_STORAGE_GET_F_CREATE && 241 /* Cannot add new elem to a going away sk. 242 * Otherwise, the new elem may become a leak 243 * (and also other memory issues during map 244 * destruction). 245 */ 246 refcount_inc_not_zero(&sk->sk_refcnt)) { 247 sdata = bpf_local_storage_update( 248 sk, (struct bpf_local_storage_map *)map, value, 249 BPF_NOEXIST, false, gfp_flags); 250 /* sk must be a fullsock (guaranteed by verifier), 251 * so sock_gen_put() is unnecessary. 252 */ 253 sock_put(sk); 254 return IS_ERR(sdata) ? 255 (unsigned long)NULL : (unsigned long)sdata->data; 256 } 257 258 return (unsigned long)NULL; 259 } 260 261 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 262 { 263 WARN_ON_ONCE(!bpf_rcu_lock_held()); 264 if (!sk || !sk_fullsock(sk)) 265 return -EINVAL; 266 267 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 268 int err; 269 270 err = bpf_sk_storage_del(sk, map); 271 sock_put(sk); 272 return err; 273 } 274 275 return -ENOENT; 276 } 277 278 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 279 void *owner, u32 size) 280 { 281 struct sock *sk = (struct sock *)owner; 282 int optmem_max; 283 284 optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); 285 /* same check as in sock_kmalloc() */ 286 if (size <= optmem_max && 287 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { 288 atomic_add(size, &sk->sk_omem_alloc); 289 return 0; 290 } 291 292 return -ENOMEM; 293 } 294 295 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 296 void *owner, u32 size) 297 { 298 struct sock *sk = owner; 299 300 atomic_sub(size, &sk->sk_omem_alloc); 301 } 302 303 static struct bpf_local_storage __rcu ** 304 bpf_sk_storage_ptr(void *owner) 305 { 306 struct sock *sk = owner; 307 308 return &sk->sk_bpf_storage; 309 } 310 311 const struct bpf_map_ops sk_storage_map_ops = { 312 .map_meta_equal = bpf_map_meta_equal, 313 .map_alloc_check = bpf_local_storage_map_alloc_check, 314 .map_alloc = bpf_sk_storage_map_alloc, 315 .map_free = bpf_sk_storage_map_free, 316 .map_get_next_key = notsupp_get_next_key, 317 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 318 .map_update_elem = bpf_fd_sk_storage_update_elem, 319 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 320 .map_check_btf = bpf_local_storage_map_check_btf, 321 .map_btf_id = &bpf_local_storage_map_btf_id[0], 322 .map_local_storage_charge = bpf_sk_storage_charge, 323 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 324 .map_owner_storage_ptr = bpf_sk_storage_ptr, 325 .map_mem_usage = bpf_local_storage_map_mem_usage, 326 }; 327 328 const struct bpf_func_proto bpf_sk_storage_get_proto = { 329 .func = bpf_sk_storage_get, 330 .gpl_only = false, 331 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 332 .arg1_type = ARG_CONST_MAP_PTR, 333 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 334 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 335 .arg4_type = ARG_ANYTHING, 336 }; 337 338 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { 339 .func = bpf_sk_storage_get, 340 .gpl_only = false, 341 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 342 .arg1_type = ARG_CONST_MAP_PTR, 343 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ 344 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 345 .arg4_type = ARG_ANYTHING, 346 }; 347 348 const struct bpf_func_proto bpf_sk_storage_delete_proto = { 349 .func = bpf_sk_storage_delete, 350 .gpl_only = false, 351 .ret_type = RET_INTEGER, 352 .arg1_type = ARG_CONST_MAP_PTR, 353 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 354 }; 355 356 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 357 { 358 if (prog->aux->dst_prog) 359 return false; 360 361 /* Ensure the tracing program is not tracing 362 * any bpf_sk_storage*() function and also 363 * use the bpf_sk_storage_(get|delete) helper. 364 */ 365 switch (prog->expected_attach_type) { 366 case BPF_TRACE_ITER: 367 case BPF_TRACE_RAW_TP: 368 /* bpf_sk_storage has no trace point */ 369 return true; 370 case BPF_TRACE_FENTRY: 371 case BPF_TRACE_FEXIT: 372 return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", 373 strlen("bpf_sk_storage")); 374 default: 375 return false; 376 } 377 378 return false; 379 } 380 381 /* *gfp_flags* is a hidden argument provided by the verifier */ 382 BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 383 void *, value, u64, flags, gfp_t, gfp_flags) 384 { 385 WARN_ON_ONCE(!bpf_rcu_lock_held()); 386 if (in_hardirq() || in_nmi()) 387 return (unsigned long)NULL; 388 389 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, 390 gfp_flags); 391 } 392 393 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 394 struct sock *, sk) 395 { 396 WARN_ON_ONCE(!bpf_rcu_lock_held()); 397 if (in_hardirq() || in_nmi()) 398 return -EPERM; 399 400 return ____bpf_sk_storage_delete(map, sk); 401 } 402 403 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 404 .func = bpf_sk_storage_get_tracing, 405 .gpl_only = false, 406 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 407 .arg1_type = ARG_CONST_MAP_PTR, 408 .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 409 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 410 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 411 .arg4_type = ARG_ANYTHING, 412 .allowed = bpf_sk_storage_tracing_allowed, 413 }; 414 415 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 416 .func = bpf_sk_storage_delete_tracing, 417 .gpl_only = false, 418 .ret_type = RET_INTEGER, 419 .arg1_type = ARG_CONST_MAP_PTR, 420 .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 421 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 422 .allowed = bpf_sk_storage_tracing_allowed, 423 }; 424 425 struct bpf_sk_storage_diag { 426 u32 nr_maps; 427 struct bpf_map *maps[]; 428 }; 429 430 /* The reply will be like: 431 * INET_DIAG_BPF_SK_STORAGES (nla_nest) 432 * SK_DIAG_BPF_STORAGE (nla_nest) 433 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 434 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 435 * SK_DIAG_BPF_STORAGE (nla_nest) 436 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 437 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 438 * .... 439 */ 440 static int nla_value_size(u32 value_size) 441 { 442 /* SK_DIAG_BPF_STORAGE (nla_nest) 443 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 444 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 445 */ 446 return nla_total_size(0) + nla_total_size(sizeof(u32)) + 447 nla_total_size_64bit(value_size); 448 } 449 450 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) 451 { 452 u32 i; 453 454 if (!diag) 455 return; 456 457 for (i = 0; i < diag->nr_maps; i++) 458 bpf_map_put(diag->maps[i]); 459 460 kfree(diag); 461 } 462 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); 463 464 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, 465 const struct bpf_map *map) 466 { 467 u32 i; 468 469 for (i = 0; i < diag->nr_maps; i++) { 470 if (diag->maps[i] == map) 471 return true; 472 } 473 474 return false; 475 } 476 477 struct bpf_sk_storage_diag * 478 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) 479 { 480 struct bpf_sk_storage_diag *diag; 481 struct nlattr *nla; 482 u32 nr_maps = 0; 483 int rem, err; 484 485 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as 486 * the map_alloc_check() side also does. 487 */ 488 if (!bpf_capable()) 489 return ERR_PTR(-EPERM); 490 491 nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, 492 nla_stgs, rem) { 493 if (nla_len(nla) != sizeof(u32)) 494 return ERR_PTR(-EINVAL); 495 nr_maps++; 496 } 497 498 diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); 499 if (!diag) 500 return ERR_PTR(-ENOMEM); 501 502 nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, 503 nla_stgs, rem) { 504 int map_fd = nla_get_u32(nla); 505 struct bpf_map *map = bpf_map_get(map_fd); 506 507 if (IS_ERR(map)) { 508 err = PTR_ERR(map); 509 goto err_free; 510 } 511 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { 512 bpf_map_put(map); 513 err = -EINVAL; 514 goto err_free; 515 } 516 if (diag_check_dup(diag, map)) { 517 bpf_map_put(map); 518 err = -EEXIST; 519 goto err_free; 520 } 521 diag->maps[diag->nr_maps++] = map; 522 } 523 524 return diag; 525 526 err_free: 527 bpf_sk_storage_diag_free(diag); 528 return ERR_PTR(err); 529 } 530 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 531 532 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 533 { 534 struct nlattr *nla_stg, *nla_value; 535 struct bpf_local_storage_map *smap; 536 537 /* It cannot exceed max nlattr's payload */ 538 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); 539 540 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); 541 if (!nla_stg) 542 return -EMSGSIZE; 543 544 smap = rcu_dereference(sdata->smap); 545 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 546 goto errout; 547 548 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, 549 smap->map.value_size, 550 SK_DIAG_BPF_STORAGE_PAD); 551 if (!nla_value) 552 goto errout; 553 554 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 555 copy_map_value_locked(&smap->map, nla_data(nla_value), 556 sdata->data, true); 557 else 558 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 559 560 nla_nest_end(skb, nla_stg); 561 return 0; 562 563 errout: 564 nla_nest_cancel(skb, nla_stg); 565 return -EMSGSIZE; 566 } 567 568 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, 569 int stg_array_type, 570 unsigned int *res_diag_size) 571 { 572 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 573 unsigned int diag_size = nla_total_size(0); 574 struct bpf_local_storage *sk_storage; 575 struct bpf_local_storage_elem *selem; 576 struct bpf_local_storage_map *smap; 577 struct nlattr *nla_stgs; 578 unsigned int saved_len; 579 int err = 0; 580 581 rcu_read_lock(); 582 583 sk_storage = rcu_dereference(sk->sk_bpf_storage); 584 if (!sk_storage || hlist_empty(&sk_storage->list)) { 585 rcu_read_unlock(); 586 return 0; 587 } 588 589 nla_stgs = nla_nest_start(skb, stg_array_type); 590 if (!nla_stgs) 591 /* Continue to learn diag_size */ 592 err = -EMSGSIZE; 593 594 saved_len = skb->len; 595 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 596 smap = rcu_dereference(SDATA(selem)->smap); 597 diag_size += nla_value_size(smap->map.value_size); 598 599 if (nla_stgs && diag_get(SDATA(selem), skb)) 600 /* Continue to learn diag_size */ 601 err = -EMSGSIZE; 602 } 603 604 rcu_read_unlock(); 605 606 if (nla_stgs) { 607 if (saved_len == skb->len) 608 nla_nest_cancel(skb, nla_stgs); 609 else 610 nla_nest_end(skb, nla_stgs); 611 } 612 613 if (diag_size == nla_total_size(0)) { 614 *res_diag_size = 0; 615 return 0; 616 } 617 618 *res_diag_size = diag_size; 619 return err; 620 } 621 622 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, 623 struct sock *sk, struct sk_buff *skb, 624 int stg_array_type, 625 unsigned int *res_diag_size) 626 { 627 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 628 unsigned int diag_size = nla_total_size(0); 629 struct bpf_local_storage *sk_storage; 630 struct bpf_local_storage_data *sdata; 631 struct nlattr *nla_stgs; 632 unsigned int saved_len; 633 int err = 0; 634 u32 i; 635 636 *res_diag_size = 0; 637 638 /* No map has been specified. Dump all. */ 639 if (!diag->nr_maps) 640 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, 641 res_diag_size); 642 643 rcu_read_lock(); 644 sk_storage = rcu_dereference(sk->sk_bpf_storage); 645 if (!sk_storage || hlist_empty(&sk_storage->list)) { 646 rcu_read_unlock(); 647 return 0; 648 } 649 650 nla_stgs = nla_nest_start(skb, stg_array_type); 651 if (!nla_stgs) 652 /* Continue to learn diag_size */ 653 err = -EMSGSIZE; 654 655 saved_len = skb->len; 656 for (i = 0; i < diag->nr_maps; i++) { 657 sdata = bpf_local_storage_lookup(sk_storage, 658 (struct bpf_local_storage_map *)diag->maps[i], 659 false); 660 661 if (!sdata) 662 continue; 663 664 diag_size += nla_value_size(diag->maps[i]->value_size); 665 666 if (nla_stgs && diag_get(sdata, skb)) 667 /* Continue to learn diag_size */ 668 err = -EMSGSIZE; 669 } 670 rcu_read_unlock(); 671 672 if (nla_stgs) { 673 if (saved_len == skb->len) 674 nla_nest_cancel(skb, nla_stgs); 675 else 676 nla_nest_end(skb, nla_stgs); 677 } 678 679 if (diag_size == nla_total_size(0)) { 680 *res_diag_size = 0; 681 return 0; 682 } 683 684 *res_diag_size = diag_size; 685 return err; 686 } 687 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); 688 689 struct bpf_iter_seq_sk_storage_map_info { 690 struct bpf_map *map; 691 unsigned int bucket_id; 692 unsigned skip_elems; 693 }; 694 695 static struct bpf_local_storage_elem * 696 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, 697 struct bpf_local_storage_elem *prev_selem) 698 __acquires(RCU) __releases(RCU) 699 { 700 struct bpf_local_storage *sk_storage; 701 struct bpf_local_storage_elem *selem; 702 u32 skip_elems = info->skip_elems; 703 struct bpf_local_storage_map *smap; 704 u32 bucket_id = info->bucket_id; 705 u32 i, count, n_buckets; 706 struct bpf_local_storage_map_bucket *b; 707 708 smap = (struct bpf_local_storage_map *)info->map; 709 n_buckets = 1U << smap->bucket_log; 710 if (bucket_id >= n_buckets) 711 return NULL; 712 713 /* try to find next selem in the same bucket */ 714 selem = prev_selem; 715 count = 0; 716 while (selem) { 717 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), 718 struct bpf_local_storage_elem, map_node); 719 if (!selem) { 720 /* not found, unlock and go to the next bucket */ 721 b = &smap->buckets[bucket_id++]; 722 rcu_read_unlock(); 723 skip_elems = 0; 724 break; 725 } 726 sk_storage = rcu_dereference(selem->local_storage); 727 if (sk_storage) { 728 info->skip_elems = skip_elems + count; 729 return selem; 730 } 731 count++; 732 } 733 734 for (i = bucket_id; i < (1U << smap->bucket_log); i++) { 735 b = &smap->buckets[i]; 736 rcu_read_lock(); 737 count = 0; 738 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 739 sk_storage = rcu_dereference(selem->local_storage); 740 if (sk_storage && count >= skip_elems) { 741 info->bucket_id = i; 742 info->skip_elems = count; 743 return selem; 744 } 745 count++; 746 } 747 rcu_read_unlock(); 748 skip_elems = 0; 749 } 750 751 info->bucket_id = i; 752 info->skip_elems = 0; 753 return NULL; 754 } 755 756 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) 757 { 758 struct bpf_local_storage_elem *selem; 759 760 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); 761 if (!selem) 762 return NULL; 763 764 if (*pos == 0) 765 ++*pos; 766 return selem; 767 } 768 769 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, 770 loff_t *pos) 771 { 772 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 773 774 ++*pos; 775 ++info->skip_elems; 776 return bpf_sk_storage_map_seq_find_next(seq->private, v); 777 } 778 779 struct bpf_iter__bpf_sk_storage_map { 780 __bpf_md_ptr(struct bpf_iter_meta *, meta); 781 __bpf_md_ptr(struct bpf_map *, map); 782 __bpf_md_ptr(struct sock *, sk); 783 __bpf_md_ptr(void *, value); 784 }; 785 786 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, 787 struct bpf_map *map, struct sock *sk, 788 void *value) 789 790 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, 791 struct bpf_local_storage_elem *selem) 792 { 793 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 794 struct bpf_iter__bpf_sk_storage_map ctx = {}; 795 struct bpf_local_storage *sk_storage; 796 struct bpf_iter_meta meta; 797 struct bpf_prog *prog; 798 int ret = 0; 799 800 meta.seq = seq; 801 prog = bpf_iter_get_info(&meta, selem == NULL); 802 if (prog) { 803 ctx.meta = &meta; 804 ctx.map = info->map; 805 if (selem) { 806 sk_storage = rcu_dereference(selem->local_storage); 807 ctx.sk = sk_storage->owner; 808 ctx.value = SDATA(selem)->data; 809 } 810 ret = bpf_iter_run_prog(prog, &ctx); 811 } 812 813 return ret; 814 } 815 816 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) 817 { 818 return __bpf_sk_storage_map_seq_show(seq, v); 819 } 820 821 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) 822 __releases(RCU) 823 { 824 if (!v) 825 (void)__bpf_sk_storage_map_seq_show(seq, v); 826 else 827 rcu_read_unlock(); 828 } 829 830 static int bpf_iter_init_sk_storage_map(void *priv_data, 831 struct bpf_iter_aux_info *aux) 832 { 833 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 834 835 bpf_map_inc_with_uref(aux->map); 836 seq_info->map = aux->map; 837 return 0; 838 } 839 840 static void bpf_iter_fini_sk_storage_map(void *priv_data) 841 { 842 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 843 844 bpf_map_put_with_uref(seq_info->map); 845 } 846 847 static int bpf_iter_attach_map(struct bpf_prog *prog, 848 union bpf_iter_link_info *linfo, 849 struct bpf_iter_aux_info *aux) 850 { 851 struct bpf_map *map; 852 int err = -EINVAL; 853 854 if (!linfo->map.map_fd) 855 return -EBADF; 856 857 map = bpf_map_get_with_uref(linfo->map.map_fd); 858 if (IS_ERR(map)) 859 return PTR_ERR(map); 860 861 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 862 goto put_map; 863 864 if (prog->aux->max_rdwr_access > map->value_size) { 865 err = -EACCES; 866 goto put_map; 867 } 868 869 aux->map = map; 870 return 0; 871 872 put_map: 873 bpf_map_put_with_uref(map); 874 return err; 875 } 876 877 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) 878 { 879 bpf_map_put_with_uref(aux->map); 880 } 881 882 static const struct seq_operations bpf_sk_storage_map_seq_ops = { 883 .start = bpf_sk_storage_map_seq_start, 884 .next = bpf_sk_storage_map_seq_next, 885 .stop = bpf_sk_storage_map_seq_stop, 886 .show = bpf_sk_storage_map_seq_show, 887 }; 888 889 static const struct bpf_iter_seq_info iter_seq_info = { 890 .seq_ops = &bpf_sk_storage_map_seq_ops, 891 .init_seq_private = bpf_iter_init_sk_storage_map, 892 .fini_seq_private = bpf_iter_fini_sk_storage_map, 893 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), 894 }; 895 896 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { 897 .target = "bpf_sk_storage_map", 898 .attach_target = bpf_iter_attach_map, 899 .detach_target = bpf_iter_detach_map, 900 .show_fdinfo = bpf_iter_map_show_fdinfo, 901 .fill_link_info = bpf_iter_map_fill_link_info, 902 .ctx_arg_info_size = 2, 903 .ctx_arg_info = { 904 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), 905 PTR_TO_BTF_ID_OR_NULL }, 906 { offsetof(struct bpf_iter__bpf_sk_storage_map, value), 907 PTR_TO_BUF | PTR_MAYBE_NULL }, 908 }, 909 .seq_info = &iter_seq_info, 910 }; 911 912 static int __init bpf_sk_storage_map_iter_init(void) 913 { 914 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = 915 btf_sock_ids[BTF_SOCK_TYPE_SOCK]; 916 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); 917 } 918 late_initcall(bpf_sk_storage_map_iter_init); 919