1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Isovalent */
3
4 #include <errno.h>
5 #include <unistd.h>
6 #include <pthread.h>
7
8 #include <bpf/bpf.h>
9 #include <bpf/libbpf.h>
10
11 #include <bpf_util.h>
12 #include <test_maps.h>
13
14 #include "map_percpu_stats.skel.h"
15
16 #define MAX_ENTRIES 16384
17 #define MAX_ENTRIES_HASH_OF_MAPS 64
18 #define N_THREADS 8
19 #define MAX_MAP_KEY_SIZE 4
20
map_info(int map_fd,struct bpf_map_info * info)21 static void map_info(int map_fd, struct bpf_map_info *info)
22 {
23 __u32 len = sizeof(*info);
24 int ret;
25
26 memset(info, 0, sizeof(*info));
27
28 ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
29 CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
30 }
31
map_type_to_s(__u32 type)32 static const char *map_type_to_s(__u32 type)
33 {
34 switch (type) {
35 case BPF_MAP_TYPE_HASH:
36 return "HASH";
37 case BPF_MAP_TYPE_PERCPU_HASH:
38 return "PERCPU_HASH";
39 case BPF_MAP_TYPE_LRU_HASH:
40 return "LRU_HASH";
41 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
42 return "LRU_PERCPU_HASH";
43 case BPF_MAP_TYPE_HASH_OF_MAPS:
44 return "BPF_MAP_TYPE_HASH_OF_MAPS";
45 default:
46 return "<define-me>";
47 }
48 }
49
map_count_elements(__u32 type,int map_fd)50 static __u32 map_count_elements(__u32 type, int map_fd)
51 {
52 __u32 key = -1;
53 int n = 0;
54
55 while (!bpf_map_get_next_key(map_fd, &key, &key))
56 n++;
57 return n;
58 }
59
60 #define BATCH true
61
delete_and_lookup_batch(int map_fd,void * keys,__u32 count)62 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
63 {
64 static __u8 values[(8 << 10) * MAX_ENTRIES];
65 void *in_batch = NULL, *out_batch;
66 __u32 save_count = count;
67 int ret;
68
69 ret = bpf_map_lookup_and_delete_batch(map_fd,
70 &in_batch, &out_batch,
71 keys, values, &count,
72 NULL);
73
74 /*
75 * Despite what uapi header says, lookup_and_delete_batch will return
76 * -ENOENT in case we successfully have deleted all elements, so check
77 * this separately
78 */
79 CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
80 "error: %s\n", strerror(errno));
81
82 CHECK(count != save_count,
83 "bpf_map_lookup_and_delete_batch",
84 "deleted not all elements: removed=%u expected=%u\n",
85 count, save_count);
86 }
87
delete_all_elements(__u32 type,int map_fd,bool batch)88 static void delete_all_elements(__u32 type, int map_fd, bool batch)
89 {
90 static __u8 val[8 << 10]; /* enough for 1024 CPUs */
91 __u32 key = -1;
92 void *keys;
93 __u32 i, n;
94 int ret;
95
96 keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
97 CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
98
99 for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
100 memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
101
102 if (batch) {
103 /* Can't mix delete_batch and delete_and_lookup_batch because
104 * they have different semantics in relation to the keys
105 * argument. However, delete_batch utilize map_delete_elem,
106 * so we actually test it in non-batch scenario */
107 delete_and_lookup_batch(map_fd, keys, n);
108 } else {
109 /* Intentionally mix delete and lookup_and_delete so we can test both */
110 for (i = 0; i < n; i++) {
111 void *keyp = keys + i*MAX_MAP_KEY_SIZE;
112
113 if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
114 ret = bpf_map_delete_elem(map_fd, keyp);
115 CHECK(ret < 0, "bpf_map_delete_elem",
116 "error: key %u: %s\n", i, strerror(errno));
117 } else {
118 ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
119 CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
120 "error: key %u: %s\n", i, strerror(errno));
121 }
122 }
123 }
124
125 free(keys);
126 }
127
is_lru(__u32 map_type)128 static bool is_lru(__u32 map_type)
129 {
130 return map_type == BPF_MAP_TYPE_LRU_HASH ||
131 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
132 }
133
is_percpu(__u32 map_type)134 static bool is_percpu(__u32 map_type)
135 {
136 return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
137 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
138 }
139
140 struct upsert_opts {
141 __u32 map_type;
142 int map_fd;
143 __u32 n;
144 bool retry_for_nomem;
145 };
146
create_small_hash(void)147 static int create_small_hash(void)
148 {
149 int map_fd;
150
151 map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
152 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
153 strerror(errno), "small");
154
155 return map_fd;
156 }
157
retry_for_nomem_fn(int err)158 static bool retry_for_nomem_fn(int err)
159 {
160 return err == ENOMEM;
161 }
162
patch_map_thread(void * arg)163 static void *patch_map_thread(void *arg)
164 {
165 /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
166 static __u8 blob[8 << 10];
167 struct upsert_opts *opts = arg;
168 void *val_ptr;
169 int val;
170 int ret;
171 int i;
172
173 for (i = 0; i < opts->n; i++) {
174 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
175 val = create_small_hash();
176 val_ptr = &val;
177 } else if (is_percpu(opts->map_type)) {
178 val_ptr = blob;
179 } else {
180 val = rand();
181 val_ptr = &val;
182 }
183
184 /* 2 seconds may be enough ? */
185 if (opts->retry_for_nomem)
186 ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
187 40, retry_for_nomem_fn);
188 else
189 ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
190 CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
191
192 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
193 close(val);
194 }
195 return NULL;
196 }
197
upsert_elements(struct upsert_opts * opts)198 static void upsert_elements(struct upsert_opts *opts)
199 {
200 pthread_t threads[N_THREADS];
201 int ret;
202 int i;
203
204 for (i = 0; i < ARRAY_SIZE(threads); i++) {
205 ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
206 CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
207 }
208
209 for (i = 0; i < ARRAY_SIZE(threads); i++) {
210 ret = pthread_join(i[threads], NULL);
211 CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
212 }
213 }
214
read_cur_elements(int iter_fd)215 static __u32 read_cur_elements(int iter_fd)
216 {
217 char buf[64];
218 ssize_t n;
219 __u32 ret;
220
221 n = read(iter_fd, buf, sizeof(buf)-1);
222 CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
223 buf[n] = '\0';
224
225 errno = 0;
226 ret = (__u32)strtol(buf, NULL, 10);
227 CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
228
229 return ret;
230 }
231
get_cur_elements(int map_id)232 static __u32 get_cur_elements(int map_id)
233 {
234 struct map_percpu_stats *skel;
235 struct bpf_link *link;
236 __u32 n_elements;
237 int iter_fd;
238 int ret;
239
240 skel = map_percpu_stats__open();
241 CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
242
243 skel->bss->target_id = map_id;
244
245 ret = map_percpu_stats__load(skel);
246 CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
247
248 link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
249 CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
250
251 iter_fd = bpf_iter_create(bpf_link__fd(link));
252 CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
253
254 n_elements = read_cur_elements(iter_fd);
255
256 close(iter_fd);
257 bpf_link__destroy(link);
258 map_percpu_stats__destroy(skel);
259
260 return n_elements;
261 }
262
check_expected_number_elements(__u32 n_inserted,int map_fd,struct bpf_map_info * info)263 static void check_expected_number_elements(__u32 n_inserted, int map_fd,
264 struct bpf_map_info *info)
265 {
266 __u32 n_real;
267 __u32 n_iter;
268
269 /* Count the current number of elements in the map by iterating through
270 * all the map keys via bpf_get_next_key
271 */
272 n_real = map_count_elements(info->type, map_fd);
273
274 /* The "real" number of elements should be the same as the inserted
275 * number of elements in all cases except LRU maps, where some elements
276 * may have been evicted
277 */
278 if (n_inserted == 0 || !is_lru(info->type))
279 CHECK(n_inserted != n_real, "map_count_elements",
280 "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
281
282 /* Count the current number of elements in the map using an iterator */
283 n_iter = get_cur_elements(info->id);
284
285 /* Both counts should be the same, as all updates are over */
286 CHECK(n_iter != n_real, "get_cur_elements",
287 "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
288 n_iter, n_real, map_type_to_s(info->type), info->map_flags);
289 }
290
__test(int map_fd)291 static void __test(int map_fd)
292 {
293 struct upsert_opts opts = {
294 .map_fd = map_fd,
295 };
296 struct bpf_map_info info;
297
298 map_info(map_fd, &info);
299 opts.map_type = info.type;
300 opts.n = info.max_entries;
301
302 /* Reduce the number of elements we are updating such that we don't
303 * bump into -E2BIG from non-preallocated hash maps, but still will
304 * have some evictions for LRU maps */
305 if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
306 opts.n -= 512;
307 else
308 opts.n /= 2;
309
310 /* per-cpu bpf memory allocator may not be able to allocate per-cpu
311 * pointer successfully and it can not refill free llist timely, and
312 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
313 * the problem temporarily.
314 */
315 opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
316
317 /*
318 * Upsert keys [0, n) under some competition: with random values from
319 * N_THREADS threads. Check values, then delete all elements and check
320 * values again.
321 */
322 upsert_elements(&opts);
323 check_expected_number_elements(opts.n, map_fd, &info);
324 delete_all_elements(info.type, map_fd, !BATCH);
325 check_expected_number_elements(0, map_fd, &info);
326
327 /* Now do the same, but using batch delete operations */
328 upsert_elements(&opts);
329 check_expected_number_elements(opts.n, map_fd, &info);
330 delete_all_elements(info.type, map_fd, BATCH);
331 check_expected_number_elements(0, map_fd, &info);
332
333 close(map_fd);
334 }
335
map_create_opts(__u32 type,const char * name,struct bpf_map_create_opts * map_opts,__u32 key_size,__u32 val_size)336 static int map_create_opts(__u32 type, const char *name,
337 struct bpf_map_create_opts *map_opts,
338 __u32 key_size, __u32 val_size)
339 {
340 int max_entries;
341 int map_fd;
342
343 if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
344 max_entries = MAX_ENTRIES_HASH_OF_MAPS;
345 else
346 max_entries = MAX_ENTRIES;
347
348 map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
349 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
350 strerror(errno), name);
351
352 return map_fd;
353 }
354
map_create(__u32 type,const char * name,struct bpf_map_create_opts * map_opts)355 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
356 {
357 return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
358 }
359
create_hash(void)360 static int create_hash(void)
361 {
362 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
363
364 return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
365 }
366
create_percpu_hash(void)367 static int create_percpu_hash(void)
368 {
369 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
370
371 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
372 }
373
create_hash_prealloc(void)374 static int create_hash_prealloc(void)
375 {
376 return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
377 }
378
create_percpu_hash_prealloc(void)379 static int create_percpu_hash_prealloc(void)
380 {
381 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
382 }
383
create_lru_hash(__u32 type,__u32 map_flags)384 static int create_lru_hash(__u32 type, __u32 map_flags)
385 {
386 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
387
388 return map_create(type, "lru_hash", &map_opts);
389 }
390
create_hash_of_maps(void)391 static int create_hash_of_maps(void)
392 {
393 LIBBPF_OPTS(bpf_map_create_opts, map_opts,
394 .map_flags = BPF_F_NO_PREALLOC,
395 .inner_map_fd = create_small_hash(),
396 );
397 int ret;
398
399 ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
400 &map_opts, sizeof(int), sizeof(int));
401 close(map_opts.inner_map_fd);
402 return ret;
403 }
404
map_percpu_stats_hash(void)405 static void map_percpu_stats_hash(void)
406 {
407 __test(create_hash());
408 printf("test_%s:PASS\n", __func__);
409 }
410
map_percpu_stats_percpu_hash(void)411 static void map_percpu_stats_percpu_hash(void)
412 {
413 __test(create_percpu_hash());
414 printf("test_%s:PASS\n", __func__);
415 }
416
map_percpu_stats_hash_prealloc(void)417 static void map_percpu_stats_hash_prealloc(void)
418 {
419 __test(create_hash_prealloc());
420 printf("test_%s:PASS\n", __func__);
421 }
422
map_percpu_stats_percpu_hash_prealloc(void)423 static void map_percpu_stats_percpu_hash_prealloc(void)
424 {
425 __test(create_percpu_hash_prealloc());
426 printf("test_%s:PASS\n", __func__);
427 }
428
map_percpu_stats_lru_hash(void)429 static void map_percpu_stats_lru_hash(void)
430 {
431 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
432 printf("test_%s:PASS\n", __func__);
433 }
434
map_percpu_stats_lru_hash_no_common(void)435 static void map_percpu_stats_lru_hash_no_common(void)
436 {
437 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
438 printf("test_%s:PASS\n", __func__);
439 }
440
map_percpu_stats_percpu_lru_hash(void)441 static void map_percpu_stats_percpu_lru_hash(void)
442 {
443 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
444 printf("test_%s:PASS\n", __func__);
445 }
446
map_percpu_stats_percpu_lru_hash_no_common(void)447 static void map_percpu_stats_percpu_lru_hash_no_common(void)
448 {
449 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
450 printf("test_%s:PASS\n", __func__);
451 }
452
map_percpu_stats_hash_of_maps(void)453 static void map_percpu_stats_hash_of_maps(void)
454 {
455 __test(create_hash_of_maps());
456 printf("test_%s:PASS\n", __func__);
457 }
458
test_map_percpu_stats(void)459 void test_map_percpu_stats(void)
460 {
461 map_percpu_stats_hash();
462 map_percpu_stats_percpu_hash();
463 map_percpu_stats_hash_prealloc();
464 map_percpu_stats_percpu_hash_prealloc();
465 map_percpu_stats_lru_hash();
466 map_percpu_stats_lru_hash_no_common();
467 map_percpu_stats_percpu_lru_hash();
468 map_percpu_stats_percpu_lru_hash_no_common();
469 map_percpu_stats_hash_of_maps();
470 }
471