1 // SPDX-License-Identifier: BSD-2-Clause 2 /* 3 * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * Links to Illumos.org for more information on kstat function: 28 * [1] https://illumos.org/man/1M/kstat 29 * [2] https://illumos.org/man/9f/kstat_create 30 */ 31 /* 32 * Copyright (c) 2024-2025, Klara, Inc. 33 * Copyright (c) 2024-2025, Syneto 34 */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/sysctl.h> 42 #include <sys/kstat.h> 43 #include <sys/sbuf.h> 44 #include <sys/zone.h> 45 46 static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics"); 47 48 SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics"); 49 50 void 51 __kstat_set_raw_ops(kstat_t *ksp, 52 int (*headers)(char *buf, size_t size), 53 int (*data)(char *buf, size_t size, void *data), 54 void *(*addr)(kstat_t *ksp, loff_t index)) 55 { 56 ksp->ks_raw_ops.headers = headers; 57 ksp->ks_raw_ops.data = data; 58 ksp->ks_raw_ops.addr = addr; 59 } 60 61 void 62 __kstat_set_seq_raw_ops(kstat_t *ksp, 63 int (*headers)(struct seq_file *f), 64 int (*data)(char *buf, size_t size, void *data), 65 void *(*addr)(kstat_t *ksp, loff_t index)) 66 { 67 ksp->ks_raw_ops.seq_headers = headers; 68 ksp->ks_raw_ops.data = data; 69 ksp->ks_raw_ops.addr = addr; 70 } 71 72 static int 73 kstat_default_update(kstat_t *ksp, int rw) 74 { 75 ASSERT3P(ksp, !=, NULL); 76 77 if (rw == KSTAT_WRITE) 78 return (EACCES); 79 80 return (0); 81 } 82 83 static int 84 kstat_resize_raw(kstat_t *ksp) 85 { 86 if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX) 87 return (ENOMEM); 88 89 free(ksp->ks_raw_buf, M_TEMP); 90 ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX); 91 ksp->ks_raw_buf = malloc(ksp->ks_raw_bufsize, M_TEMP, M_WAITOK); 92 93 return (0); 94 } 95 96 static void * 97 kstat_raw_default_addr(kstat_t *ksp, loff_t n) 98 { 99 if (n == 0) 100 return (ksp->ks_data); 101 return (NULL); 102 } 103 104 static int 105 kstat_sysctl(SYSCTL_HANDLER_ARGS) 106 { 107 kstat_t *ksp = arg1; 108 kstat_named_t *ksent; 109 uint64_t val; 110 111 ksent = ksp->ks_data; 112 /* Select the correct element */ 113 ksent += arg2; 114 /* Update the aggsums before reading */ 115 (void) ksp->ks_update(ksp, KSTAT_READ); 116 val = ksent->value.ui64; 117 118 return (sysctl_handle_64(oidp, &val, 0, req)); 119 } 120 121 static int 122 kstat_sysctl_string(SYSCTL_HANDLER_ARGS) 123 { 124 kstat_t *ksp = arg1; 125 kstat_named_t *ksent = ksp->ks_data; 126 char *val; 127 uint32_t len = 0; 128 129 /* Select the correct element */ 130 ksent += arg2; 131 /* Update the aggsums before reading */ 132 (void) ksp->ks_update(ksp, KSTAT_READ); 133 val = KSTAT_NAMED_STR_PTR(ksent); 134 len = KSTAT_NAMED_STR_BUFLEN(ksent); 135 val[len-1] = '\0'; 136 137 return (sysctl_handle_string(oidp, val, len, req)); 138 } 139 140 static int 141 kstat_sysctl_dataset(SYSCTL_HANDLER_ARGS) 142 { 143 kstat_t *ksp = arg1; 144 kstat_named_t *ksent; 145 kstat_named_t *ksent_ds; 146 uint64_t val; 147 char *ds_name; 148 uint32_t ds_len = 0; 149 150 ksent_ds = ksent = ksp->ks_data; 151 ds_name = KSTAT_NAMED_STR_PTR(ksent_ds); 152 ds_len = KSTAT_NAMED_STR_BUFLEN(ksent_ds); 153 ds_name[ds_len-1] = '\0'; 154 155 if (!zone_dataset_visible(ds_name, NULL)) { 156 return (EPERM); 157 } 158 159 /* Select the correct element */ 160 ksent += arg2; 161 /* Update the aggsums before reading */ 162 (void) ksp->ks_update(ksp, KSTAT_READ); 163 val = ksent->value.ui64; 164 165 return (sysctl_handle_64(oidp, &val, 0, req)); 166 } 167 168 static int 169 kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS) 170 { 171 kstat_t *ksp = arg1; 172 kstat_named_t *ksent = ksp->ks_data; 173 char *val; 174 uint32_t len = 0; 175 176 /* Select the correct element */ 177 ksent += arg2; 178 val = KSTAT_NAMED_STR_PTR(ksent); 179 len = KSTAT_NAMED_STR_BUFLEN(ksent); 180 val[len-1] = '\0'; 181 182 if (!zone_dataset_visible(val, NULL)) { 183 return (EPERM); 184 } 185 186 return (sysctl_handle_string(oidp, val, len, req)); 187 } 188 189 static int 190 kstat_sysctl_io(SYSCTL_HANDLER_ARGS) 191 { 192 struct sbuf sb; 193 kstat_t *ksp = arg1; 194 kstat_io_t *kip = ksp->ks_data; 195 int rc; 196 197 sbuf_new_for_sysctl(&sb, NULL, 0, req); 198 199 /* Update the aggsums before reading */ 200 (void) ksp->ks_update(ksp, KSTAT_READ); 201 202 /* though wlentime & friends are signed, they will never be negative */ 203 sbuf_printf(&sb, 204 "%-8llu %-8llu %-8u %-8u %-8llu %-8llu " 205 "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n", 206 kip->nread, kip->nwritten, 207 kip->reads, kip->writes, 208 kip->wtime, kip->wlentime, kip->wlastupdate, 209 kip->rtime, kip->rlentime, kip->rlastupdate, 210 kip->wcnt, kip->rcnt); 211 rc = sbuf_finish(&sb); 212 sbuf_delete(&sb); 213 return (rc); 214 } 215 216 static int 217 kstat_sysctl_raw(SYSCTL_HANDLER_ARGS) 218 { 219 struct sbuf sb; 220 void *data; 221 kstat_t *ksp = arg1; 222 void *(*addr_op)(kstat_t *ksp, loff_t index); 223 int n, has_header, rc = 0; 224 225 sbuf_new_for_sysctl(&sb, NULL, PAGE_SIZE, req); 226 227 if (ksp->ks_raw_ops.addr) 228 addr_op = ksp->ks_raw_ops.addr; 229 else 230 addr_op = kstat_raw_default_addr; 231 232 mutex_enter(ksp->ks_lock); 233 234 /* Update the aggsums before reading */ 235 (void) ksp->ks_update(ksp, KSTAT_READ); 236 237 ksp->ks_raw_bufsize = PAGE_SIZE; 238 ksp->ks_raw_buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK); 239 240 n = 0; 241 has_header = (ksp->ks_raw_ops.headers || 242 ksp->ks_raw_ops.seq_headers); 243 244 restart_headers: 245 if (ksp->ks_raw_ops.headers) { 246 rc = ksp->ks_raw_ops.headers( 247 ksp->ks_raw_buf, ksp->ks_raw_bufsize); 248 } else if (ksp->ks_raw_ops.seq_headers) { 249 struct seq_file f; 250 251 f.sf_buf = ksp->ks_raw_buf; 252 f.sf_size = ksp->ks_raw_bufsize; 253 rc = ksp->ks_raw_ops.seq_headers(&f); 254 } 255 if (has_header) { 256 if (rc == ENOMEM && !kstat_resize_raw(ksp)) 257 goto restart_headers; 258 if (rc == 0) { 259 sbuf_cat(&sb, "\n"); 260 sbuf_cat(&sb, ksp->ks_raw_buf); 261 } 262 } 263 264 while ((data = addr_op(ksp, n)) != NULL) { 265 restart: 266 if (ksp->ks_raw_ops.data) { 267 rc = ksp->ks_raw_ops.data(ksp->ks_raw_buf, 268 ksp->ks_raw_bufsize, data); 269 if (rc == ENOMEM && !kstat_resize_raw(ksp)) 270 goto restart; 271 if (rc == 0) 272 sbuf_cat(&sb, ksp->ks_raw_buf); 273 274 } else { 275 ASSERT3U(ksp->ks_ndata, ==, 1); 276 sbuf_hexdump(&sb, ksp->ks_data, 277 ksp->ks_data_size, NULL, 0); 278 } 279 n++; 280 } 281 free(ksp->ks_raw_buf, M_TEMP); 282 mutex_exit(ksp->ks_lock); 283 rc = sbuf_finish(&sb); 284 sbuf_delete(&sb); 285 return (rc); 286 } 287 288 kstat_t * 289 __kstat_create(const char *module, int instance, const char *name, 290 const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags) 291 { 292 char buf[KSTAT_STRLEN]; 293 struct sysctl_oid *root; 294 kstat_t *ksp; 295 char *p, *frag; 296 297 KASSERT(instance == 0, ("instance=%d", instance)); 298 if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO)) 299 ASSERT3U(ks_ndata, ==, 1); 300 301 if (class == NULL) 302 class = "misc"; 303 304 /* 305 * Allocate the main structure. We don't need to keep a copy of 306 * module in here, because it is only used for sysctl node creation 307 * done in this function. 308 */ 309 ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO); 310 311 ksp->ks_crtime = gethrtime(); 312 ksp->ks_snaptime = ksp->ks_crtime; 313 ksp->ks_instance = instance; 314 (void) strlcpy(ksp->ks_name, name, KSTAT_STRLEN); 315 (void) strlcpy(ksp->ks_class, class, KSTAT_STRLEN); 316 ksp->ks_type = ks_type; 317 ksp->ks_flags = flags; 318 ksp->ks_update = kstat_default_update; 319 320 mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL); 321 ksp->ks_lock = &ksp->ks_private_lock; 322 323 switch (ksp->ks_type) { 324 case KSTAT_TYPE_RAW: 325 ksp->ks_ndata = 1; 326 ksp->ks_data_size = ks_ndata; 327 break; 328 case KSTAT_TYPE_NAMED: 329 ksp->ks_ndata = ks_ndata; 330 ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t); 331 break; 332 case KSTAT_TYPE_INTR: 333 ksp->ks_ndata = ks_ndata; 334 ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t); 335 break; 336 case KSTAT_TYPE_IO: 337 ksp->ks_ndata = ks_ndata; 338 ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t); 339 break; 340 case KSTAT_TYPE_TIMER: 341 ksp->ks_ndata = ks_ndata; 342 ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t); 343 break; 344 default: 345 panic("Undefined kstat type %d\n", ksp->ks_type); 346 } 347 348 if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) 349 ksp->ks_data = NULL; 350 else 351 ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP); 352 353 sysctl_ctx_init(&ksp->ks_sysctl_ctx); 354 355 (void) strlcpy(buf, module, KSTAT_STRLEN); 356 357 /* 358 * Walk over the module name, splitting on '/', and create the 359 * intermediate nodes. 360 */ 361 root = NULL; 362 p = buf; 363 while ((frag = strsep(&p, "/")) != NULL) { 364 root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, root ? 365 SYSCTL_CHILDREN(root) : SYSCTL_STATIC_CHILDREN(_kstat), 366 OID_AUTO, frag, CTLFLAG_RW, 0, ""); 367 if (root == NULL) { 368 printf("%s: Cannot create kstat.%s tree!\n", 369 __func__, buf); 370 sysctl_ctx_free(&ksp->ks_sysctl_ctx); 371 free(ksp, M_KSTAT); 372 return (NULL); 373 } 374 if (p != NULL) 375 p[-1] = '.'; 376 } 377 378 root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root), 379 OID_AUTO, class, CTLFLAG_RW, 0, ""); 380 if (root == NULL) { 381 printf("%s: Cannot create kstat.%s.%s tree!\n", 382 __func__, buf, class); 383 sysctl_ctx_free(&ksp->ks_sysctl_ctx); 384 free(ksp, M_KSTAT); 385 return (NULL); 386 } 387 388 if (ksp->ks_type == KSTAT_TYPE_NAMED) { 389 root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, 390 SYSCTL_CHILDREN(root), 391 OID_AUTO, name, CTLFLAG_RW, 0, ""); 392 if (root == NULL) { 393 printf("%s: Cannot create kstat.%s.%s.%s tree!\n", 394 __func__, buf, class, name); 395 sysctl_ctx_free(&ksp->ks_sysctl_ctx); 396 free(ksp, M_KSTAT); 397 return (NULL); 398 } 399 } 400 401 ksp->ks_sysctl_root = root; 402 403 return (ksp); 404 } 405 406 static void 407 kstat_install_named(kstat_t *ksp) 408 { 409 kstat_named_t *ksent; 410 char *namelast; 411 int typelast; 412 413 ksent = ksp->ks_data; 414 415 VERIFY((ksp->ks_flags & KSTAT_FLAG_VIRTUAL) || ksent != NULL); 416 417 typelast = 0; 418 namelast = NULL; 419 420 for (int i = 0; i < ksp->ks_ndata; i++, ksent++) { 421 if (ksent->data_type != 0) { 422 typelast = ksent->data_type; 423 namelast = ksent->name; 424 425 /* 426 * If a sysctl with this name already exists on this on 427 * this root, first remove it by deleting it from its 428 * old context, and then destroying it. 429 */ 430 struct sysctl_oid *oid = NULL; 431 SYSCTL_FOREACH(oid, 432 SYSCTL_CHILDREN(ksp->ks_sysctl_root)) { 433 if (strcmp(oid->oid_name, namelast) == 0) { 434 kstat_t *oldksp = 435 (kstat_t *)oid->oid_arg1; 436 sysctl_ctx_entry_del( 437 &oldksp->ks_sysctl_ctx, oid); 438 sysctl_remove_oid(oid, 1, 0); 439 break; 440 } 441 } 442 } 443 444 switch (typelast) { 445 case KSTAT_DATA_CHAR: 446 /* Not Implemented */ 447 break; 448 case KSTAT_DATA_INT32: 449 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 450 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 451 OID_AUTO, namelast, 452 CTLTYPE_S32 | CTLFLAG_RD | CTLFLAG_MPSAFE, 453 ksp, i, kstat_sysctl, "I", namelast); 454 break; 455 case KSTAT_DATA_UINT32: 456 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 457 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 458 OID_AUTO, namelast, 459 CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE, 460 ksp, i, kstat_sysctl, "IU", namelast); 461 break; 462 case KSTAT_DATA_INT64: 463 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 464 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 465 OID_AUTO, namelast, 466 CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 467 ksp, i, kstat_sysctl, "Q", namelast); 468 break; 469 case KSTAT_DATA_UINT64: 470 if (strcmp(ksp->ks_class, "dataset") == 0) { 471 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 472 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 473 OID_AUTO, namelast, 474 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 475 ksp, i, kstat_sysctl_dataset, "QU", 476 namelast); 477 } else { 478 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 479 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 480 OID_AUTO, namelast, 481 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 482 ksp, i, kstat_sysctl, "QU", namelast); 483 } 484 break; 485 case KSTAT_DATA_LONG: 486 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 487 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 488 OID_AUTO, namelast, 489 CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, 490 ksp, i, kstat_sysctl, "L", namelast); 491 break; 492 case KSTAT_DATA_ULONG: 493 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 494 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 495 OID_AUTO, namelast, 496 CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, 497 ksp, i, kstat_sysctl, "LU", namelast); 498 break; 499 case KSTAT_DATA_STRING: 500 if (strcmp(ksp->ks_class, "dataset") == 0) { 501 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 502 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 503 OID_AUTO, namelast, CTLTYPE_STRING | 504 CTLFLAG_RD | CTLFLAG_MPSAFE, 505 ksp, i, kstat_sysctl_dataset_string, "A", 506 namelast); 507 } else { 508 SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 509 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 510 OID_AUTO, namelast, CTLTYPE_STRING | 511 CTLFLAG_RD | CTLFLAG_MPSAFE, 512 ksp, i, kstat_sysctl_string, "A", 513 namelast); 514 } 515 break; 516 default: 517 panic("unsupported type: %d", typelast); 518 } 519 } 520 } 521 522 void 523 kstat_install(kstat_t *ksp) 524 { 525 struct sysctl_oid *root; 526 527 if (ksp->ks_ndata == UINT32_MAX) 528 VERIFY3U(ksp->ks_type, ==, KSTAT_TYPE_RAW); 529 530 switch (ksp->ks_type) { 531 case KSTAT_TYPE_NAMED: 532 return (kstat_install_named(ksp)); 533 case KSTAT_TYPE_RAW: 534 if (ksp->ks_raw_ops.data) { 535 root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 536 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 537 OID_AUTO, ksp->ks_name, CTLTYPE_STRING | CTLFLAG_RD 538 | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 539 ksp, 0, kstat_sysctl_raw, "A", ksp->ks_name); 540 } else { 541 root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 542 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 543 OID_AUTO, ksp->ks_name, CTLTYPE_OPAQUE | CTLFLAG_RD 544 | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 545 ksp, 0, kstat_sysctl_raw, "", ksp->ks_name); 546 } 547 break; 548 case KSTAT_TYPE_IO: 549 root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx, 550 SYSCTL_CHILDREN(ksp->ks_sysctl_root), 551 OID_AUTO, ksp->ks_name, 552 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 553 ksp, 0, kstat_sysctl_io, "A", ksp->ks_name); 554 break; 555 case KSTAT_TYPE_TIMER: 556 case KSTAT_TYPE_INTR: 557 default: 558 panic("unsupported kstat type %d\n", ksp->ks_type); 559 } 560 VERIFY3P(root, !=, NULL); 561 ksp->ks_sysctl_root = root; 562 } 563 564 void 565 kstat_delete(kstat_t *ksp) 566 { 567 568 sysctl_ctx_free(&ksp->ks_sysctl_ctx); 569 ksp->ks_lock = NULL; 570 mutex_destroy(&ksp->ks_private_lock); 571 if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) 572 kmem_free(ksp->ks_data, ksp->ks_data_size); 573 free(ksp, M_KSTAT); 574 } 575