1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/seq_buf.h> 32 #include <linux/spinlock.h> 33 #include <linux/smp.h> 34 #include <linux/stat.h> 35 #include <linux/srcu.h> 36 #include <linux/slab.h> 37 #include <linux/torture.h> 38 #include <linux/types.h> 39 #include <linux/sched/clock.h> 40 41 #include "rcu.h" 42 43 #define SCALE_FLAG "-ref-scale: " 44 45 #define SCALEOUT(s, x...) \ 46 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 47 48 #define VERBOSE_SCALEOUT(s, x...) \ 49 do { \ 50 if (verbose) \ 51 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 52 } while (0) 53 54 static atomic_t verbose_batch_ctr; 55 56 #define VERBOSE_SCALEOUT_BATCH(s, x...) \ 57 do { \ 58 if (verbose && \ 59 (verbose_batched <= 0 || \ 60 !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ 61 schedule_timeout_uninterruptible(1); \ 62 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 63 } \ 64 } while (0) 65 66 #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) 67 68 MODULE_DESCRIPTION("Scalability test for object reference mechanisms"); 69 MODULE_LICENSE("GPL"); 70 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 71 72 static char *scale_type = "rcu"; 73 module_param(scale_type, charp, 0444); 74 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 75 76 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 77 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); 78 79 // Number of seconds to extend warm-up and cool-down for multiple guest OSes 80 torture_param(long, guest_os_delay, 0, 81 "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); 82 // Wait until there are multiple CPUs before starting test. 83 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 84 "Holdoff time before test start (s)"); 85 // Number of typesafe_lookup structures, that is, the degree of concurrency. 86 torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures."); 87 // Number of loops per experiment, all readers execute operations concurrently. 88 torture_param(long, loops, 10000, "Number of loops per experiment."); 89 // Number of readers, with -1 defaulting to about 75% of the CPUs. 90 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 91 // Number of runs. 92 torture_param(int, nruns, 30, "Number of experiments to run."); 93 // Reader delay in nanoseconds, 0 for no delay. 94 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 95 96 #ifdef MODULE 97 # define REFSCALE_SHUTDOWN 0 98 #else 99 # define REFSCALE_SHUTDOWN 1 100 #endif 101 102 torture_param(bool, shutdown, REFSCALE_SHUTDOWN, 103 "Shutdown at end of scalability tests."); 104 105 struct reader_task { 106 struct task_struct *task; 107 int start_reader; 108 wait_queue_head_t wq; 109 u64 last_duration_ns; 110 }; 111 112 static struct task_struct *shutdown_task; 113 static wait_queue_head_t shutdown_wq; 114 115 static struct task_struct *main_task; 116 static wait_queue_head_t main_wq; 117 static int shutdown_start; 118 119 static struct reader_task *reader_tasks; 120 121 // Number of readers that are part of the current experiment. 122 static atomic_t nreaders_exp; 123 124 // Use to wait for all threads to start. 125 static atomic_t n_init; 126 static atomic_t n_started; 127 static atomic_t n_warmedup; 128 static atomic_t n_cooleddown; 129 130 // Track which experiment is currently running. 131 static int exp_idx; 132 133 // Operations vector for selecting different types of tests. 134 struct ref_scale_ops { 135 bool (*init)(void); 136 void (*cleanup)(void); 137 void (*readsection)(const int nloops); 138 void (*delaysection)(const int nloops, const int udl, const int ndl); 139 const char *name; 140 }; 141 142 static const struct ref_scale_ops *cur_ops; 143 144 static void un_delay(const int udl, const int ndl) 145 { 146 if (udl) 147 udelay(udl); 148 if (ndl) 149 ndelay(ndl); 150 } 151 152 static void ref_rcu_read_section(const int nloops) 153 { 154 int i; 155 156 for (i = nloops; i >= 0; i--) { 157 rcu_read_lock(); 158 rcu_read_unlock(); 159 } 160 } 161 162 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 163 { 164 int i; 165 166 for (i = nloops; i >= 0; i--) { 167 rcu_read_lock(); 168 un_delay(udl, ndl); 169 rcu_read_unlock(); 170 } 171 } 172 173 static bool rcu_sync_scale_init(void) 174 { 175 return true; 176 } 177 178 static const struct ref_scale_ops rcu_ops = { 179 .init = rcu_sync_scale_init, 180 .readsection = ref_rcu_read_section, 181 .delaysection = ref_rcu_delay_section, 182 .name = "rcu" 183 }; 184 185 // Definitions for SRCU ref scale testing. 186 DEFINE_STATIC_SRCU(srcu_refctl_scale); 187 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 188 189 static void srcu_ref_scale_read_section(const int nloops) 190 { 191 int i; 192 int idx; 193 194 for (i = nloops; i >= 0; i--) { 195 idx = srcu_read_lock(srcu_ctlp); 196 srcu_read_unlock(srcu_ctlp, idx); 197 } 198 } 199 200 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 201 { 202 int i; 203 int idx; 204 205 for (i = nloops; i >= 0; i--) { 206 idx = srcu_read_lock(srcu_ctlp); 207 un_delay(udl, ndl); 208 srcu_read_unlock(srcu_ctlp, idx); 209 } 210 } 211 212 static const struct ref_scale_ops srcu_ops = { 213 .init = rcu_sync_scale_init, 214 .readsection = srcu_ref_scale_read_section, 215 .delaysection = srcu_ref_scale_delay_section, 216 .name = "srcu" 217 }; 218 219 static void srcu_fast_ref_scale_read_section(const int nloops) 220 { 221 int i; 222 struct srcu_ctr __percpu *scp; 223 224 for (i = nloops; i >= 0; i--) { 225 scp = srcu_read_lock_fast(srcu_ctlp); 226 srcu_read_unlock_fast(srcu_ctlp, scp); 227 } 228 } 229 230 static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 231 { 232 int i; 233 struct srcu_ctr __percpu *scp; 234 235 for (i = nloops; i >= 0; i--) { 236 scp = srcu_read_lock_fast(srcu_ctlp); 237 un_delay(udl, ndl); 238 srcu_read_unlock_fast(srcu_ctlp, scp); 239 } 240 } 241 242 static const struct ref_scale_ops srcu_fast_ops = { 243 .init = rcu_sync_scale_init, 244 .readsection = srcu_fast_ref_scale_read_section, 245 .delaysection = srcu_fast_ref_scale_delay_section, 246 .name = "srcu-fast" 247 }; 248 249 static void srcu_lite_ref_scale_read_section(const int nloops) 250 { 251 int i; 252 int idx; 253 254 for (i = nloops; i >= 0; i--) { 255 idx = srcu_read_lock_lite(srcu_ctlp); 256 srcu_read_unlock_lite(srcu_ctlp, idx); 257 } 258 } 259 260 static void srcu_lite_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 261 { 262 int i; 263 int idx; 264 265 for (i = nloops; i >= 0; i--) { 266 idx = srcu_read_lock_lite(srcu_ctlp); 267 un_delay(udl, ndl); 268 srcu_read_unlock_lite(srcu_ctlp, idx); 269 } 270 } 271 272 static const struct ref_scale_ops srcu_lite_ops = { 273 .init = rcu_sync_scale_init, 274 .readsection = srcu_lite_ref_scale_read_section, 275 .delaysection = srcu_lite_ref_scale_delay_section, 276 .name = "srcu-lite" 277 }; 278 279 #ifdef CONFIG_TASKS_RCU 280 281 // Definitions for RCU Tasks ref scale testing: Empty read markers. 282 // These definitions also work for RCU Rude readers. 283 static void rcu_tasks_ref_scale_read_section(const int nloops) 284 { 285 int i; 286 287 for (i = nloops; i >= 0; i--) 288 continue; 289 } 290 291 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 292 { 293 int i; 294 295 for (i = nloops; i >= 0; i--) 296 un_delay(udl, ndl); 297 } 298 299 static const struct ref_scale_ops rcu_tasks_ops = { 300 .init = rcu_sync_scale_init, 301 .readsection = rcu_tasks_ref_scale_read_section, 302 .delaysection = rcu_tasks_ref_scale_delay_section, 303 .name = "rcu-tasks" 304 }; 305 306 #define RCU_TASKS_OPS &rcu_tasks_ops, 307 308 #else // #ifdef CONFIG_TASKS_RCU 309 310 #define RCU_TASKS_OPS 311 312 #endif // #else // #ifdef CONFIG_TASKS_RCU 313 314 #ifdef CONFIG_TASKS_TRACE_RCU 315 316 // Definitions for RCU Tasks Trace ref scale testing. 317 static void rcu_trace_ref_scale_read_section(const int nloops) 318 { 319 int i; 320 321 for (i = nloops; i >= 0; i--) { 322 rcu_read_lock_trace(); 323 rcu_read_unlock_trace(); 324 } 325 } 326 327 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 328 { 329 int i; 330 331 for (i = nloops; i >= 0; i--) { 332 rcu_read_lock_trace(); 333 un_delay(udl, ndl); 334 rcu_read_unlock_trace(); 335 } 336 } 337 338 static const struct ref_scale_ops rcu_trace_ops = { 339 .init = rcu_sync_scale_init, 340 .readsection = rcu_trace_ref_scale_read_section, 341 .delaysection = rcu_trace_ref_scale_delay_section, 342 .name = "rcu-trace" 343 }; 344 345 #define RCU_TRACE_OPS &rcu_trace_ops, 346 347 #else // #ifdef CONFIG_TASKS_TRACE_RCU 348 349 #define RCU_TRACE_OPS 350 351 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 352 353 // Definitions for reference count 354 static atomic_t refcnt; 355 356 static void ref_refcnt_section(const int nloops) 357 { 358 int i; 359 360 for (i = nloops; i >= 0; i--) { 361 atomic_inc(&refcnt); 362 atomic_dec(&refcnt); 363 } 364 } 365 366 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 367 { 368 int i; 369 370 for (i = nloops; i >= 0; i--) { 371 atomic_inc(&refcnt); 372 un_delay(udl, ndl); 373 atomic_dec(&refcnt); 374 } 375 } 376 377 static const struct ref_scale_ops refcnt_ops = { 378 .init = rcu_sync_scale_init, 379 .readsection = ref_refcnt_section, 380 .delaysection = ref_refcnt_delay_section, 381 .name = "refcnt" 382 }; 383 384 // Definitions for rwlock 385 static rwlock_t test_rwlock; 386 387 static bool ref_rwlock_init(void) 388 { 389 rwlock_init(&test_rwlock); 390 return true; 391 } 392 393 static void ref_rwlock_section(const int nloops) 394 { 395 int i; 396 397 for (i = nloops; i >= 0; i--) { 398 read_lock(&test_rwlock); 399 read_unlock(&test_rwlock); 400 } 401 } 402 403 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 404 { 405 int i; 406 407 for (i = nloops; i >= 0; i--) { 408 read_lock(&test_rwlock); 409 un_delay(udl, ndl); 410 read_unlock(&test_rwlock); 411 } 412 } 413 414 static const struct ref_scale_ops rwlock_ops = { 415 .init = ref_rwlock_init, 416 .readsection = ref_rwlock_section, 417 .delaysection = ref_rwlock_delay_section, 418 .name = "rwlock" 419 }; 420 421 // Definitions for rwsem 422 static struct rw_semaphore test_rwsem; 423 424 static bool ref_rwsem_init(void) 425 { 426 init_rwsem(&test_rwsem); 427 return true; 428 } 429 430 static void ref_rwsem_section(const int nloops) 431 { 432 int i; 433 434 for (i = nloops; i >= 0; i--) { 435 down_read(&test_rwsem); 436 up_read(&test_rwsem); 437 } 438 } 439 440 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 441 { 442 int i; 443 444 for (i = nloops; i >= 0; i--) { 445 down_read(&test_rwsem); 446 un_delay(udl, ndl); 447 up_read(&test_rwsem); 448 } 449 } 450 451 static const struct ref_scale_ops rwsem_ops = { 452 .init = ref_rwsem_init, 453 .readsection = ref_rwsem_section, 454 .delaysection = ref_rwsem_delay_section, 455 .name = "rwsem" 456 }; 457 458 // Definitions for global spinlock 459 static DEFINE_RAW_SPINLOCK(test_lock); 460 461 static void ref_lock_section(const int nloops) 462 { 463 int i; 464 465 preempt_disable(); 466 for (i = nloops; i >= 0; i--) { 467 raw_spin_lock(&test_lock); 468 raw_spin_unlock(&test_lock); 469 } 470 preempt_enable(); 471 } 472 473 static void ref_lock_delay_section(const int nloops, const int udl, const int ndl) 474 { 475 int i; 476 477 preempt_disable(); 478 for (i = nloops; i >= 0; i--) { 479 raw_spin_lock(&test_lock); 480 un_delay(udl, ndl); 481 raw_spin_unlock(&test_lock); 482 } 483 preempt_enable(); 484 } 485 486 static const struct ref_scale_ops lock_ops = { 487 .readsection = ref_lock_section, 488 .delaysection = ref_lock_delay_section, 489 .name = "lock" 490 }; 491 492 // Definitions for global irq-save spinlock 493 494 static void ref_lock_irq_section(const int nloops) 495 { 496 unsigned long flags; 497 int i; 498 499 preempt_disable(); 500 for (i = nloops; i >= 0; i--) { 501 raw_spin_lock_irqsave(&test_lock, flags); 502 raw_spin_unlock_irqrestore(&test_lock, flags); 503 } 504 preempt_enable(); 505 } 506 507 static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl) 508 { 509 unsigned long flags; 510 int i; 511 512 preempt_disable(); 513 for (i = nloops; i >= 0; i--) { 514 raw_spin_lock_irqsave(&test_lock, flags); 515 un_delay(udl, ndl); 516 raw_spin_unlock_irqrestore(&test_lock, flags); 517 } 518 preempt_enable(); 519 } 520 521 static const struct ref_scale_ops lock_irq_ops = { 522 .readsection = ref_lock_irq_section, 523 .delaysection = ref_lock_irq_delay_section, 524 .name = "lock-irq" 525 }; 526 527 // Definitions acquire-release. 528 static DEFINE_PER_CPU(unsigned long, test_acqrel); 529 530 static void ref_acqrel_section(const int nloops) 531 { 532 unsigned long x; 533 int i; 534 535 preempt_disable(); 536 for (i = nloops; i >= 0; i--) { 537 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 538 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 539 } 540 preempt_enable(); 541 } 542 543 static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl) 544 { 545 unsigned long x; 546 int i; 547 548 preempt_disable(); 549 for (i = nloops; i >= 0; i--) { 550 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 551 un_delay(udl, ndl); 552 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 553 } 554 preempt_enable(); 555 } 556 557 static const struct ref_scale_ops acqrel_ops = { 558 .readsection = ref_acqrel_section, 559 .delaysection = ref_acqrel_delay_section, 560 .name = "acqrel" 561 }; 562 563 static volatile u64 stopopts; 564 565 static void ref_sched_clock_section(const int nloops) 566 { 567 u64 x = 0; 568 int i; 569 570 preempt_disable(); 571 for (i = nloops; i >= 0; i--) 572 x += sched_clock(); 573 preempt_enable(); 574 stopopts = x; 575 } 576 577 static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl) 578 { 579 u64 x = 0; 580 int i; 581 582 preempt_disable(); 583 for (i = nloops; i >= 0; i--) { 584 x += sched_clock(); 585 un_delay(udl, ndl); 586 } 587 preempt_enable(); 588 stopopts = x; 589 } 590 591 static const struct ref_scale_ops sched_clock_ops = { 592 .readsection = ref_sched_clock_section, 593 .delaysection = ref_sched_clock_delay_section, 594 .name = "sched-clock" 595 }; 596 597 598 static void ref_clock_section(const int nloops) 599 { 600 u64 x = 0; 601 int i; 602 603 preempt_disable(); 604 for (i = nloops; i >= 0; i--) 605 x += ktime_get_real_fast_ns(); 606 preempt_enable(); 607 stopopts = x; 608 } 609 610 static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) 611 { 612 u64 x = 0; 613 int i; 614 615 preempt_disable(); 616 for (i = nloops; i >= 0; i--) { 617 x += ktime_get_real_fast_ns(); 618 un_delay(udl, ndl); 619 } 620 preempt_enable(); 621 stopopts = x; 622 } 623 624 static const struct ref_scale_ops clock_ops = { 625 .readsection = ref_clock_section, 626 .delaysection = ref_clock_delay_section, 627 .name = "clock" 628 }; 629 630 static void ref_jiffies_section(const int nloops) 631 { 632 u64 x = 0; 633 int i; 634 635 preempt_disable(); 636 for (i = nloops; i >= 0; i--) 637 x += jiffies; 638 preempt_enable(); 639 stopopts = x; 640 } 641 642 static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl) 643 { 644 u64 x = 0; 645 int i; 646 647 preempt_disable(); 648 for (i = nloops; i >= 0; i--) { 649 x += jiffies; 650 un_delay(udl, ndl); 651 } 652 preempt_enable(); 653 stopopts = x; 654 } 655 656 static const struct ref_scale_ops jiffies_ops = { 657 .readsection = ref_jiffies_section, 658 .delaysection = ref_jiffies_delay_section, 659 .name = "jiffies" 660 }; 661 662 //////////////////////////////////////////////////////////////////////// 663 // 664 // Methods leveraging SLAB_TYPESAFE_BY_RCU. 665 // 666 667 // Item to look up in a typesafe manner. Array of pointers to these. 668 struct refscale_typesafe { 669 atomic_t rts_refctr; // Used by all flavors 670 spinlock_t rts_lock; 671 seqlock_t rts_seqlock; 672 unsigned int a; 673 unsigned int b; 674 }; 675 676 static struct kmem_cache *typesafe_kmem_cachep; 677 static struct refscale_typesafe **rtsarray; 678 static long rtsarray_size; 679 static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand); 680 static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start); 681 static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start); 682 683 // Conditionally acquire an explicit in-structure reference count. 684 static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 685 { 686 return atomic_inc_not_zero(&rtsp->rts_refctr); 687 } 688 689 // Unconditionally release an explicit in-structure reference count. 690 static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start) 691 { 692 if (!atomic_dec_return(&rtsp->rts_refctr)) { 693 WRITE_ONCE(rtsp->a, rtsp->a + 1); 694 kmem_cache_free(typesafe_kmem_cachep, rtsp); 695 } 696 return true; 697 } 698 699 // Unconditionally acquire an explicit in-structure spinlock. 700 static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 701 { 702 spin_lock(&rtsp->rts_lock); 703 return true; 704 } 705 706 // Unconditionally release an explicit in-structure spinlock. 707 static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start) 708 { 709 spin_unlock(&rtsp->rts_lock); 710 return true; 711 } 712 713 // Unconditionally acquire an explicit in-structure sequence lock. 714 static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 715 { 716 *start = read_seqbegin(&rtsp->rts_seqlock); 717 return true; 718 } 719 720 // Conditionally release an explicit in-structure sequence lock. Return 721 // true if this release was successful, that is, if no retry is required. 722 static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start) 723 { 724 return !read_seqretry(&rtsp->rts_seqlock, start); 725 } 726 727 // Do a read-side critical section with the specified delay in 728 // microseconds and nanoseconds inserted so as to increase probability 729 // of failure. 730 static void typesafe_delay_section(const int nloops, const int udl, const int ndl) 731 { 732 unsigned int a; 733 unsigned int b; 734 int i; 735 long idx; 736 struct refscale_typesafe *rtsp; 737 unsigned int start; 738 739 for (i = nloops; i >= 0; i--) { 740 preempt_disable(); 741 idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size; 742 preempt_enable(); 743 retry: 744 rcu_read_lock(); 745 rtsp = rcu_dereference(rtsarray[idx]); 746 a = READ_ONCE(rtsp->a); 747 if (!rts_acquire(rtsp, &start)) { 748 rcu_read_unlock(); 749 goto retry; 750 } 751 if (a != READ_ONCE(rtsp->a)) { 752 (void)rts_release(rtsp, start); 753 rcu_read_unlock(); 754 goto retry; 755 } 756 un_delay(udl, ndl); 757 b = READ_ONCE(rtsp->a); 758 // Remember, seqlock read-side release can fail. 759 if (!rts_release(rtsp, start)) { 760 rcu_read_unlock(); 761 goto retry; 762 } 763 WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b); 764 b = rtsp->b; 765 rcu_read_unlock(); 766 WARN_ON_ONCE(a * a != b); 767 } 768 } 769 770 // Because the acquisition and release methods are expensive, there 771 // is no point in optimizing away the un_delay() function's two checks. 772 // Thus simply define typesafe_read_section() as a simple wrapper around 773 // typesafe_delay_section(). 774 static void typesafe_read_section(const int nloops) 775 { 776 typesafe_delay_section(nloops, 0, 0); 777 } 778 779 // Allocate and initialize one refscale_typesafe structure. 780 static struct refscale_typesafe *typesafe_alloc_one(void) 781 { 782 struct refscale_typesafe *rtsp; 783 784 rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL); 785 if (!rtsp) 786 return NULL; 787 atomic_set(&rtsp->rts_refctr, 1); 788 WRITE_ONCE(rtsp->a, rtsp->a + 1); 789 WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a); 790 return rtsp; 791 } 792 793 // Slab-allocator constructor for refscale_typesafe structures created 794 // out of a new slab of system memory. 795 static void refscale_typesafe_ctor(void *rtsp_in) 796 { 797 struct refscale_typesafe *rtsp = rtsp_in; 798 799 spin_lock_init(&rtsp->rts_lock); 800 seqlock_init(&rtsp->rts_seqlock); 801 preempt_disable(); 802 rtsp->a = torture_random(this_cpu_ptr(&refscale_rand)); 803 preempt_enable(); 804 } 805 806 static const struct ref_scale_ops typesafe_ref_ops; 807 static const struct ref_scale_ops typesafe_lock_ops; 808 static const struct ref_scale_ops typesafe_seqlock_ops; 809 810 // Initialize for a typesafe test. 811 static bool typesafe_init(void) 812 { 813 long idx; 814 long si = lookup_instances; 815 816 typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe", 817 sizeof(struct refscale_typesafe), sizeof(void *), 818 SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor); 819 if (!typesafe_kmem_cachep) 820 return false; 821 if (si < 0) 822 si = -si * nr_cpu_ids; 823 else if (si == 0) 824 si = nr_cpu_ids; 825 rtsarray_size = si; 826 rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL); 827 if (!rtsarray) 828 return false; 829 for (idx = 0; idx < rtsarray_size; idx++) { 830 rtsarray[idx] = typesafe_alloc_one(); 831 if (!rtsarray[idx]) 832 return false; 833 } 834 if (cur_ops == &typesafe_ref_ops) { 835 rts_acquire = typesafe_ref_acquire; 836 rts_release = typesafe_ref_release; 837 } else if (cur_ops == &typesafe_lock_ops) { 838 rts_acquire = typesafe_lock_acquire; 839 rts_release = typesafe_lock_release; 840 } else if (cur_ops == &typesafe_seqlock_ops) { 841 rts_acquire = typesafe_seqlock_acquire; 842 rts_release = typesafe_seqlock_release; 843 } else { 844 WARN_ON_ONCE(1); 845 return false; 846 } 847 return true; 848 } 849 850 // Clean up after a typesafe test. 851 static void typesafe_cleanup(void) 852 { 853 long idx; 854 855 if (rtsarray) { 856 for (idx = 0; idx < rtsarray_size; idx++) 857 kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]); 858 kfree(rtsarray); 859 rtsarray = NULL; 860 rtsarray_size = 0; 861 } 862 kmem_cache_destroy(typesafe_kmem_cachep); 863 typesafe_kmem_cachep = NULL; 864 rts_acquire = NULL; 865 rts_release = NULL; 866 } 867 868 // The typesafe_init() function distinguishes these structures by address. 869 static const struct ref_scale_ops typesafe_ref_ops = { 870 .init = typesafe_init, 871 .cleanup = typesafe_cleanup, 872 .readsection = typesafe_read_section, 873 .delaysection = typesafe_delay_section, 874 .name = "typesafe_ref" 875 }; 876 877 static const struct ref_scale_ops typesafe_lock_ops = { 878 .init = typesafe_init, 879 .cleanup = typesafe_cleanup, 880 .readsection = typesafe_read_section, 881 .delaysection = typesafe_delay_section, 882 .name = "typesafe_lock" 883 }; 884 885 static const struct ref_scale_ops typesafe_seqlock_ops = { 886 .init = typesafe_init, 887 .cleanup = typesafe_cleanup, 888 .readsection = typesafe_read_section, 889 .delaysection = typesafe_delay_section, 890 .name = "typesafe_seqlock" 891 }; 892 893 static void rcu_scale_one_reader(void) 894 { 895 if (readdelay <= 0) 896 cur_ops->readsection(loops); 897 else 898 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 899 } 900 901 // Warm up cache, or, if needed run a series of rcu_scale_one_reader() 902 // to allow multiple rcuscale guest OSes to collect mutually valid data. 903 static void rcu_scale_warm_cool(void) 904 { 905 unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1); 906 907 do { 908 rcu_scale_one_reader(); 909 cond_resched(); 910 } while (time_before(jiffies, jdone)); 911 } 912 913 // Reader kthread. Repeatedly does empty RCU read-side 914 // critical section, minimizing update-side interference. 915 static int 916 ref_scale_reader(void *arg) 917 { 918 unsigned long flags; 919 long me = (long)arg; 920 struct reader_task *rt = &(reader_tasks[me]); 921 u64 start; 922 s64 duration; 923 924 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); 925 WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); 926 set_user_nice(current, MAX_NICE); 927 atomic_inc(&n_init); 928 if (holdoff) 929 schedule_timeout_interruptible(holdoff * HZ); 930 repeat: 931 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); 932 933 // Wait for signal that this reader can start. 934 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 935 torture_must_stop()); 936 937 if (torture_must_stop()) 938 goto end; 939 940 // Make sure that the CPU is affinitized appropriately during testing. 941 WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids); 942 943 WRITE_ONCE(rt->start_reader, 0); 944 if (!atomic_dec_return(&n_started)) 945 while (atomic_read_acquire(&n_started)) 946 cpu_relax(); 947 948 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx); 949 950 951 // To reduce noise, do an initial cache-warming invocation, check 952 // in, and then keep warming until everyone has checked in. 953 rcu_scale_one_reader(); 954 if (!atomic_dec_return(&n_warmedup)) 955 while (atomic_read_acquire(&n_warmedup)) 956 rcu_scale_one_reader(); 957 // Also keep interrupts disabled. This also has the effect 958 // of preventing entries into slow path for rcu_read_unlock(). 959 local_irq_save(flags); 960 start = ktime_get_mono_fast_ns(); 961 962 rcu_scale_one_reader(); 963 964 duration = ktime_get_mono_fast_ns() - start; 965 local_irq_restore(flags); 966 967 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 968 // To reduce runtime-skew noise, do maintain-load invocations until 969 // everyone is done. 970 if (!atomic_dec_return(&n_cooleddown)) 971 while (atomic_read_acquire(&n_cooleddown)) 972 rcu_scale_one_reader(); 973 974 if (atomic_dec_and_test(&nreaders_exp)) 975 wake_up(&main_wq); 976 977 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 978 me, exp_idx, atomic_read(&nreaders_exp)); 979 980 if (!torture_must_stop()) 981 goto repeat; 982 end: 983 torture_kthread_stopping("ref_scale_reader"); 984 return 0; 985 } 986 987 static void reset_readers(void) 988 { 989 int i; 990 struct reader_task *rt; 991 992 for (i = 0; i < nreaders; i++) { 993 rt = &(reader_tasks[i]); 994 995 rt->last_duration_ns = 0; 996 } 997 } 998 999 // Print the results of each reader and return the sum of all their durations. 1000 static u64 process_durations(int n) 1001 { 1002 int i; 1003 struct reader_task *rt; 1004 struct seq_buf s; 1005 char *buf; 1006 u64 sum = 0; 1007 1008 buf = kmalloc(800 + 64, GFP_KERNEL); 1009 if (!buf) 1010 return 0; 1011 seq_buf_init(&s, buf, 800 + 64); 1012 1013 seq_buf_printf(&s, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 1014 exp_idx); 1015 1016 for (i = 0; i < n && !torture_must_stop(); i++) { 1017 rt = &(reader_tasks[i]); 1018 1019 if (i % 5 == 0) 1020 seq_buf_putc(&s, '\n'); 1021 1022 if (seq_buf_used(&s) >= 800) { 1023 pr_alert("%s", seq_buf_str(&s)); 1024 seq_buf_clear(&s); 1025 } 1026 1027 seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns); 1028 1029 sum += rt->last_duration_ns; 1030 } 1031 pr_alert("%s\n", seq_buf_str(&s)); 1032 1033 kfree(buf); 1034 return sum; 1035 } 1036 1037 // The main_func is the main orchestrator, it performs a bunch of 1038 // experiments. For every experiment, it orders all the readers 1039 // involved to start and waits for them to finish the experiment. It 1040 // then reads their timestamps and starts the next experiment. Each 1041 // experiment progresses from 1 concurrent reader to N of them at which 1042 // point all the timestamps are printed. 1043 static int main_func(void *arg) 1044 { 1045 int exp, r; 1046 char buf1[64]; 1047 char *buf; 1048 u64 *result_avg; 1049 1050 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 1051 set_user_nice(current, MAX_NICE); 1052 1053 VERBOSE_SCALEOUT("main_func task started"); 1054 result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); 1055 buf = kzalloc(800 + 64, GFP_KERNEL); 1056 if (!result_avg || !buf) { 1057 SCALEOUT_ERRSTRING("out of memory"); 1058 goto oom_exit; 1059 } 1060 if (holdoff) 1061 schedule_timeout_interruptible(holdoff * HZ); 1062 1063 // Wait for all threads to start. 1064 atomic_inc(&n_init); 1065 while (atomic_read(&n_init) < nreaders + 1) 1066 schedule_timeout_uninterruptible(1); 1067 1068 // Start exp readers up per experiment 1069 rcu_scale_warm_cool(); 1070 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 1071 if (torture_must_stop()) 1072 goto end; 1073 1074 reset_readers(); 1075 atomic_set(&nreaders_exp, nreaders); 1076 atomic_set(&n_started, nreaders); 1077 atomic_set(&n_warmedup, nreaders); 1078 atomic_set(&n_cooleddown, nreaders); 1079 1080 exp_idx = exp; 1081 1082 for (r = 0; r < nreaders; r++) { 1083 smp_store_release(&reader_tasks[r].start_reader, 1); 1084 wake_up(&reader_tasks[r].wq); 1085 } 1086 1087 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 1088 nreaders); 1089 1090 wait_event(main_wq, 1091 !atomic_read(&nreaders_exp) || torture_must_stop()); 1092 1093 VERBOSE_SCALEOUT("main_func: experiment ended"); 1094 1095 if (torture_must_stop()) 1096 goto end; 1097 1098 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 1099 } 1100 rcu_scale_warm_cool(); 1101 1102 // Print the average of all experiments 1103 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 1104 1105 pr_alert("Runs\tTime(ns)\n"); 1106 for (exp = 0; exp < nruns; exp++) { 1107 u64 avg; 1108 u32 rem; 1109 1110 avg = div_u64_rem(result_avg[exp], 1000, &rem); 1111 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 1112 strcat(buf, buf1); 1113 if (strlen(buf) >= 800) { 1114 pr_alert("%s", buf); 1115 buf[0] = 0; 1116 } 1117 } 1118 1119 pr_alert("%s", buf); 1120 1121 oom_exit: 1122 // This will shutdown everything including us. 1123 if (shutdown) { 1124 shutdown_start = 1; 1125 wake_up(&shutdown_wq); 1126 } 1127 1128 // Wait for torture to stop us 1129 while (!torture_must_stop()) 1130 schedule_timeout_uninterruptible(1); 1131 1132 end: 1133 torture_kthread_stopping("main_func"); 1134 kfree(result_avg); 1135 kfree(buf); 1136 return 0; 1137 } 1138 1139 static void 1140 ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) 1141 { 1142 pr_alert("%s" SCALE_FLAG 1143 "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 1144 verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); 1145 } 1146 1147 static void 1148 ref_scale_cleanup(void) 1149 { 1150 int i; 1151 1152 if (torture_cleanup_begin()) 1153 return; 1154 1155 if (!cur_ops) { 1156 torture_cleanup_end(); 1157 return; 1158 } 1159 1160 if (reader_tasks) { 1161 for (i = 0; i < nreaders; i++) 1162 torture_stop_kthread("ref_scale_reader", 1163 reader_tasks[i].task); 1164 } 1165 kfree(reader_tasks); 1166 1167 torture_stop_kthread("main_task", main_task); 1168 kfree(main_task); 1169 1170 // Do scale-type-specific cleanup operations. 1171 if (cur_ops->cleanup != NULL) 1172 cur_ops->cleanup(); 1173 1174 torture_cleanup_end(); 1175 } 1176 1177 // Shutdown kthread. Just waits to be awakened, then shuts down system. 1178 static int 1179 ref_scale_shutdown(void *arg) 1180 { 1181 wait_event_idle(shutdown_wq, shutdown_start); 1182 1183 smp_mb(); // Wake before output. 1184 ref_scale_cleanup(); 1185 kernel_power_off(); 1186 1187 return -EINVAL; 1188 } 1189 1190 static int __init 1191 ref_scale_init(void) 1192 { 1193 long i; 1194 int firsterr = 0; 1195 static const struct ref_scale_ops *scale_ops[] = { 1196 &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS 1197 &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, 1198 &acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops, 1199 &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops, 1200 }; 1201 1202 if (!torture_init_begin(scale_type, verbose)) 1203 return -EBUSY; 1204 1205 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 1206 cur_ops = scale_ops[i]; 1207 if (strcmp(scale_type, cur_ops->name) == 0) 1208 break; 1209 } 1210 if (i == ARRAY_SIZE(scale_ops)) { 1211 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 1212 pr_alert("rcu-scale types:"); 1213 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 1214 pr_cont(" %s", scale_ops[i]->name); 1215 pr_cont("\n"); 1216 firsterr = -EINVAL; 1217 cur_ops = NULL; 1218 goto unwind; 1219 } 1220 if (cur_ops->init) 1221 if (!cur_ops->init()) { 1222 firsterr = -EUCLEAN; 1223 goto unwind; 1224 } 1225 1226 ref_scale_print_module_parms(cur_ops, "Start of test"); 1227 1228 // Shutdown task 1229 if (shutdown) { 1230 init_waitqueue_head(&shutdown_wq); 1231 firsterr = torture_create_kthread(ref_scale_shutdown, NULL, 1232 shutdown_task); 1233 if (torture_init_error(firsterr)) 1234 goto unwind; 1235 schedule_timeout_uninterruptible(1); 1236 } 1237 1238 // Reader tasks (default to ~75% of online CPUs). 1239 if (nreaders < 0) 1240 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 1241 if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops)) 1242 loops = 1; 1243 if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders)) 1244 nreaders = 1; 1245 if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns)) 1246 nruns = 1; 1247 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), 1248 GFP_KERNEL); 1249 if (!reader_tasks) { 1250 SCALEOUT_ERRSTRING("out of memory"); 1251 firsterr = -ENOMEM; 1252 goto unwind; 1253 } 1254 1255 VERBOSE_SCALEOUT("Starting %d reader threads", nreaders); 1256 1257 for (i = 0; i < nreaders; i++) { 1258 init_waitqueue_head(&reader_tasks[i].wq); 1259 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 1260 reader_tasks[i].task); 1261 if (torture_init_error(firsterr)) 1262 goto unwind; 1263 } 1264 1265 // Main Task 1266 init_waitqueue_head(&main_wq); 1267 firsterr = torture_create_kthread(main_func, NULL, main_task); 1268 if (torture_init_error(firsterr)) 1269 goto unwind; 1270 1271 torture_init_end(); 1272 return 0; 1273 1274 unwind: 1275 torture_init_end(); 1276 ref_scale_cleanup(); 1277 if (shutdown) { 1278 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 1279 kernel_power_off(); 1280 } 1281 return firsterr; 1282 } 1283 1284 module_init(ref_scale_init); 1285 module_exit(ref_scale_cleanup); 1286