1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Kernel timekeeping code and accessor functions. Based on code from 4 * timer.c, moved in commit 8524070b7982. 5 */ 6 #include <linux/audit.h> 7 #include <linux/clocksource.h> 8 #include <linux/compiler.h> 9 #include <linux/jiffies.h> 10 #include <linux/kobject.h> 11 #include <linux/module.h> 12 #include <linux/nmi.h> 13 #include <linux/pvclock_gtod.h> 14 #include <linux/random.h> 15 #include <linux/sched/clock.h> 16 #include <linux/sched/loadavg.h> 17 #include <linux/static_key.h> 18 #include <linux/stop_machine.h> 19 #include <linux/syscore_ops.h> 20 #include <linux/tick.h> 21 #include <linux/time.h> 22 #include <linux/timex.h> 23 #include <linux/timekeeper_internal.h> 24 25 #include <vdso/auxclock.h> 26 27 #include "tick-internal.h" 28 #include "timekeeping_internal.h" 29 #include "ntp_internal.h" 30 31 #define TK_CLEAR_NTP (1 << 0) 32 #define TK_CLOCK_WAS_SET (1 << 1) 33 34 #define TK_UPDATE_ALL (TK_CLEAR_NTP | TK_CLOCK_WAS_SET) 35 36 enum timekeeping_adv_mode { 37 /* Update timekeeper when a tick has passed */ 38 TK_ADV_TICK, 39 40 /* Update timekeeper on a direct frequency change */ 41 TK_ADV_FREQ 42 }; 43 44 /* 45 * The most important data for readout fits into a single 64 byte 46 * cache line. 47 */ 48 struct tk_data { 49 seqcount_raw_spinlock_t seq; 50 struct timekeeper timekeeper; 51 struct timekeeper shadow_timekeeper; 52 raw_spinlock_t lock; 53 } ____cacheline_aligned; 54 55 static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; 56 57 /* The core timekeeper */ 58 #define tk_core (timekeeper_data[TIMEKEEPER_CORE]) 59 60 #ifdef CONFIG_POSIX_AUX_CLOCKS 61 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 62 { 63 return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); 64 } 65 66 static inline bool tk_is_aux(const struct timekeeper *tk) 67 { 68 return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST; 69 } 70 #else 71 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 72 { 73 return false; 74 } 75 76 static inline bool tk_is_aux(const struct timekeeper *tk) 77 { 78 return false; 79 } 80 #endif 81 82 static inline void tk_update_aux_offs(struct timekeeper *tk, ktime_t offs) 83 { 84 tk->offs_aux = offs; 85 tk->monotonic_to_aux = ktime_to_timespec64(offs); 86 } 87 88 /* flag for if timekeeping is suspended */ 89 int __read_mostly timekeeping_suspended; 90 91 /** 92 * struct tk_fast - NMI safe timekeeper 93 * @seq: Sequence counter for protecting updates. The lowest bit 94 * is the index for the tk_read_base array 95 * @base: tk_read_base array. Access is indexed by the lowest bit of 96 * @seq. 97 * 98 * See @update_fast_timekeeper() below. 99 */ 100 struct tk_fast { 101 seqcount_latch_t seq; 102 struct tk_read_base base[2]; 103 }; 104 105 /* Suspend-time cycles value for halted fast timekeeper. */ 106 static u64 cycles_at_suspend; 107 108 static u64 dummy_clock_read(struct clocksource *cs) 109 { 110 if (timekeeping_suspended) 111 return cycles_at_suspend; 112 return local_clock(); 113 } 114 115 static struct clocksource dummy_clock = { 116 .read = dummy_clock_read, 117 }; 118 119 /* 120 * Boot time initialization which allows local_clock() to be utilized 121 * during early boot when clocksources are not available. local_clock() 122 * returns nanoseconds already so no conversion is required, hence mult=1 123 * and shift=0. When the first proper clocksource is installed then 124 * the fast time keepers are updated with the correct values. 125 */ 126 #define FAST_TK_INIT \ 127 { \ 128 .clock = &dummy_clock, \ 129 .mask = CLOCKSOURCE_MASK(64), \ 130 .mult = 1, \ 131 .shift = 0, \ 132 } 133 134 static struct tk_fast tk_fast_mono ____cacheline_aligned = { 135 .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), 136 .base[0] = FAST_TK_INIT, 137 .base[1] = FAST_TK_INIT, 138 }; 139 140 static struct tk_fast tk_fast_raw ____cacheline_aligned = { 141 .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), 142 .base[0] = FAST_TK_INIT, 143 .base[1] = FAST_TK_INIT, 144 }; 145 146 #ifdef CONFIG_POSIX_AUX_CLOCKS 147 static __init void tk_aux_setup(void); 148 static void tk_aux_update_clocksource(void); 149 static void tk_aux_advance(void); 150 #else 151 static inline void tk_aux_setup(void) { } 152 static inline void tk_aux_update_clocksource(void) { } 153 static inline void tk_aux_advance(void) { } 154 #endif 155 156 unsigned long timekeeper_lock_irqsave(void) 157 { 158 unsigned long flags; 159 160 raw_spin_lock_irqsave(&tk_core.lock, flags); 161 return flags; 162 } 163 164 void timekeeper_unlock_irqrestore(unsigned long flags) 165 { 166 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 167 } 168 169 /* 170 * Multigrain timestamps require tracking the latest fine-grained timestamp 171 * that has been issued, and never returning a coarse-grained timestamp that is 172 * earlier than that value. 173 * 174 * mg_floor represents the latest fine-grained time that has been handed out as 175 * a file timestamp on the system. This is tracked as a monotonic ktime_t, and 176 * converted to a realtime clock value on an as-needed basis. 177 * 178 * Maintaining mg_floor ensures the multigrain interfaces never issue a 179 * timestamp earlier than one that has been previously issued. 180 * 181 * The exception to this rule is when there is a backward realtime clock jump. If 182 * such an event occurs, a timestamp can appear to be earlier than a previous one. 183 */ 184 static __cacheline_aligned_in_smp atomic64_t mg_floor; 185 186 static inline void tk_normalize_xtime(struct timekeeper *tk) 187 { 188 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { 189 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 190 tk->xtime_sec++; 191 } 192 while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { 193 tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 194 tk->raw_sec++; 195 } 196 } 197 198 static inline struct timespec64 tk_xtime(const struct timekeeper *tk) 199 { 200 struct timespec64 ts; 201 202 ts.tv_sec = tk->xtime_sec; 203 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 204 return ts; 205 } 206 207 static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) 208 { 209 struct timespec64 ts; 210 211 ts.tv_sec = tk->xtime_sec; 212 ts.tv_nsec = tk->coarse_nsec; 213 return ts; 214 } 215 216 /* 217 * Update the nanoseconds part for the coarse time keepers. They can't rely 218 * on xtime_nsec because xtime_nsec could be adjusted by a small negative 219 * amount when the multiplication factor of the clock is adjusted, which 220 * could cause the coarse clocks to go slightly backwards. See 221 * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse 222 * clockids which only is updated when the clock has been set or we have 223 * accumulated time. 224 */ 225 static inline void tk_update_coarse_nsecs(struct timekeeper *tk) 226 { 227 tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 228 } 229 230 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 231 { 232 tk->xtime_sec = ts->tv_sec; 233 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; 234 tk_update_coarse_nsecs(tk); 235 } 236 237 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 238 { 239 tk->xtime_sec += ts->tv_sec; 240 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; 241 tk_normalize_xtime(tk); 242 tk_update_coarse_nsecs(tk); 243 } 244 245 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) 246 { 247 struct timespec64 tmp; 248 249 /* 250 * Verify consistency of: offset_real = -wall_to_monotonic 251 * before modifying anything 252 */ 253 set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec, 254 -tk->wall_to_monotonic.tv_nsec); 255 WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp)); 256 tk->wall_to_monotonic = wtm; 257 set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec); 258 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 259 WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp)); 260 WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0))); 261 } 262 263 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) 264 { 265 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 266 WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta)); 267 /* 268 * Timespec representation for VDSO update to avoid 64bit division 269 * on every update. 270 */ 271 tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); 272 } 273 274 #ifdef CONFIG_ARCH_WANTS_CLOCKSOURCE_READ_INLINE 275 #include <asm/clock_inlined.h> 276 277 static DEFINE_STATIC_KEY_FALSE(clocksource_read_inlined); 278 279 /* 280 * tk_clock_read - atomic clocksource read() helper 281 * 282 * This helper is necessary to use in the read paths because, while the 283 * seqcount ensures we don't return a bad value while structures are updated, 284 * it doesn't protect from potential crashes. There is the possibility that 285 * the tkr's clocksource may change between the read reference, and the 286 * clock reference passed to the read function. This can cause crashes if 287 * the wrong clocksource is passed to the wrong read function. 288 * This isn't necessary to use when holding the tk_core.lock or doing 289 * a read of the fast-timekeeper tkrs (which is protected by its own locking 290 * and update logic). 291 */ 292 static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr) 293 { 294 struct clocksource *clock = READ_ONCE(tkr->clock); 295 296 if (static_branch_likely(&clocksource_read_inlined)) 297 return arch_inlined_clocksource_read(clock); 298 299 return clock->read(clock); 300 } 301 302 static inline void clocksource_disable_inline_read(void) 303 { 304 static_branch_disable(&clocksource_read_inlined); 305 } 306 307 static inline void clocksource_enable_inline_read(void) 308 { 309 static_branch_enable(&clocksource_read_inlined); 310 } 311 #else 312 static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr) 313 { 314 struct clocksource *clock = READ_ONCE(tkr->clock); 315 316 return clock->read(clock); 317 } 318 static inline void clocksource_disable_inline_read(void) { } 319 static inline void clocksource_enable_inline_read(void) { } 320 #endif 321 322 /** 323 * tk_setup_internals - Set up internals to use clocksource clock. 324 * 325 * @tk: The target timekeeper to setup. 326 * @clock: Pointer to clocksource. 327 * 328 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment 329 * pair and interval request. 330 * 331 * Unless you're the timekeeping code, you should not be using this! 332 */ 333 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) 334 { 335 u64 interval; 336 u64 tmp, ntpinterval; 337 struct clocksource *old_clock; 338 339 ++tk->cs_was_changed_seq; 340 old_clock = tk->tkr_mono.clock; 341 tk->tkr_mono.clock = clock; 342 tk->tkr_mono.mask = clock->mask; 343 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); 344 345 tk->tkr_raw.clock = clock; 346 tk->tkr_raw.mask = clock->mask; 347 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 348 349 /* Do the ns -> cycle conversion first, using original mult */ 350 tmp = NTP_INTERVAL_LENGTH; 351 tmp <<= clock->shift; 352 ntpinterval = tmp; 353 tmp += clock->mult/2; 354 do_div(tmp, clock->mult); 355 if (tmp == 0) 356 tmp = 1; 357 358 interval = (u64) tmp; 359 tk->cycle_interval = interval; 360 361 /* Go back from cycles -> shifted ns */ 362 tk->xtime_interval = interval * clock->mult; 363 tk->xtime_remainder = ntpinterval - tk->xtime_interval; 364 tk->raw_interval = interval * clock->mult; 365 366 /* if changing clocks, convert xtime_nsec shift units */ 367 if (old_clock) { 368 int shift_change = clock->shift - old_clock->shift; 369 if (shift_change < 0) { 370 tk->tkr_mono.xtime_nsec >>= -shift_change; 371 tk->tkr_raw.xtime_nsec >>= -shift_change; 372 } else { 373 tk->tkr_mono.xtime_nsec <<= shift_change; 374 tk->tkr_raw.xtime_nsec <<= shift_change; 375 } 376 } 377 378 tk->tkr_mono.shift = clock->shift; 379 tk->tkr_raw.shift = clock->shift; 380 381 tk->ntp_error = 0; 382 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 383 tk->ntp_tick = ntpinterval << tk->ntp_error_shift; 384 385 /* 386 * The timekeeper keeps its own mult values for the currently 387 * active clocksource. These value will be adjusted via NTP 388 * to counteract clock drifting. 389 */ 390 tk->tkr_mono.mult = clock->mult; 391 tk->tkr_raw.mult = clock->mult; 392 tk->ntp_err_mult = 0; 393 tk->skip_second_overflow = 0; 394 395 tk->cs_id = clock->id; 396 397 /* Coupled clockevent data */ 398 if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) && 399 clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT) { 400 /* 401 * Aim for an one hour maximum delta and use KHz to handle 402 * clocksources with a frequency above 4GHz correctly as 403 * the frequency argument of clocks_calc_mult_shift() is u32. 404 */ 405 clocks_calc_mult_shift(&tk->cs_ns_to_cyc_mult, &tk->cs_ns_to_cyc_shift, 406 NSEC_PER_MSEC, clock->freq_khz, 3600 * 1000); 407 /* 408 * Initialize the conversion limit as the previous clocksource 409 * might have the same shift/mult pair so the quick check in 410 * tk_update_ns_to_cyc() fails to update it after a clocksource 411 * change leaving it effectivly zero. 412 */ 413 tk->cs_ns_to_cyc_maxns = div_u64(clock->mask, tk->cs_ns_to_cyc_mult); 414 } 415 } 416 417 /* Timekeeper helper functions. */ 418 static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) 419 { 420 return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); 421 } 422 423 static __always_inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) 424 { 425 /* Calculate the delta since the last update_wall_time() */ 426 u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; 427 428 /* 429 * This detects both negative motion and the case where the delta 430 * overflows the multiplication with tkr->mult. 431 */ 432 if (unlikely(delta > tkr->clock->max_cycles)) { 433 /* 434 * Handle clocksource inconsistency between CPUs to prevent 435 * time from going backwards by checking for the MSB of the 436 * mask being set in the delta. 437 */ 438 if (delta & ~(mask >> 1)) 439 return tkr->xtime_nsec >> tkr->shift; 440 441 return delta_to_ns_safe(tkr, delta); 442 } 443 444 return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; 445 } 446 447 static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) 448 { 449 return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); 450 } 451 452 /** 453 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 454 * @tkr: Timekeeping readout base from which we take the update 455 * @tkf: Pointer to NMI safe timekeeper 456 * 457 * We want to use this from any context including NMI and tracing / 458 * instrumenting the timekeeping code itself. 459 * 460 * Employ the latch technique; see @write_seqcount_latch. 461 * 462 * So if a NMI hits the update of base[0] then it will use base[1] 463 * which is still consistent. In the worst case this can result is a 464 * slightly wrong timestamp (a few nanoseconds). See 465 * @ktime_get_mono_fast_ns. 466 */ 467 static void update_fast_timekeeper(const struct tk_read_base *tkr, 468 struct tk_fast *tkf) 469 { 470 struct tk_read_base *base = tkf->base; 471 472 /* Force readers off to base[1] */ 473 write_seqcount_latch_begin(&tkf->seq); 474 475 /* Update base[0] */ 476 memcpy(base, tkr, sizeof(*base)); 477 478 /* Force readers back to base[0] */ 479 write_seqcount_latch(&tkf->seq); 480 481 /* Update base[1] */ 482 memcpy(base + 1, base, sizeof(*base)); 483 484 write_seqcount_latch_end(&tkf->seq); 485 } 486 487 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) 488 { 489 struct tk_read_base *tkr; 490 unsigned int seq; 491 u64 now; 492 493 do { 494 seq = read_seqcount_latch(&tkf->seq); 495 tkr = tkf->base + (seq & 0x01); 496 now = ktime_to_ns(tkr->base); 497 now += timekeeping_get_ns(tkr); 498 } while (read_seqcount_latch_retry(&tkf->seq, seq)); 499 500 return now; 501 } 502 503 /** 504 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic 505 * 506 * This timestamp is not guaranteed to be monotonic across an update. 507 * The timestamp is calculated by: 508 * 509 * now = base_mono + clock_delta * slope 510 * 511 * So if the update lowers the slope, readers who are forced to the 512 * not yet updated second array are still using the old steeper slope. 513 * 514 * tmono 515 * ^ 516 * | o n 517 * | o n 518 * | u 519 * | o 520 * |o 521 * |12345678---> reader order 522 * 523 * o = old slope 524 * u = update 525 * n = new slope 526 * 527 * So reader 6 will observe time going backwards versus reader 5. 528 * 529 * While other CPUs are likely to be able to observe that, the only way 530 * for a CPU local observation is when an NMI hits in the middle of 531 * the update. Timestamps taken from that NMI context might be ahead 532 * of the following timestamps. Callers need to be aware of that and 533 * deal with it. 534 */ 535 u64 notrace ktime_get_mono_fast_ns(void) 536 { 537 return __ktime_get_fast_ns(&tk_fast_mono); 538 } 539 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 540 541 /** 542 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw 543 * 544 * Contrary to ktime_get_mono_fast_ns() this is always correct because the 545 * conversion factor is not affected by NTP/PTP correction. 546 */ 547 u64 notrace ktime_get_raw_fast_ns(void) 548 { 549 return __ktime_get_fast_ns(&tk_fast_raw); 550 } 551 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); 552 553 /** 554 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. 555 * 556 * To keep it NMI safe since we're accessing from tracing, we're not using a 557 * separate timekeeper with updates to monotonic clock and boot offset 558 * protected with seqcounts. This has the following minor side effects: 559 * 560 * (1) Its possible that a timestamp be taken after the boot offset is updated 561 * but before the timekeeper is updated. If this happens, the new boot offset 562 * is added to the old timekeeping making the clock appear to update slightly 563 * earlier: 564 * CPU 0 CPU 1 565 * timekeeping_inject_sleeptime64() 566 * __timekeeping_inject_sleeptime(tk, delta); 567 * timestamp(); 568 * timekeeping_update_staged(tkd, TK_CLEAR_NTP...); 569 * 570 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be 571 * partially updated. Since the tk->offs_boot update is a rare event, this 572 * should be a rare occurrence which postprocessing should be able to handle. 573 * 574 * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns() 575 * apply as well. 576 */ 577 u64 notrace ktime_get_boot_fast_ns(void) 578 { 579 struct timekeeper *tk = &tk_core.timekeeper; 580 581 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); 582 } 583 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); 584 585 /** 586 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. 587 * 588 * The same limitations as described for ktime_get_boot_fast_ns() apply. The 589 * mono time and the TAI offset are not read atomically which may yield wrong 590 * readouts. However, an update of the TAI offset is an rare event e.g., caused 591 * by settime or adjtimex with an offset. The user of this function has to deal 592 * with the possibility of wrong timestamps in post processing. 593 */ 594 u64 notrace ktime_get_tai_fast_ns(void) 595 { 596 struct timekeeper *tk = &tk_core.timekeeper; 597 598 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); 599 } 600 EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); 601 602 /** 603 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. 604 * 605 * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering. 606 */ 607 u64 ktime_get_real_fast_ns(void) 608 { 609 struct tk_fast *tkf = &tk_fast_mono; 610 struct tk_read_base *tkr; 611 u64 baser, delta; 612 unsigned int seq; 613 614 do { 615 seq = raw_read_seqcount_latch(&tkf->seq); 616 tkr = tkf->base + (seq & 0x01); 617 baser = ktime_to_ns(tkr->base_real); 618 delta = timekeeping_get_ns(tkr); 619 } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); 620 621 return baser + delta; 622 } 623 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); 624 625 /** 626 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 627 * @tk: Timekeeper to snapshot. 628 * 629 * It generally is unsafe to access the clocksource after timekeeping has been 630 * suspended, so take a snapshot of the readout base of @tk and use it as the 631 * fast timekeeper's readout base while suspended. It will return the same 632 * number of cycles every time until timekeeping is resumed at which time the 633 * proper readout base for the fast timekeeper will be restored automatically. 634 */ 635 static void halt_fast_timekeeper(const struct timekeeper *tk) 636 { 637 static struct tk_read_base tkr_dummy; 638 const struct tk_read_base *tkr = &tk->tkr_mono; 639 640 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 641 cycles_at_suspend = tk_clock_read(tkr); 642 tkr_dummy.clock = &dummy_clock; 643 tkr_dummy.base_real = tkr->base + tk->offs_real; 644 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 645 646 tkr = &tk->tkr_raw; 647 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 648 tkr_dummy.clock = &dummy_clock; 649 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 650 } 651 652 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); 653 654 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) 655 { 656 raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); 657 } 658 659 /** 660 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 661 * @nb: Pointer to the notifier block to register 662 */ 663 int pvclock_gtod_register_notifier(struct notifier_block *nb) 664 { 665 struct timekeeper *tk = &tk_core.timekeeper; 666 int ret; 667 668 guard(raw_spinlock_irqsave)(&tk_core.lock); 669 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 670 update_pvclock_gtod(tk, true); 671 672 return ret; 673 } 674 EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); 675 676 /** 677 * pvclock_gtod_unregister_notifier - unregister a pvclock 678 * timedata update listener 679 * @nb: Pointer to the notifier block to unregister 680 */ 681 int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 682 { 683 guard(raw_spinlock_irqsave)(&tk_core.lock); 684 return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 685 } 686 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 687 688 /* 689 * tk_update_leap_state - helper to update the next_leap_ktime 690 */ 691 static inline void tk_update_leap_state(struct timekeeper *tk) 692 { 693 tk->next_leap_ktime = ntp_get_next_leap(tk->id); 694 if (tk->next_leap_ktime != KTIME_MAX) 695 /* Convert to monotonic time */ 696 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); 697 } 698 699 /* 700 * Leap state update for both shadow and the real timekeeper 701 * Separate to spare a full memcpy() of the timekeeper. 702 */ 703 static void tk_update_leap_state_all(struct tk_data *tkd) 704 { 705 write_seqcount_begin(&tkd->seq); 706 tk_update_leap_state(&tkd->shadow_timekeeper); 707 tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime; 708 write_seqcount_end(&tkd->seq); 709 } 710 711 /* 712 * Update the ktime_t based scalar nsec members of the timekeeper 713 */ 714 static inline void tk_update_ktime_data(struct timekeeper *tk) 715 { 716 u64 seconds; 717 u32 nsec; 718 719 /* 720 * The xtime based monotonic readout is: 721 * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now(); 722 * The ktime based monotonic readout is: 723 * nsec = base_mono + now(); 724 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec 725 */ 726 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 727 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 728 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 729 730 /* 731 * The sum of the nanoseconds portions of xtime and 732 * wall_to_monotonic can be greater/equal one second. Take 733 * this into account before updating tk->ktime_sec. 734 */ 735 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 736 if (nsec >= NSEC_PER_SEC) 737 seconds++; 738 tk->ktime_sec = seconds; 739 740 /* Update the monotonic raw base */ 741 tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); 742 } 743 744 static inline void tk_update_ns_to_cyc(struct timekeeper *tks, struct timekeeper *tkc) 745 { 746 struct tk_read_base *tkrs = &tks->tkr_mono; 747 struct tk_read_base *tkrc = &tkc->tkr_mono; 748 unsigned int shift; 749 750 if (!IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) || 751 !(tkrs->clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT)) 752 return; 753 754 if (tkrs->mult == tkrc->mult && tkrs->shift == tkrc->shift) 755 return; 756 /* 757 * The conversion math is simple: 758 * 759 * CS::MULT (1 << NS_TO_CYC_SHIFT) 760 * --------------- = ---------------------- 761 * (1 << CS:SHIFT) NS_TO_CYC_MULT 762 * 763 * Ergo: 764 * 765 * NS_TO_CYC_MULT = (1 << (CS::SHIFT + NS_TO_CYC_SHIFT)) / CS::MULT 766 * 767 * NS_TO_CYC_SHIFT has been set up in tk_setup_internals() 768 */ 769 shift = tkrs->shift + tks->cs_ns_to_cyc_shift; 770 tks->cs_ns_to_cyc_mult = (u32)div_u64(1ULL << shift, tkrs->mult); 771 tks->cs_ns_to_cyc_maxns = div_u64(tkrs->clock->mask, tks->cs_ns_to_cyc_mult); 772 } 773 774 /* 775 * Restore the shadow timekeeper from the real timekeeper. 776 */ 777 static void timekeeping_restore_shadow(struct tk_data *tkd) 778 { 779 lockdep_assert_held(&tkd->lock); 780 memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper)); 781 } 782 783 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) 784 { 785 struct timekeeper *tk = &tkd->shadow_timekeeper; 786 787 lockdep_assert_held(&tkd->lock); 788 789 /* 790 * Block out readers before running the updates below because that 791 * updates VDSO and other time related infrastructure. Not blocking 792 * the readers might let a reader see time going backwards when 793 * reading from the VDSO after the VDSO update and then reading in 794 * the kernel from the timekeeper before that got updated. 795 */ 796 write_seqcount_begin(&tkd->seq); 797 798 if (action & TK_CLEAR_NTP) { 799 tk->ntp_error = 0; 800 ntp_clear(tk->id); 801 } 802 803 tk_update_leap_state(tk); 804 tk_update_ktime_data(tk); 805 tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; 806 807 if (tk->id == TIMEKEEPER_CORE) { 808 tk_update_ns_to_cyc(tk, &tkd->timekeeper); 809 update_vsyscall(tk); 810 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); 811 812 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 813 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 814 } else if (tk_is_aux(tk)) { 815 vdso_time_update_aux(tk); 816 } 817 818 if (action & TK_CLOCK_WAS_SET) 819 tk->clock_was_set_seq++; 820 821 /* 822 * Update the real timekeeper. 823 * 824 * We could avoid this memcpy() by switching pointers, but that has 825 * the downside that the reader side does not longer benefit from 826 * the cacheline optimized data layout of the timekeeper and requires 827 * another indirection. 828 */ 829 memcpy(&tkd->timekeeper, tk, sizeof(*tk)); 830 write_seqcount_end(&tkd->seq); 831 } 832 833 /** 834 * timekeeping_forward_now - update clock to the current time 835 * @tk: Pointer to the timekeeper to update 836 * 837 * Forward the current clock to update its state since the last call to 838 * update_wall_time(). This is useful before significant clock changes, 839 * as it avoids having to deal with this time offset explicitly. 840 */ 841 static void timekeeping_forward_now(struct timekeeper *tk) 842 { 843 u64 cycle_now, delta; 844 845 cycle_now = tk_clock_read(&tk->tkr_mono); 846 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 847 tk->tkr_mono.clock->max_raw_delta); 848 tk->tkr_mono.cycle_last = cycle_now; 849 tk->tkr_raw.cycle_last = cycle_now; 850 851 while (delta > 0) { 852 u64 max = tk->tkr_mono.clock->max_cycles; 853 u64 incr = delta < max ? delta : max; 854 855 tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; 856 tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; 857 tk_normalize_xtime(tk); 858 delta -= incr; 859 } 860 tk_update_coarse_nsecs(tk); 861 } 862 863 /* 864 * ktime_expiry_to_cycles - Convert a expiry time to clocksource cycles 865 * @id: Clocksource ID which is required for validity 866 * @expires_ns: Absolute CLOCK_MONOTONIC expiry time (nsecs) to be converted 867 * @cycles: Pointer to storage for corresponding absolute cycles value 868 * 869 * Convert a CLOCK_MONOTONIC based absolute expiry time to a cycles value 870 * based on the correlated clocksource of the clockevent device by using 871 * the base nanoseconds and cycles values of the last timekeeper update and 872 * converting the delta between @expires_ns and base nanoseconds to cycles. 873 * 874 * This only works for clockevent devices which are using a less than or 875 * equal comparator against the clocksource. 876 * 877 * Utilizing this avoids two clocksource reads for such devices, the 878 * ktime_get() in clockevents_program_event() to calculate the delta expiry 879 * value and the readout in the device::set_next_event() callback to 880 * convert the delta back to a absolute comparator value. 881 * 882 * Returns: True if @id matches the current clocksource ID, false otherwise 883 */ 884 bool ktime_expiry_to_cycles(enum clocksource_ids id, ktime_t expires_ns, u64 *cycles) 885 { 886 struct timekeeper *tk = &tk_core.timekeeper; 887 struct tk_read_base *tkrm = &tk->tkr_mono; 888 ktime_t base_ns, delta_ns, max_ns; 889 u64 base_cycles, delta_cycles; 890 unsigned int seq; 891 u32 mult, shift; 892 893 /* 894 * Racy check to avoid the seqcount overhead when ID does not match. If 895 * the relevant clocksource is installed concurrently, then this will 896 * just delay the switch over to this mechanism until the next event is 897 * programmed. If the ID is not matching the clock events code will use 898 * the regular relative set_next_event() callback as before. 899 */ 900 if (data_race(tk->cs_id) != id) 901 return false; 902 903 do { 904 seq = read_seqcount_begin(&tk_core.seq); 905 906 if (tk->cs_id != id) 907 return false; 908 909 base_cycles = tkrm->cycle_last; 910 base_ns = tkrm->base + (tkrm->xtime_nsec >> tkrm->shift); 911 912 mult = tk->cs_ns_to_cyc_mult; 913 shift = tk->cs_ns_to_cyc_shift; 914 max_ns = tk->cs_ns_to_cyc_maxns; 915 916 } while (read_seqcount_retry(&tk_core.seq, seq)); 917 918 /* Prevent negative deltas and multiplication overflows */ 919 delta_ns = min(expires_ns - base_ns, max_ns); 920 delta_ns = max(delta_ns, 0); 921 922 /* Convert to cycles */ 923 delta_cycles = ((u64)delta_ns * mult) >> shift; 924 *cycles = base_cycles + delta_cycles; 925 return true; 926 } 927 928 /** 929 * ktime_get_real_ts64 - Returns the time of day in a timespec64. 930 * @ts: pointer to the timespec to be set 931 * 932 * Returns the time of day in a timespec64 (WARN if suspended). 933 */ 934 void ktime_get_real_ts64(struct timespec64 *ts) 935 { 936 struct timekeeper *tk = &tk_core.timekeeper; 937 unsigned int seq; 938 u64 nsecs; 939 940 WARN_ON(timekeeping_suspended); 941 942 do { 943 seq = read_seqcount_begin(&tk_core.seq); 944 945 ts->tv_sec = tk->xtime_sec; 946 nsecs = timekeeping_get_ns(&tk->tkr_mono); 947 948 } while (read_seqcount_retry(&tk_core.seq, seq)); 949 950 ts->tv_nsec = 0; 951 timespec64_add_ns(ts, nsecs); 952 } 953 EXPORT_SYMBOL(ktime_get_real_ts64); 954 955 ktime_t ktime_get(void) 956 { 957 struct timekeeper *tk = &tk_core.timekeeper; 958 unsigned int seq; 959 ktime_t base; 960 u64 nsecs; 961 962 WARN_ON(timekeeping_suspended); 963 964 do { 965 seq = read_seqcount_begin(&tk_core.seq); 966 base = tk->tkr_mono.base; 967 nsecs = timekeeping_get_ns(&tk->tkr_mono); 968 969 } while (read_seqcount_retry(&tk_core.seq, seq)); 970 971 return ktime_add_ns(base, nsecs); 972 } 973 EXPORT_SYMBOL_GPL(ktime_get); 974 975 u32 ktime_get_resolution_ns(void) 976 { 977 struct timekeeper *tk = &tk_core.timekeeper; 978 unsigned int seq; 979 u32 nsecs; 980 981 WARN_ON(timekeeping_suspended); 982 983 do { 984 seq = read_seqcount_begin(&tk_core.seq); 985 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift; 986 } while (read_seqcount_retry(&tk_core.seq, seq)); 987 988 return nsecs; 989 } 990 EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); 991 992 static const ktime_t *const offsets[TK_OFFS_MAX] = { 993 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, 994 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, 995 [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, 996 }; 997 998 ktime_t ktime_get_with_offset(enum tk_offsets offs) 999 { 1000 struct timekeeper *tk = &tk_core.timekeeper; 1001 const ktime_t *offset = offsets[offs]; 1002 unsigned int seq; 1003 ktime_t base; 1004 u64 nsecs; 1005 1006 WARN_ON(timekeeping_suspended); 1007 1008 do { 1009 seq = read_seqcount_begin(&tk_core.seq); 1010 base = ktime_add(tk->tkr_mono.base, *offset); 1011 nsecs = timekeeping_get_ns(&tk->tkr_mono); 1012 1013 } while (read_seqcount_retry(&tk_core.seq, seq)); 1014 1015 return ktime_add_ns(base, nsecs); 1016 1017 } 1018 EXPORT_SYMBOL_GPL(ktime_get_with_offset); 1019 1020 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) 1021 { 1022 struct timekeeper *tk = &tk_core.timekeeper; 1023 const ktime_t *offset = offsets[offs]; 1024 unsigned int seq; 1025 ktime_t base; 1026 u64 nsecs; 1027 1028 WARN_ON(timekeeping_suspended); 1029 1030 do { 1031 seq = read_seqcount_begin(&tk_core.seq); 1032 base = ktime_add(tk->tkr_mono.base, *offset); 1033 nsecs = tk->coarse_nsec; 1034 1035 } while (read_seqcount_retry(&tk_core.seq, seq)); 1036 1037 return ktime_add_ns(base, nsecs); 1038 } 1039 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); 1040 1041 /** 1042 * ktime_mono_to_any() - convert monotonic time to any other time 1043 * @tmono: time to convert. 1044 * @offs: which offset to use 1045 */ 1046 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs) 1047 { 1048 const ktime_t *offset = offsets[offs]; 1049 unsigned int seq; 1050 ktime_t tconv; 1051 1052 if (IS_ENABLED(CONFIG_64BIT)) { 1053 /* 1054 * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and 1055 * tk_update_sleep_time(). 1056 */ 1057 return ktime_add(tmono, READ_ONCE(*offset)); 1058 } 1059 1060 do { 1061 seq = read_seqcount_begin(&tk_core.seq); 1062 tconv = ktime_add(tmono, *offset); 1063 } while (read_seqcount_retry(&tk_core.seq, seq)); 1064 1065 return tconv; 1066 } 1067 EXPORT_SYMBOL_GPL(ktime_mono_to_any); 1068 1069 /** 1070 * ktime_get_raw - Returns the raw monotonic time in ktime_t format 1071 */ 1072 ktime_t ktime_get_raw(void) 1073 { 1074 struct timekeeper *tk = &tk_core.timekeeper; 1075 unsigned int seq; 1076 ktime_t base; 1077 u64 nsecs; 1078 1079 do { 1080 seq = read_seqcount_begin(&tk_core.seq); 1081 base = tk->tkr_raw.base; 1082 nsecs = timekeeping_get_ns(&tk->tkr_raw); 1083 1084 } while (read_seqcount_retry(&tk_core.seq, seq)); 1085 1086 return ktime_add_ns(base, nsecs); 1087 } 1088 EXPORT_SYMBOL_GPL(ktime_get_raw); 1089 1090 /** 1091 * ktime_get_ts64 - get the monotonic clock in timespec64 format 1092 * @ts: pointer to timespec variable 1093 * 1094 * The function calculates the monotonic clock from the realtime 1095 * clock and the wall_to_monotonic offset and stores the result 1096 * in normalized timespec64 format in the variable pointed to by @ts. 1097 */ 1098 void ktime_get_ts64(struct timespec64 *ts) 1099 { 1100 struct timekeeper *tk = &tk_core.timekeeper; 1101 struct timespec64 tomono; 1102 unsigned int seq; 1103 u64 nsec; 1104 1105 WARN_ON(timekeeping_suspended); 1106 1107 do { 1108 seq = read_seqcount_begin(&tk_core.seq); 1109 ts->tv_sec = tk->xtime_sec; 1110 nsec = timekeeping_get_ns(&tk->tkr_mono); 1111 tomono = tk->wall_to_monotonic; 1112 1113 } while (read_seqcount_retry(&tk_core.seq, seq)); 1114 1115 ts->tv_sec += tomono.tv_sec; 1116 ts->tv_nsec = 0; 1117 timespec64_add_ns(ts, nsec + tomono.tv_nsec); 1118 } 1119 EXPORT_SYMBOL_GPL(ktime_get_ts64); 1120 1121 /** 1122 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC 1123 * 1124 * Returns the seconds portion of CLOCK_MONOTONIC with a single non 1125 * serialized read. tk->ktime_sec is of type 'unsigned long' so this 1126 * works on both 32 and 64 bit systems. On 32 bit systems the readout 1127 * covers ~136 years of uptime which should be enough to prevent 1128 * premature wrap arounds. 1129 */ 1130 time64_t ktime_get_seconds(void) 1131 { 1132 struct timekeeper *tk = &tk_core.timekeeper; 1133 1134 WARN_ON(timekeeping_suspended); 1135 return tk->ktime_sec; 1136 } 1137 EXPORT_SYMBOL_GPL(ktime_get_seconds); 1138 1139 /** 1140 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME 1141 * 1142 * Returns the wall clock seconds since 1970. 1143 * 1144 * For 64bit systems the fast access to tk->xtime_sec is preserved. On 1145 * 32bit systems the access must be protected with the sequence 1146 * counter to provide "atomic" access to the 64bit tk->xtime_sec 1147 * value. 1148 */ 1149 time64_t ktime_get_real_seconds(void) 1150 { 1151 struct timekeeper *tk = &tk_core.timekeeper; 1152 time64_t seconds; 1153 unsigned int seq; 1154 1155 if (IS_ENABLED(CONFIG_64BIT)) 1156 return tk->xtime_sec; 1157 1158 do { 1159 seq = read_seqcount_begin(&tk_core.seq); 1160 seconds = tk->xtime_sec; 1161 1162 } while (read_seqcount_retry(&tk_core.seq, seq)); 1163 1164 return seconds; 1165 } 1166 EXPORT_SYMBOL_GPL(ktime_get_real_seconds); 1167 1168 /** 1169 * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds 1170 * 1171 * The same as ktime_get_real_seconds() but without the sequence counter 1172 * protection. This function is used in restricted contexts like the x86 MCE 1173 * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half 1174 * completed modification and only to be used for such critical contexts. 1175 * 1176 * Returns: Racy snapshot of the CLOCK_REALTIME seconds value 1177 */ 1178 noinstr time64_t __ktime_get_real_seconds(void) 1179 { 1180 struct timekeeper *tk = &tk_core.timekeeper; 1181 1182 return tk->xtime_sec; 1183 } 1184 1185 /** 1186 * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter 1187 * @systime_snapshot: pointer to struct receiving the system time snapshot 1188 */ 1189 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) 1190 { 1191 struct timekeeper *tk = &tk_core.timekeeper; 1192 unsigned int seq; 1193 ktime_t base_raw; 1194 ktime_t base_real; 1195 ktime_t base_boot; 1196 u64 nsec_raw; 1197 u64 nsec_real; 1198 u64 now; 1199 1200 WARN_ON_ONCE(timekeeping_suspended); 1201 1202 do { 1203 seq = read_seqcount_begin(&tk_core.seq); 1204 now = tk_clock_read(&tk->tkr_mono); 1205 systime_snapshot->cs_id = tk->tkr_mono.clock->id; 1206 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; 1207 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; 1208 base_real = ktime_add(tk->tkr_mono.base, 1209 tk_core.timekeeper.offs_real); 1210 base_boot = ktime_add(tk->tkr_mono.base, 1211 tk_core.timekeeper.offs_boot); 1212 base_raw = tk->tkr_raw.base; 1213 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); 1214 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); 1215 } while (read_seqcount_retry(&tk_core.seq, seq)); 1216 1217 systime_snapshot->cycles = now; 1218 systime_snapshot->real = ktime_add_ns(base_real, nsec_real); 1219 systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real); 1220 systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); 1221 } 1222 EXPORT_SYMBOL_GPL(ktime_get_snapshot); 1223 1224 /* Scale base by mult/div checking for overflow */ 1225 static int scale64_check_overflow(u64 mult, u64 div, u64 *base) 1226 { 1227 u64 tmp, rem; 1228 1229 tmp = div64_u64_rem(*base, div, &rem); 1230 1231 if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) || 1232 ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) 1233 return -EOVERFLOW; 1234 tmp *= mult; 1235 1236 rem = div64_u64(rem * mult, div); 1237 *base = tmp + rem; 1238 return 0; 1239 } 1240 1241 /** 1242 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval 1243 * @history: Snapshot representing start of history 1244 * @partial_history_cycles: Cycle offset into history (fractional part) 1245 * @total_history_cycles: Total history length in cycles 1246 * @discontinuity: True indicates clock was set on history period 1247 * @ts: Cross timestamp that should be adjusted using 1248 * partial/total ratio 1249 * 1250 * Helper function used by get_device_system_crosststamp() to correct the 1251 * crosstimestamp corresponding to the start of the current interval to the 1252 * system counter value (timestamp point) provided by the driver. The 1253 * total_history_* quantities are the total history starting at the provided 1254 * reference point and ending at the start of the current interval. The cycle 1255 * count between the driver timestamp point and the start of the current 1256 * interval is partial_history_cycles. 1257 */ 1258 static int adjust_historical_crosststamp(struct system_time_snapshot *history, 1259 u64 partial_history_cycles, 1260 u64 total_history_cycles, 1261 bool discontinuity, 1262 struct system_device_crosststamp *ts) 1263 { 1264 struct timekeeper *tk = &tk_core.timekeeper; 1265 u64 corr_raw, corr_real; 1266 bool interp_forward; 1267 int ret; 1268 1269 if (total_history_cycles == 0 || partial_history_cycles == 0) 1270 return 0; 1271 1272 /* Interpolate shortest distance from beginning or end of history */ 1273 interp_forward = partial_history_cycles > total_history_cycles / 2; 1274 partial_history_cycles = interp_forward ? 1275 total_history_cycles - partial_history_cycles : 1276 partial_history_cycles; 1277 1278 /* 1279 * Scale the monotonic raw time delta by: 1280 * partial_history_cycles / total_history_cycles 1281 */ 1282 corr_raw = (u64)ktime_to_ns( 1283 ktime_sub(ts->sys_monoraw, history->raw)); 1284 ret = scale64_check_overflow(partial_history_cycles, 1285 total_history_cycles, &corr_raw); 1286 if (ret) 1287 return ret; 1288 1289 /* 1290 * If there is a discontinuity in the history, scale monotonic raw 1291 * correction by: 1292 * mult(real)/mult(raw) yielding the realtime correction 1293 * Otherwise, calculate the realtime correction similar to monotonic 1294 * raw calculation 1295 */ 1296 if (discontinuity) { 1297 corr_real = mul_u64_u32_div 1298 (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); 1299 } else { 1300 corr_real = (u64)ktime_to_ns( 1301 ktime_sub(ts->sys_realtime, history->real)); 1302 ret = scale64_check_overflow(partial_history_cycles, 1303 total_history_cycles, &corr_real); 1304 if (ret) 1305 return ret; 1306 } 1307 1308 /* Fixup monotonic raw and real time time values */ 1309 if (interp_forward) { 1310 ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw); 1311 ts->sys_realtime = ktime_add_ns(history->real, corr_real); 1312 } else { 1313 ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); 1314 ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real); 1315 } 1316 1317 return 0; 1318 } 1319 1320 /* 1321 * timestamp_in_interval - true if ts is chronologically in [start, end] 1322 * 1323 * True if ts occurs chronologically at or after start, and before or at end. 1324 */ 1325 static bool timestamp_in_interval(u64 start, u64 end, u64 ts) 1326 { 1327 if (ts >= start && ts <= end) 1328 return true; 1329 if (start > end && (ts >= start || ts <= end)) 1330 return true; 1331 return false; 1332 } 1333 1334 static bool convert_clock(u64 *val, u32 numerator, u32 denominator) 1335 { 1336 u64 rem, res; 1337 1338 if (!numerator || !denominator) 1339 return false; 1340 1341 res = div64_u64_rem(*val, denominator, &rem) * numerator; 1342 *val = res + div_u64(rem * numerator, denominator); 1343 return true; 1344 } 1345 1346 static bool convert_base_to_cs(struct system_counterval_t *scv) 1347 { 1348 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1349 struct clocksource_base *base; 1350 u32 num, den; 1351 1352 /* The timestamp was taken from the time keeper clock source */ 1353 if (cs->id == scv->cs_id) 1354 return true; 1355 1356 /* 1357 * Check whether cs_id matches the base clock. Prevent the compiler from 1358 * re-evaluating @base as the clocksource might change concurrently. 1359 */ 1360 base = READ_ONCE(cs->base); 1361 if (!base || base->id != scv->cs_id) 1362 return false; 1363 1364 num = scv->use_nsecs ? cs->freq_khz : base->numerator; 1365 den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; 1366 1367 if (!convert_clock(&scv->cycles, num, den)) 1368 return false; 1369 1370 scv->cycles += base->offset; 1371 return true; 1372 } 1373 1374 static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) 1375 { 1376 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1377 struct clocksource_base *base; 1378 1379 /* 1380 * Check whether base_id matches the base clock. Prevent the compiler from 1381 * re-evaluating @base as the clocksource might change concurrently. 1382 */ 1383 base = READ_ONCE(cs->base); 1384 if (!base || base->id != base_id) 1385 return false; 1386 1387 *cycles -= base->offset; 1388 if (!convert_clock(cycles, base->denominator, base->numerator)) 1389 return false; 1390 return true; 1391 } 1392 1393 static bool convert_ns_to_cs(u64 *delta) 1394 { 1395 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 1396 1397 if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) 1398 return false; 1399 1400 *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); 1401 return true; 1402 } 1403 1404 /** 1405 * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp 1406 * @treal: CLOCK_REALTIME timestamp to convert 1407 * @base_id: base clocksource id 1408 * @cycles: pointer to store the converted base clock timestamp 1409 * 1410 * Converts a supplied, future realtime clock value to the corresponding base clock value. 1411 * 1412 * Return: true if the conversion is successful, false otherwise. 1413 */ 1414 bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) 1415 { 1416 struct timekeeper *tk = &tk_core.timekeeper; 1417 unsigned int seq; 1418 u64 delta; 1419 1420 do { 1421 seq = read_seqcount_begin(&tk_core.seq); 1422 if ((u64)treal < tk->tkr_mono.base_real) 1423 return false; 1424 delta = (u64)treal - tk->tkr_mono.base_real; 1425 if (!convert_ns_to_cs(&delta)) 1426 return false; 1427 *cycles = tk->tkr_mono.cycle_last + delta; 1428 if (!convert_cs_to_base(cycles, base_id)) 1429 return false; 1430 } while (read_seqcount_retry(&tk_core.seq, seq)); 1431 1432 return true; 1433 } 1434 EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); 1435 1436 /** 1437 * get_device_system_crosststamp - Synchronously capture system/device timestamp 1438 * @get_time_fn: Callback to get simultaneous device time and 1439 * system counter from the device driver 1440 * @ctx: Context passed to get_time_fn() 1441 * @history_begin: Historical reference point used to interpolate system 1442 * time when counter provided by the driver is before the current interval 1443 * @xtstamp: Receives simultaneously captured system and device time 1444 * 1445 * Reads a timestamp from a device and correlates it to system time 1446 */ 1447 int get_device_system_crosststamp(int (*get_time_fn) 1448 (ktime_t *device_time, 1449 struct system_counterval_t *sys_counterval, 1450 void *ctx), 1451 void *ctx, 1452 struct system_time_snapshot *history_begin, 1453 struct system_device_crosststamp *xtstamp) 1454 { 1455 struct system_counterval_t system_counterval = {}; 1456 struct timekeeper *tk = &tk_core.timekeeper; 1457 u64 cycles, now, interval_start; 1458 unsigned int clock_was_set_seq = 0; 1459 ktime_t base_real, base_raw; 1460 u64 nsec_real, nsec_raw; 1461 u8 cs_was_changed_seq; 1462 unsigned int seq; 1463 bool do_interp; 1464 int ret; 1465 1466 do { 1467 seq = read_seqcount_begin(&tk_core.seq); 1468 /* 1469 * Try to synchronously capture device time and a system 1470 * counter value calling back into the device driver 1471 */ 1472 ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); 1473 if (ret) 1474 return ret; 1475 1476 /* 1477 * Verify that the clocksource ID associated with the captured 1478 * system counter value is the same as for the currently 1479 * installed timekeeper clocksource 1480 */ 1481 if (system_counterval.cs_id == CSID_GENERIC || 1482 !convert_base_to_cs(&system_counterval)) 1483 return -ENODEV; 1484 cycles = system_counterval.cycles; 1485 1486 /* 1487 * Check whether the system counter value provided by the 1488 * device driver is on the current timekeeping interval. 1489 */ 1490 now = tk_clock_read(&tk->tkr_mono); 1491 interval_start = tk->tkr_mono.cycle_last; 1492 if (!timestamp_in_interval(interval_start, now, cycles)) { 1493 clock_was_set_seq = tk->clock_was_set_seq; 1494 cs_was_changed_seq = tk->cs_was_changed_seq; 1495 cycles = interval_start; 1496 do_interp = true; 1497 } else { 1498 do_interp = false; 1499 } 1500 1501 base_real = ktime_add(tk->tkr_mono.base, 1502 tk_core.timekeeper.offs_real); 1503 base_raw = tk->tkr_raw.base; 1504 1505 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); 1506 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); 1507 } while (read_seqcount_retry(&tk_core.seq, seq)); 1508 1509 xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); 1510 xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); 1511 1512 /* 1513 * Interpolate if necessary, adjusting back from the start of the 1514 * current interval 1515 */ 1516 if (do_interp) { 1517 u64 partial_history_cycles, total_history_cycles; 1518 bool discontinuity; 1519 1520 /* 1521 * Check that the counter value is not before the provided 1522 * history reference and that the history doesn't cross a 1523 * clocksource change 1524 */ 1525 if (!history_begin || 1526 !timestamp_in_interval(history_begin->cycles, 1527 cycles, system_counterval.cycles) || 1528 history_begin->cs_was_changed_seq != cs_was_changed_seq) 1529 return -EINVAL; 1530 partial_history_cycles = cycles - system_counterval.cycles; 1531 total_history_cycles = cycles - history_begin->cycles; 1532 discontinuity = 1533 history_begin->clock_was_set_seq != clock_was_set_seq; 1534 1535 ret = adjust_historical_crosststamp(history_begin, 1536 partial_history_cycles, 1537 total_history_cycles, 1538 discontinuity, xtstamp); 1539 if (ret) 1540 return ret; 1541 } 1542 1543 return 0; 1544 } 1545 EXPORT_SYMBOL_GPL(get_device_system_crosststamp); 1546 1547 /** 1548 * timekeeping_clocksource_has_base - Check whether the current clocksource 1549 * is based on given a base clock 1550 * @id: base clocksource ID 1551 * 1552 * Note: The return value is a snapshot which can become invalid right 1553 * after the function returns. 1554 * 1555 * Return: true if the timekeeper clocksource has a base clock with @id, 1556 * false otherwise 1557 */ 1558 bool timekeeping_clocksource_has_base(enum clocksource_ids id) 1559 { 1560 /* 1561 * This is a snapshot, so no point in using the sequence 1562 * count. Just prevent the compiler from re-evaluating @base as the 1563 * clocksource might change concurrently. 1564 */ 1565 struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); 1566 1567 return base ? base->id == id : false; 1568 } 1569 EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); 1570 1571 /** 1572 * do_settimeofday64 - Sets the time of day. 1573 * @ts: pointer to the timespec64 variable containing the new time 1574 * 1575 * Sets the time of day to the new time and update NTP and notify hrtimers 1576 */ 1577 int do_settimeofday64(const struct timespec64 *ts) 1578 { 1579 struct timespec64 ts_delta, xt; 1580 1581 if (!timespec64_valid_settod(ts)) 1582 return -EINVAL; 1583 1584 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1585 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1586 1587 timekeeping_forward_now(tks); 1588 1589 xt = tk_xtime(tks); 1590 ts_delta = timespec64_sub(*ts, xt); 1591 1592 if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) { 1593 timekeeping_restore_shadow(&tk_core); 1594 return -EINVAL; 1595 } 1596 1597 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta)); 1598 tk_set_xtime(tks, ts); 1599 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1600 } 1601 1602 /* Signal hrtimers about time change */ 1603 clock_was_set(CLOCK_SET_WALL); 1604 1605 audit_tk_injoffset(ts_delta); 1606 add_device_randomness(ts, sizeof(*ts)); 1607 return 0; 1608 } 1609 EXPORT_SYMBOL(do_settimeofday64); 1610 1611 static inline bool timekeeper_is_core_tk(struct timekeeper *tk) 1612 { 1613 return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE; 1614 } 1615 1616 /** 1617 * __timekeeping_inject_offset - Adds or subtracts from the current time. 1618 * @tkd: Pointer to the timekeeper to modify 1619 * @ts: Pointer to the timespec variable containing the offset 1620 * 1621 * Adds or subtracts an offset value from the current time. 1622 */ 1623 static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts) 1624 { 1625 struct timekeeper *tks = &tkd->shadow_timekeeper; 1626 struct timespec64 tmp; 1627 1628 if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) 1629 return -EINVAL; 1630 1631 timekeeping_forward_now(tks); 1632 1633 if (timekeeper_is_core_tk(tks)) { 1634 /* Make sure the proposed value is valid */ 1635 tmp = timespec64_add(tk_xtime(tks), *ts); 1636 if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || 1637 !timespec64_valid_settod(&tmp)) { 1638 timekeeping_restore_shadow(tkd); 1639 return -EINVAL; 1640 } 1641 1642 tk_xtime_add(tks, ts); 1643 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); 1644 } else { 1645 struct tk_read_base *tkr_mono = &tks->tkr_mono; 1646 ktime_t now, offs; 1647 1648 /* Get the current time */ 1649 now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); 1650 /* Add the relative offset change */ 1651 offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts)); 1652 1653 /* Prevent that the resulting time becomes negative */ 1654 if (ktime_add(now, offs) < 0) { 1655 timekeeping_restore_shadow(tkd); 1656 return -EINVAL; 1657 } 1658 tk_update_aux_offs(tks, offs); 1659 } 1660 1661 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 1662 return 0; 1663 } 1664 1665 static int timekeeping_inject_offset(const struct timespec64 *ts) 1666 { 1667 int ret; 1668 1669 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) 1670 ret = __timekeeping_inject_offset(&tk_core, ts); 1671 1672 /* Signal hrtimers about time change */ 1673 if (!ret) 1674 clock_was_set(CLOCK_SET_WALL); 1675 return ret; 1676 } 1677 1678 /* 1679 * Indicates if there is an offset between the system clock and the hardware 1680 * clock/persistent clock/rtc. 1681 */ 1682 int persistent_clock_is_local; 1683 1684 /* 1685 * Adjust the time obtained from the CMOS to be UTC time instead of 1686 * local time. 1687 * 1688 * This is ugly, but preferable to the alternatives. Otherwise we 1689 * would either need to write a program to do it in /etc/rc (and risk 1690 * confusion if the program gets run more than once; it would also be 1691 * hard to make the program warp the clock precisely n hours) or 1692 * compile in the timezone information into the kernel. Bad, bad.... 1693 * 1694 * - TYT, 1992-01-01 1695 * 1696 * The best thing to do is to keep the CMOS clock in universal time (UTC) 1697 * as real UNIX machines always do it. This avoids all headaches about 1698 * daylight saving times and warping kernel clocks. 1699 */ 1700 void timekeeping_warp_clock(void) 1701 { 1702 if (sys_tz.tz_minuteswest != 0) { 1703 struct timespec64 adjust; 1704 1705 persistent_clock_is_local = 1; 1706 adjust.tv_sec = sys_tz.tz_minuteswest * 60; 1707 adjust.tv_nsec = 0; 1708 timekeeping_inject_offset(&adjust); 1709 } 1710 } 1711 1712 /* 1713 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic 1714 */ 1715 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) 1716 { 1717 tk->tai_offset = tai_offset; 1718 tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); 1719 } 1720 1721 /* 1722 * change_clocksource - Swaps clocksources if a new one is available 1723 * 1724 * Accumulates current time interval and initializes new clocksource 1725 */ 1726 static int change_clocksource(void *data) 1727 { 1728 struct clocksource *new = data, *old = NULL; 1729 1730 /* 1731 * If the clocksource is in a module, get a module reference. 1732 * Succeeds for built-in code (owner == NULL) as well. Abort if the 1733 * reference can't be acquired. 1734 */ 1735 if (!try_module_get(new->owner)) 1736 return 0; 1737 1738 /* Abort if the device can't be enabled */ 1739 if (new->enable && new->enable(new) != 0) { 1740 module_put(new->owner); 1741 return 0; 1742 } 1743 1744 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1745 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1746 1747 timekeeping_forward_now(tks); 1748 old = tks->tkr_mono.clock; 1749 tk_setup_internals(tks, new); 1750 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1751 } 1752 1753 tk_aux_update_clocksource(); 1754 1755 if (old) { 1756 if (old->disable) 1757 old->disable(old); 1758 module_put(old->owner); 1759 } 1760 1761 return 0; 1762 } 1763 1764 /** 1765 * timekeeping_notify - Install a new clock source 1766 * @clock: pointer to the clock source 1767 * 1768 * This function is called from clocksource.c after a new, better clock 1769 * source has been registered. The caller holds the clocksource_mutex. 1770 */ 1771 int timekeeping_notify(struct clocksource *clock) 1772 { 1773 struct timekeeper *tk = &tk_core.timekeeper; 1774 1775 if (tk->tkr_mono.clock == clock) 1776 return 0; 1777 1778 /* Disable inlined reads accross the clocksource switch */ 1779 clocksource_disable_inline_read(); 1780 1781 stop_machine(change_clocksource, clock, NULL); 1782 1783 /* 1784 * If the clocksource has been selected and supports inlined reads 1785 * enable the branch. 1786 */ 1787 if (tk->tkr_mono.clock == clock && clock->flags & CLOCK_SOURCE_CAN_INLINE_READ) 1788 clocksource_enable_inline_read(); 1789 1790 tick_clock_notify(); 1791 return tk->tkr_mono.clock == clock ? 0 : -1; 1792 } 1793 1794 /** 1795 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec 1796 * @ts: pointer to the timespec64 to be set 1797 * 1798 * Returns the raw monotonic time (completely un-modified by ntp) 1799 */ 1800 void ktime_get_raw_ts64(struct timespec64 *ts) 1801 { 1802 struct timekeeper *tk = &tk_core.timekeeper; 1803 unsigned int seq; 1804 u64 nsecs; 1805 1806 do { 1807 seq = read_seqcount_begin(&tk_core.seq); 1808 ts->tv_sec = tk->raw_sec; 1809 nsecs = timekeeping_get_ns(&tk->tkr_raw); 1810 1811 } while (read_seqcount_retry(&tk_core.seq, seq)); 1812 1813 ts->tv_nsec = 0; 1814 timespec64_add_ns(ts, nsecs); 1815 } 1816 EXPORT_SYMBOL(ktime_get_raw_ts64); 1817 1818 /** 1819 * ktime_get_clock_ts64 - Returns time of a clock in a timespec 1820 * @id: POSIX clock ID of the clock to read 1821 * @ts: Pointer to the timespec64 to be set 1822 * 1823 * The timestamp is invalidated (@ts->sec is set to -1) if the 1824 * clock @id is not available. 1825 */ 1826 void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) 1827 { 1828 /* Invalidate time stamp */ 1829 ts->tv_sec = -1; 1830 ts->tv_nsec = 0; 1831 1832 switch (id) { 1833 case CLOCK_REALTIME: 1834 ktime_get_real_ts64(ts); 1835 return; 1836 case CLOCK_MONOTONIC: 1837 ktime_get_ts64(ts); 1838 return; 1839 case CLOCK_MONOTONIC_RAW: 1840 ktime_get_raw_ts64(ts); 1841 return; 1842 case CLOCK_AUX ... CLOCK_AUX_LAST: 1843 if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) 1844 ktime_get_aux_ts64(id, ts); 1845 return; 1846 default: 1847 WARN_ON_ONCE(1); 1848 } 1849 } 1850 EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); 1851 1852 /** 1853 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 1854 */ 1855 int timekeeping_valid_for_hres(void) 1856 { 1857 struct timekeeper *tk = &tk_core.timekeeper; 1858 unsigned int seq; 1859 int ret; 1860 1861 do { 1862 seq = read_seqcount_begin(&tk_core.seq); 1863 1864 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1865 1866 } while (read_seqcount_retry(&tk_core.seq, seq)); 1867 1868 return ret; 1869 } 1870 1871 /** 1872 * timekeeping_max_deferment - Returns max time the clocksource can be deferred 1873 */ 1874 u64 timekeeping_max_deferment(void) 1875 { 1876 struct timekeeper *tk = &tk_core.timekeeper; 1877 unsigned int seq; 1878 u64 ret; 1879 1880 do { 1881 seq = read_seqcount_begin(&tk_core.seq); 1882 1883 ret = tk->tkr_mono.clock->max_idle_ns; 1884 1885 } while (read_seqcount_retry(&tk_core.seq, seq)); 1886 1887 return ret; 1888 } 1889 1890 /** 1891 * read_persistent_clock64 - Return time from the persistent clock. 1892 * @ts: Pointer to the storage for the readout value 1893 * 1894 * Weak dummy function for arches that do not yet support it. 1895 * Reads the time from the battery backed persistent clock. 1896 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 1897 * 1898 * XXX - Do be sure to remove it once all arches implement it. 1899 */ 1900 void __weak read_persistent_clock64(struct timespec64 *ts) 1901 { 1902 ts->tv_sec = 0; 1903 ts->tv_nsec = 0; 1904 } 1905 1906 /** 1907 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset 1908 * from the boot. 1909 * @wall_time: current time as returned by persistent clock 1910 * @boot_offset: offset that is defined as wall_time - boot_time 1911 * 1912 * Weak dummy function for arches that do not yet support it. 1913 * 1914 * The default function calculates offset based on the current value of 1915 * local_clock(). This way architectures that support sched_clock() but don't 1916 * support dedicated boot time clock will provide the best estimate of the 1917 * boot time. 1918 */ 1919 void __weak __init 1920 read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, 1921 struct timespec64 *boot_offset) 1922 { 1923 read_persistent_clock64(wall_time); 1924 *boot_offset = ns_to_timespec64(local_clock()); 1925 } 1926 1927 static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid) 1928 { 1929 raw_spin_lock_init(&tkd->lock); 1930 seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); 1931 tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; 1932 tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid; 1933 } 1934 1935 /* 1936 * Flag reflecting whether timekeeping_resume() has injected sleeptime. 1937 * 1938 * The flag starts of false and is only set when a suspend reaches 1939 * timekeeping_suspend(), timekeeping_resume() sets it to false when the 1940 * timekeeper clocksource is not stopping across suspend and has been 1941 * used to update sleep time. If the timekeeper clocksource has stopped 1942 * then the flag stays true and is used by the RTC resume code to decide 1943 * whether sleeptime must be injected and if so the flag gets false then. 1944 * 1945 * If a suspend fails before reaching timekeeping_resume() then the flag 1946 * stays false and prevents erroneous sleeptime injection. 1947 */ 1948 static bool suspend_timing_needed; 1949 1950 /* Flag for if there is a persistent clock on this platform */ 1951 static bool persistent_clock_exists; 1952 1953 /* 1954 * timekeeping_init - Initializes the clocksource and common timekeeping values 1955 */ 1956 void __init timekeeping_init(void) 1957 { 1958 struct timespec64 wall_time, boot_offset, wall_to_mono; 1959 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1960 struct clocksource *clock; 1961 1962 tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); 1963 tk_aux_setup(); 1964 1965 read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); 1966 if (timespec64_valid_settod(&wall_time) && 1967 timespec64_to_ns(&wall_time) > 0) { 1968 persistent_clock_exists = true; 1969 } else if (timespec64_to_ns(&wall_time) != 0) { 1970 pr_warn("Persistent clock returned invalid value"); 1971 wall_time = (struct timespec64){0}; 1972 } 1973 1974 if (timespec64_compare(&wall_time, &boot_offset) < 0) 1975 boot_offset = (struct timespec64){0}; 1976 1977 /* 1978 * We want set wall_to_mono, so the following is true: 1979 * wall time + wall_to_mono = boot time 1980 */ 1981 wall_to_mono = timespec64_sub(boot_offset, wall_time); 1982 1983 guard(raw_spinlock_irqsave)(&tk_core.lock); 1984 1985 ntp_init(); 1986 1987 clock = clocksource_default_clock(); 1988 if (clock->enable) 1989 clock->enable(clock); 1990 tk_setup_internals(tks, clock); 1991 1992 tk_set_xtime(tks, &wall_time); 1993 tks->raw_sec = 0; 1994 1995 tk_set_wall_to_mono(tks, wall_to_mono); 1996 1997 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 1998 } 1999 2000 /* time in seconds when suspend began for persistent clock */ 2001 static struct timespec64 timekeeping_suspend_time; 2002 2003 /** 2004 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 2005 * @tk: Pointer to the timekeeper to be updated 2006 * @delta: Pointer to the delta value in timespec64 format 2007 * 2008 * Takes a timespec offset measuring a suspend interval and properly 2009 * adds the sleep offset to the timekeeping variables. 2010 */ 2011 static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 2012 const struct timespec64 *delta) 2013 { 2014 if (!timespec64_valid_strict(delta)) { 2015 printk_deferred(KERN_WARNING 2016 "__timekeeping_inject_sleeptime: Invalid " 2017 "sleep delta value!\n"); 2018 return; 2019 } 2020 tk_xtime_add(tk, delta); 2021 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); 2022 tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); 2023 tk_debug_account_sleep_time(delta); 2024 } 2025 2026 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) 2027 /* 2028 * We have three kinds of time sources to use for sleep time 2029 * injection, the preference order is: 2030 * 1) non-stop clocksource 2031 * 2) persistent clock (ie: RTC accessible when irqs are off) 2032 * 3) RTC 2033 * 2034 * 1) and 2) are used by timekeeping, 3) by RTC subsystem. 2035 * If system has neither 1) nor 2), 3) will be used finally. 2036 * 2037 * 2038 * If timekeeping has injected sleeptime via either 1) or 2), 2039 * 3) becomes needless, so in this case we don't need to call 2040 * rtc_resume(), and this is what timekeeping_rtc_skipresume() 2041 * means. 2042 */ 2043 bool timekeeping_rtc_skipresume(void) 2044 { 2045 return !suspend_timing_needed; 2046 } 2047 2048 /* 2049 * 1) can be determined whether to use or not only when doing 2050 * timekeeping_resume() which is invoked after rtc_suspend(), 2051 * so we can't skip rtc_suspend() surely if system has 1). 2052 * 2053 * But if system has 2), 2) will definitely be used, so in this 2054 * case we don't need to call rtc_suspend(), and this is what 2055 * timekeeping_rtc_skipsuspend() means. 2056 */ 2057 bool timekeeping_rtc_skipsuspend(void) 2058 { 2059 return persistent_clock_exists; 2060 } 2061 2062 /** 2063 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values 2064 * @delta: pointer to a timespec64 delta value 2065 * 2066 * This hook is for architectures that cannot support read_persistent_clock64 2067 * because their RTC/persistent clock is only accessible when irqs are enabled. 2068 * and also don't have an effective nonstop clocksource. 2069 * 2070 * This function should only be called by rtc_resume(), and allows 2071 * a suspend offset to be injected into the timekeeping values. 2072 */ 2073 void timekeeping_inject_sleeptime64(const struct timespec64 *delta) 2074 { 2075 scoped_guard(raw_spinlock_irqsave, &tk_core.lock) { 2076 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2077 2078 suspend_timing_needed = false; 2079 timekeeping_forward_now(tks); 2080 __timekeeping_inject_sleeptime(tks, delta); 2081 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 2082 } 2083 2084 /* Signal hrtimers about time change */ 2085 clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT); 2086 } 2087 #endif 2088 2089 /** 2090 * timekeeping_resume - Resumes the generic timekeeping subsystem. 2091 */ 2092 void timekeeping_resume(void) 2093 { 2094 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2095 struct clocksource *clock = tks->tkr_mono.clock; 2096 struct timespec64 ts_new, ts_delta; 2097 bool inject_sleeptime = false; 2098 u64 cycle_now, nsec; 2099 unsigned long flags; 2100 2101 read_persistent_clock64(&ts_new); 2102 2103 clockevents_resume(); 2104 clocksource_resume(); 2105 2106 raw_spin_lock_irqsave(&tk_core.lock, flags); 2107 2108 /* 2109 * After system resumes, we need to calculate the suspended time and 2110 * compensate it for the OS time. There are 3 sources that could be 2111 * used: Nonstop clocksource during suspend, persistent clock and rtc 2112 * device. 2113 * 2114 * One specific platform may have 1 or 2 or all of them, and the 2115 * preference will be: 2116 * suspend-nonstop clocksource -> persistent clock -> rtc 2117 * The less preferred source will only be tried if there is no better 2118 * usable source. The rtc part is handled separately in rtc core code. 2119 */ 2120 cycle_now = tk_clock_read(&tks->tkr_mono); 2121 nsec = clocksource_stop_suspend_timing(clock, cycle_now); 2122 if (nsec > 0) { 2123 ts_delta = ns_to_timespec64(nsec); 2124 inject_sleeptime = true; 2125 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { 2126 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); 2127 inject_sleeptime = true; 2128 } 2129 2130 if (inject_sleeptime) { 2131 suspend_timing_needed = false; 2132 __timekeeping_inject_sleeptime(tks, &ts_delta); 2133 } 2134 2135 /* Re-base the last cycle value */ 2136 tks->tkr_mono.cycle_last = cycle_now; 2137 tks->tkr_raw.cycle_last = cycle_now; 2138 2139 tks->ntp_error = 0; 2140 timekeeping_suspended = 0; 2141 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 2142 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 2143 2144 touch_softlockup_watchdog(); 2145 2146 /* Resume the clockevent device(s) and hrtimers */ 2147 tick_resume(); 2148 /* Notify timerfd as resume is equivalent to clock_was_set() */ 2149 timerfd_resume(); 2150 } 2151 2152 static void timekeeping_syscore_resume(void *data) 2153 { 2154 timekeeping_resume(); 2155 } 2156 2157 int timekeeping_suspend(void) 2158 { 2159 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2160 struct timespec64 delta, delta_delta; 2161 static struct timespec64 old_delta; 2162 struct clocksource *curr_clock; 2163 unsigned long flags; 2164 u64 cycle_now; 2165 2166 read_persistent_clock64(&timekeeping_suspend_time); 2167 2168 /* 2169 * On some systems the persistent_clock can not be detected at 2170 * timekeeping_init by its return value, so if we see a valid 2171 * value returned, update the persistent_clock_exists flag. 2172 */ 2173 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) 2174 persistent_clock_exists = true; 2175 2176 suspend_timing_needed = true; 2177 2178 raw_spin_lock_irqsave(&tk_core.lock, flags); 2179 timekeeping_forward_now(tks); 2180 timekeeping_suspended = 1; 2181 2182 /* 2183 * Since we've called forward_now, cycle_last stores the value 2184 * just read from the current clocksource. Save this to potentially 2185 * use in suspend timing. 2186 */ 2187 curr_clock = tks->tkr_mono.clock; 2188 cycle_now = tks->tkr_mono.cycle_last; 2189 clocksource_start_suspend_timing(curr_clock, cycle_now); 2190 2191 if (persistent_clock_exists) { 2192 /* 2193 * To avoid drift caused by repeated suspend/resumes, 2194 * which each can add ~1 second drift error, 2195 * try to compensate so the difference in system time 2196 * and persistent_clock time stays close to constant. 2197 */ 2198 delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time); 2199 delta_delta = timespec64_sub(delta, old_delta); 2200 if (abs(delta_delta.tv_sec) >= 2) { 2201 /* 2202 * if delta_delta is too large, assume time correction 2203 * has occurred and set old_delta to the current delta. 2204 */ 2205 old_delta = delta; 2206 } else { 2207 /* Otherwise try to adjust old_system to compensate */ 2208 timekeeping_suspend_time = 2209 timespec64_add(timekeeping_suspend_time, delta_delta); 2210 } 2211 } 2212 2213 timekeeping_update_from_shadow(&tk_core, 0); 2214 halt_fast_timekeeper(tks); 2215 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 2216 2217 tick_suspend(); 2218 clocksource_suspend(); 2219 clockevents_suspend(); 2220 2221 return 0; 2222 } 2223 2224 static int timekeeping_syscore_suspend(void *data) 2225 { 2226 return timekeeping_suspend(); 2227 } 2228 2229 /* sysfs resume/suspend bits for timekeeping */ 2230 static const struct syscore_ops timekeeping_syscore_ops = { 2231 .resume = timekeeping_syscore_resume, 2232 .suspend = timekeeping_syscore_suspend, 2233 }; 2234 2235 static struct syscore timekeeping_syscore = { 2236 .ops = &timekeeping_syscore_ops, 2237 }; 2238 2239 static int __init timekeeping_init_ops(void) 2240 { 2241 register_syscore(&timekeeping_syscore); 2242 return 0; 2243 } 2244 device_initcall(timekeeping_init_ops); 2245 2246 /* 2247 * Apply a multiplier adjustment to the timekeeper 2248 */ 2249 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, 2250 s64 offset, 2251 s32 mult_adj) 2252 { 2253 s64 interval = tk->cycle_interval; 2254 2255 if (mult_adj == 0) { 2256 return; 2257 } else if (mult_adj == -1) { 2258 interval = -interval; 2259 offset = -offset; 2260 } else if (mult_adj != 1) { 2261 interval *= mult_adj; 2262 offset *= mult_adj; 2263 } 2264 2265 /* 2266 * So the following can be confusing. 2267 * 2268 * To keep things simple, lets assume mult_adj == 1 for now. 2269 * 2270 * When mult_adj != 1, remember that the interval and offset values 2271 * have been appropriately scaled so the math is the same. 2272 * 2273 * The basic idea here is that we're increasing the multiplier 2274 * by one, this causes the xtime_interval to be incremented by 2275 * one cycle_interval. This is because: 2276 * xtime_interval = cycle_interval * mult 2277 * So if mult is being incremented by one: 2278 * xtime_interval = cycle_interval * (mult + 1) 2279 * Its the same as: 2280 * xtime_interval = (cycle_interval * mult) + cycle_interval 2281 * Which can be shortened to: 2282 * xtime_interval += cycle_interval 2283 * 2284 * So offset stores the non-accumulated cycles. Thus the current 2285 * time (in shifted nanoseconds) is: 2286 * now = (offset * adj) + xtime_nsec 2287 * Now, even though we're adjusting the clock frequency, we have 2288 * to keep time consistent. In other words, we can't jump back 2289 * in time, and we also want to avoid jumping forward in time. 2290 * 2291 * So given the same offset value, we need the time to be the same 2292 * both before and after the freq adjustment. 2293 * now = (offset * adj_1) + xtime_nsec_1 2294 * now = (offset * adj_2) + xtime_nsec_2 2295 * So: 2296 * (offset * adj_1) + xtime_nsec_1 = 2297 * (offset * adj_2) + xtime_nsec_2 2298 * And we know: 2299 * adj_2 = adj_1 + 1 2300 * So: 2301 * (offset * adj_1) + xtime_nsec_1 = 2302 * (offset * (adj_1+1)) + xtime_nsec_2 2303 * (offset * adj_1) + xtime_nsec_1 = 2304 * (offset * adj_1) + offset + xtime_nsec_2 2305 * Canceling the sides: 2306 * xtime_nsec_1 = offset + xtime_nsec_2 2307 * Which gives us: 2308 * xtime_nsec_2 = xtime_nsec_1 - offset 2309 * Which simplifies to: 2310 * xtime_nsec -= offset 2311 */ 2312 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { 2313 /* NTP adjustment caused clocksource mult overflow */ 2314 WARN_ON_ONCE(1); 2315 return; 2316 } 2317 2318 tk->tkr_mono.mult += mult_adj; 2319 tk->xtime_interval += interval; 2320 tk->tkr_mono.xtime_nsec -= offset; 2321 } 2322 2323 /* 2324 * Adjust the timekeeper's multiplier to the correct frequency 2325 * and also to reduce the accumulated error value. 2326 */ 2327 static void timekeeping_adjust(struct timekeeper *tk, s64 offset) 2328 { 2329 u64 ntp_tl = ntp_tick_length(tk->id); 2330 u32 mult; 2331 2332 /* 2333 * Determine the multiplier from the current NTP tick length. 2334 * Avoid expensive division when the tick length doesn't change. 2335 */ 2336 if (likely(tk->ntp_tick == ntp_tl)) { 2337 mult = tk->tkr_mono.mult - tk->ntp_err_mult; 2338 } else { 2339 tk->ntp_tick = ntp_tl; 2340 mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - 2341 tk->xtime_remainder, tk->cycle_interval); 2342 } 2343 2344 /* 2345 * If the clock is behind the NTP time, increase the multiplier by 1 2346 * to catch up with it. If it's ahead and there was a remainder in the 2347 * tick division, the clock will slow down. Otherwise it will stay 2348 * ahead until the tick length changes to a non-divisible value. 2349 */ 2350 tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0; 2351 mult += tk->ntp_err_mult; 2352 2353 timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult); 2354 2355 if (unlikely(tk->tkr_mono.clock->maxadj && 2356 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) 2357 > tk->tkr_mono.clock->maxadj))) { 2358 printk_once(KERN_WARNING 2359 "Adjusting %s more than 11%% (%ld vs %ld)\n", 2360 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, 2361 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); 2362 } 2363 2364 /* 2365 * It may be possible that when we entered this function, xtime_nsec 2366 * was very small. Further, if we're slightly speeding the clocksource 2367 * in the code above, its possible the required corrective factor to 2368 * xtime_nsec could cause it to underflow. 2369 * 2370 * Now, since we have already accumulated the second and the NTP 2371 * subsystem has been notified via second_overflow(), we need to skip 2372 * the next update. 2373 */ 2374 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { 2375 tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC << 2376 tk->tkr_mono.shift; 2377 tk->xtime_sec--; 2378 tk->skip_second_overflow = 1; 2379 } 2380 } 2381 2382 /* 2383 * accumulate_nsecs_to_secs - Accumulates nsecs into secs 2384 * 2385 * Helper function that accumulates the nsecs greater than a second 2386 * from the xtime_nsec field to the xtime_secs field. 2387 * It also calls into the NTP code to handle leapsecond processing. 2388 */ 2389 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 2390 { 2391 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 2392 unsigned int clock_set = 0; 2393 2394 while (tk->tkr_mono.xtime_nsec >= nsecps) { 2395 int leap; 2396 2397 tk->tkr_mono.xtime_nsec -= nsecps; 2398 tk->xtime_sec++; 2399 2400 /* 2401 * Skip NTP update if this second was accumulated before, 2402 * i.e. xtime_nsec underflowed in timekeeping_adjust() 2403 */ 2404 if (unlikely(tk->skip_second_overflow)) { 2405 tk->skip_second_overflow = 0; 2406 continue; 2407 } 2408 2409 /* Figure out if its a leap sec and apply if needed */ 2410 leap = second_overflow(tk->id, tk->xtime_sec); 2411 if (unlikely(leap)) { 2412 struct timespec64 ts; 2413 2414 tk->xtime_sec += leap; 2415 2416 ts.tv_sec = leap; 2417 ts.tv_nsec = 0; 2418 tk_set_wall_to_mono(tk, 2419 timespec64_sub(tk->wall_to_monotonic, ts)); 2420 2421 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); 2422 2423 clock_set = TK_CLOCK_WAS_SET; 2424 } 2425 } 2426 return clock_set; 2427 } 2428 2429 /* 2430 * logarithmic_accumulation - shifted accumulation of cycles 2431 * 2432 * This functions accumulates a shifted interval of cycles into 2433 * a shifted interval nanoseconds. Allows for O(log) accumulation 2434 * loop. 2435 * 2436 * Returns the unconsumed cycles. 2437 */ 2438 static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, 2439 u32 shift, unsigned int *clock_set) 2440 { 2441 u64 interval = tk->cycle_interval << shift; 2442 u64 snsec_per_sec; 2443 2444 /* If the offset is smaller than a shifted interval, do nothing */ 2445 if (offset < interval) 2446 return offset; 2447 2448 /* Accumulate one shifted interval */ 2449 offset -= interval; 2450 tk->tkr_mono.cycle_last += interval; 2451 tk->tkr_raw.cycle_last += interval; 2452 2453 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; 2454 *clock_set |= accumulate_nsecs_to_secs(tk); 2455 2456 /* Accumulate raw time */ 2457 tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; 2458 snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 2459 while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { 2460 tk->tkr_raw.xtime_nsec -= snsec_per_sec; 2461 tk->raw_sec++; 2462 } 2463 2464 /* Accumulate error between NTP and clock interval */ 2465 tk->ntp_error += tk->ntp_tick << shift; 2466 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << 2467 (tk->ntp_error_shift + shift); 2468 2469 return offset; 2470 } 2471 2472 /* 2473 * timekeeping_advance - Updates the timekeeper to the current time and 2474 * current NTP tick length 2475 */ 2476 static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode) 2477 { 2478 struct timekeeper *tk = &tkd->shadow_timekeeper; 2479 struct timekeeper *real_tk = &tkd->timekeeper; 2480 unsigned int clock_set = 0; 2481 int shift = 0, maxshift; 2482 u64 offset, orig_offset; 2483 2484 /* Make sure we're fully resumed: */ 2485 if (unlikely(timekeeping_suspended)) 2486 return false; 2487 2488 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2489 tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 2490 tk->tkr_mono.clock->max_raw_delta); 2491 orig_offset = offset; 2492 /* Check if there's really nothing to do */ 2493 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) 2494 return false; 2495 2496 /* 2497 * With NO_HZ we may have to accumulate many cycle_intervals 2498 * (think "ticks") worth of time at once. To do this efficiently, 2499 * we calculate the largest doubling multiple of cycle_intervals 2500 * that is smaller than the offset. We then accumulate that 2501 * chunk in one go, and then try to consume the next smaller 2502 * doubled multiple. 2503 */ 2504 shift = ilog2(offset) - ilog2(tk->cycle_interval); 2505 shift = max(0, shift); 2506 /* Bound shift to one less than what overflows tick_length */ 2507 maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1; 2508 shift = min(shift, maxshift); 2509 while (offset >= tk->cycle_interval) { 2510 offset = logarithmic_accumulation(tk, offset, shift, &clock_set); 2511 if (offset < tk->cycle_interval<<shift) 2512 shift--; 2513 } 2514 2515 /* Adjust the multiplier to correct NTP error */ 2516 timekeeping_adjust(tk, offset); 2517 2518 /* 2519 * Finally, make sure that after the rounding 2520 * xtime_nsec isn't larger than NSEC_PER_SEC 2521 */ 2522 clock_set |= accumulate_nsecs_to_secs(tk); 2523 2524 /* 2525 * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls 2526 * making small negative adjustments to the base xtime_nsec 2527 * value, only update the coarse clocks if we accumulated time 2528 */ 2529 if (orig_offset != offset) 2530 tk_update_coarse_nsecs(tk); 2531 2532 timekeeping_update_from_shadow(tkd, clock_set); 2533 2534 return !!clock_set; 2535 } 2536 2537 static bool timekeeping_advance(enum timekeeping_adv_mode mode) 2538 { 2539 guard(raw_spinlock_irqsave)(&tk_core.lock); 2540 return __timekeeping_advance(&tk_core, mode); 2541 } 2542 2543 /** 2544 * update_wall_time - Uses the current clocksource to increment the wall time 2545 * 2546 * It also updates the enabled auxiliary clock timekeepers 2547 */ 2548 void update_wall_time(void) 2549 { 2550 if (timekeeping_advance(TK_ADV_TICK)) 2551 clock_was_set_delayed(); 2552 tk_aux_advance(); 2553 } 2554 2555 /** 2556 * getboottime64 - Return the real time of system boot. 2557 * @ts: pointer to the timespec64 to be set 2558 * 2559 * Returns the wall-time of boot in a timespec64. 2560 * 2561 * This is based on the wall_to_monotonic offset and the total suspend 2562 * time. Calls to settimeofday will affect the value returned (which 2563 * basically means that however wrong your real time clock is at boot time, 2564 * you get the right time here). 2565 */ 2566 void getboottime64(struct timespec64 *ts) 2567 { 2568 struct timekeeper *tk = &tk_core.timekeeper; 2569 ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); 2570 2571 *ts = ktime_to_timespec64(t); 2572 } 2573 EXPORT_SYMBOL_GPL(getboottime64); 2574 2575 void ktime_get_coarse_real_ts64(struct timespec64 *ts) 2576 { 2577 struct timekeeper *tk = &tk_core.timekeeper; 2578 unsigned int seq; 2579 2580 do { 2581 seq = read_seqcount_begin(&tk_core.seq); 2582 2583 *ts = tk_xtime_coarse(tk); 2584 } while (read_seqcount_retry(&tk_core.seq, seq)); 2585 } 2586 EXPORT_SYMBOL(ktime_get_coarse_real_ts64); 2587 2588 /** 2589 * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor 2590 * @ts: timespec64 to be filled 2591 * 2592 * Fetch the global mg_floor value, convert it to realtime and compare it 2593 * to the current coarse-grained time. Fill @ts with whichever is 2594 * latest. Note that this is a filesystem-specific interface and should be 2595 * avoided outside of that context. 2596 */ 2597 void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) 2598 { 2599 struct timekeeper *tk = &tk_core.timekeeper; 2600 u64 floor = atomic64_read(&mg_floor); 2601 ktime_t f_real, offset, coarse; 2602 unsigned int seq; 2603 2604 do { 2605 seq = read_seqcount_begin(&tk_core.seq); 2606 *ts = tk_xtime_coarse(tk); 2607 offset = tk_core.timekeeper.offs_real; 2608 } while (read_seqcount_retry(&tk_core.seq, seq)); 2609 2610 coarse = timespec64_to_ktime(*ts); 2611 f_real = ktime_add(floor, offset); 2612 if (ktime_after(f_real, coarse)) 2613 *ts = ktime_to_timespec64(f_real); 2614 } 2615 2616 /** 2617 * ktime_get_real_ts64_mg - attempt to update floor value and return result 2618 * @ts: pointer to the timespec to be set 2619 * 2620 * Get a monotonic fine-grained time value and attempt to swap it into 2621 * mg_floor. If that succeeds then accept the new floor value. If it fails 2622 * then another task raced in during the interim time and updated the 2623 * floor. Since any update to the floor must be later than the previous 2624 * floor, either outcome is acceptable. 2625 * 2626 * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), 2627 * and determining that the resulting coarse-grained timestamp did not effect 2628 * a change in ctime. Any more recent floor value would effect a change to 2629 * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. 2630 * 2631 * @ts will be filled with the latest floor value, regardless of the outcome of 2632 * the cmpxchg. Note that this is a filesystem specific interface and should be 2633 * avoided outside of that context. 2634 */ 2635 void ktime_get_real_ts64_mg(struct timespec64 *ts) 2636 { 2637 struct timekeeper *tk = &tk_core.timekeeper; 2638 ktime_t old = atomic64_read(&mg_floor); 2639 ktime_t offset, mono; 2640 unsigned int seq; 2641 u64 nsecs; 2642 2643 do { 2644 seq = read_seqcount_begin(&tk_core.seq); 2645 2646 ts->tv_sec = tk->xtime_sec; 2647 mono = tk->tkr_mono.base; 2648 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2649 offset = tk_core.timekeeper.offs_real; 2650 } while (read_seqcount_retry(&tk_core.seq, seq)); 2651 2652 mono = ktime_add_ns(mono, nsecs); 2653 2654 /* 2655 * Attempt to update the floor with the new time value. As any 2656 * update must be later then the existing floor, and would effect 2657 * a change to ctime from the perspective of the current task, 2658 * accept the resulting floor value regardless of the outcome of 2659 * the swap. 2660 */ 2661 if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { 2662 ts->tv_nsec = 0; 2663 timespec64_add_ns(ts, nsecs); 2664 timekeeping_inc_mg_floor_swaps(); 2665 } else { 2666 /* 2667 * Another task changed mg_floor since "old" was fetched. 2668 * "old" has been updated with the latest value of "mg_floor". 2669 * That value is newer than the previous floor value, which 2670 * is enough to effect a change to ctime. Accept it. 2671 */ 2672 *ts = ktime_to_timespec64(ktime_add(old, offset)); 2673 } 2674 } 2675 2676 void ktime_get_coarse_ts64(struct timespec64 *ts) 2677 { 2678 struct timekeeper *tk = &tk_core.timekeeper; 2679 struct timespec64 now, mono; 2680 unsigned int seq; 2681 2682 do { 2683 seq = read_seqcount_begin(&tk_core.seq); 2684 2685 now = tk_xtime_coarse(tk); 2686 mono = tk->wall_to_monotonic; 2687 } while (read_seqcount_retry(&tk_core.seq, seq)); 2688 2689 set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, 2690 now.tv_nsec + mono.tv_nsec); 2691 } 2692 EXPORT_SYMBOL(ktime_get_coarse_ts64); 2693 2694 /* 2695 * Must hold jiffies_lock 2696 */ 2697 void do_timer(unsigned long ticks) 2698 { 2699 jiffies_64 += ticks; 2700 calc_global_load(); 2701 } 2702 2703 /** 2704 * ktime_get_update_offsets_now - hrtimer helper 2705 * @cwsseq: pointer to check and store the clock was set sequence number 2706 * @offs_real: pointer to storage for monotonic -> realtime offset 2707 * @offs_boot: pointer to storage for monotonic -> boottime offset 2708 * @offs_tai: pointer to storage for monotonic -> clock tai offset 2709 * 2710 * Returns current monotonic time and updates the offsets if the 2711 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are 2712 * different. 2713 * 2714 * Called from hrtimer_interrupt() or retrigger_next_event() 2715 */ 2716 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, 2717 ktime_t *offs_boot, ktime_t *offs_tai) 2718 { 2719 struct timekeeper *tk = &tk_core.timekeeper; 2720 unsigned int seq; 2721 ktime_t base; 2722 u64 nsecs; 2723 2724 do { 2725 seq = read_seqcount_begin(&tk_core.seq); 2726 2727 base = tk->tkr_mono.base; 2728 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2729 base = ktime_add_ns(base, nsecs); 2730 2731 if (*cwsseq != tk->clock_was_set_seq) { 2732 *cwsseq = tk->clock_was_set_seq; 2733 *offs_real = tk->offs_real; 2734 *offs_boot = tk->offs_boot; 2735 *offs_tai = tk->offs_tai; 2736 } 2737 2738 /* Handle leapsecond insertion adjustments */ 2739 if (unlikely(base >= tk->next_leap_ktime)) 2740 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0)); 2741 2742 } while (read_seqcount_retry(&tk_core.seq, seq)); 2743 2744 return base; 2745 } 2746 2747 /* 2748 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex 2749 */ 2750 static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock) 2751 { 2752 if (txc->modes & ADJ_ADJTIME) { 2753 /* singleshot must not be used with any other mode bits */ 2754 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) 2755 return -EINVAL; 2756 if (!(txc->modes & ADJ_OFFSET_READONLY) && 2757 !capable(CAP_SYS_TIME)) 2758 return -EPERM; 2759 } else { 2760 /* In order to modify anything, you gotta be super-user! */ 2761 if (txc->modes && !capable(CAP_SYS_TIME)) 2762 return -EPERM; 2763 /* 2764 * if the quartz is off by more than 10% then 2765 * something is VERY wrong! 2766 */ 2767 if (txc->modes & ADJ_TICK && 2768 (txc->tick < 900000/USER_HZ || 2769 txc->tick > 1100000/USER_HZ)) 2770 return -EINVAL; 2771 } 2772 2773 if (txc->modes & ADJ_SETOFFSET) { 2774 /* In order to inject time, you gotta be super-user! */ 2775 if (!capable(CAP_SYS_TIME)) 2776 return -EPERM; 2777 2778 /* 2779 * Validate if a timespec/timeval used to inject a time 2780 * offset is valid. Offsets can be positive or negative, so 2781 * we don't check tv_sec. The value of the timeval/timespec 2782 * is the sum of its fields,but *NOTE*: 2783 * The field tv_usec/tv_nsec must always be non-negative and 2784 * we can't have more nanoseconds/microseconds than a second. 2785 */ 2786 if (txc->time.tv_usec < 0) 2787 return -EINVAL; 2788 2789 if (txc->modes & ADJ_NANO) { 2790 if (txc->time.tv_usec >= NSEC_PER_SEC) 2791 return -EINVAL; 2792 } else { 2793 if (txc->time.tv_usec >= USEC_PER_SEC) 2794 return -EINVAL; 2795 } 2796 } 2797 2798 /* 2799 * Check for potential multiplication overflows that can 2800 * only happen on 64-bit systems: 2801 */ 2802 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { 2803 if (LLONG_MIN / PPM_SCALE > txc->freq) 2804 return -EINVAL; 2805 if (LLONG_MAX / PPM_SCALE < txc->freq) 2806 return -EINVAL; 2807 } 2808 2809 if (aux_clock) { 2810 /* Auxiliary clocks are similar to TAI and do not have leap seconds */ 2811 if (txc->modes & ADJ_STATUS && 2812 txc->status & (STA_INS | STA_DEL)) 2813 return -EINVAL; 2814 2815 /* No TAI offset setting */ 2816 if (txc->modes & ADJ_TAI) 2817 return -EINVAL; 2818 2819 /* No PPS support either */ 2820 if (txc->modes & ADJ_STATUS && 2821 txc->status & (STA_PPSFREQ | STA_PPSTIME)) 2822 return -EINVAL; 2823 } 2824 2825 return 0; 2826 } 2827 2828 /** 2829 * random_get_entropy_fallback - Returns the raw clock source value, 2830 * used by random.c for platforms with no valid random_get_entropy(). 2831 */ 2832 unsigned long random_get_entropy_fallback(void) 2833 { 2834 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 2835 struct clocksource *clock = READ_ONCE(tkr->clock); 2836 2837 if (unlikely(timekeeping_suspended || !clock)) 2838 return 0; 2839 return clock->read(clock); 2840 } 2841 EXPORT_SYMBOL_GPL(random_get_entropy_fallback); 2842 2843 struct adjtimex_result { 2844 struct audit_ntp_data ad; 2845 struct timespec64 delta; 2846 bool clock_set; 2847 }; 2848 2849 static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, 2850 struct adjtimex_result *result) 2851 { 2852 struct timekeeper *tks = &tkd->shadow_timekeeper; 2853 bool aux_clock = !timekeeper_is_core_tk(tks); 2854 struct timespec64 ts; 2855 s32 orig_tai, tai; 2856 int ret; 2857 2858 /* Validate the data before disabling interrupts */ 2859 ret = timekeeping_validate_timex(txc, aux_clock); 2860 if (ret) 2861 return ret; 2862 add_device_randomness(txc, sizeof(*txc)); 2863 2864 if (!aux_clock) 2865 ktime_get_real_ts64(&ts); 2866 else 2867 tk_get_aux_ts64(tkd->timekeeper.id, &ts); 2868 2869 add_device_randomness(&ts, sizeof(ts)); 2870 2871 guard(raw_spinlock_irqsave)(&tkd->lock); 2872 2873 if (!tks->clock_valid) 2874 return -ENODEV; 2875 2876 if (txc->modes & ADJ_SETOFFSET) { 2877 result->delta.tv_sec = txc->time.tv_sec; 2878 result->delta.tv_nsec = txc->time.tv_usec; 2879 if (!(txc->modes & ADJ_NANO)) 2880 result->delta.tv_nsec *= 1000; 2881 ret = __timekeeping_inject_offset(tkd, &result->delta); 2882 if (ret) 2883 return ret; 2884 result->clock_set = true; 2885 } 2886 2887 orig_tai = tai = tks->tai_offset; 2888 ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad); 2889 2890 if (tai != orig_tai) { 2891 __timekeeping_set_tai_offset(tks, tai); 2892 timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); 2893 result->clock_set = true; 2894 } else { 2895 tk_update_leap_state_all(tkd); 2896 } 2897 2898 /* Update the multiplier immediately if frequency was set directly */ 2899 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) 2900 result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); 2901 2902 return ret; 2903 } 2904 2905 /** 2906 * do_adjtimex() - Accessor function to NTP __do_adjtimex function 2907 * @txc: Pointer to kernel_timex structure containing NTP parameters 2908 */ 2909 int do_adjtimex(struct __kernel_timex *txc) 2910 { 2911 struct adjtimex_result result = { }; 2912 int ret; 2913 2914 ret = __do_adjtimex(&tk_core, txc, &result); 2915 if (ret < 0) 2916 return ret; 2917 2918 if (txc->modes & ADJ_SETOFFSET) 2919 audit_tk_injoffset(result.delta); 2920 2921 audit_ntp_log(&result.ad); 2922 2923 if (result.clock_set) 2924 clock_was_set(CLOCK_SET_WALL); 2925 2926 ntp_notify_cmos_timer(result.delta.tv_sec != 0); 2927 2928 return ret; 2929 } 2930 2931 /* 2932 * Invoked from NTP with the time keeper lock held, so lockless access is 2933 * fine. 2934 */ 2935 long ktime_get_ntp_seconds(unsigned int id) 2936 { 2937 return timekeeper_data[id].timekeeper.xtime_sec; 2938 } 2939 2940 #ifdef CONFIG_NTP_PPS 2941 /** 2942 * hardpps() - Accessor function to NTP __hardpps function 2943 * @phase_ts: Pointer to timespec64 structure representing phase timestamp 2944 * @raw_ts: Pointer to timespec64 structure representing raw timestamp 2945 */ 2946 void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) 2947 { 2948 guard(raw_spinlock_irqsave)(&tk_core.lock); 2949 __hardpps(phase_ts, raw_ts); 2950 } 2951 EXPORT_SYMBOL(hardpps); 2952 #endif /* CONFIG_NTP_PPS */ 2953 2954 #ifdef CONFIG_POSIX_AUX_CLOCKS 2955 #include "posix-timers.h" 2956 2957 /* 2958 * Bitmap for the activated auxiliary timekeepers to allow lockless quick 2959 * checks in the hot paths without touching extra cache lines. If set, then 2960 * the state of the corresponding timekeeper has to be re-checked under 2961 * timekeeper::lock. 2962 */ 2963 static unsigned long aux_timekeepers; 2964 2965 static inline unsigned int clockid_to_tkid(unsigned int id) 2966 { 2967 return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX; 2968 } 2969 2970 static inline struct tk_data *aux_get_tk_data(clockid_t id) 2971 { 2972 if (!clockid_aux_valid(id)) 2973 return NULL; 2974 return &timekeeper_data[clockid_to_tkid(id)]; 2975 } 2976 2977 /* Invoked from timekeeping after a clocksource change */ 2978 static void tk_aux_update_clocksource(void) 2979 { 2980 unsigned long active = READ_ONCE(aux_timekeepers); 2981 unsigned int id; 2982 2983 for_each_set_bit(id, &active, BITS_PER_LONG) { 2984 struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 2985 struct timekeeper *tks = &tkd->shadow_timekeeper; 2986 2987 guard(raw_spinlock_irqsave)(&tkd->lock); 2988 if (!tks->clock_valid) 2989 continue; 2990 2991 timekeeping_forward_now(tks); 2992 tk_setup_internals(tks, tk_core.timekeeper.tkr_raw.clock); 2993 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 2994 } 2995 } 2996 2997 static void tk_aux_advance(void) 2998 { 2999 unsigned long active = READ_ONCE(aux_timekeepers); 3000 unsigned int id; 3001 3002 /* Lockless quick check to avoid extra cache lines */ 3003 for_each_set_bit(id, &active, BITS_PER_LONG) { 3004 struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 3005 3006 guard(raw_spinlock)(&aux_tkd->lock); 3007 if (aux_tkd->shadow_timekeeper.clock_valid) 3008 __timekeeping_advance(aux_tkd, TK_ADV_TICK); 3009 } 3010 } 3011 3012 /** 3013 * ktime_get_aux - Get time for a AUX clock 3014 * @id: ID of the clock to read (CLOCK_AUX...) 3015 * @kt: Pointer to ktime_t to store the time stamp 3016 * 3017 * Returns: True if the timestamp is valid, false otherwise 3018 */ 3019 bool ktime_get_aux(clockid_t id, ktime_t *kt) 3020 { 3021 struct tk_data *aux_tkd = aux_get_tk_data(id); 3022 struct timekeeper *aux_tk; 3023 unsigned int seq; 3024 ktime_t base; 3025 u64 nsecs; 3026 3027 WARN_ON(timekeeping_suspended); 3028 3029 if (!aux_tkd) 3030 return false; 3031 3032 aux_tk = &aux_tkd->timekeeper; 3033 do { 3034 seq = read_seqcount_begin(&aux_tkd->seq); 3035 if (!aux_tk->clock_valid) 3036 return false; 3037 3038 base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux); 3039 nsecs = timekeeping_get_ns(&aux_tk->tkr_mono); 3040 } while (read_seqcount_retry(&aux_tkd->seq, seq)); 3041 3042 *kt = ktime_add_ns(base, nsecs); 3043 return true; 3044 } 3045 EXPORT_SYMBOL_GPL(ktime_get_aux); 3046 3047 /** 3048 * ktime_get_aux_ts64 - Get time for a AUX clock 3049 * @id: ID of the clock to read (CLOCK_AUX...) 3050 * @ts: Pointer to timespec64 to store the time stamp 3051 * 3052 * Returns: True if the timestamp is valid, false otherwise 3053 */ 3054 bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) 3055 { 3056 ktime_t now; 3057 3058 if (!ktime_get_aux(id, &now)) 3059 return false; 3060 *ts = ktime_to_timespec64(now); 3061 return true; 3062 } 3063 EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); 3064 3065 static int aux_get_res(clockid_t id, struct timespec64 *tp) 3066 { 3067 if (!clockid_aux_valid(id)) 3068 return -ENODEV; 3069 3070 tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC; 3071 tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC; 3072 return 0; 3073 } 3074 3075 static int aux_get_timespec(clockid_t id, struct timespec64 *tp) 3076 { 3077 return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; 3078 } 3079 3080 static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) 3081 { 3082 struct tk_data *aux_tkd = aux_get_tk_data(id); 3083 struct timekeeper *aux_tks; 3084 ktime_t tnow, nsecs; 3085 3086 if (!timespec64_valid_settod(tnew)) 3087 return -EINVAL; 3088 if (!aux_tkd) 3089 return -ENODEV; 3090 3091 aux_tks = &aux_tkd->shadow_timekeeper; 3092 3093 guard(raw_spinlock_irq)(&aux_tkd->lock); 3094 if (!aux_tks->clock_valid) 3095 return -ENODEV; 3096 3097 /* Forward the timekeeper base time */ 3098 timekeeping_forward_now(aux_tks); 3099 /* 3100 * Get the updated base time. tkr_mono.base has not been 3101 * updated yet, so do that first. That makes the update 3102 * in timekeeping_update_from_shadow() redundant, but 3103 * that's harmless. After that @tnow can be calculated 3104 * by using tkr_mono::cycle_last, which has been set 3105 * by timekeeping_forward_now(). 3106 */ 3107 tk_update_ktime_data(aux_tks); 3108 nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last); 3109 tnow = ktime_add(aux_tks->tkr_mono.base, nsecs); 3110 3111 /* 3112 * Calculate the new AUX offset as delta to @tnow ("monotonic"). 3113 * That avoids all the tk::xtime back and forth conversions as 3114 * xtime ("realtime") is not applicable for auxiliary clocks and 3115 * kept in sync with "monotonic". 3116 */ 3117 tk_update_aux_offs(aux_tks, ktime_sub(timespec64_to_ktime(*tnew), tnow)); 3118 3119 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 3120 return 0; 3121 } 3122 3123 static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc) 3124 { 3125 struct tk_data *aux_tkd = aux_get_tk_data(id); 3126 struct adjtimex_result result = { }; 3127 3128 if (!aux_tkd) 3129 return -ENODEV; 3130 3131 /* 3132 * @result is ignored for now as there are neither hrtimers nor a 3133 * RTC related to auxiliary clocks for now. 3134 */ 3135 return __do_adjtimex(aux_tkd, txc, &result); 3136 } 3137 3138 const struct k_clock clock_aux = { 3139 .clock_getres = aux_get_res, 3140 .clock_get_timespec = aux_get_timespec, 3141 .clock_set = aux_clock_set, 3142 .clock_adj = aux_clock_adj, 3143 }; 3144 3145 static void aux_clock_enable(clockid_t id) 3146 { 3147 struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw; 3148 struct tk_data *aux_tkd = aux_get_tk_data(id); 3149 struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper; 3150 3151 /* Prevent the core timekeeper from changing. */ 3152 guard(raw_spinlock_irq)(&tk_core.lock); 3153 3154 /* 3155 * Setup the auxiliary clock assuming that the raw core timekeeper 3156 * clock frequency conversion is close enough. Userspace has to 3157 * adjust for the deviation via clock_adjtime(2). 3158 */ 3159 guard(raw_spinlock_nested)(&aux_tkd->lock); 3160 3161 /* Remove leftovers of a previous registration */ 3162 memset(aux_tks, 0, sizeof(*aux_tks)); 3163 /* Restore the timekeeper id */ 3164 aux_tks->id = aux_tkd->timekeeper.id; 3165 /* Setup the timekeeper based on the current system clocksource */ 3166 tk_setup_internals(aux_tks, tkr_raw->clock); 3167 3168 /* Mark it valid and set it live */ 3169 aux_tks->clock_valid = true; 3170 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 3171 } 3172 3173 static void aux_clock_disable(clockid_t id) 3174 { 3175 struct tk_data *aux_tkd = aux_get_tk_data(id); 3176 3177 guard(raw_spinlock_irq)(&aux_tkd->lock); 3178 aux_tkd->shadow_timekeeper.clock_valid = false; 3179 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 3180 } 3181 3182 static DEFINE_MUTEX(aux_clock_mutex); 3183 3184 static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr, 3185 const char *buf, size_t count) 3186 { 3187 /* Lazy atoi() as name is "0..7" */ 3188 int id = kobj->name[0] & 0x7; 3189 bool enable; 3190 3191 if (!capable(CAP_SYS_TIME)) 3192 return -EPERM; 3193 3194 if (kstrtobool(buf, &enable) < 0) 3195 return -EINVAL; 3196 3197 guard(mutex)(&aux_clock_mutex); 3198 if (enable == test_bit(id, &aux_timekeepers)) 3199 return count; 3200 3201 if (enable) { 3202 aux_clock_enable(CLOCK_AUX + id); 3203 set_bit(id, &aux_timekeepers); 3204 } else { 3205 aux_clock_disable(CLOCK_AUX + id); 3206 clear_bit(id, &aux_timekeepers); 3207 } 3208 return count; 3209 } 3210 3211 static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 3212 { 3213 unsigned long active = READ_ONCE(aux_timekeepers); 3214 /* Lazy atoi() as name is "0..7" */ 3215 int id = kobj->name[0] & 0x7; 3216 3217 return sysfs_emit(buf, "%d\n", test_bit(id, &active)); 3218 } 3219 3220 static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable); 3221 3222 static struct attribute *aux_clock_enable_attrs[] = { 3223 &aux_clock_enable_attr.attr, 3224 NULL 3225 }; 3226 3227 static const struct attribute_group aux_clock_enable_attr_group = { 3228 .attrs = aux_clock_enable_attrs, 3229 }; 3230 3231 static int __init tk_aux_sysfs_init(void) 3232 { 3233 struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); 3234 int ret = -ENOMEM; 3235 3236 if (!tko) 3237 return ret; 3238 3239 auxo = kobject_create_and_add("aux_clocks", tko); 3240 if (!auxo) 3241 goto err_clean; 3242 3243 for (int i = 0; i < MAX_AUX_CLOCKS; i++) { 3244 char id[2] = { [0] = '0' + i, }; 3245 struct kobject *clk = kobject_create_and_add(id, auxo); 3246 3247 if (!clk) { 3248 ret = -ENOMEM; 3249 goto err_clean; 3250 } 3251 3252 ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); 3253 if (ret) 3254 goto err_clean; 3255 } 3256 return 0; 3257 3258 err_clean: 3259 kobject_put(auxo); 3260 kobject_put(tko); 3261 return ret; 3262 } 3263 late_initcall(tk_aux_sysfs_init); 3264 3265 static __init void tk_aux_setup(void) 3266 { 3267 for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) 3268 tkd_basic_setup(&timekeeper_data[i], i, false); 3269 } 3270 #endif /* CONFIG_POSIX_AUX_CLOCKS */ 3271