Lines Matching +full:assigned +full:- +full:resolution +full:- +full:bits
1 /* SPDX-License-Identifier: GPL-2.0 */
113 * Helpers for converting nanosecond timing to jiffy resolution
118 * Increase resolution of nice-level calculations for 64-bit architectures.
119 * The extra resolution improves shares distribution and load balancing of
120 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
121 * hierarchies, especially on larger systems. This is not a user-visible change
122 * and does not change the user-interface for setting shares/weights.
124 * We increase resolution only if we have enough bits to allow this increased
125 * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
129 * increase coverage and consistency always enable it on 64-bit platforms.
149 * independent resolution, but they should be well calibrated. We use
153 * scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
160 * 10 -> just above 1us
161 * 9 -> just above 0.5us
196 return idle_policy(p->policy); in task_has_idle_policy()
201 return rt_policy(p->policy); in task_has_rt_policy()
206 return dl_policy(p->policy); in task_has_dl_policy()
213 s64 diff = sample - *avg; in update_avg()
219 * is UB; cap at size-1.
222 (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
243 return unlikely(dl_se->flags & SCHED_FLAG_SUGOV); in dl_entity_is_special()
256 dl_time_before(a->deadline, b->deadline); in dl_entity_preempt()
260 * This is the priority-queue data structure of the RT scheduling class:
282 * To keep the bandwidth of -deadline tasks under control
284 * - store the maximum -deadline bandwidth of each cpu;
285 * - cache the fraction of bandwidth that is currently allocated in
289 * one used for RT-throttling (rt_bandwidth), with the main difference
296 * - bw (< 100%) is the deadline bandwidth of each CPU;
297 * - total_bw is the currently allocated bandwidth in each root domain;
320 * dl_se::rq -- runqueue we belong to.
322 * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
325 * dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
328 * dl_server_update() -- called from update_curr_common(), propagates runtime
332 * dl_server_stop() -- start/stop the server when it has (no) tasks.
334 * dl_server_init() -- initializes the server.
421 /* The two decimal precision [%] value requested from user-space */
439 * (The default weight is 1024 - so there's no practical
535 * applicable for 32-bits architectures.
570 /* CFS-related fields in a runqueue */
630 * Where f(tg) is the recursive weight fraction assigned to
644 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
688 /* Real-Time classes' related field in a runqueue: */
724 return rt_rq->rt_queued && rt_rq->rt_nr_running; in rt_rq_is_runnable()
750 * an rb-tree, ordered by tasks' deadlines, with caching
765 * Utilization of the tasks "assigned" to this runqueue (including
771 * runqueue (inactive utilization = this_bw - running_bw).
791 #define entity_is_task(se) (!se->my_q)
796 se->runnable_weight = se->my_q->h_nr_running; in se_update_runnable()
802 return !!se->on_rq; in se_runnable()
804 return se->runnable_weight; in se_runnable()
814 return !!se->on_rq; in se_runnable()
820 * XXX we want to get rid of these helpers and use the full load resolution.
824 return scale_load_down(se->load.weight); in se_weight()
841 #define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
844 * We add the notion of a root-domain which will be used to define per-domain
847 * exclusive cpuset is created, we also create and attach a new root-domain
860 * - More than one runnable task
861 * - Running task is misfit
865 /* Indicate one or more cpus over-utilized (tipping point) */
870 * than one runnable -deadline task (as it is below for RT tasks).
909 * NULL-terminated list of performance domains intersecting with the
928 * struct uclamp_bucket - Utilization clamp bucket
937 unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
941 * struct uclamp_rq - rq's utilization clamp
953 * - for util_min: we want to run the CPU at least at the max of the minimum
955 * - for util_max: we want to allow the CPU to run up to the max of the
960 * the metrics required to compute all the per-rq utilization clamp values.
977 * This is the main, per-CPU runqueue data structure.
1132 /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
1165 /* shared state -- careful with sched_core_cpu_deactivate() */
1189 return cfs_rq->rq; in rq_of()
1203 return rq->cpu; in cpu_of()
1214 return p->migration_disabled; in is_migration_disabled()
1225 #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
1236 return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled; in sched_core_enabled()
1246 * stable unless you actually hold a relevant rq->__lock.
1251 return &rq->core->__lock; in rq_lockp()
1253 return &rq->__lock; in rq_lockp()
1258 if (rq->core_enabled) in __rq_lockp()
1259 return &rq->core->__lock; in __rq_lockp()
1261 return &rq->__lock; in __rq_lockp()
1280 return rq->core->core_cookie == p->core_cookie; in sched_cpu_cookie_match()
1303 return idle_core || rq->core->core_cookie == p->core_cookie; in sched_core_cookie_match()
1316 for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) { in sched_group_cookie_match()
1325 return !RB_EMPTY_NODE(&p->core_node); in sched_core_enqueued()
1348 return &rq->__lock; in rq_lockp()
1353 return &rq->__lock; in __rq_lockp()
1441 return p->se.cfs_rq; in task_cfs_rq()
1447 return se->cfs_rq; in cfs_rq_of()
1453 return grp->my_q; in group_cfs_rq()
1462 return &task_rq(p)->cfs; in task_cfs_rq()
1470 return &rq->cfs; in cfs_rq_of()
1483 * rq::clock_update_flags bits
1485 * %RQCF_REQ_SKIP - will request skipping of clock update on the next
1489 * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
1492 * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
1499 * if (rq-clock_update_flags >= RQCF_UPDATED)
1515 SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); in assert_clock_updated()
1523 return rq->clock; in rq_clock()
1531 return rq->clock_task; in rq_clock_task()
1555 rq->clock_update_flags |= RQCF_REQ_SKIP; in rq_clock_skip_update()
1565 rq->clock_update_flags &= ~RQCF_REQ_SKIP; in rq_clock_cancel_skipupdate()
1575 * to clear RQCF_ACT_SKIP of rq->clock_update_flags.
1580 SCHED_WARN_ON(rq->clock_update_flags & RQCF_ACT_SKIP); in rq_clock_start_loop_update()
1581 rq->clock_update_flags |= RQCF_ACT_SKIP; in rq_clock_start_loop_update()
1587 rq->clock_update_flags &= ~RQCF_ACT_SKIP; in rq_clock_stop_loop_update()
1611 * copy of the (on-stack) 'struct rq_flags rf'.
1613 * Also see Documentation/locking/lockdep-design.rst.
1617 rf->cookie = lockdep_pin_lock(__rq_lockp(rq)); in rq_pin_lock()
1620 rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); in rq_pin_lock()
1621 rf->clock_update_flags = 0; in rq_pin_lock()
1623 SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback); in rq_pin_lock()
1631 if (rq->clock_update_flags > RQCF_ACT_SKIP) in rq_unpin_lock()
1632 rf->clock_update_flags = RQCF_UPDATED; in rq_unpin_lock()
1635 lockdep_unpin_lock(__rq_lockp(rq), rf->cookie); in rq_unpin_lock()
1640 lockdep_repin_lock(__rq_lockp(rq), rf->cookie); in rq_repin_lock()
1646 rq->clock_update_flags |= rf->clock_update_flags; in rq_repin_lock()
1651 __acquires(rq->lock);
1654 __acquires(p->pi_lock)
1655 __acquires(rq->lock);
1658 __releases(rq->lock) in __task_rq_unlock()
1666 __releases(rq->lock) in task_rq_unlock()
1667 __releases(p->pi_lock) in task_rq_unlock()
1671 raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); in task_rq_unlock()
1675 _T->rq = task_rq_lock(_T->lock, &_T->rf),
1676 task_rq_unlock(_T->rq, _T->lock, &_T->rf),
1681 __acquires(rq->lock) in rq_lock_irqsave()
1683 raw_spin_rq_lock_irqsave(rq, rf->flags); in rq_lock_irqsave()
1689 __acquires(rq->lock) in rq_lock_irq()
1697 __acquires(rq->lock) in rq_lock()
1705 __releases(rq->lock) in rq_unlock_irqrestore()
1708 raw_spin_rq_unlock_irqrestore(rq, rf->flags); in rq_unlock_irqrestore()
1713 __releases(rq->lock) in rq_unlock_irq()
1721 __releases(rq->lock) in rq_unlock()
1728 rq_lock(_T->lock, &_T->rf),
1729 rq_unlock(_T->lock, &_T->rf),
1733 rq_lock_irq(_T->lock, &_T->rf),
1734 rq_unlock_irq(_T->lock, &_T->rf),
1738 rq_lock_irqsave(_T->lock, &_T->rf),
1739 rq_unlock_irqrestore(_T->lock, &_T->rf),
1744 __acquires(rq->lock) in this_rq_lock_irq()
1813 if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) in queue_balance_callback()
1816 head->func = func; in queue_balance_callback()
1817 head->next = rq->balance_callback; in queue_balance_callback()
1818 rq->balance_callback = head; in queue_balance_callback()
1826 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
1830 * preempt-disabled sections.
1833 for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
1834 __sd; __sd = __sd->parent)
1844 * highest_flag_domain - Return highest sched_domain containing flag.
1858 if (sd->flags & flag) { in highest_flag_domain()
1879 if (sd->flags & flag) in lowest_flag_domain()
1909 unsigned long min_capacity; /* Min per-CPU capacity in group */
1910 unsigned long max_capacity; /* Max per-CPU capacity in group */
1943 return to_cpumask(sg->cpumask); in sched_group_span()
1951 return to_cpumask(sg->sgc->cpumask); in group_balance_mask()
1972 if (!p->user_cpus_ptr) in task_user_cpus()
1974 return p->user_cpus_ptr; in task_user_cpus()
2015 * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
2023 return p->sched_task_group; in task_group()
2034 set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]); in set_task_rq()
2035 p->se.cfs_rq = tg->cfs_rq[cpu]; in set_task_rq()
2036 p->se.parent = tg->se[cpu]; in set_task_rq()
2037 p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0; in set_task_rq()
2041 p->rt.rt_rq = tg->rt_rq[cpu]; in set_task_rq()
2042 p->rt.parent = tg->rt_se[cpu]; in set_task_rq()
2061 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be in __set_task_cpu()
2063 * per-task data have been completed by this moment. in __set_task_cpu()
2066 WRITE_ONCE(task_thread_info(p)->cpu, cpu); in __set_task_cpu()
2067 p->wake_cpu = cpu; in __set_task_cpu()
2093 * To support run-time toggling of sched features, all the translation units
2153 return rq->curr == p; in task_current()
2159 return p->on_cpu; in task_on_cpu()
2167 return p->on_rq == TASK_ON_RQ_QUEUED; in task_on_rq_queued()
2172 return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; in task_on_rq_migrating()
2208 * DEQUEUE_SLEEP - task is no longer runnable
2209 * ENQUEUE_WAKEUP - task just became runnable
2211 * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
2215 * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
2218 * NOCLOCK - skip the update_rq_clock() (avoids double updates)
2220 * MIGRATION - p->on_rq == TASK_ON_RQ_MIGRATING (used for DEADLINE)
2222 * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
2223 * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
2224 * ENQUEUE_MIGRATED - the task was migrated during wakeup
2249 #define RETRY_TASK ((void *)-1UL)
2300 * The switched_from() call is allowed to drop rq->lock, therefore we
2302 * rq->lock. They are however serialized by p->pi_lock.
2325 WARN_ON_ONCE(rq->curr != prev); in put_prev_task()
2326 prev->sched_class->put_prev_task(rq, prev); in put_prev_task()
2331 next->sched_class->set_next_task(rq, next, false); in set_next_task()
2339 * include/asm-generic/vmlinux.lds.h
2350 /* Defined in include/asm-generic/vmlinux.lds.h */
2370 return rq->stop && task_on_rq_queued(rq->stop); in sched_stop_runnable()
2375 return rq->dl.dl_nr_running > 0; in sched_dl_runnable()
2380 return rq->rt.rt_queued > 0; in sched_rt_runnable()
2385 return rq->cfs.nr_running > 0; in sched_fair_runnable()
2406 struct task_struct *p = rq->curr; in get_push_task()
2410 if (rq->push_busy) in get_push_task()
2413 if (p->nr_cpus_allowed == 1) in get_push_task()
2416 if (p->migration_disabled) in get_push_task()
2419 rq->push_busy = true; in get_push_task()
2431 rq->idle_state = idle_state; in idle_set_state()
2438 return rq->idle_state; in idle_get_state()
2477 #define MAX_BW_BITS (64 - BW_SHIFT)
2478 #define MAX_BW ((1ULL << MAX_BW_BITS) - 1)
2512 unsigned prev_nr = rq->nr_running; in add_nr_running()
2514 rq->nr_running = prev_nr + count; in add_nr_running()
2520 if (prev_nr < 2 && rq->nr_running >= 2) { in add_nr_running()
2521 if (!READ_ONCE(rq->rd->overload)) in add_nr_running()
2522 WRITE_ONCE(rq->rd->overload, 1); in add_nr_running()
2531 rq->nr_running -= count; in sub_nr_running()
2533 call_trace_sched_update_nr_running(rq, -count); in sub_nr_running()
2573 * - enabled by features
2574 * - hrtimer is actually high res
2580 return hrtimer_is_hres_active(&rq->hrtick_timer); in hrtick_enabled()
2627 * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
2633 * ------ * SCHED_CAPACITY_SCALE
2648 * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
2652 rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); in double_rq_clock_clear_update()
2655 rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); in double_rq_clock_clear_update()
2674 * In order to not have {0,2},{1,3} turn into into an AB-BA, in rq_order_less()
2675 * order by core-id first and cpu-id second. in rq_order_less()
2679 * double_rq_lock(0,3); will take core-0, core-1 lock in rq_order_less()
2680 * double_rq_lock(1,2); will take core-1, core-0 lock in rq_order_less()
2682 * when only cpu-id is considered. in rq_order_less()
2684 if (rq1->core->cpu < rq2->core->cpu) in rq_order_less()
2686 if (rq1->core->cpu > rq2->core->cpu) in rq_order_less()
2690 * __sched_core_flip() relies on SMT having cpu-id lock order. in rq_order_less()
2693 return rq1->cpu < rq2->cpu; in rq_order_less()
2701 * fair double_lock_balance: Safely acquires both rq->locks in a fair
2709 __releases(this_rq->lock) in _double_lock_balance()
2710 __acquires(busiest->lock) in _double_lock_balance()
2711 __acquires(this_rq->lock) in _double_lock_balance()
2723 * already in proper order on entry. This favors lower CPU-ids and will
2728 __releases(this_rq->lock) in _double_lock_balance()
2729 __acquires(busiest->lock) in _double_lock_balance()
2730 __acquires(this_rq->lock) in _double_lock_balance()
2753 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
2763 __releases(busiest->lock) in double_unlock_balance()
2767 lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_); in double_unlock_balance()
2804 double_raw_lock(_T->lock, _T->lock2),
2805 double_raw_unlock(_T->lock, _T->lock2))
2808 * double_rq_unlock - safely unlock two runqueues
2814 __releases(rq1->lock) in double_rq_unlock()
2815 __releases(rq2->lock) in double_rq_unlock()
2820 __release(rq2->lock); in double_rq_unlock()
2831 * double_rq_lock - safely lock two runqueues
2837 __acquires(rq1->lock) in double_rq_lock()
2838 __acquires(rq2->lock) in double_rq_lock()
2843 __acquire(rq2->lock); /* Fake it out ;) */ in double_rq_lock()
2848 * double_rq_unlock - safely unlock two runqueues
2854 __releases(rq1->lock) in double_rq_unlock()
2855 __releases(rq2->lock) in double_rq_unlock()
2859 __release(rq2->lock); in double_rq_unlock()
2865 double_rq_lock(_T->lock, _T->lock2),
2866 double_rq_unlock(_T->lock, _T->lock2))
2918 #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
2953 seq = __u64_stats_fetch_begin(&irqtime->sync); in irq_time_read()
2954 total = irqtime->total; in irq_time_read()
2955 } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); in irq_time_read()
2965 * cpufreq_update_util - Take a note about CPU utilization changes.
2972 * It can only be called from RCU-sched read-side critical sections.
2983 * but that really is a band-aid. Going forward it should be replaced with
2993 data->func(data, rq_clock(rq), flags); in cpufreq_update_util()
3023 * (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
3029 return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT); in dl_task_fits_capacity()
3034 return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; in cpu_bw_dl()
3039 return READ_ONCE(rq->avg_dl.util_avg); in cpu_util_dl()
3048 return READ_ONCE(rq->avg_rt.util_avg); in cpu_util_rt()
3058 return READ_ONCE(rq->uclamp[clamp_id].value); in uclamp_rq_get()
3064 WRITE_ONCE(rq->uclamp[clamp_id].value, value); in uclamp_rq_set()
3069 return rq->uclamp_flags & UCLAMP_FLAG_IDLE; in uclamp_rq_is_idle()
3082 max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); in uclamp_rq_is_capped()
3092 * Returns true if userspace opted-in to use uclamp and aggregation at rq level
3139 return rq->avg_irq.util_avg; in cpu_util_irq()
3145 util *= (max - irq); in scale_irq_capacity()
3166 #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
3187 * - prior user-space memory accesses and store to rq->membarrier_state,
3188 * - store to rq->membarrier_state and following user-space memory accesses.
3189 * In the same way it provides those guarantees around store to rq->curr.
3200 membarrier_state = atomic_read(&next_mm->membarrier_state); in membarrier_switch_mm()
3201 if (READ_ONCE(rq->membarrier_state) == membarrier_state) in membarrier_switch_mm()
3204 WRITE_ONCE(rq->membarrier_state, membarrier_state); in membarrier_switch_mm()
3217 if (!(p->flags & PF_KTHREAD)) in is_per_cpu_kthread()
3220 if (p->nr_cpus_allowed != 1) in is_per_cpu_kthread()
3259 * The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
3268 struct mm_struct *mm = t->mm; in mm_cid_put_lazy()
3269 struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; in mm_cid_put_lazy()
3273 cid = __this_cpu_read(pcpu_cid->cid); in mm_cid_put_lazy()
3275 !try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET)) in mm_cid_put_lazy()
3282 struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; in mm_cid_pcpu_unset()
3286 cid = __this_cpu_read(pcpu_cid->cid); in mm_cid_pcpu_unset()
3291 * Attempt transition from valid or lazy-put to unset. in mm_cid_pcpu_unset()
3293 res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET); in mm_cid_pcpu_unset()
3320 * filled. This only happens during concurrent remote-clear in __mm_cid_try_get()
3330 return -1; in __mm_cid_try_get()
3336 * with the per-cpu cid value, allowing to estimate how recently it was used.
3340 struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq)); in mm_cid_snapshot_time()
3343 WRITE_ONCE(pcpu_cid->time, rq->clock); in mm_cid_snapshot_time()
3351 * All allocations (even those using the cid_lock) are lock-free. If in __mm_cid_get()
3400 struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; in mm_cid_get()
3406 cid = __this_cpu_read(pcpu_cid->cid); in mm_cid_get()
3412 if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET)) in mm_cid_get()
3416 __this_cpu_write(pcpu_cid->cid, cid); in mm_cid_get()
3425 * Provide a memory barrier between rq->curr store and load of in switch_mm_cid()
3426 * {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition. in switch_mm_cid()
3430 if (!next->mm) { // to kernel in switch_mm_cid()
3432 * user -> kernel transition does not guarantee a barrier, but in switch_mm_cid()
3436 if (prev->mm) // from user in switch_mm_cid()
3439 * kernel -> kernel transition does not change rq->curr->mm in switch_mm_cid()
3444 * kernel -> user transition does not provide a barrier in switch_mm_cid()
3445 * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu]. in switch_mm_cid()
3448 if (!prev->mm) // from kernel in switch_mm_cid()
3451 * user -> user transition guarantees a memory barrier through in switch_mm_cid()
3452 * switch_mm() when current->mm changes. If current->mm is in switch_mm_cid()
3456 if (prev->mm_cid_active) { in switch_mm_cid()
3457 mm_cid_snapshot_time(rq, prev->mm); in switch_mm_cid()
3459 prev->mm_cid = -1; in switch_mm_cid()
3461 if (next->mm_cid_active) in switch_mm_cid()
3462 next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm); in switch_mm_cid()