Lines Matching +full:cs +full:- +full:0
7 * Copyright (C) 2004-2007 Silicon Graphics, Inc.
11 * sysfs is Copyright (c) 2001-3 Patrick Mochel
13 * 2003-10-10 Written by Simon Derr.
14 * 2003-10-22 Updates by Stephen Hemminger.
15 * 2004 May-July Rework by Paul Jackson.
54 * node binding, add this key to provide a quick low-cost judgment
72 PERR_NONE = 0,
102 * The user-configured masks can only be changed by writing to
116 * The user-configured masks are always the same with effective masks.
119 /* user-configured CPUs and Memory Nodes allow to tasks */
136 * effective_xcpus may be distributed to sub-partitions below & hence
149 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
150 * - A new cpuset's old_mems_allowed is initialized when some
152 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
162 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
172 /* number of valid sub-partitions */
180 * use_parent_ecpus - set if using parent's effective_cpus
181 * child_ecpus_count - # of children with use_parent_ecpus set
205 * Exclusive CPUs distributed out to sub-partitions of top_cpuset
220 * 0 - member (not a partition root)
221 * 1 - partition root
222 * 2 - partition root without load balancing (isolated)
223 * -1 - invalid partition root
224 * -2 - invalid isolated partition root
226 #define PRS_MEMBER 0
229 #define PRS_INVALID_ROOT -1
230 #define PRS_INVALID_ISOLATED -2
234 return prs_state < 0; in is_prs_invalid()
257 static inline struct cpuset *parent_cs(struct cpuset *cs) in parent_cs() argument
259 return css_cs(cs->css.parent); in parent_cs()
264 struct cpuset *cs = task_cs(p); in inc_dl_tasks_cs() local
266 cs->nr_deadline_tasks++; in inc_dl_tasks_cs()
271 struct cpuset *cs = task_cs(p); in dec_dl_tasks_cs() local
273 cs->nr_deadline_tasks--; in dec_dl_tasks_cs()
289 static inline bool is_cpuset_online(struct cpuset *cs) in is_cpuset_online() argument
291 return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); in is_cpuset_online()
294 static inline int is_cpu_exclusive(const struct cpuset *cs) in is_cpu_exclusive() argument
296 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); in is_cpu_exclusive()
299 static inline int is_mem_exclusive(const struct cpuset *cs) in is_mem_exclusive() argument
301 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); in is_mem_exclusive()
304 static inline int is_mem_hardwall(const struct cpuset *cs) in is_mem_hardwall() argument
306 return test_bit(CS_MEM_HARDWALL, &cs->flags); in is_mem_hardwall()
309 static inline int is_sched_load_balance(const struct cpuset *cs) in is_sched_load_balance() argument
311 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in is_sched_load_balance()
314 static inline int is_memory_migrate(const struct cpuset *cs) in is_memory_migrate() argument
316 return test_bit(CS_MEMORY_MIGRATE, &cs->flags); in is_memory_migrate()
319 static inline int is_spread_page(const struct cpuset *cs) in is_spread_page() argument
321 return test_bit(CS_SPREAD_PAGE, &cs->flags); in is_spread_page()
324 static inline int is_spread_slab(const struct cpuset *cs) in is_spread_slab() argument
326 return test_bit(CS_SPREAD_SLAB, &cs->flags); in is_spread_slab()
329 static inline int is_partition_valid(const struct cpuset *cs) in is_partition_valid() argument
331 return cs->partition_root_state > 0; in is_partition_valid()
334 static inline int is_partition_invalid(const struct cpuset *cs) in is_partition_invalid() argument
336 return cs->partition_root_state < 0; in is_partition_invalid()
342 static inline void make_partition_invalid(struct cpuset *cs) in make_partition_invalid() argument
344 if (cs->partition_root_state > 0) in make_partition_invalid()
345 cs->partition_root_state = -cs->partition_root_state; in make_partition_invalid()
351 static inline void notify_partition_change(struct cpuset *cs, int old_prs) in notify_partition_change() argument
353 if (old_prs == cs->partition_root_state) in notify_partition_change()
355 cgroup_file_notify(&cs->partition_file); in notify_partition_change()
358 if (is_partition_valid(cs)) in notify_partition_change()
359 WRITE_ONCE(cs->prs_err, PERR_NONE); in notify_partition_change()
370 * cpuset_for_each_child - traverse online children of a cpuset
379 css_for_each_child((pos_css), &(parent_cs)->css) \
383 * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
394 css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
398 * There are two global locks guarding cpuset structures - cpuset_mutex and
404 * paths that rely on priority inheritance (e.g. scheduler - on RT) for
421 * If a task is only holding callback_lock, then it has read-only
429 * small pieces of code, such as when reading out possibly multi-word
482 (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); in is_in_v2_mode()
486 * partition_is_populated - check if partition has tasks
487 * @cs: partition root to be checked
491 * It is assumed that @cs is a valid partition root. @excluded_child should
492 * be non-NULL when this cpuset is going to become a partition itself.
494 static inline bool partition_is_populated(struct cpuset *cs, in partition_is_populated() argument
500 if (cs->css.cgroup->nr_populated_csets) in partition_is_populated()
502 if (!excluded_child && !cs->nr_subparts) in partition_is_populated()
503 return cgroup_is_populated(cs->css.cgroup); in partition_is_populated()
506 cpuset_for_each_child(child, css, cs) { in partition_is_populated()
511 if (cgroup_is_populated(child->css.cgroup)) { in partition_is_populated()
526 * One way or another, we guarantee to return some non-empty subset
535 struct cpuset *cs; in guarantee_online_cpus() local
541 cs = task_cs(tsk); in guarantee_online_cpus()
543 while (!cpumask_intersects(cs->effective_cpus, pmask)) { in guarantee_online_cpus()
544 cs = parent_cs(cs); in guarantee_online_cpus()
545 if (unlikely(!cs)) { in guarantee_online_cpus()
556 cpumask_and(pmask, pmask, cs->effective_cpus); in guarantee_online_cpus()
568 * One way or another, we guarantee to return some non-empty subset
573 static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) in guarantee_online_mems() argument
575 while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) in guarantee_online_mems()
576 cs = parent_cs(cs); in guarantee_online_mems()
577 nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); in guarantee_online_mems()
586 static void cpuset_update_task_spread_flags(struct cpuset *cs, in cpuset_update_task_spread_flags() argument
592 if (is_spread_page(cs)) in cpuset_update_task_spread_flags()
597 if (is_spread_slab(cs)) in cpuset_update_task_spread_flags()
604 * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
613 return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && in is_cpuset_subset()
614 nodes_subset(p->mems_allowed, q->mems_allowed) && in is_cpuset_subset()
620 * alloc_cpumasks - allocate three cpumasks for cpuset
621 * @cs: the cpuset that have cpumasks to be allocated.
623 * Return: 0 if successful, -ENOMEM otherwise.
625 * Only one of the two input arguments should be non-NULL.
627 static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) in alloc_cpumasks() argument
631 if (cs) { in alloc_cpumasks()
632 pmask1 = &cs->cpus_allowed; in alloc_cpumasks()
633 pmask2 = &cs->effective_cpus; in alloc_cpumasks()
634 pmask3 = &cs->effective_xcpus; in alloc_cpumasks()
635 pmask4 = &cs->exclusive_cpus; in alloc_cpumasks()
637 pmask1 = &tmp->new_cpus; in alloc_cpumasks()
638 pmask2 = &tmp->addmask; in alloc_cpumasks()
639 pmask3 = &tmp->delmask; in alloc_cpumasks()
644 return -ENOMEM; in alloc_cpumasks()
656 return 0; in alloc_cpumasks()
664 return -ENOMEM; in alloc_cpumasks()
668 * free_cpumasks - free cpumasks in a tmpmasks structure
669 * @cs: the cpuset that have cpumasks to be free.
672 static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) in free_cpumasks() argument
674 if (cs) { in free_cpumasks()
675 free_cpumask_var(cs->cpus_allowed); in free_cpumasks()
676 free_cpumask_var(cs->effective_cpus); in free_cpumasks()
677 free_cpumask_var(cs->effective_xcpus); in free_cpumasks()
678 free_cpumask_var(cs->exclusive_cpus); in free_cpumasks()
681 free_cpumask_var(tmp->new_cpus); in free_cpumasks()
682 free_cpumask_var(tmp->addmask); in free_cpumasks()
683 free_cpumask_var(tmp->delmask); in free_cpumasks()
688 * alloc_trial_cpuset - allocate a trial cpuset
689 * @cs: the cpuset that the trial cpuset duplicates
691 static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) in alloc_trial_cpuset() argument
695 trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); in alloc_trial_cpuset()
704 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); in alloc_trial_cpuset()
705 cpumask_copy(trial->effective_cpus, cs->effective_cpus); in alloc_trial_cpuset()
706 cpumask_copy(trial->effective_xcpus, cs->effective_xcpus); in alloc_trial_cpuset()
707 cpumask_copy(trial->exclusive_cpus, cs->exclusive_cpus); in alloc_trial_cpuset()
712 * free_cpuset - free the cpuset
713 * @cs: the cpuset to be freed
715 static inline void free_cpuset(struct cpuset *cs) in free_cpuset() argument
717 free_cpumasks(cs, NULL); in free_cpuset()
718 kfree(cs); in free_cpuset()
721 static inline struct cpumask *fetch_xcpus(struct cpuset *cs) in fetch_xcpus() argument
723 return !cpumask_empty(cs->exclusive_cpus) ? cs->exclusive_cpus : in fetch_xcpus()
724 cpumask_empty(cs->effective_xcpus) ? cs->cpus_allowed in fetch_xcpus()
725 : cs->effective_xcpus; in fetch_xcpus()
729 * cpusets_are_exclusive() - check if two cpusets are exclusive
744 * validate_change_legacy() - Validate conditions specific to legacy (v1)
756 ret = -EBUSY; in validate_change_legacy()
762 ret = -EACCES; in validate_change_legacy()
767 ret = 0; in validate_change_legacy()
773 * validate_change() - Used to validate that any proposed cpuset change
781 * 'cur' is the address of an actual, in-use cpuset. Operations
789 * Return 0 if valid, -errno if not.
796 int ret = 0; in validate_change()
812 * Cpusets with tasks - existing or newly being attached - can't in validate_change()
815 ret = -ENOSPC; in validate_change()
816 if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) { in validate_change()
817 if (!cpumask_empty(cur->cpus_allowed) && in validate_change()
818 cpumask_empty(trial->cpus_allowed)) in validate_change()
820 if (!nodes_empty(cur->mems_allowed) && in validate_change()
821 nodes_empty(trial->mems_allowed)) in validate_change()
829 ret = -EBUSY; in validate_change()
831 !cpuset_cpumask_can_shrink(cur->cpus_allowed, in validate_change()
832 trial->cpus_allowed)) in validate_change()
839 ret = -EINVAL; in validate_change()
848 nodes_intersects(trial->mems_allowed, c->mems_allowed)) in validate_change()
852 ret = 0; in validate_change()
865 return cpumask_intersects(a->effective_cpus, b->effective_cpus); in cpusets_overlap()
871 if (dattr->relax_domain_level < c->relax_domain_level) in update_domain_attr()
872 dattr->relax_domain_level = c->relax_domain_level; in update_domain_attr()
885 if (cpumask_empty(cp->cpus_allowed)) { in update_domain_attr_tree()
899 /* jump label reference count + the top-level cpuset */ in nr_cpusets()
907 * A 'partial partition' is a set of non-overlapping subsets whose
914 * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
925 * cp - cpuset pointer, used (together with pos_css) to perform a
926 * top-down scan of all cpusets. For our purposes, rebuilding
929 * csa - (for CpuSet Array) Array of pointers to all the cpusets
936 * doms - Conversion of 'csa' to an array of cpumasks, for passing to
959 struct cpuset *cp; /* top-down scan of cpusets */ in generate_sched_domains()
965 int ndoms = 0; /* number of sched domains in result */ in generate_sched_domains()
986 cpumask_and(doms[0], top_cpuset.effective_cpus, in generate_sched_domains()
995 csn = 0; in generate_sched_domains()
1011 * If root is load-balancing, we can skip @cp if it in generate_sched_domains()
1014 if (!cpumask_empty(cp->cpus_allowed) && in generate_sched_domains()
1016 cpumask_intersects(cp->cpus_allowed, in generate_sched_domains()
1021 cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus)) in generate_sched_domains()
1025 !cpumask_empty(cp->effective_cpus)) in generate_sched_domains()
1034 for (i = 0; i < csn; i++) in generate_sched_domains()
1035 csa[i]->pn = i; in generate_sched_domains()
1040 for (i = 0; i < csn; i++) { in generate_sched_domains()
1042 int apn = a->pn; in generate_sched_domains()
1044 for (j = 0; j < csn; j++) { in generate_sched_domains()
1046 int bpn = b->pn; in generate_sched_domains()
1049 for (k = 0; k < csn; k++) { in generate_sched_domains()
1052 if (c->pn == bpn) in generate_sched_domains()
1053 c->pn = apn; in generate_sched_domains()
1055 ndoms--; /* one less element */ in generate_sched_domains()
1076 for (nslot = 0, i = 0; i < csn; i++) { in generate_sched_domains()
1079 int apn = a->pn; in generate_sched_domains()
1081 if (apn < 0) { in generate_sched_domains()
1093 warnings--; in generate_sched_domains()
1104 if (apn == b->pn) { in generate_sched_domains()
1105 cpumask_or(dp, dp, b->effective_cpus); in generate_sched_domains()
1111 b->pn = -1; in generate_sched_domains()
1133 static void dl_update_tasks_root_domain(struct cpuset *cs) in dl_update_tasks_root_domain() argument
1138 if (cs->nr_deadline_tasks == 0) in dl_update_tasks_root_domain()
1141 css_task_iter_start(&cs->css, 0, &it); in dl_update_tasks_root_domain()
1151 struct cpuset *cs = NULL; in dl_rebuild_rd_accounting() local
1166 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { in dl_rebuild_rd_accounting()
1168 if (cpumask_empty(cs->effective_cpus)) { in dl_rebuild_rd_accounting()
1173 css_get(&cs->css); in dl_rebuild_rd_accounting()
1177 dl_update_tasks_root_domain(cs); in dl_rebuild_rd_accounting()
1180 css_put(&cs->css); in dl_rebuild_rd_accounting()
1198 * If the flag 'sched_load_balance' of any cpuset with non-empty
1200 * which has that flag enabled, or if any cpuset with a non-empty
1211 struct cpuset *cs; in rebuild_sched_domains_locked() local
1237 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { in rebuild_sched_domains_locked()
1238 if (!is_partition_valid(cs)) { in rebuild_sched_domains_locked()
1242 if (!cpumask_subset(cs->effective_cpus, in rebuild_sched_domains_locked()
1273 * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
1274 * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
1277 * Iterate through each task of @cs updating its cpus_allowed to the
1283 static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) in update_tasks_cpumask() argument
1287 bool top_cs = cs == &top_cpuset; in update_tasks_cpumask()
1289 css_task_iter_start(&cs->css, 0, &it); in update_tasks_cpumask()
1301 cpumask_and(new_cpus, possible_mask, cs->effective_cpus); in update_tasks_cpumask()
1309 * compute_effective_cpumask - Compute the effective cpumask of the cpuset
1311 * @cs: the cpuset the need to recompute the new effective_cpus mask
1317 struct cpuset *cs, struct cpuset *parent) in compute_effective_cpumask() argument
1319 cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus); in compute_effective_cpumask()
1333 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1335 static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
1341 * Return: 0 if successful, an error code otherwise
1343 static int update_partition_exclusive(struct cpuset *cs, int new_prs) in update_partition_exclusive() argument
1345 bool exclusive = (new_prs > 0); in update_partition_exclusive()
1347 if (exclusive && !is_cpu_exclusive(cs)) { in update_partition_exclusive()
1348 if (update_flag(CS_CPU_EXCLUSIVE, cs, 1)) in update_partition_exclusive()
1350 } else if (!exclusive && is_cpu_exclusive(cs)) { in update_partition_exclusive()
1352 update_flag(CS_CPU_EXCLUSIVE, cs, 0); in update_partition_exclusive()
1354 return 0; in update_partition_exclusive()
1364 static void update_partition_sd_lb(struct cpuset *cs, int old_prs) in update_partition_sd_lb() argument
1366 int new_prs = cs->partition_root_state; in update_partition_sd_lb()
1367 bool rebuild_domains = (new_prs > 0) || (old_prs > 0); in update_partition_sd_lb()
1371 * If cs is not a valid partition root, the load balance state in update_partition_sd_lb()
1374 if (new_prs > 0) { in update_partition_sd_lb()
1377 new_lb = is_sched_load_balance(parent_cs(cs)); in update_partition_sd_lb()
1379 if (new_lb != !!is_sched_load_balance(cs)) { in update_partition_sd_lb()
1382 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1384 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1392 * tasks_nocpu_error - Return true if tasks will have no effective_cpus
1394 static bool tasks_nocpu_error(struct cpuset *parent, struct cpuset *cs, in tasks_nocpu_error() argument
1398 * A populated partition (cs or parent) can't have empty effective_cpus in tasks_nocpu_error()
1400 return (cpumask_subset(parent->effective_cpus, xcpus) && in tasks_nocpu_error()
1401 partition_is_populated(parent, cs)) || in tasks_nocpu_error()
1403 partition_is_populated(cs, NULL)); in tasks_nocpu_error()
1406 static void reset_partition_data(struct cpuset *cs) in reset_partition_data() argument
1408 struct cpuset *parent = parent_cs(cs); in reset_partition_data()
1415 cs->nr_subparts = 0; in reset_partition_data()
1416 if (cpumask_empty(cs->exclusive_cpus)) { in reset_partition_data()
1417 cpumask_clear(cs->effective_xcpus); in reset_partition_data()
1418 if (is_cpu_exclusive(cs)) in reset_partition_data()
1419 clear_bit(CS_CPU_EXCLUSIVE, &cs->flags); in reset_partition_data()
1421 if (!cpumask_and(cs->effective_cpus, in reset_partition_data()
1422 parent->effective_cpus, cs->cpus_allowed)) { in reset_partition_data()
1423 cs->use_parent_ecpus = true; in reset_partition_data()
1424 parent->child_ecpus_count++; in reset_partition_data()
1425 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in reset_partition_data()
1430 * partition_xcpus_newstate - Exclusive CPUs state change
1445 * partition_xcpus_add - Add new exclusive CPUs to partition
1458 WARN_ON_ONCE(new_prs < 0); in partition_xcpus_add()
1467 isolcpus_updated = (new_prs != parent->partition_root_state); in partition_xcpus_add()
1469 partition_xcpus_newstate(parent->partition_root_state, new_prs, in partition_xcpus_add()
1472 cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); in partition_xcpus_add()
1477 * partition_xcpus_del - Remove exclusive CPUs from partition
1490 WARN_ON_ONCE(old_prs < 0); in partition_xcpus_del()
1498 isolcpus_updated = (old_prs != parent->partition_root_state); in partition_xcpus_del()
1500 partition_xcpus_newstate(old_prs, parent->partition_root_state, in partition_xcpus_del()
1504 cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); in partition_xcpus_del()
1518 WARN_ON_ONCE(ret < 0); in update_unbound_workqueue_cpumask()
1522 * cpuset_cpu_is_isolated - Check if the given CPU is isolated
1533 * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
1534 * @cs: cpuset
1541 static bool compute_effective_exclusive_cpumask(struct cpuset *cs, in compute_effective_exclusive_cpumask() argument
1544 struct cpuset *parent = parent_cs(cs); in compute_effective_exclusive_cpumask()
1547 xcpus = cs->effective_xcpus; in compute_effective_exclusive_cpumask()
1549 if (!cpumask_empty(cs->exclusive_cpus)) in compute_effective_exclusive_cpumask()
1550 cpumask_and(xcpus, cs->exclusive_cpus, cs->cpus_allowed); in compute_effective_exclusive_cpumask()
1552 cpumask_copy(xcpus, cs->cpus_allowed); in compute_effective_exclusive_cpumask()
1554 return cpumask_and(xcpus, xcpus, parent->effective_xcpus); in compute_effective_exclusive_cpumask()
1557 static inline bool is_remote_partition(struct cpuset *cs) in is_remote_partition() argument
1559 return !list_empty(&cs->remote_sibling); in is_remote_partition()
1562 static inline bool is_local_partition(struct cpuset *cs) in is_local_partition() argument
1564 return is_partition_valid(cs) && !is_remote_partition(cs); in is_local_partition()
1568 * remote_partition_enable - Enable current cpuset as a remote partition root
1569 * @cs: the cpuset to update
1572 * Return: 1 if successful, 0 if error
1577 static int remote_partition_enable(struct cpuset *cs, int new_prs, in remote_partition_enable() argument
1586 return 0; in remote_partition_enable()
1596 compute_effective_exclusive_cpumask(cs, tmp->new_cpus); in remote_partition_enable()
1597 if (cpumask_empty(tmp->new_cpus) || in remote_partition_enable()
1598 cpumask_intersects(tmp->new_cpus, subpartitions_cpus) || in remote_partition_enable()
1599 cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus)) in remote_partition_enable()
1600 return 0; in remote_partition_enable()
1603 isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); in remote_partition_enable()
1604 list_add(&cs->remote_sibling, &remote_children); in remote_partition_enable()
1605 if (cs->use_parent_ecpus) { in remote_partition_enable()
1606 struct cpuset *parent = parent_cs(cs); in remote_partition_enable()
1608 cs->use_parent_ecpus = false; in remote_partition_enable()
1609 parent->child_ecpus_count--; in remote_partition_enable()
1617 update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_partition_enable()
1623 * remote_partition_disable - Remove current cpuset from remote partition list
1624 * @cs: the cpuset to update
1631 static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) in remote_partition_disable() argument
1635 compute_effective_exclusive_cpumask(cs, tmp->new_cpus); in remote_partition_disable()
1636 WARN_ON_ONCE(!is_remote_partition(cs)); in remote_partition_disable()
1637 WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); in remote_partition_disable()
1640 list_del_init(&cs->remote_sibling); in remote_partition_disable()
1641 isolcpus_updated = partition_xcpus_del(cs->partition_root_state, in remote_partition_disable()
1642 NULL, tmp->new_cpus); in remote_partition_disable()
1643 cs->partition_root_state = -cs->partition_root_state; in remote_partition_disable()
1644 if (!cs->prs_err) in remote_partition_disable()
1645 cs->prs_err = PERR_INVCPUS; in remote_partition_disable()
1646 reset_partition_data(cs); in remote_partition_disable()
1653 update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_partition_disable()
1658 * remote_cpus_update - cpus_exclusive change of remote partition
1659 * @cs: the cpuset to be updated
1666 static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, in remote_cpus_update() argument
1670 int prs = cs->partition_root_state; in remote_cpus_update()
1671 int isolcpus_updated = 0; in remote_cpus_update()
1673 if (WARN_ON_ONCE(!is_remote_partition(cs))) in remote_cpus_update()
1676 WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); in remote_cpus_update()
1681 adding = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus); in remote_cpus_update()
1682 deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask); in remote_cpus_update()
1690 cpumask_intersects(tmp->addmask, subpartitions_cpus) || in remote_cpus_update()
1691 cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))) in remote_cpus_update()
1696 isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); in remote_cpus_update()
1698 isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); in remote_cpus_update()
1705 update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_cpus_update()
1710 remote_partition_disable(cs, tmp); in remote_cpus_update()
1714 * remote_partition_check - check if a child remote partition needs update
1715 * @cs: the cpuset to be updated
1720 * This should be called before the given cs has updated its cpus_allowed
1723 static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, in remote_partition_check() argument
1727 int disable_cnt = 0; in remote_partition_check()
1732 if (!cpumask_andnot(delmask, cs->effective_xcpus, newmask) || in remote_partition_check()
1746 if (cpumask_intersects(child->effective_cpus, delmask)) { in remote_partition_check()
1755 * prstate_housekeeping_conflict - check for partition & housekeeping conflicts
1775 * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
1776 * @cs: The cpuset that requests change in partition root state
1780 * Return: 0 or a partition root state error code
1782 * For partcmd_enable*, the cpuset is being transformed from a non-partition
1785 * parent's effective_cpus. The function will return 0 if all the CPUs listed
1789 * root back to a non-partition root. Any CPUs in effective_xcpus will be
1790 * given back to parent's effective_cpus. 0 will always be returned.
1811 static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, in update_parent_effective_cpumask() argument
1815 struct cpuset *parent = parent_cs(cs); in update_parent_effective_cpumask()
1820 int subparts_delta = 0; in update_parent_effective_cpumask()
1821 struct cpumask *xcpus; /* cs effective_xcpus */ in update_parent_effective_cpumask()
1822 int isolcpus_updated = 0; in update_parent_effective_cpumask()
1832 old_prs = new_prs = cs->partition_root_state; in update_parent_effective_cpumask()
1833 xcpus = !cpumask_empty(cs->exclusive_cpus) in update_parent_effective_cpumask()
1834 ? cs->effective_xcpus : cs->cpus_allowed; in update_parent_effective_cpumask()
1838 return 0; in update_parent_effective_cpumask()
1844 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1845 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1846 if (old_prs > 0) { in update_parent_effective_cpumask()
1847 new_prs = -old_prs; in update_parent_effective_cpumask()
1848 subparts_delta--; in update_parent_effective_cpumask()
1862 if (!newmask && cpumask_empty(cs->cpus_allowed)) in update_parent_effective_cpumask()
1865 nocpu = tasks_nocpu_error(parent, cs, xcpus); in update_parent_effective_cpumask()
1874 !cpumask_intersects(xcpus, parent->effective_xcpus)) in update_parent_effective_cpumask()
1887 cpumask_copy(tmp->delmask, xcpus); in update_parent_effective_cpumask()
1897 cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1899 subparts_delta--; in update_parent_effective_cpumask()
1917 * & parent->effective_xcpus in update_parent_effective_cpumask()
1919 * & parent->effective_xcpus in update_parent_effective_cpumask()
1922 * delmask = newmask & parent->effective_xcpus in update_parent_effective_cpumask()
1926 deleting = cpumask_and(tmp->delmask, in update_parent_effective_cpumask()
1927 newmask, parent->effective_xcpus); in update_parent_effective_cpumask()
1929 cpumask_andnot(tmp->addmask, xcpus, newmask); in update_parent_effective_cpumask()
1930 adding = cpumask_and(tmp->addmask, tmp->addmask, in update_parent_effective_cpumask()
1931 parent->effective_xcpus); in update_parent_effective_cpumask()
1933 cpumask_andnot(tmp->delmask, newmask, xcpus); in update_parent_effective_cpumask()
1934 deleting = cpumask_and(tmp->delmask, tmp->delmask, in update_parent_effective_cpumask()
1935 parent->effective_xcpus); in update_parent_effective_cpumask()
1942 !cpumask_intersects(tmp->addmask, cpu_active_mask))) { in update_parent_effective_cpumask()
1945 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1946 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1952 * delmask = effective_xcpus & parent->effective_cpus in update_parent_effective_cpumask()
1967 if (is_partition_valid(cs)) in update_parent_effective_cpumask()
1968 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1969 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1970 } else if (is_partition_invalid(cs) && in update_parent_effective_cpumask()
1971 cpumask_subset(xcpus, parent->effective_xcpus)) { in update_parent_effective_cpumask()
1982 if (child == cs) in update_parent_effective_cpumask()
1984 if (!cpusets_are_exclusive(cs, child)) { in update_parent_effective_cpumask()
1991 deleting = cpumask_and(tmp->delmask, in update_parent_effective_cpumask()
1992 xcpus, parent->effective_cpus); in update_parent_effective_cpumask()
2000 WRITE_ONCE(cs->prs_err, part_error); in update_parent_effective_cpumask()
2007 switch (cs->partition_root_state) { in update_parent_effective_cpumask()
2011 new_prs = -old_prs; in update_parent_effective_cpumask()
2012 subparts_delta--; in update_parent_effective_cpumask()
2018 new_prs = -old_prs; in update_parent_effective_cpumask()
2026 return 0; in update_parent_effective_cpumask()
2032 * CPU lists in cs haven't been updated yet. So defer it to later. in update_parent_effective_cpumask()
2035 int err = update_partition_exclusive(cs, new_prs); in update_parent_effective_cpumask()
2050 cs->partition_root_state = new_prs; in update_parent_effective_cpumask()
2051 if (new_prs <= 0) in update_parent_effective_cpumask()
2052 cs->nr_subparts = 0; in update_parent_effective_cpumask()
2055 * Adding to parent's effective_cpus means deletion CPUs from cs in update_parent_effective_cpumask()
2060 tmp->addmask); in update_parent_effective_cpumask()
2063 tmp->delmask); in update_parent_effective_cpumask()
2066 parent->nr_subparts += subparts_delta; in update_parent_effective_cpumask()
2067 WARN_ON_ONCE(parent->nr_subparts < 0); in update_parent_effective_cpumask()
2073 update_partition_exclusive(cs, new_prs); in update_parent_effective_cpumask()
2076 update_tasks_cpumask(parent, tmp->addmask); in update_parent_effective_cpumask()
2077 update_sibling_cpumasks(parent, cs, tmp); in update_parent_effective_cpumask()
2087 update_partition_sd_lb(cs, old_prs); in update_parent_effective_cpumask()
2091 notify_partition_change(cs, old_prs); in update_parent_effective_cpumask()
2092 return 0; in update_parent_effective_cpumask()
2096 * compute_partition_effective_cpumask - compute effective_cpus for partition
2097 * @cs: partition root cpuset
2111 static void compute_partition_effective_cpumask(struct cpuset *cs, in compute_partition_effective_cpumask() argument
2116 bool populated = partition_is_populated(cs, NULL); in compute_partition_effective_cpumask()
2126 compute_effective_exclusive_cpumask(cs, new_ecpus); in compute_partition_effective_cpumask()
2130 cpuset_for_each_child(child, css, cs) { in compute_partition_effective_cpumask()
2134 child->prs_err = 0; in compute_partition_effective_cpumask()
2135 if (!cpumask_subset(child->effective_xcpus, in compute_partition_effective_cpumask()
2136 cs->effective_xcpus)) in compute_partition_effective_cpumask()
2137 child->prs_err = PERR_INVCPUS; in compute_partition_effective_cpumask()
2139 cpumask_subset(new_ecpus, child->effective_xcpus)) in compute_partition_effective_cpumask()
2140 child->prs_err = PERR_NOCPUS; in compute_partition_effective_cpumask()
2142 if (child->prs_err) { in compute_partition_effective_cpumask()
2143 int old_prs = child->partition_root_state; in compute_partition_effective_cpumask()
2150 cs->nr_subparts--; in compute_partition_effective_cpumask()
2151 child->nr_subparts = 0; in compute_partition_effective_cpumask()
2157 child->effective_xcpus); in compute_partition_effective_cpumask()
2165 #define HIER_CHECKALL 0x01 /* Check all cpusets with no skipping */
2166 #define HIER_NO_SD_REBUILD 0x02 /* Don't rebuild sched domains */
2169 * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
2170 * @cs: the cpuset to consider
2181 static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, in update_cpumasks_hier() argument
2190 cpuset_for_each_descendant_pre(cp, pos_css, cs) { in update_cpumasks_hier()
2197 * directly from top cpuset unless it is cs. in update_cpumasks_hier()
2199 if (remote && (cp != cs)) { in update_cpumasks_hier()
2208 if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) { in update_cpumasks_hier()
2214 old_prs = new_prs = cp->partition_root_state; in update_cpumasks_hier()
2217 compute_partition_effective_cpumask(cp, tmp->new_cpus); in update_cpumasks_hier()
2219 compute_effective_cpumask(tmp->new_cpus, cp, parent); in update_cpumasks_hier()
2226 if (is_partition_valid(cp) && cpumask_empty(tmp->new_cpus)) { in update_cpumasks_hier()
2237 if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus)) { in update_cpumasks_hier()
2238 cpumask_copy(tmp->new_cpus, parent->effective_cpus); in update_cpumasks_hier()
2239 if (!cp->use_parent_ecpus) { in update_cpumasks_hier()
2240 cp->use_parent_ecpus = true; in update_cpumasks_hier()
2241 parent->child_ecpus_count++; in update_cpumasks_hier()
2243 } else if (cp->use_parent_ecpus) { in update_cpumasks_hier()
2244 cp->use_parent_ecpus = false; in update_cpumasks_hier()
2245 WARN_ON_ONCE(!parent->child_ecpus_count); in update_cpumasks_hier()
2246 parent->child_ecpus_count--; in update_cpumasks_hier()
2259 if (!cp->partition_root_state && !(flags & HIER_CHECKALL) && in update_cpumasks_hier()
2260 cpumask_equal(tmp->new_cpus, cp->effective_cpus) && in update_cpumasks_hier()
2270 * for cs already in update_cpumask(). We should also call in update_cpumasks_hier()
2274 if ((cp != cs) && old_prs) { in update_cpumasks_hier()
2275 switch (parent->partition_root_state) { in update_cpumasks_hier()
2288 new_prs = -cp->partition_root_state; in update_cpumasks_hier()
2289 WRITE_ONCE(cp->prs_err, in update_cpumasks_hier()
2296 if (!css_tryget_online(&cp->css)) in update_cpumasks_hier()
2306 new_prs = cp->partition_root_state; in update_cpumasks_hier()
2310 cpumask_copy(cp->effective_cpus, tmp->new_cpus); in update_cpumasks_hier()
2311 cp->partition_root_state = new_prs; in update_cpumasks_hier()
2316 if ((new_prs > 0) && cpumask_empty(cp->exclusive_cpus)) in update_cpumasks_hier()
2317 cpumask_and(cp->effective_xcpus, in update_cpumasks_hier()
2318 cp->cpus_allowed, parent->effective_xcpus); in update_cpumasks_hier()
2319 else if (new_prs < 0) in update_cpumasks_hier()
2326 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); in update_cpumasks_hier()
2328 update_tasks_cpumask(cp, cp->effective_cpus); in update_cpumasks_hier()
2339 set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
2341 clear_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
2345 * On legacy hierarchy, if the effective cpumask of any non- in update_cpumasks_hier()
2350 if (!cpumask_empty(cp->cpus_allowed) && in update_cpumasks_hier()
2357 css_put(&cp->css); in update_cpumasks_hier()
2366 * update_sibling_cpumasks - Update siblings cpumasks
2368 * @cs: Current cpuset
2371 static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, in update_sibling_cpumasks() argument
2396 if (sibling == cs) in update_sibling_cpumasks()
2398 if (!sibling->use_parent_ecpus && in update_sibling_cpumasks()
2400 compute_effective_cpumask(tmp->new_cpus, sibling, in update_sibling_cpumasks()
2402 if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus)) in update_sibling_cpumasks()
2405 if (!css_tryget_online(&sibling->css)) in update_sibling_cpumasks()
2411 css_put(&sibling->css); in update_sibling_cpumasks()
2417 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
2418 * @cs: the cpuset to consider
2422 static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, in update_cpumask() argument
2427 struct cpuset *parent = parent_cs(cs); in update_cpumask()
2429 int hier_flags = 0; in update_cpumask()
2430 int old_prs = cs->partition_root_state; in update_cpumask()
2432 /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ in update_cpumask()
2433 if (cs == &top_cpuset) in update_cpumask()
2434 return -EACCES; in update_cpumask()
2443 cpumask_clear(trialcs->cpus_allowed); in update_cpumask()
2444 cpumask_clear(trialcs->effective_xcpus); in update_cpumask()
2446 retval = cpulist_parse(buf, trialcs->cpus_allowed); in update_cpumask()
2447 if (retval < 0) in update_cpumask()
2450 if (!cpumask_subset(trialcs->cpus_allowed, in update_cpumask()
2452 return -EINVAL; in update_cpumask()
2457 * trialcs->effective_xcpus is used as a temporary cpumask in update_cpumask()
2460 if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs)) in update_cpumask()
2465 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) in update_cpumask()
2466 return 0; in update_cpumask()
2469 return -ENOMEM; in update_cpumask()
2472 if (is_partition_valid(cs) && in update_cpumask()
2473 cpumask_empty(trialcs->effective_xcpus)) { in update_cpumask()
2475 cs->prs_err = PERR_INVCPUS; in update_cpumask()
2476 } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) { in update_cpumask()
2478 cs->prs_err = PERR_HKEEPING; in update_cpumask()
2479 } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) { in update_cpumask()
2481 cs->prs_err = PERR_NOCPUS; in update_cpumask()
2489 if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus)) in update_cpumask()
2492 retval = validate_change(cs, trialcs); in update_cpumask()
2494 if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { in update_cpumask()
2499 * The -EINVAL error code indicates that partition sibling in update_cpumask()
2511 cpumask_intersects(xcpus, cp->effective_xcpus)) { in update_cpumask()
2518 retval = 0; in update_cpumask()
2521 if (retval < 0) in update_cpumask()
2524 if (is_partition_valid(cs) || in update_cpumask()
2525 (is_partition_invalid(cs) && !invalidate)) { in update_cpumask()
2526 struct cpumask *xcpus = trialcs->effective_xcpus; in update_cpumask()
2528 if (cpumask_empty(xcpus) && is_partition_invalid(cs)) in update_cpumask()
2529 xcpus = trialcs->cpus_allowed; in update_cpumask()
2534 if (is_remote_partition(cs)) in update_cpumask()
2535 remote_cpus_update(cs, xcpus, &tmp); in update_cpumask()
2537 update_parent_effective_cpumask(cs, partcmd_invalidate, in update_cpumask()
2540 update_parent_effective_cpumask(cs, partcmd_update, in update_cpumask()
2542 } else if (!cpumask_empty(cs->exclusive_cpus)) { in update_cpumask()
2544 * Use trialcs->effective_cpus as a temp cpumask in update_cpumask()
2546 remote_partition_check(cs, trialcs->effective_xcpus, in update_cpumask()
2547 trialcs->effective_cpus, &tmp); in update_cpumask()
2551 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); in update_cpumask()
2552 cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); in update_cpumask()
2553 if ((old_prs > 0) && !is_partition_valid(cs)) in update_cpumask()
2554 reset_partition_data(cs); in update_cpumask()
2558 update_cpumasks_hier(cs, &tmp, hier_flags); in update_cpumask()
2561 if (cs->partition_root_state) in update_cpumask()
2562 update_partition_sd_lb(cs, old_prs); in update_cpumask()
2569 * update_exclusive_cpumask - update the exclusive_cpus mask of a cpuset
2570 * @cs: the cpuset to consider
2574 * The tasks' cpumask will be updated if cs is a valid partition root.
2576 static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, in update_exclusive_cpumask() argument
2581 struct cpuset *parent = parent_cs(cs); in update_exclusive_cpumask()
2583 int hier_flags = 0; in update_exclusive_cpumask()
2584 int old_prs = cs->partition_root_state; in update_exclusive_cpumask()
2587 cpumask_clear(trialcs->exclusive_cpus); in update_exclusive_cpumask()
2588 cpumask_clear(trialcs->effective_xcpus); in update_exclusive_cpumask()
2590 retval = cpulist_parse(buf, trialcs->exclusive_cpus); in update_exclusive_cpumask()
2591 if (retval < 0) in update_exclusive_cpumask()
2593 if (!is_cpu_exclusive(cs)) in update_exclusive_cpumask()
2594 set_bit(CS_CPU_EXCLUSIVE, &trialcs->flags); in update_exclusive_cpumask()
2598 if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) in update_exclusive_cpumask()
2599 return 0; in update_exclusive_cpumask()
2608 if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus)) in update_exclusive_cpumask()
2611 retval = validate_change(cs, trialcs); in update_exclusive_cpumask()
2616 return -ENOMEM; in update_exclusive_cpumask()
2619 if (cpumask_empty(trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2621 cs->prs_err = PERR_INVCPUS; in update_exclusive_cpumask()
2622 } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2624 cs->prs_err = PERR_HKEEPING; in update_exclusive_cpumask()
2625 } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2627 cs->prs_err = PERR_NOCPUS; in update_exclusive_cpumask()
2630 if (is_remote_partition(cs)) { in update_exclusive_cpumask()
2632 remote_partition_disable(cs, &tmp); in update_exclusive_cpumask()
2634 remote_cpus_update(cs, trialcs->effective_xcpus, in update_exclusive_cpumask()
2637 update_parent_effective_cpumask(cs, partcmd_invalidate, in update_exclusive_cpumask()
2640 update_parent_effective_cpumask(cs, partcmd_update, in update_exclusive_cpumask()
2641 trialcs->effective_xcpus, &tmp); in update_exclusive_cpumask()
2643 } else if (!cpumask_empty(trialcs->exclusive_cpus)) { in update_exclusive_cpumask()
2645 * Use trialcs->effective_cpus as a temp cpumask in update_exclusive_cpumask()
2647 remote_partition_check(cs, trialcs->effective_xcpus, in update_exclusive_cpumask()
2648 trialcs->effective_cpus, &tmp); in update_exclusive_cpumask()
2651 cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus); in update_exclusive_cpumask()
2652 cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); in update_exclusive_cpumask()
2653 if ((old_prs > 0) && !is_partition_valid(cs)) in update_exclusive_cpumask()
2654 reset_partition_data(cs); in update_exclusive_cpumask()
2662 if (is_partition_valid(cs) || hier_flags) in update_exclusive_cpumask()
2663 update_cpumasks_hier(cs, &tmp, hier_flags); in update_exclusive_cpumask()
2666 if (cs->partition_root_state) in update_exclusive_cpumask()
2667 update_partition_sd_lb(cs, old_prs); in update_exclusive_cpumask()
2670 return 0; in update_exclusive_cpumask()
2694 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); in cpuset_migrate_mm_workfn()
2695 mmput(mwork->mm); in cpuset_migrate_mm_workfn()
2711 mwork->mm = mm; in cpuset_migrate_mm()
2712 mwork->from = *from; in cpuset_migrate_mm()
2713 mwork->to = *to; in cpuset_migrate_mm()
2714 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); in cpuset_migrate_mm()
2715 queue_work(cpuset_migrate_mm_wq, &mwork->work); in cpuset_migrate_mm()
2727 * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
2731 * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
2742 write_seqcount_begin(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2744 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); in cpuset_change_task_nodemask()
2746 tsk->mems_allowed = *newmems; in cpuset_change_task_nodemask()
2748 write_seqcount_end(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2757 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
2758 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
2760 * Iterate through each task of @cs updating its mems_allowed to the
2764 static void update_tasks_nodemask(struct cpuset *cs) in update_tasks_nodemask() argument
2770 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ in update_tasks_nodemask()
2772 guarantee_online_mems(cs, &newmems); in update_tasks_nodemask()
2776 * take while holding tasklist_lock. Forks can happen - the in update_tasks_nodemask()
2784 css_task_iter_start(&cs->css, 0, &it); in update_tasks_nodemask()
2795 migrate = is_memory_migrate(cs); in update_tasks_nodemask()
2797 mpol_rebind_mm(mm, &cs->mems_allowed); in update_tasks_nodemask()
2799 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); in update_tasks_nodemask()
2807 * cs->old_mems_allowed. in update_tasks_nodemask()
2809 cs->old_mems_allowed = newmems; in update_tasks_nodemask()
2816 * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
2817 * @cs: the cpuset to consider
2827 static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) in update_nodemasks_hier() argument
2833 cpuset_for_each_descendant_pre(cp, pos_css, cs) { in update_nodemasks_hier()
2836 nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); in update_nodemasks_hier()
2843 *new_mems = parent->effective_mems; in update_nodemasks_hier()
2846 if (nodes_equal(*new_mems, cp->effective_mems)) { in update_nodemasks_hier()
2851 if (!css_tryget_online(&cp->css)) in update_nodemasks_hier()
2856 cp->effective_mems = *new_mems; in update_nodemasks_hier()
2860 !nodes_equal(cp->mems_allowed, cp->effective_mems)); in update_nodemasks_hier()
2865 css_put(&cp->css); in update_nodemasks_hier()
2879 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
2880 * lock each such tasks mm->mmap_lock, scan its vma's and rebind
2883 static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, in update_nodemask() argument
2890 * it's read-only in update_nodemask()
2892 if (cs == &top_cpuset) { in update_nodemask()
2893 retval = -EACCES; in update_nodemask()
2904 nodes_clear(trialcs->mems_allowed); in update_nodemask()
2906 retval = nodelist_parse(buf, trialcs->mems_allowed); in update_nodemask()
2907 if (retval < 0) in update_nodemask()
2910 if (!nodes_subset(trialcs->mems_allowed, in update_nodemask()
2912 retval = -EINVAL; in update_nodemask()
2917 if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { in update_nodemask()
2918 retval = 0; /* Too easy - nothing to do */ in update_nodemask()
2921 retval = validate_change(cs, trialcs); in update_nodemask()
2922 if (retval < 0) in update_nodemask()
2925 check_insane_mems_config(&trialcs->mems_allowed); in update_nodemask()
2928 cs->mems_allowed = trialcs->mems_allowed; in update_nodemask()
2931 /* use trialcs->mems_allowed as a temp variable */ in update_nodemask()
2932 update_nodemasks_hier(cs, &trialcs->mems_allowed); in update_nodemask()
2948 static int update_relax_domain_level(struct cpuset *cs, s64 val) in update_relax_domain_level() argument
2951 if (val < -1 || val >= sched_domain_level_max) in update_relax_domain_level()
2952 return -EINVAL; in update_relax_domain_level()
2955 if (val != cs->relax_domain_level) { in update_relax_domain_level()
2956 cs->relax_domain_level = val; in update_relax_domain_level()
2957 if (!cpumask_empty(cs->cpus_allowed) && in update_relax_domain_level()
2958 is_sched_load_balance(cs)) in update_relax_domain_level()
2962 return 0; in update_relax_domain_level()
2966 * update_tasks_flags - update the spread flags of tasks in the cpuset.
2967 * @cs: the cpuset in which each task's spread flags needs to be changed
2969 * Iterate through each task of @cs updating its spread flags. As this
2973 static void update_tasks_flags(struct cpuset *cs) in update_tasks_flags() argument
2978 css_task_iter_start(&cs->css, 0, &it); in update_tasks_flags()
2980 cpuset_update_task_spread_flags(cs, task); in update_tasks_flags()
2985 * update_flag - read a 0 or a 1 in a file and update associated flag
2987 * cs: the cpuset to update
2993 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, in update_flag() argument
3001 trialcs = alloc_trial_cpuset(cs); in update_flag()
3003 return -ENOMEM; in update_flag()
3006 set_bit(bit, &trialcs->flags); in update_flag()
3008 clear_bit(bit, &trialcs->flags); in update_flag()
3010 err = validate_change(cs, trialcs); in update_flag()
3011 if (err < 0) in update_flag()
3014 balance_flag_changed = (is_sched_load_balance(cs) != in update_flag()
3017 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) in update_flag()
3018 || (is_spread_page(cs) != is_spread_page(trialcs))); in update_flag()
3021 cs->flags = trialcs->flags; in update_flag()
3024 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) in update_flag()
3028 update_tasks_flags(cs); in update_flag()
3035 * update_prstate - update partition_root_state
3036 * @cs: the cpuset to update
3038 * Return: 0 if successful, != 0 if error
3042 static int update_prstate(struct cpuset *cs, int new_prs) in update_prstate() argument
3044 int err = PERR_NONE, old_prs = cs->partition_root_state; in update_prstate()
3045 struct cpuset *parent = parent_cs(cs); in update_prstate()
3050 return 0; in update_prstate()
3059 return -ENOMEM; in update_prstate()
3065 if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) { in update_prstate()
3067 cpumask_and(cs->effective_xcpus, in update_prstate()
3068 cs->cpus_allowed, parent->effective_xcpus); in update_prstate()
3072 err = update_partition_exclusive(cs, new_prs); in update_prstate()
3083 if (cpumask_empty(cs->cpus_allowed)) { in update_prstate()
3088 err = update_parent_effective_cpumask(cs, cmd, NULL, &tmpmask); in update_prstate()
3093 if (err && remote_partition_enable(cs, new_prs, &tmpmask)) in update_prstate()
3094 err = 0; in update_prstate()
3105 if (is_remote_partition(cs)) in update_prstate()
3106 remote_partition_disable(cs, &tmpmask); in update_prstate()
3108 update_parent_effective_cpumask(cs, partcmd_disable, in update_prstate()
3122 new_prs = -new_prs; in update_prstate()
3123 update_partition_exclusive(cs, new_prs); in update_prstate()
3127 cs->partition_root_state = new_prs; in update_prstate()
3128 WRITE_ONCE(cs->prs_err, err); in update_prstate()
3129 if (!is_partition_valid(cs)) in update_prstate()
3130 reset_partition_data(cs); in update_prstate()
3132 partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); in update_prstate()
3137 update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0); in update_prstate()
3140 update_partition_sd_lb(cs, old_prs); in update_prstate()
3142 notify_partition_change(cs, old_prs); in update_prstate()
3144 return 0; in update_prstate()
3148 * Frequency meter - How fast is some event occurring?
3152 * fmeter_init() - initialize a frequency meter.
3153 * fmeter_markevent() - called each time the event happens.
3154 * fmeter_getrate() - returns the recent rate of such events.
3155 * fmeter_update() - internal routine used to update fmeter.
3162 * The filter is single-pole low-pass recursive (IIR). The time unit
3163 * is 1 second. Arithmetic is done using 32-bit integers scaled to
3167 * has a half-life of 10 seconds, meaning that if the events quit
3192 #define FM_COEF 933 /* coefficient for half-life of 10 secs */
3200 fmp->cnt = 0; in fmeter_init()
3201 fmp->val = 0; in fmeter_init()
3202 fmp->time = 0; in fmeter_init()
3203 spin_lock_init(&fmp->lock); in fmeter_init()
3206 /* Internal meter update - process cnt events and update value */
3213 ticks = now - fmp->time; in fmeter_update()
3215 if (ticks == 0) in fmeter_update()
3219 while (ticks-- > 0) in fmeter_update()
3220 fmp->val = (FM_COEF * fmp->val) / FM_SCALE; in fmeter_update()
3221 fmp->time = now; in fmeter_update()
3223 fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE; in fmeter_update()
3224 fmp->cnt = 0; in fmeter_update()
3230 spin_lock(&fmp->lock); in fmeter_markevent()
3232 fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE); in fmeter_markevent()
3233 spin_unlock(&fmp->lock); in fmeter_markevent()
3241 spin_lock(&fmp->lock); in fmeter_getrate()
3243 val = fmp->val; in fmeter_getrate()
3244 spin_unlock(&fmp->lock); in fmeter_getrate()
3256 static int cpuset_can_attach_check(struct cpuset *cs) in cpuset_can_attach_check() argument
3258 if (cpumask_empty(cs->effective_cpus) || in cpuset_can_attach_check()
3259 (!is_in_v2_mode() && nodes_empty(cs->mems_allowed))) in cpuset_can_attach_check()
3260 return -ENOSPC; in cpuset_can_attach_check()
3261 return 0; in cpuset_can_attach_check()
3264 static void reset_migrate_dl_data(struct cpuset *cs) in reset_migrate_dl_data() argument
3266 cs->nr_migrate_dl_tasks = 0; in reset_migrate_dl_data()
3267 cs->sum_migrate_dl_bw = 0; in reset_migrate_dl_data()
3274 struct cpuset *cs, *oldcs; in cpuset_can_attach() local
3282 cs = css_cs(css); in cpuset_can_attach()
3287 ret = cpuset_can_attach_check(cs); in cpuset_can_attach()
3291 cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); in cpuset_can_attach()
3292 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_can_attach()
3312 cs->nr_migrate_dl_tasks++; in cpuset_can_attach()
3313 cs->sum_migrate_dl_bw += task->dl.dl_bw; in cpuset_can_attach()
3317 if (!cs->nr_migrate_dl_tasks) in cpuset_can_attach()
3320 if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { in cpuset_can_attach()
3321 int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); in cpuset_can_attach()
3324 reset_migrate_dl_data(cs); in cpuset_can_attach()
3325 ret = -EINVAL; in cpuset_can_attach()
3329 ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); in cpuset_can_attach()
3331 reset_migrate_dl_data(cs); in cpuset_can_attach()
3341 cs->attach_in_progress++; in cpuset_can_attach()
3350 struct cpuset *cs; in cpuset_cancel_attach() local
3353 cs = css_cs(css); in cpuset_cancel_attach()
3356 cs->attach_in_progress--; in cpuset_cancel_attach()
3357 if (!cs->attach_in_progress) in cpuset_cancel_attach()
3360 if (cs->nr_migrate_dl_tasks) { in cpuset_cancel_attach()
3361 int cpu = cpumask_any(cs->effective_cpus); in cpuset_cancel_attach()
3363 dl_bw_free(cpu, cs->sum_migrate_dl_bw); in cpuset_cancel_attach()
3364 reset_migrate_dl_data(cs); in cpuset_cancel_attach()
3378 static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) in cpuset_attach_task() argument
3382 if (cs != &top_cpuset) in cpuset_attach_task()
3394 cpuset_update_task_spread_flags(cs, task); in cpuset_attach_task()
3402 struct cpuset *cs; in cpuset_attach() local
3407 cs = css_cs(css); in cpuset_attach()
3411 cpus_updated = !cpumask_equal(cs->effective_cpus, in cpuset_attach()
3412 oldcs->effective_cpus); in cpuset_attach()
3413 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_attach()
3423 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
3427 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); in cpuset_attach()
3430 cpuset_attach_task(cs, task); in cpuset_attach()
3438 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
3439 if (!is_memory_migrate(cs) && !mems_updated) in cpuset_attach()
3456 if (is_memory_migrate(cs)) in cpuset_attach()
3457 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, in cpuset_attach()
3465 cs->old_mems_allowed = cpuset_attach_nodemask_to; in cpuset_attach()
3467 if (cs->nr_migrate_dl_tasks) { in cpuset_attach()
3468 cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; in cpuset_attach()
3469 oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; in cpuset_attach()
3470 reset_migrate_dl_data(cs); in cpuset_attach()
3473 cs->attach_in_progress--; in cpuset_attach()
3474 if (!cs->attach_in_progress) in cpuset_attach()
3507 struct cpuset *cs = css_cs(css); in cpuset_write_u64() local
3508 cpuset_filetype_t type = cft->private; in cpuset_write_u64()
3509 int retval = 0; in cpuset_write_u64()
3513 if (!is_cpuset_online(cs)) { in cpuset_write_u64()
3514 retval = -ENODEV; in cpuset_write_u64()
3520 retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); in cpuset_write_u64()
3523 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); in cpuset_write_u64()
3526 retval = update_flag(CS_MEM_HARDWALL, cs, val); in cpuset_write_u64()
3529 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); in cpuset_write_u64()
3532 retval = update_flag(CS_MEMORY_MIGRATE, cs, val); in cpuset_write_u64()
3538 retval = update_flag(CS_SPREAD_PAGE, cs, val); in cpuset_write_u64()
3541 retval = update_flag(CS_SPREAD_SLAB, cs, val); in cpuset_write_u64()
3544 retval = -EINVAL; in cpuset_write_u64()
3556 struct cpuset *cs = css_cs(css); in cpuset_write_s64() local
3557 cpuset_filetype_t type = cft->private; in cpuset_write_s64()
3558 int retval = -ENODEV; in cpuset_write_s64()
3562 if (!is_cpuset_online(cs)) in cpuset_write_s64()
3567 retval = update_relax_domain_level(cs, val); in cpuset_write_s64()
3570 retval = -EINVAL; in cpuset_write_s64()
3585 struct cpuset *cs = css_cs(of_css(of)); in cpuset_write_resmask() local
3587 int retval = -ENODEV; in cpuset_write_resmask()
3592 * CPU or memory hotunplug may leave @cs w/o any execution in cpuset_write_resmask()
3597 * As writes to "cpus" or "mems" may restore @cs's execution in cpuset_write_resmask()
3606 * protection is okay as we check whether @cs is online after in cpuset_write_resmask()
3610 css_get(&cs->css); in cpuset_write_resmask()
3611 kernfs_break_active_protection(of->kn); in cpuset_write_resmask()
3616 if (!is_cpuset_online(cs)) in cpuset_write_resmask()
3619 trialcs = alloc_trial_cpuset(cs); in cpuset_write_resmask()
3621 retval = -ENOMEM; in cpuset_write_resmask()
3625 switch (of_cft(of)->private) { in cpuset_write_resmask()
3627 retval = update_cpumask(cs, trialcs, buf); in cpuset_write_resmask()
3630 retval = update_exclusive_cpumask(cs, trialcs, buf); in cpuset_write_resmask()
3633 retval = update_nodemask(cs, trialcs, buf); in cpuset_write_resmask()
3636 retval = -EINVAL; in cpuset_write_resmask()
3644 kernfs_unbreak_active_protection(of->kn); in cpuset_write_resmask()
3645 css_put(&cs->css); in cpuset_write_resmask()
3660 struct cpuset *cs = css_cs(seq_css(sf)); in cpuset_common_seq_show() local
3661 cpuset_filetype_t type = seq_cft(sf)->private; in cpuset_common_seq_show()
3662 int ret = 0; in cpuset_common_seq_show()
3668 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); in cpuset_common_seq_show()
3671 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); in cpuset_common_seq_show()
3674 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); in cpuset_common_seq_show()
3677 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); in cpuset_common_seq_show()
3680 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->exclusive_cpus)); in cpuset_common_seq_show()
3683 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_xcpus)); in cpuset_common_seq_show()
3692 ret = -EINVAL; in cpuset_common_seq_show()
3701 struct cpuset *cs = css_cs(css); in cpuset_read_u64() local
3702 cpuset_filetype_t type = cft->private; in cpuset_read_u64()
3705 return is_cpu_exclusive(cs); in cpuset_read_u64()
3707 return is_mem_exclusive(cs); in cpuset_read_u64()
3709 return is_mem_hardwall(cs); in cpuset_read_u64()
3711 return is_sched_load_balance(cs); in cpuset_read_u64()
3713 return is_memory_migrate(cs); in cpuset_read_u64()
3717 return fmeter_getrate(&cs->fmeter); in cpuset_read_u64()
3719 return is_spread_page(cs); in cpuset_read_u64()
3721 return is_spread_slab(cs); in cpuset_read_u64()
3727 return 0; in cpuset_read_u64()
3732 struct cpuset *cs = css_cs(css); in cpuset_read_s64() local
3733 cpuset_filetype_t type = cft->private; in cpuset_read_s64()
3736 return cs->relax_domain_level; in cpuset_read_s64()
3742 return 0; in cpuset_read_s64()
3747 struct cpuset *cs = css_cs(seq_css(seq)); in sched_partition_show() local
3750 switch (cs->partition_root_state) { in sched_partition_show()
3766 err = perr_strings[READ_ONCE(cs->prs_err)]; in sched_partition_show()
3773 return 0; in sched_partition_show()
3779 struct cpuset *cs = css_cs(of_css(of)); in sched_partition_write() local
3781 int retval = -ENODEV; in sched_partition_write()
3795 return -EINVAL; in sched_partition_write()
3797 css_get(&cs->css); in sched_partition_write()
3800 if (!is_cpuset_online(cs)) in sched_partition_write()
3803 retval = update_prstate(cs, val); in sched_partition_write()
3807 css_put(&cs->css); in sched_partition_write()
3996 * cpuset_css_alloc - Allocate a cpuset css
3999 * Return: cpuset css on success, -ENOMEM on failure.
4001 * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
4007 struct cpuset *cs; in cpuset_css_alloc() local
4012 cs = kzalloc(sizeof(*cs), GFP_KERNEL); in cpuset_css_alloc()
4013 if (!cs) in cpuset_css_alloc()
4014 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
4016 if (alloc_cpumasks(cs, NULL)) { in cpuset_css_alloc()
4017 kfree(cs); in cpuset_css_alloc()
4018 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
4021 __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_alloc()
4022 nodes_clear(cs->mems_allowed); in cpuset_css_alloc()
4023 nodes_clear(cs->effective_mems); in cpuset_css_alloc()
4024 fmeter_init(&cs->fmeter); in cpuset_css_alloc()
4025 cs->relax_domain_level = -1; in cpuset_css_alloc()
4026 INIT_LIST_HEAD(&cs->remote_sibling); in cpuset_css_alloc()
4030 __set_bit(CS_MEMORY_MIGRATE, &cs->flags); in cpuset_css_alloc()
4032 return &cs->css; in cpuset_css_alloc()
4037 struct cpuset *cs = css_cs(css); in cpuset_css_online() local
4038 struct cpuset *parent = parent_cs(cs); in cpuset_css_online()
4043 return 0; in cpuset_css_online()
4048 set_bit(CS_ONLINE, &cs->flags); in cpuset_css_online()
4050 set_bit(CS_SPREAD_PAGE, &cs->flags); in cpuset_css_online()
4052 set_bit(CS_SPREAD_SLAB, &cs->flags); in cpuset_css_online()
4058 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in cpuset_css_online()
4059 cs->effective_mems = parent->effective_mems; in cpuset_css_online()
4060 cs->use_parent_ecpus = true; in cpuset_css_online()
4061 parent->child_ecpus_count++; in cpuset_css_online()
4066 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_online()
4074 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_online()
4078 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) in cpuset_css_online()
4084 * historical reasons - the flag may be specified during mount. in cpuset_css_online()
4087 * refuse to clone the configuration - thereby refusing the task to in cpuset_css_online()
4091 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive in cpuset_css_online()
4104 cs->mems_allowed = parent->mems_allowed; in cpuset_css_online()
4105 cs->effective_mems = parent->mems_allowed; in cpuset_css_online()
4106 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); in cpuset_css_online()
4107 cpumask_copy(cs->effective_cpus, parent->cpus_allowed); in cpuset_css_online()
4112 return 0; in cpuset_css_online()
4128 struct cpuset *cs = css_cs(css); in cpuset_css_offline() local
4133 if (is_partition_valid(cs)) in cpuset_css_offline()
4134 update_prstate(cs, 0); in cpuset_css_offline()
4137 is_sched_load_balance(cs)) in cpuset_css_offline()
4138 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); in cpuset_css_offline()
4140 if (cs->use_parent_ecpus) { in cpuset_css_offline()
4141 struct cpuset *parent = parent_cs(cs); in cpuset_css_offline()
4143 cs->use_parent_ecpus = false; in cpuset_css_offline()
4144 parent->child_ecpus_count--; in cpuset_css_offline()
4148 clear_bit(CS_ONLINE, &cs->flags); in cpuset_css_offline()
4156 struct cpuset *cs = css_cs(css); in cpuset_css_free() local
4158 free_cpuset(cs); in cpuset_css_free()
4186 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_can_fork() local
4191 same_cs = (cs == task_cs(current)); in cpuset_can_fork()
4195 return 0; in cpuset_can_fork()
4201 ret = cpuset_can_attach_check(cs); in cpuset_can_fork()
4217 cs->attach_in_progress++; in cpuset_can_fork()
4225 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_cancel_fork() local
4229 same_cs = (cs == task_cs(current)); in cpuset_cancel_fork()
4236 cs->attach_in_progress--; in cpuset_cancel_fork()
4237 if (!cs->attach_in_progress) in cpuset_cancel_fork()
4249 struct cpuset *cs; in cpuset_fork() local
4253 cs = task_cs(task); in cpuset_fork()
4254 same_cs = (cs == task_cs(current)); in cpuset_fork()
4258 if (cs == &top_cpuset) in cpuset_fork()
4261 set_cpus_allowed_ptr(task, current->cpus_ptr); in cpuset_fork()
4262 task->mems_allowed = current->mems_allowed; in cpuset_fork()
4268 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); in cpuset_fork()
4269 cpuset_attach_task(cs, task); in cpuset_fork()
4271 cs->attach_in_progress--; in cpuset_fork()
4272 if (!cs->attach_in_progress) in cpuset_fork()
4298 * cpuset_init - initialize cpusets at system boot
4321 top_cpuset.relax_domain_level = -1; in cpuset_init()
4326 return 0; in cpuset_init()
4334 * cpuset to its next-highest non-empty parent.
4336 static void remove_tasks_in_empty_cpuset(struct cpuset *cs) in remove_tasks_in_empty_cpuset() argument
4341 * Find its next-highest non-empty parent, (top cpuset in remove_tasks_in_empty_cpuset()
4344 parent = parent_cs(cs); in remove_tasks_in_empty_cpuset()
4345 while (cpumask_empty(parent->cpus_allowed) || in remove_tasks_in_empty_cpuset()
4346 nodes_empty(parent->mems_allowed)) in remove_tasks_in_empty_cpuset()
4349 if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { in remove_tasks_in_empty_cpuset()
4351 pr_cont_cgroup_name(cs->css.cgroup); in remove_tasks_in_empty_cpuset()
4357 hotplug_update_tasks_legacy(struct cpuset *cs, in hotplug_update_tasks_legacy() argument
4364 cpumask_copy(cs->cpus_allowed, new_cpus); in hotplug_update_tasks_legacy()
4365 cpumask_copy(cs->effective_cpus, new_cpus); in hotplug_update_tasks_legacy()
4366 cs->mems_allowed = *new_mems; in hotplug_update_tasks_legacy()
4367 cs->effective_mems = *new_mems; in hotplug_update_tasks_legacy()
4374 if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) in hotplug_update_tasks_legacy()
4375 update_tasks_cpumask(cs, new_cpus); in hotplug_update_tasks_legacy()
4376 if (mems_updated && !nodes_empty(cs->mems_allowed)) in hotplug_update_tasks_legacy()
4377 update_tasks_nodemask(cs); in hotplug_update_tasks_legacy()
4379 is_empty = cpumask_empty(cs->cpus_allowed) || in hotplug_update_tasks_legacy()
4380 nodes_empty(cs->mems_allowed); in hotplug_update_tasks_legacy()
4389 remove_tasks_in_empty_cpuset(cs); in hotplug_update_tasks_legacy()
4395 hotplug_update_tasks(struct cpuset *cs, in hotplug_update_tasks() argument
4400 if (cpumask_empty(new_cpus) && !is_partition_valid(cs)) in hotplug_update_tasks()
4401 cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); in hotplug_update_tasks()
4403 *new_mems = parent_cs(cs)->effective_mems; in hotplug_update_tasks()
4406 cpumask_copy(cs->effective_cpus, new_cpus); in hotplug_update_tasks()
4407 cs->effective_mems = *new_mems; in hotplug_update_tasks()
4411 update_tasks_cpumask(cs, new_cpus); in hotplug_update_tasks()
4413 update_tasks_nodemask(cs); in hotplug_update_tasks()
4433 int retries = 0; in cpuset_hotplug_cpus_read_trylock()
4448 * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
4449 * @cs: cpuset in interest
4452 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
4453 * offline, update @cs accordingly. If @cs ends up with no CPU or memory,
4456 static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) in cpuset_hotplug_update_tasks() argument
4463 int partcmd = -1; in cpuset_hotplug_update_tasks()
4466 wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); in cpuset_hotplug_update_tasks()
4474 if (cs->attach_in_progress) { in cpuset_hotplug_update_tasks()
4479 parent = parent_cs(cs); in cpuset_hotplug_update_tasks()
4480 compute_effective_cpumask(&new_cpus, cs, parent); in cpuset_hotplug_update_tasks()
4481 nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); in cpuset_hotplug_update_tasks()
4483 if (!tmp || !cs->partition_root_state) in cpuset_hotplug_update_tasks()
4490 remote = is_remote_partition(cs); in cpuset_hotplug_update_tasks()
4491 if (remote || (is_partition_valid(cs) && is_partition_valid(parent))) in cpuset_hotplug_update_tasks()
4492 compute_partition_effective_cpumask(cs, &new_cpus); in cpuset_hotplug_update_tasks()
4495 partition_is_populated(cs, NULL) && in cpuset_hotplug_update_tasks()
4497 remote_partition_disable(cs, tmp); in cpuset_hotplug_update_tasks()
4498 compute_effective_cpumask(&new_cpus, cs, parent); in cpuset_hotplug_update_tasks()
4511 if (is_local_partition(cs) && (!is_partition_valid(parent) || in cpuset_hotplug_update_tasks()
4512 tasks_nocpu_error(parent, cs, &new_cpus))) in cpuset_hotplug_update_tasks()
4518 else if (is_partition_valid(parent) && is_partition_invalid(cs)) in cpuset_hotplug_update_tasks()
4527 if (partcmd >= 0) { in cpuset_hotplug_update_tasks()
4531 update_parent_effective_cpumask(cs, partcmd, NULL, tmp); in cpuset_hotplug_update_tasks()
4533 if ((partcmd == partcmd_invalidate) || is_partition_valid(cs)) { in cpuset_hotplug_update_tasks()
4534 compute_partition_effective_cpumask(cs, &new_cpus); in cpuset_hotplug_update_tasks()
4540 cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); in cpuset_hotplug_update_tasks()
4541 mems_updated = !nodes_equal(new_mems, cs->effective_mems); in cpuset_hotplug_update_tasks()
4549 hotplug_update_tasks(cs, &new_cpus, &new_mems, in cpuset_hotplug_update_tasks()
4552 hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, in cpuset_hotplug_update_tasks()
4560 * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
4569 * Non-root cpusets are only affected by offlining. If any CPUs or memory
4622 top_cpuset.nr_subparts = 0; in cpuset_hotplug_workfn()
4648 struct cpuset *cs; in cpuset_hotplug_workfn() local
4652 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { in cpuset_hotplug_workfn()
4653 if (cs == &top_cpuset || !css_tryget_online(&cs->css)) in cpuset_hotplug_workfn()
4657 cpuset_hotplug_update_tasks(cs, ptmp); in cpuset_hotplug_workfn()
4660 css_put(&cs->css); in cpuset_hotplug_workfn()
4702 * cpuset_init_smp - initialize cpus_allowed
4720 cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); in cpuset_init_smp()
4725 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
4726 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
4730 * attached to the specified @tsk. Guaranteed to return some non-empty
4738 struct cpuset *cs; in cpuset_cpus_allowed() local
4743 cs = task_cs(tsk); in cpuset_cpus_allowed()
4744 if (cs != &top_cpuset) in cpuset_cpus_allowed()
4751 if ((cs == &top_cpuset) || cpumask_empty(pmask)) { in cpuset_cpus_allowed()
4768 * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
4772 * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
4773 * mode however, this value is the same as task_cs(tsk)->effective_cpus,
4788 cs_mask = task_cs(tsk)->cpus_allowed; in cpuset_cpus_allowed_fallback()
4796 * We own tsk->cpus_allowed, nobody can change it under us. in cpuset_cpus_allowed_fallback()
4798 * But we used cs && cs->cpus_allowed lockless and thus can in cpuset_cpus_allowed_fallback()
4800 * the wrong tsk->cpus_allowed. However, both cases imply the in cpuset_cpus_allowed_fallback()
4801 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() in cpuset_cpus_allowed_fallback()
4805 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary in cpuset_cpus_allowed_fallback()
4817 nodes_setall(current->mems_allowed); in cpuset_init_current_mems_allowed()
4821 * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
4822 * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
4825 * attached to the specified @tsk. Guaranteed to return some non-empty
4845 * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
4848 * Are any of the nodes in the nodemask allowed in current->mems_allowed?
4852 return nodes_intersects(*nodemask, current->mems_allowed); in cpuset_nodemask_valid_mems_allowed()
4856 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
4861 static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) in nearest_hardwall_ancestor() argument
4863 while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) in nearest_hardwall_ancestor()
4864 cs = parent_cs(cs); in nearest_hardwall_ancestor()
4865 return cs; in nearest_hardwall_ancestor()
4869 * cpuset_node_allowed - Can we allocate on a memory node?
4902 * in_interrupt - any node ok (current task context irrelevant)
4903 * GFP_ATOMIC - any node ok
4904 * tsk_is_oom_victim - any node ok
4905 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
4906 * GFP_USER - only nodes in current tasks mems allowed ok.
4910 struct cpuset *cs; /* current cpuset ancestors */ in cpuset_node_allowed() local
4916 if (node_isset(node, current->mems_allowed)) in cpuset_node_allowed()
4927 if (current->flags & PF_EXITING) /* Let dying task have memory */ in cpuset_node_allowed()
4934 cs = nearest_hardwall_ancestor(task_cs(current)); in cpuset_node_allowed()
4935 allowed = node_isset(node, cs->mems_allowed); in cpuset_node_allowed()
4943 * cpuset_spread_node() - On which node to begin search for a page
4959 * only set nodes in task->mems_allowed that are online. So it
4970 return *rotor = next_node_in(*rotor, current->mems_allowed); in cpuset_spread_node()
4974 * cpuset_mem_spread_node() - On which node to begin search for a file page
4978 if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) in cpuset_mem_spread_node()
4979 current->cpuset_mem_spread_rotor = in cpuset_mem_spread_node()
4980 node_random(¤t->mems_allowed); in cpuset_mem_spread_node()
4982 return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); in cpuset_mem_spread_node()
4986 * cpuset_slab_spread_node() - On which node to begin search for a slab page
4990 if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) in cpuset_slab_spread_node()
4991 current->cpuset_slab_spread_rotor = in cpuset_slab_spread_node()
4992 node_random(¤t->mems_allowed); in cpuset_slab_spread_node()
4994 return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); in cpuset_slab_spread_node()
4999 * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
5012 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); in cpuset_mems_allowed_intersects()
5016 * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
5027 cgrp = task_cs(current)->css.cgroup; in cpuset_print_current_mems_allowed()
5031 nodemask_pr_args(¤t->mems_allowed)); in cpuset_print_current_mems_allowed()
5045 * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
5056 * Display to user space in the per-cpuset read-only file
5065 fmeter_markevent(&task_cs(current)->fmeter); in __cpuset_memory_pressure_bump()
5072 * - Print tasks cpuset path into seq_file.
5073 * - Used for /proc/<pid>/cpuset.
5074 * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
5075 * doesn't really matter if tsk->cpuset changes after we read it,
5086 retval = -ENOMEM; in proc_cpuset_show()
5092 retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, in proc_cpuset_show()
5093 current->nsproxy->cgroup_ns); in proc_cpuset_show()
5095 if (retval == -E2BIG) in proc_cpuset_show()
5096 retval = -ENAMETOOLONG; in proc_cpuset_show()
5097 if (retval < 0) in proc_cpuset_show()
5101 retval = 0; in proc_cpuset_show()
5113 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()
5115 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()