Lines Matching +full:int +full:- +full:array +full:- +full:variable +full:- +full:length +full:- +full:and +full:- +full:constrained +full:- +full:values

4  *  Processor and Memory placement constraints for sets of tasks.
7 * Copyright (C) 2004-2007 Silicon Graphics, Inc.
11 * sysfs is Copyright (c) 2001-3 Patrick Mochel
13 * 2003-10-10 Written by Simon Derr.
14 * 2003-10-22 Updates by Stephen Hemminger.
15 * 2004 May-July Rework by Paul Jackson.
17 * 2008 Rework of the scheduler domains and CPU hotplug handling
20 * This file is subject to the terms and conditions of the GNU General Public
24 #include "cpuset-internal.h"
49 * node binding, add this key to provide a quick low-cost judgment
61 [PERR_CPUSEMPTY] = "cpuset.cpus and cpuset.cpus.exclusive are empty",
70 * the remote_partition_*() and remote_cpus_update() helpers.
73 * Exclusive CPUs distributed out to local or remote sub-partitions of
95 * - update_partition_sd_lb()
96 * - update_cpumasks_hier()
97 * - cpuset_update_flag()
98 * - cpuset_hotplug_update_tasks()
99 * - cpuset_handle_hotplug()
103 * Note that update_relax_domain_level() in cpuset-v1.c can still call
111 * 0 - member (not a partition root)
112 * 1 - partition root
113 * 2 - partition root without load balancing (isolated)
114 * -1 - invalid partition root
115 * -2 - invalid isolated partition root
117 * There are 2 types of partitions - local or remote. Local partitions are
131 #define PRS_INVALID_ROOT -1
132 #define PRS_INVALID_ISOLATED -2
134 static inline bool is_prs_invalid(int prs_state) in is_prs_invalid()
152 cs->nr_deadline_tasks++; in inc_dl_tasks_cs()
159 cs->nr_deadline_tasks--; in dec_dl_tasks_cs()
162 static inline int is_partition_valid(const struct cpuset *cs) in is_partition_valid()
164 return cs->partition_root_state > 0; in is_partition_valid()
167 static inline int is_partition_invalid(const struct cpuset *cs) in is_partition_invalid()
169 return cs->partition_root_state < 0; in is_partition_invalid()
177 if (cs->partition_root_state > 0) in make_partition_invalid()
178 cs->partition_root_state = -cs->partition_root_state; in make_partition_invalid()
184 static inline void notify_partition_change(struct cpuset *cs, int old_prs) in notify_partition_change()
186 if (old_prs == cs->partition_root_state) in notify_partition_change()
188 cgroup_file_notify(&cs->partition_file); in notify_partition_change()
192 WRITE_ONCE(cs->prs_err, PERR_NONE); in notify_partition_change()
199 .relax_domain_level = -1,
204 * There are two global locks guarding cpuset structures - cpuset_mutex and
208 * paths that rely on priority inheritance (e.g. scheduler - on RT) for
213 * also acquire callback_lock and be able to modify cpusets. It can perform
225 * If a task is only holding callback_lock, then it has read-only
228 * Now, the task_struct fields mems_allowed and mempolicy may be changed
233 * small pieces of code, such as when reading out possibly multi-word
234 * cpumasks and nodemasks.
277 * decrease cs->attach_in_progress.
278 * wake_up cpuset_attach_wq if cs->attach_in_progress==0.
284 cs->attach_in_progress--; in dec_attach_in_progress_locked()
285 if (!cs->attach_in_progress) in dec_attach_in_progress_locked()
303 * Cgroup v2 behavior is used on the "cpus" and "mems" control files when
306 * With v2 behavior, "cpus" and "mems" are always what the users have
307 * requested and won't be changed by hotplug events. Only the effective
313 (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); in is_in_v2_mode()
317 * partition_is_populated - check if partition has tasks
323 * be non-NULL when this cpuset is going to become a partition itself.
331 if (cs->css.cgroup->nr_populated_csets) in partition_is_populated()
333 if (!excluded_child && !cs->nr_subparts) in partition_is_populated()
334 return cgroup_is_populated(cs->css.cgroup); in partition_is_populated()
342 if (cgroup_is_populated(child->css.cgroup)) { in partition_is_populated()
353 * are online and are capable of running the task. If none are found,
357 * One way or another, we guarantee to return some non-empty subset
374 while (!cpumask_intersects(cs->effective_cpus, pmask)) in guarantee_online_cpus()
377 cpumask_and(pmask, pmask, cs->effective_cpus); in guarantee_online_cpus()
387 * One way or another, we guarantee to return some non-empty subset
394 while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) in guarantee_online_mems()
396 nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); in guarantee_online_mems()
400 * alloc_cpumasks - allocate three cpumasks for cpuset
403 * Return: 0 if successful, -ENOMEM otherwise.
405 * Only one of the two input arguments should be non-NULL.
407 static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) in alloc_cpumasks()
412 pmask1 = &cs->cpus_allowed; in alloc_cpumasks()
413 pmask2 = &cs->effective_cpus; in alloc_cpumasks()
414 pmask3 = &cs->effective_xcpus; in alloc_cpumasks()
415 pmask4 = &cs->exclusive_cpus; in alloc_cpumasks()
417 pmask1 = &tmp->new_cpus; in alloc_cpumasks()
418 pmask2 = &tmp->addmask; in alloc_cpumasks()
419 pmask3 = &tmp->delmask; in alloc_cpumasks()
424 return -ENOMEM; in alloc_cpumasks()
444 return -ENOMEM; in alloc_cpumasks()
448 * free_cpumasks - free cpumasks in a tmpmasks structure
455 free_cpumask_var(cs->cpus_allowed); in free_cpumasks()
456 free_cpumask_var(cs->effective_cpus); in free_cpumasks()
457 free_cpumask_var(cs->effective_xcpus); in free_cpumasks()
458 free_cpumask_var(cs->exclusive_cpus); in free_cpumasks()
461 free_cpumask_var(tmp->new_cpus); in free_cpumasks()
462 free_cpumask_var(tmp->addmask); in free_cpumasks()
463 free_cpumask_var(tmp->delmask); in free_cpumasks()
468 * alloc_trial_cpuset - allocate a trial cpuset
484 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); in alloc_trial_cpuset()
485 cpumask_copy(trial->effective_cpus, cs->effective_cpus); in alloc_trial_cpuset()
486 cpumask_copy(trial->effective_xcpus, cs->effective_xcpus); in alloc_trial_cpuset()
487 cpumask_copy(trial->exclusive_cpus, cs->exclusive_cpus); in alloc_trial_cpuset()
492 * free_cpuset - free the cpuset
504 return cpumask_empty(cs->exclusive_cpus) ? cs->cpus_allowed in user_xcpus()
505 : cs->exclusive_cpus; in user_xcpus()
510 return cpumask_empty(cs->cpus_allowed) && in xcpus_empty()
511 cpumask_empty(cs->exclusive_cpus); in xcpus_empty()
515 * cpusets_are_exclusive() - check if two cpusets are exclusive
530 * validate_change() - Used to validate that any proposed cpuset change
533 * If we replaced the flag and mask values of the current cpuset
534 * (cur) with those values in the trial cpuset (trial), would
535 * our various subset and exclusive rules still be valid? Presumes
538 * 'cur' is the address of an actual, in-use cpuset. Operations
544 * or flags changed to new, trial values.
546 * Return 0 if valid, -errno if not.
549 static int validate_change(struct cpuset *cur, struct cpuset *trial) in validate_change()
553 int ret = 0; in validate_change()
569 * Cpusets with tasks - existing or newly being attached - can't in validate_change()
572 ret = -ENOSPC; in validate_change()
573 if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) { in validate_change()
574 if (!cpumask_empty(cur->cpus_allowed) && in validate_change()
575 cpumask_empty(trial->cpus_allowed)) in validate_change()
577 if (!nodes_empty(cur->mems_allowed) && in validate_change()
578 nodes_empty(trial->mems_allowed)) in validate_change()
591 * for non-isolated partition root. At this point, the target in validate_change()
599 ret = -EBUSY; in validate_change()
601 !cpuset_cpumask_can_shrink(cur->effective_cpus, user_xcpus(trial))) in validate_change()
608 ret = -EINVAL; in validate_change()
615 txset = !cpumask_empty(trial->exclusive_cpus); in validate_change()
616 cxset = !cpumask_empty(c->exclusive_cpus); in validate_change()
631 xcpus = trial->exclusive_cpus; in validate_change()
632 acpus = c->cpus_allowed; in validate_change()
634 xcpus = c->exclusive_cpus; in validate_change()
635 acpus = trial->cpus_allowed; in validate_change()
641 nodes_intersects(trial->mems_allowed, c->mems_allowed)) in validate_change()
656 static int cpusets_overlap(struct cpuset *a, struct cpuset *b) in cpusets_overlap()
658 return cpumask_intersects(a->effective_cpus, b->effective_cpus); in cpusets_overlap()
664 if (dattr->relax_domain_level < c->relax_domain_level) in update_domain_attr()
665 dattr->relax_domain_level = c->relax_domain_level; in update_domain_attr()
678 if (cpumask_empty(cp->cpus_allowed)) { in update_domain_attr_tree()
690 static inline int nr_cpusets(void) in nr_cpusets()
692 /* jump label reference count + the top-level cpuset */ in nr_cpusets()
700 * A 'partial partition' is a set of non-overlapping subsets whose
707 * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
718 * cp - cpuset pointer, used (together with pos_css) to perform a
719 * top-down scan of all cpusets. For our purposes, rebuilding
722 * csa - (for CpuSet Array) Array of pointers to all the cpusets
729 * doms - Conversion of 'csa' to an array of cpumasks, for passing to
737 * balanced cpusets (using the array of cpuset pointers in csa[])
739 * and merging them using a union-find algorithm.
746 static int generate_sched_domains(cpumask_var_t **domains, in generate_sched_domains()
749 struct cpuset *cp; /* top-down scan of cpusets */ in generate_sched_domains()
750 struct cpuset **csa; /* array of all cpuset ptrs */ in generate_sched_domains()
751 int csn; /* how many cpuset ptrs in csa so far */ in generate_sched_domains()
752 int i, j; /* indices for partition finding loops */ in generate_sched_domains()
755 int ndoms = 0; /* number of sched domains in result */ in generate_sched_domains()
756 int nslot; /* next empty doms[] struct cpumask slot */ in generate_sched_domains()
760 int nslot_update; in generate_sched_domains()
802 * Continue traversing beyond @cp iff @cp has some CPUs and in generate_sched_domains()
805 * parent's cpus, so just skip them, and then we call in generate_sched_domains()
809 if (!cpumask_empty(cp->cpus_allowed) && in generate_sched_domains()
811 cpumask_intersects(cp->cpus_allowed, in generate_sched_domains()
816 !cpumask_empty(cp->effective_cpus)) in generate_sched_domains()
825 * Only valid partition roots that are not isolated and with in generate_sched_domains()
826 * non-empty effective_cpus will be saved into csn[]. in generate_sched_domains()
828 if ((cp->partition_root_state == PRS_ROOT) && in generate_sched_domains()
829 !cpumask_empty(cp->effective_cpus)) in generate_sched_domains()
833 * Skip @cp's subtree if not a partition root and has no in generate_sched_domains()
836 if (!is_partition_valid(cp) && cpumask_empty(cp->exclusive_cpus)) in generate_sched_domains()
849 uf_node_init(&csa[i]->node); in generate_sched_domains()
860 uf_union(&csa[i]->node, &csa[j]->node); in generate_sched_domains()
867 if (uf_find(&csa[i]->node) == &csa[i]->node) in generate_sched_domains()
873 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. in generate_sched_domains()
888 * to SD_ATTR_INIT. Also non-isolating partition root CPUs are a in generate_sched_domains()
898 cpumask_and(doms[i], csa[i]->effective_cpus, in generate_sched_domains()
901 cpumask_copy(doms[i], csa[i]->effective_cpus); in generate_sched_domains()
911 if (uf_find(&csa[j]->node) == &csa[i]->node) { in generate_sched_domains()
920 cpumask_or(dp, dp, csa[j]->effective_cpus); in generate_sched_domains()
951 if (cs->nr_deadline_tasks == 0) in dl_update_tasks_root_domain()
954 css_task_iter_start(&cs->css, 0, &it); in dl_update_tasks_root_domain()
966 int cpu; in dl_rebuild_rd_accounting()
984 if (cpumask_empty(cs->effective_cpus)) { in dl_rebuild_rd_accounting()
989 css_get(&cs->css); in dl_rebuild_rd_accounting()
996 css_put(&cs->css); in dl_rebuild_rd_accounting()
1004 * If the flag 'sched_load_balance' of any cpuset with non-empty
1006 * which has that flag enabled, or if any cpuset with a non-empty
1018 int ndoms; in rebuild_sched_domains_locked()
1049 if (!cpumask_subset(cs->effective_cpus, in rebuild_sched_domains_locked()
1058 /* Generate domain masks and attrs */ in rebuild_sched_domains_locked()
1092 * cpuset_update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
1094 * @new_cpus: the temp variable for the new effective_cpus mask
1113 css_task_iter_start(&cs->css, 0, &it); in cpuset_update_tasks_cpumask()
1123 if (task->flags & PF_NO_SETAFFINITY) in cpuset_update_tasks_cpumask()
1127 cpumask_and(new_cpus, possible_mask, cs->effective_cpus); in cpuset_update_tasks_cpumask()
1135 * compute_effective_cpumask - Compute the effective cpumask of the cpuset
1136 * @new_cpus: the temp variable for the new effective_cpus mask
1145 cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus); in compute_effective_cpumask()
1167 static int update_partition_exclusive_flag(struct cpuset *cs, int new_prs) in update_partition_exclusive_flag()
1182 * Update partition load balance flag and/or rebuild sched domain
1188 static void update_partition_sd_lb(struct cpuset *cs, int old_prs) in update_partition_sd_lb()
1190 int new_prs = cs->partition_root_state; in update_partition_sd_lb()
1206 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1208 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1216 * tasks_nocpu_error - Return true if tasks will have no effective_cpus
1224 return (cpumask_subset(parent->effective_cpus, xcpus) && in tasks_nocpu_error()
1239 cs->nr_subparts = 0; in reset_partition_data()
1240 if (cpumask_empty(cs->exclusive_cpus)) { in reset_partition_data()
1241 cpumask_clear(cs->effective_xcpus); in reset_partition_data()
1243 clear_bit(CS_CPU_EXCLUSIVE, &cs->flags); in reset_partition_data()
1245 if (!cpumask_and(cs->effective_cpus, parent->effective_cpus, cs->cpus_allowed)) in reset_partition_data()
1246 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in reset_partition_data()
1250 * isolated_cpus_update - Update the isolated_cpus mask
1255 static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus) in isolated_cpus_update()
1265 * partition_xcpus_add - Add new exclusive CPUs to partition
1273 static bool partition_xcpus_add(int new_prs, struct cpuset *parent, in partition_xcpus_add()
1287 isolcpus_updated = (new_prs != parent->partition_root_state); in partition_xcpus_add()
1289 isolated_cpus_update(parent->partition_root_state, new_prs, in partition_xcpus_add()
1292 cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); in partition_xcpus_add()
1297 * partition_xcpus_del - Remove exclusive CPUs from partition
1305 static bool partition_xcpus_del(int old_prs, struct cpuset *parent, in partition_xcpus_del()
1318 isolcpus_updated = (old_prs != parent->partition_root_state); in partition_xcpus_del()
1320 isolated_cpus_update(old_prs, parent->partition_root_state, in partition_xcpus_del()
1324 cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); in partition_xcpus_del()
1330 int ret; in update_unbound_workqueue_cpumask()
1342 * cpuset_cpu_is_isolated - Check if the given CPU is isolated
1346 bool cpuset_cpu_is_isolated(int cpu) in cpuset_cpu_is_isolated()
1353 * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
1360 * scan the sibling cpusets and exclude their exclusive_cpus or effective_xcpus
1361 * as well. The provision of real_cs means that a cpumask is being changed and
1364 static int compute_effective_exclusive_cpumask(struct cpuset *cs, in compute_effective_exclusive_cpumask()
1371 int retval = 0; in compute_effective_exclusive_cpumask()
1374 xcpus = cs->effective_xcpus; in compute_effective_exclusive_cpumask()
1376 cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); in compute_effective_exclusive_cpumask()
1379 if (!cpumask_empty(cs->exclusive_cpus)) in compute_effective_exclusive_cpumask()
1393 if (!cpumask_empty(sibling->exclusive_cpus) && in compute_effective_exclusive_cpumask()
1394 cpumask_intersects(xcpus, sibling->exclusive_cpus)) { in compute_effective_exclusive_cpumask()
1395 cpumask_andnot(xcpus, xcpus, sibling->exclusive_cpus); in compute_effective_exclusive_cpumask()
1399 if (!cpumask_empty(sibling->effective_xcpus) && in compute_effective_exclusive_cpumask()
1400 cpumask_intersects(xcpus, sibling->effective_xcpus)) { in compute_effective_exclusive_cpumask()
1401 cpumask_andnot(xcpus, xcpus, sibling->effective_xcpus); in compute_effective_exclusive_cpumask()
1411 return !list_empty(&cs->remote_sibling); in is_remote_partition()
1420 * remote_partition_enable - Enable current cpuset as a remote partition root
1429 static int remote_partition_enable(struct cpuset *cs, int new_prs, in remote_partition_enable()
1442 * partitions and it can't use up all the root's effective_cpus. in remote_partition_enable()
1448 compute_effective_exclusive_cpumask(cs, tmp->new_cpus, NULL); in remote_partition_enable()
1449 if (cpumask_empty(tmp->new_cpus) || in remote_partition_enable()
1450 cpumask_intersects(tmp->new_cpus, subpartitions_cpus) || in remote_partition_enable()
1451 cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus)) in remote_partition_enable()
1455 isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); in remote_partition_enable()
1456 list_add(&cs->remote_sibling, &remote_children); in remote_partition_enable()
1457 cpumask_copy(cs->effective_xcpus, tmp->new_cpus); in remote_partition_enable()
1461 cs->prs_err = 0; in remote_partition_enable()
1466 cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_partition_enable()
1472 * remote_partition_disable - Remove current cpuset from remote partition list
1485 WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); in remote_partition_disable()
1488 list_del_init(&cs->remote_sibling); in remote_partition_disable()
1489 isolcpus_updated = partition_xcpus_del(cs->partition_root_state, in remote_partition_disable()
1490 NULL, cs->effective_xcpus); in remote_partition_disable()
1491 if (cs->prs_err) in remote_partition_disable()
1492 cs->partition_root_state = -cs->partition_root_state; in remote_partition_disable()
1494 cs->partition_root_state = PRS_MEMBER; in remote_partition_disable()
1506 cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_partition_disable()
1511 * remote_cpus_update - cpus_exclusive change of remote partition
1513 * @xcpus: the new exclusive_cpus mask, if non-NULL
1517 * top_cpuset and subpartitions_cpus will be updated or partition can be
1524 int prs = cs->partition_root_state; in remote_cpus_update()
1525 int isolcpus_updated = 0; in remote_cpus_update()
1530 WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); in remote_cpus_update()
1533 cs->prs_err = PERR_CPUSEMPTY; in remote_cpus_update()
1537 adding = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus); in remote_cpus_update()
1538 deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus); in remote_cpus_update()
1542 * not allocated to other partitions and there are effective_cpus in remote_cpus_update()
1547 cs->prs_err = PERR_ACCESS; in remote_cpus_update()
1548 else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) || in remote_cpus_update()
1549 cpumask_subset(top_cpuset.effective_cpus, tmp->addmask)) in remote_cpus_update()
1550 cs->prs_err = PERR_NOCPUS; in remote_cpus_update()
1551 if (cs->prs_err) in remote_cpus_update()
1557 isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); in remote_cpus_update()
1559 isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); in remote_cpus_update()
1561 * Need to update effective_xcpus and exclusive_cpus now as in remote_cpus_update()
1564 cpumask_copy(cs->effective_xcpus, excpus); in remote_cpus_update()
1566 cpumask_copy(cs->exclusive_cpus, xcpus); in remote_cpus_update()
1575 cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_cpus_update()
1584 * prstate_housekeeping_conflict - check for partition & housekeeping conflicts
1592 static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus) in prstate_housekeeping_conflict()
1604 * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
1608 * @tmp: Temporary addmask and delmask
1611 * For partcmd_enable*, the cpuset is being transformed from a non-partition
1618 * root back to a non-partition root. Any CPUs in effective_xcpus will be
1630 * The partcmd_enable* and partcmd_disable commands are used by
1631 * update_prstate(). An error code may be returned and the caller will check
1635 * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
1637 * check for error and so partition_root_state and prs_err will be updated
1640 static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, in update_parent_effective_cpumask()
1645 int adding; /* Adding cpus to parent's effective_cpus */ in update_parent_effective_cpumask()
1646 int deleting; /* Deleting cpus from parent's effective_cpus */ in update_parent_effective_cpumask()
1647 int old_prs, new_prs; in update_parent_effective_cpumask()
1648 int part_error = PERR_NONE; /* Partition error? */ in update_parent_effective_cpumask()
1649 int subparts_delta = 0; in update_parent_effective_cpumask()
1650 int isolcpus_updated = 0; in update_parent_effective_cpumask()
1658 * new_prs will only be changed for the partcmd_update and in update_parent_effective_cpumask()
1662 old_prs = new_prs = cs->partition_root_state; in update_parent_effective_cpumask()
1672 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1673 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1675 new_prs = -old_prs; in update_parent_effective_cpumask()
1676 subparts_delta--; in update_parent_effective_cpumask()
1701 xcpus = tmp->new_cpus; in update_parent_effective_cpumask()
1703 WARN_ON_ONCE(!cpumask_empty(cs->exclusive_cpus)); in update_parent_effective_cpumask()
1722 deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1729 * (and maybe removed from subpartitions_cpus/isolated_cpus) in update_parent_effective_cpumask()
1734 cpumask_copy(tmp->addmask, cs->effective_xcpus); in update_parent_effective_cpumask()
1736 subparts_delta--; in update_parent_effective_cpumask()
1758 * & parent->effective_xcpus in update_parent_effective_cpumask()
1760 * & parent->effective_xcpus in update_parent_effective_cpumask()
1763 * delmask = newmask & parent->effective_xcpus in update_parent_effective_cpumask()
1767 deleting = cpumask_and(tmp->delmask, in update_parent_effective_cpumask()
1768 newmask, parent->effective_xcpus); in update_parent_effective_cpumask()
1770 cpumask_andnot(tmp->addmask, xcpus, newmask); in update_parent_effective_cpumask()
1771 adding = cpumask_and(tmp->addmask, tmp->addmask, in update_parent_effective_cpumask()
1772 parent->effective_xcpus); in update_parent_effective_cpumask()
1774 cpumask_andnot(tmp->delmask, newmask, xcpus); in update_parent_effective_cpumask()
1775 deleting = cpumask_and(tmp->delmask, tmp->delmask, in update_parent_effective_cpumask()
1776 parent->effective_xcpus); in update_parent_effective_cpumask()
1780 * become empty and there are tasks in the parent. in update_parent_effective_cpumask()
1783 !cpumask_intersects(tmp->addmask, cpu_active_mask))) { in update_parent_effective_cpumask()
1786 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1787 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1793 * delmask = effective_xcpus & parent->effective_cpus in update_parent_effective_cpumask()
1802 * A partition error happens when parent has tasks and all in update_parent_effective_cpumask()
1809 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1810 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1812 cpumask_subset(xcpus, parent->effective_xcpus)) { in update_parent_effective_cpumask()
1832 deleting = cpumask_and(tmp->delmask, in update_parent_effective_cpumask()
1833 xcpus, parent->effective_cpus); in update_parent_effective_cpumask()
1841 WRITE_ONCE(cs->prs_err, part_error); in update_parent_effective_cpumask()
1845 * Check for possible transition between valid and invalid in update_parent_effective_cpumask()
1848 switch (cs->partition_root_state) { in update_parent_effective_cpumask()
1852 new_prs = -old_prs; in update_parent_effective_cpumask()
1853 subparts_delta--; in update_parent_effective_cpumask()
1859 new_prs = -old_prs; in update_parent_effective_cpumask()
1872 * validate_change() has already been successfully called and in update_parent_effective_cpumask()
1876 int err = update_partition_exclusive_flag(cs, new_prs); in update_parent_effective_cpumask()
1886 * Newly added CPUs will be removed from effective_cpus and in update_parent_effective_cpumask()
1891 cs->partition_root_state = new_prs; in update_parent_effective_cpumask()
1893 cs->nr_subparts = 0; in update_parent_effective_cpumask()
1897 * and vice versa. in update_parent_effective_cpumask()
1901 tmp->addmask); in update_parent_effective_cpumask()
1904 tmp->delmask); in update_parent_effective_cpumask()
1907 parent->nr_subparts += subparts_delta; in update_parent_effective_cpumask()
1908 WARN_ON_ONCE(parent->nr_subparts < 0); in update_parent_effective_cpumask()
1917 cpuset_update_tasks_cpumask(parent, tmp->addmask); in update_parent_effective_cpumask()
1923 * cpuset_handle_hotplug(). Update the load balance flag and in update_parent_effective_cpumask()
1934 * compute_partition_effective_cpumask - compute effective_cpus for partition
1939 * of child partition roots and excluding their effective_xcpus.
1943 * or update_cpumasks_hier() where parent and children are modified
1945 * and the child's effective_cpus will be updated in later iterations.
1961 * 2) All the effective_cpus will be used up and cp in compute_partition_effective_cpumask()
1977 child->prs_err = 0; in compute_partition_effective_cpumask()
1978 if (!cpumask_subset(child->effective_xcpus, in compute_partition_effective_cpumask()
1979 cs->effective_xcpus)) in compute_partition_effective_cpumask()
1980 child->prs_err = PERR_INVCPUS; in compute_partition_effective_cpumask()
1982 cpumask_subset(new_ecpus, child->effective_xcpus)) in compute_partition_effective_cpumask()
1983 child->prs_err = PERR_NOCPUS; in compute_partition_effective_cpumask()
1985 if (child->prs_err) { in compute_partition_effective_cpumask()
1986 int old_prs = child->partition_root_state; in compute_partition_effective_cpumask()
1993 cs->nr_subparts--; in compute_partition_effective_cpumask()
1994 child->nr_subparts = 0; in compute_partition_effective_cpumask()
2000 child->effective_xcpus); in compute_partition_effective_cpumask()
2006 * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
2012 * and all its descendants need to be updated.
2024 int old_prs, new_prs; in update_cpumasks_hier()
2032 old_prs = new_prs = cp->partition_root_state; in update_cpumasks_hier()
2039 * remote_cpus_update() will reuse tmp->new_cpus only after in update_cpumasks_hier()
2043 compute_effective_exclusive_cpumask(cp, tmp->new_cpus, NULL); in update_cpumasks_hier()
2044 if (cpumask_equal(cp->effective_xcpus, tmp->new_cpus)) { in update_cpumasks_hier()
2049 remote_cpus_update(cp, NULL, tmp->new_cpus, tmp); in update_cpumasks_hier()
2053 new_prs = cp->partition_root_state; in update_cpumasks_hier()
2058 compute_partition_effective_cpumask(cp, tmp->new_cpus); in update_cpumasks_hier()
2060 compute_effective_cpumask(tmp->new_cpus, cp, parent); in update_cpumasks_hier()
2070 if (is_partition_valid(cp) && cpumask_empty(tmp->new_cpus)) { in update_cpumasks_hier()
2081 if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus)) in update_cpumasks_hier()
2082 cpumask_copy(tmp->new_cpus, parent->effective_cpus); in update_cpumasks_hier()
2088 * 3) force flag not set, and in update_cpumasks_hier()
2091 if (!cp->partition_root_state && !force && in update_cpumasks_hier()
2092 cpumask_equal(tmp->new_cpus, cp->effective_cpus) && in update_cpumasks_hier()
2107 switch (parent->partition_root_state) { in update_cpumasks_hier()
2120 new_prs = -cp->partition_root_state; in update_cpumasks_hier()
2121 WRITE_ONCE(cp->prs_err, in update_cpumasks_hier()
2128 if (!css_tryget_online(&cp->css)) in update_cpumasks_hier()
2138 new_prs = cp->partition_root_state; in update_cpumasks_hier()
2142 cpumask_copy(cp->effective_cpus, tmp->new_cpus); in update_cpumasks_hier()
2143 cp->partition_root_state = new_prs; in update_cpumasks_hier()
2144 if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) in update_cpumasks_hier()
2151 if ((new_prs > 0) && cpumask_empty(cp->exclusive_cpus)) in update_cpumasks_hier()
2152 cpumask_and(cp->effective_xcpus, in update_cpumasks_hier()
2153 cp->cpus_allowed, parent->effective_xcpus); in update_cpumasks_hier()
2161 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); in update_cpumasks_hier()
2163 cpuset_update_tasks_cpumask(cp, cp->effective_cpus); in update_cpumasks_hier()
2168 * and their load balance states differ. in update_cpumasks_hier()
2173 set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
2175 clear_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
2179 * On legacy hierarchy, if the effective cpumask of any non- in update_cpumasks_hier()
2184 if (!cpumask_empty(cp->cpus_allowed) && in update_cpumasks_hier()
2190 css_put(&cp->css); in update_cpumasks_hier()
2199 * update_sibling_cpumasks - Update siblings cpumasks
2213 * Check all its siblings and call update_cpumasks_hier() in update_sibling_cpumasks()
2229 compute_effective_cpumask(tmp->new_cpus, sibling, in update_sibling_cpumasks()
2231 if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus)) in update_sibling_cpumasks()
2241 if (!css_tryget_online(&sibling->css)) in update_sibling_cpumasks()
2247 css_put(&sibling->css); in update_sibling_cpumasks()
2253 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
2258 static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, in update_cpumask()
2261 int retval; in update_cpumask()
2266 int old_prs = cs->partition_root_state; in update_cpumask()
2268 /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ in update_cpumask()
2270 return -EACCES; in update_cpumask()
2279 cpumask_clear(trialcs->cpus_allowed); in update_cpumask()
2280 if (cpumask_empty(trialcs->exclusive_cpus)) in update_cpumask()
2281 cpumask_clear(trialcs->effective_xcpus); in update_cpumask()
2283 retval = cpulist_parse(buf, trialcs->cpus_allowed); in update_cpumask()
2287 if (!cpumask_subset(trialcs->cpus_allowed, in update_cpumask()
2289 return -EINVAL; in update_cpumask()
2292 * When exclusive_cpus isn't explicitly set, it is constrained in update_cpumask()
2293 * by cpus_allowed and parent's effective_xcpus. Otherwise, in update_cpumask()
2294 * trialcs->effective_xcpus is used as a temporary cpumask in update_cpumask()
2297 trialcs->partition_root_state = PRS_MEMBER; in update_cpumask()
2298 if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs)) in update_cpumask()
2303 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) in update_cpumask()
2307 return -ENOMEM; in update_cpumask()
2311 cpumask_empty(trialcs->effective_xcpus)) { in update_cpumask()
2313 cs->prs_err = PERR_INVCPUS; in update_cpumask()
2314 } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) { in update_cpumask()
2316 cs->prs_err = PERR_HKEEPING; in update_cpumask()
2317 } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) { in update_cpumask()
2319 cs->prs_err = PERR_NOCPUS; in update_cpumask()
2327 force = !cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus); in update_cpumask()
2331 if ((retval == -EINVAL) && cpuset_v2()) { in update_cpumask()
2336 * The -EINVAL error code indicates that partition sibling in update_cpumask()
2348 cpumask_intersects(xcpus, cp->effective_xcpus)) { in update_cpumask()
2363 struct cpumask *xcpus = trialcs->effective_xcpus; in update_cpumask()
2366 xcpus = trialcs->cpus_allowed; in update_cpumask()
2382 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); in update_cpumask()
2383 cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); in update_cpumask()
2391 /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */ in update_cpumask()
2392 if (cs->partition_root_state) in update_cpumask()
2400 * update_exclusive_cpumask - update the exclusive_cpus mask of a cpuset
2407 static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, in update_exclusive_cpumask()
2410 int retval; in update_exclusive_cpumask()
2415 int old_prs = cs->partition_root_state; in update_exclusive_cpumask()
2418 cpumask_clear(trialcs->exclusive_cpus); in update_exclusive_cpumask()
2419 cpumask_clear(trialcs->effective_xcpus); in update_exclusive_cpumask()
2421 retval = cpulist_parse(buf, trialcs->exclusive_cpus); in update_exclusive_cpumask()
2427 if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) in update_exclusive_cpumask()
2431 trialcs->partition_root_state = PRS_MEMBER; in update_exclusive_cpumask()
2437 return -EINVAL; in update_exclusive_cpumask()
2444 force = !cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus); in update_exclusive_cpumask()
2451 return -ENOMEM; in update_exclusive_cpumask()
2454 if (cpumask_empty(trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2456 cs->prs_err = PERR_INVCPUS; in update_exclusive_cpumask()
2457 } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2459 cs->prs_err = PERR_HKEEPING; in update_exclusive_cpumask()
2460 } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2462 cs->prs_err = PERR_NOCPUS; in update_exclusive_cpumask()
2469 remote_cpus_update(cs, trialcs->exclusive_cpus, in update_exclusive_cpumask()
2470 trialcs->effective_xcpus, &tmp); in update_exclusive_cpumask()
2476 trialcs->effective_xcpus, &tmp); in update_exclusive_cpumask()
2480 cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus); in update_exclusive_cpumask()
2481 cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); in update_exclusive_cpumask()
2494 /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */ in update_exclusive_cpumask()
2495 if (cs->partition_root_state) in update_exclusive_cpumask()
2506 * performed in the queued order and can be waited for by flushing
2523 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); in cpuset_migrate_mm_workfn()
2524 mmput(mwork->mm); in cpuset_migrate_mm_workfn()
2540 mwork->mm = mm; in cpuset_migrate_mm()
2541 mwork->from = *from; in cpuset_migrate_mm()
2542 mwork->to = *to; in cpuset_migrate_mm()
2543 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); in cpuset_migrate_mm()
2544 queue_work(cpuset_migrate_mm_wq, &mwork->work); in cpuset_migrate_mm()
2556 * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
2560 * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
2561 * and rebind an eventual tasks' mempolicy. If the task is allocating in
2563 * a seqlock check and retry before OOM or allocation failure.
2571 write_seqcount_begin(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2573 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); in cpuset_change_task_nodemask()
2575 tsk->mems_allowed = *newmems; in cpuset_change_task_nodemask()
2577 write_seqcount_end(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2586 * cpuset_update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
2605 * take while holding tasklist_lock. Forks can happen - the in cpuset_update_tasks_nodemask()
2607 * and rebind their vma mempolicies too. Because we still hold in cpuset_update_tasks_nodemask()
2609 * will be contending for the global variable cpuset_being_rebound. in cpuset_update_tasks_nodemask()
2613 css_task_iter_start(&cs->css, 0, &it); in cpuset_update_tasks_nodemask()
2626 mpol_rebind_mm(mm, &cs->mems_allowed); in cpuset_update_tasks_nodemask()
2628 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); in cpuset_update_tasks_nodemask()
2636 * cs->old_mems_allowed. in cpuset_update_tasks_nodemask()
2638 cs->old_mems_allowed = newmems; in cpuset_update_tasks_nodemask()
2645 * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
2647 * @new_mems: a temp variable for calculating new effective_mems
2650 * and all its descendants need to be updated.
2665 nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); in update_nodemasks_hier()
2672 *new_mems = parent->effective_mems; in update_nodemasks_hier()
2675 if (nodes_equal(*new_mems, cp->effective_mems)) { in update_nodemasks_hier()
2680 if (!css_tryget_online(&cp->css)) in update_nodemasks_hier()
2685 cp->effective_mems = *new_mems; in update_nodemasks_hier()
2689 !nodes_equal(cp->mems_allowed, cp->effective_mems)); in update_nodemasks_hier()
2694 css_put(&cp->css); in update_nodemasks_hier()
2702 * cpusets mems_allowed, and for each task in the cpuset,
2703 * update mems_allowed and rebind task's mempolicy and any vma
2704 * mempolicies and if the cpuset is marked 'memory_migrate',
2709 * lock each such tasks mm->mmap_lock, scan its vma's and rebind
2712 static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, in update_nodemask()
2715 int retval; in update_nodemask()
2719 * it's read-only in update_nodemask()
2722 retval = -EACCES; in update_nodemask()
2733 nodes_clear(trialcs->mems_allowed); in update_nodemask()
2735 retval = nodelist_parse(buf, trialcs->mems_allowed); in update_nodemask()
2739 if (!nodes_subset(trialcs->mems_allowed, in update_nodemask()
2741 retval = -EINVAL; in update_nodemask()
2746 if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { in update_nodemask()
2747 retval = 0; /* Too easy - nothing to do */ in update_nodemask()
2754 check_insane_mems_config(&trialcs->mems_allowed); in update_nodemask()
2757 cs->mems_allowed = trialcs->mems_allowed; in update_nodemask()
2760 /* use trialcs->mems_allowed as a temp variable */ in update_nodemask()
2761 update_nodemasks_hier(cs, &trialcs->mems_allowed); in update_nodemask()
2778 * cpuset_update_flag - read a 0 or a 1 in a file and update associated flag
2786 int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, in cpuset_update_flag()
2787 int turning_on) in cpuset_update_flag()
2790 int balance_flag_changed; in cpuset_update_flag()
2791 int spread_flag_changed; in cpuset_update_flag()
2792 int err; in cpuset_update_flag()
2796 return -ENOMEM; in cpuset_update_flag()
2799 set_bit(bit, &trialcs->flags); in cpuset_update_flag()
2801 clear_bit(bit, &trialcs->flags); in cpuset_update_flag()
2814 cs->flags = trialcs->flags; in cpuset_update_flag()
2817 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) { in cpuset_update_flag()
2832 * update_prstate - update partition_root_state
2839 static int update_prstate(struct cpuset *cs, int new_prs) in update_prstate()
2841 int err = PERR_NONE, old_prs = cs->partition_root_state; in update_prstate()
2856 return -ENOMEM; in update_prstate()
2864 * cpus_allowed and exclusive_cpus cannot be both empty. in update_prstate()
2879 cpumask_intersects(cs->exclusive_cpus, subpartitions_cpus)) { in update_prstate()
2924 new_prs = -new_prs; in update_prstate()
2929 cs->partition_root_state = new_prs; in update_prstate()
2930 WRITE_ONCE(cs->prs_err, err); in update_prstate()
2934 isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus); in update_prstate()
2943 && cpumask_empty(cs->effective_xcpus)); in update_prstate()
2945 /* Update sched domains and load balance flag */ in update_prstate()
2959 * For v1, cpus_allowed and mems_allowed can't be empty.
2963 static int cpuset_can_attach_check(struct cpuset *cs) in cpuset_can_attach_check()
2965 if (cpumask_empty(cs->effective_cpus) || in cpuset_can_attach_check()
2966 (!is_in_v2_mode() && nodes_empty(cs->mems_allowed))) in cpuset_can_attach_check()
2967 return -ENOSPC; in cpuset_can_attach_check()
2973 cs->nr_migrate_dl_tasks = 0; in reset_migrate_dl_data()
2974 cs->sum_migrate_dl_bw = 0; in reset_migrate_dl_data()
2978 static int cpuset_can_attach(struct cgroup_taskset *tset) in cpuset_can_attach()
2984 int ret; in cpuset_can_attach()
2998 cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); in cpuset_can_attach()
2999 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_can_attach()
3018 cs->nr_migrate_dl_tasks++; in cpuset_can_attach()
3019 cs->sum_migrate_dl_bw += task->dl.dl_bw; in cpuset_can_attach()
3023 if (!cs->nr_migrate_dl_tasks) in cpuset_can_attach()
3026 if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { in cpuset_can_attach()
3027 int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); in cpuset_can_attach()
3031 ret = -EINVAL; in cpuset_can_attach()
3035 ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); in cpuset_can_attach()
3047 cs->attach_in_progress++; in cpuset_can_attach()
3064 if (cs->nr_migrate_dl_tasks) { in cpuset_cancel_attach()
3065 int cpu = cpumask_any(cs->effective_cpus); in cpuset_cancel_attach()
3067 dl_bw_free(cpu, cs->sum_migrate_dl_bw); in cpuset_cancel_attach()
3076 * but we can't allocate it dynamically there. Define it global and
3115 cpus_updated = !cpumask_equal(cs->effective_cpus, in cpuset_attach()
3116 oldcs->effective_cpus); in cpuset_attach()
3117 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_attach()
3122 * in effective cpus and mems. In that case, we can optimize out in cpuset_attach()
3123 * by skipping the task iteration and update. in cpuset_attach()
3126 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
3136 * Change mm for all threadgroup leaders. This is expensive and may in cpuset_attach()
3137 * sleep and should be moved outside migration path proper. Skip it in cpuset_attach()
3138 * if there is no change in effective_mems and CS_MEMORY_MIGRATE is in cpuset_attach()
3141 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
3155 * @mems_allowed has been updated and is empty, so in cpuset_attach()
3160 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, in cpuset_attach()
3168 cs->old_mems_allowed = cpuset_attach_nodemask_to; in cpuset_attach()
3170 if (cs->nr_migrate_dl_tasks) { in cpuset_attach()
3171 cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; in cpuset_attach()
3172 oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; in cpuset_attach()
3189 int retval = -ENODEV; in cpuset_write_resmask()
3199 retval = -ENOMEM; in cpuset_write_resmask()
3203 switch (of_cft(of)->private) { in cpuset_write_resmask()
3214 retval = -EINVAL; in cpuset_write_resmask()
3232 * used, list of ranges of sequential numbers, is variable length,
3233 * and since these maps can change value dynamically, one could read
3236 int cpuset_common_seq_show(struct seq_file *sf, void *v) in cpuset_common_seq_show()
3239 cpuset_filetype_t type = seq_cft(sf)->private; in cpuset_common_seq_show()
3240 int ret = 0; in cpuset_common_seq_show()
3246 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); in cpuset_common_seq_show()
3249 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); in cpuset_common_seq_show()
3252 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); in cpuset_common_seq_show()
3255 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); in cpuset_common_seq_show()
3258 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->exclusive_cpus)); in cpuset_common_seq_show()
3261 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_xcpus)); in cpuset_common_seq_show()
3270 ret = -EINVAL; in cpuset_common_seq_show()
3277 static int cpuset_partition_show(struct seq_file *seq, void *v) in cpuset_partition_show()
3282 switch (cs->partition_root_state) { in cpuset_partition_show()
3298 err = perr_strings[READ_ONCE(cs->prs_err)]; in cpuset_partition_show()
3312 int val; in cpuset_partition_write()
3313 int retval = -ENODEV; in cpuset_partition_write()
3324 return -EINVAL; in cpuset_partition_write()
3326 css_get(&cs->css); in cpuset_partition_write()
3333 css_put(&cs->css); in cpuset_partition_write()
3339 * expanded later on by migrating more features and control files from v1.
3416 * cpuset_css_alloc - Allocate a cpuset css
3419 * Return: cpuset css on success, -ENOMEM on failure.
3421 * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
3434 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
3438 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
3441 __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_alloc()
3442 fmeter_init(&cs->fmeter); in cpuset_css_alloc()
3443 cs->relax_domain_level = -1; in cpuset_css_alloc()
3444 INIT_LIST_HEAD(&cs->remote_sibling); in cpuset_css_alloc()
3448 __set_bit(CS_MEMORY_MIGRATE, &cs->flags); in cpuset_css_alloc()
3450 return &cs->css; in cpuset_css_alloc()
3453 static int cpuset_css_online(struct cgroup_subsys_state *css) in cpuset_css_online()
3466 set_bit(CS_ONLINE, &cs->flags); in cpuset_css_online()
3468 set_bit(CS_SPREAD_PAGE, &cs->flags); in cpuset_css_online()
3470 set_bit(CS_SPREAD_SLAB, &cs->flags); in cpuset_css_online()
3475 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_online()
3481 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in cpuset_css_online()
3482 cs->effective_mems = parent->effective_mems; in cpuset_css_online()
3486 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) in cpuset_css_online()
3492 * historical reasons - the flag may be specified during mount. in cpuset_css_online()
3495 * refuse to clone the configuration - thereby refusing the task to in cpuset_css_online()
3496 * be entered, and as a result refusing the sys_unshare() or in cpuset_css_online()
3499 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive in cpuset_css_online()
3500 * (and likewise for mems) to the new cgroup. in cpuset_css_online()
3512 cs->mems_allowed = parent->mems_allowed; in cpuset_css_online()
3513 cs->effective_mems = parent->mems_allowed; in cpuset_css_online()
3514 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); in cpuset_css_online()
3515 cpumask_copy(cs->effective_cpus, parent->cpus_allowed); in cpuset_css_online()
3545 clear_bit(CS_ONLINE, &cs->flags); in cpuset_css_offline()
3597 static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) in cpuset_can_fork()
3599 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_can_fork()
3601 int ret; in cpuset_can_fork()
3630 cs->attach_in_progress++; in cpuset_can_fork()
3638 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_cancel_fork()
3654 * state from the parent and before it sits on the cgroup's task list.
3670 set_cpus_allowed_ptr(task, current->cpus_ptr); in cpuset_fork()
3671 task->mems_allowed = current->mems_allowed; in cpuset_fork()
3707 * cpuset_init - initialize cpusets at system boot
3712 int __init cpuset_init(void) in cpuset_init()
3750 cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); in hotplug_update_tasks()
3752 *new_mems = parent_cs(cs)->effective_mems; in hotplug_update_tasks()
3755 cpumask_copy(cs->effective_cpus, new_cpus); in hotplug_update_tasks()
3756 cs->effective_mems = *new_mems; in hotplug_update_tasks()
3771 * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
3775 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
3786 int partcmd = -1; in cpuset_hotplug_update_tasks()
3789 wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); in cpuset_hotplug_update_tasks()
3797 if (cs->attach_in_progress) { in cpuset_hotplug_update_tasks()
3804 nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); in cpuset_hotplug_update_tasks()
3806 if (!tmp || !cs->partition_root_state) in cpuset_hotplug_update_tasks()
3819 cs->prs_err = PERR_HOTPLUG; in cpuset_hotplug_update_tasks()
3851 cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); in cpuset_hotplug_update_tasks()
3852 mems_updated = !nodes_equal(new_mems, cs->effective_mems); in cpuset_hotplug_update_tasks()
3871 * cpuset_handle_hotplug - handle CPU/memory hot{,un}plug for a cpuset
3874 * changed and updates cpuset accordingly. The top_cpuset is always
3875 * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
3879 * Non-root cpusets are only affected by offlining. If any CPUs or memory
3902 /* fetch the available cpus/mems and find out which changed how */ in cpuset_handle_hotplug()
3960 if (cs == &top_cpuset || !css_tryget_online(&cs->css)) in cpuset_handle_hotplug()
3967 css_put(&cs->css); in cpuset_handle_hotplug()
3994 static int cpuset_track_online_nodes(struct notifier_block *self, in cpuset_track_online_nodes()
4002 * cpuset_init_smp - initialize cpus_allowed
4009 * cpus_allowd/mems_allowed set to v2 values in the initial in cpuset_init_smp()
4010 * cpuset_bind() call will be reset to v1 values in another in cpuset_init_smp()
4025 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
4026 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
4027 * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
4030 * attached to the specified @tsk. Guaranteed to return some non-empty
4068 * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
4072 * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
4073 * mode however, this value is the same as task_cs(tsk)->effective_cpus,
4075 * This is the absolute last resort for the scheduler and it is only used if
4088 cs_mask = task_cs(tsk)->cpus_allowed; in cpuset_cpus_allowed_fallback()
4096 * We own tsk->cpus_allowed, nobody can change it under us. in cpuset_cpus_allowed_fallback()
4098 * But we used cs && cs->cpus_allowed lockless and thus can in cpuset_cpus_allowed_fallback()
4099 * race with cgroup_attach_task() or update_cpumask() and get in cpuset_cpus_allowed_fallback()
4100 * the wrong tsk->cpus_allowed. However, both cases imply the in cpuset_cpus_allowed_fallback()
4101 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() in cpuset_cpus_allowed_fallback()
4105 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary in cpuset_cpus_allowed_fallback()
4109 * select_fallback_rq() will fix things ups and set cpu_possible_mask in cpuset_cpus_allowed_fallback()
4117 nodes_setall(current->mems_allowed); in cpuset_init_current_mems_allowed()
4121 * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
4122 * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
4125 * attached to the specified @tsk. Guaranteed to return some non-empty
4145 * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
4148 * Are any of the nodes in the nodemask allowed in current->mems_allowed?
4150 int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) in cpuset_nodemask_valid_mems_allowed()
4152 return nodes_intersects(*nodemask, current->mems_allowed); in cpuset_nodemask_valid_mems_allowed()
4156 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
4169 * cpuset_node_allowed - Can we allocate on a memory node?
4174 * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this
4180 * and do not allow allocations outside the current tasks cpuset
4187 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
4199 * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
4200 * in alloc_flags. That logic and the checks below have the combined
4202 * in_interrupt - any node ok (current task context irrelevant)
4203 * GFP_ATOMIC - any node ok
4204 * tsk_is_oom_victim - any node ok
4205 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
4206 * GFP_USER - only nodes in current tasks mems allowed ok.
4208 bool cpuset_node_allowed(int node, gfp_t gfp_mask) in cpuset_node_allowed()
4216 if (node_isset(node, current->mems_allowed)) in cpuset_node_allowed()
4227 if (current->flags & PF_EXITING) /* Let dying task have memory */ in cpuset_node_allowed()
4230 /* Not hardwall and node outside mems_allowed: scan up cpusets */ in cpuset_node_allowed()
4235 allowed = node_isset(node, cs->mems_allowed); in cpuset_node_allowed()
4243 * cpuset_spread_node() - On which node to begin search for a page
4248 * and if the memory allocation used cpuset_mem_spread_node()
4251 * system buffers and inode caches, then instead of starting on the
4256 * because "it can't happen", and even if it did, it would be ok.
4259 * only set nodes in task->mems_allowed that are online. So it
4268 static int cpuset_spread_node(int *rotor) in cpuset_spread_node()
4270 return *rotor = next_node_in(*rotor, current->mems_allowed); in cpuset_spread_node()
4274 * cpuset_mem_spread_node() - On which node to begin search for a file page
4276 int cpuset_mem_spread_node(void) in cpuset_mem_spread_node()
4278 if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) in cpuset_mem_spread_node()
4279 current->cpuset_mem_spread_rotor = in cpuset_mem_spread_node()
4280 node_random(&current->mems_allowed); in cpuset_mem_spread_node()
4282 return cpuset_spread_node(&current->cpuset_mem_spread_rotor); in cpuset_mem_spread_node()
4286 * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
4296 int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, in cpuset_mems_allowed_intersects()
4299 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); in cpuset_mems_allowed_intersects()
4303 * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
4305 * Description: Prints current's name, cpuset name, and cached copy of its
4314 cgrp = task_cs(current)->css.cgroup; in cpuset_print_current_mems_allowed()
4318 nodemask_pr_args(&current->mems_allowed)); in cpuset_print_current_mems_allowed()
4327 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()
4329 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()