15dc9cf83SThomas Weißschuh // SPDX-License-Identifier: GPL-2.0
25dc9cf83SThomas Weißschuh /*
35dc9cf83SThomas Weißschuh * Author: Andrei Vagin <avagin@openvz.org>
45dc9cf83SThomas Weißschuh * Author: Dmitry Safonov <dima@arista.com>
55dc9cf83SThomas Weißschuh */
65dc9cf83SThomas Weißschuh
75dc9cf83SThomas Weißschuh #include <linux/cleanup.h>
85dc9cf83SThomas Weißschuh #include <linux/mm.h>
95dc9cf83SThomas Weißschuh #include <linux/time_namespace.h>
105dc9cf83SThomas Weißschuh #include <linux/time.h>
115dc9cf83SThomas Weißschuh #include <linux/vdso_datastore.h>
125dc9cf83SThomas Weißschuh
135dc9cf83SThomas Weißschuh #include <vdso/clocksource.h>
145dc9cf83SThomas Weißschuh #include <vdso/datapage.h>
155dc9cf83SThomas Weißschuh
165dc9cf83SThomas Weißschuh #include "namespace_internal.h"
175dc9cf83SThomas Weißschuh
offset_from_ts(struct timespec64 off)185dc9cf83SThomas Weißschuh static struct timens_offset offset_from_ts(struct timespec64 off)
195dc9cf83SThomas Weißschuh {
205dc9cf83SThomas Weißschuh struct timens_offset ret;
215dc9cf83SThomas Weißschuh
225dc9cf83SThomas Weißschuh ret.sec = off.tv_sec;
235dc9cf83SThomas Weißschuh ret.nsec = off.tv_nsec;
245dc9cf83SThomas Weißschuh
255dc9cf83SThomas Weißschuh return ret;
265dc9cf83SThomas Weißschuh }
275dc9cf83SThomas Weißschuh
285dc9cf83SThomas Weißschuh /*
295dc9cf83SThomas Weißschuh * A time namespace VVAR page has the same layout as the VVAR page which
305dc9cf83SThomas Weißschuh * contains the system wide VDSO data.
315dc9cf83SThomas Weißschuh *
325dc9cf83SThomas Weißschuh * For a normal task the VVAR pages are installed in the normal ordering:
335dc9cf83SThomas Weißschuh * VVAR
345dc9cf83SThomas Weißschuh * PVCLOCK
355dc9cf83SThomas Weißschuh * HVCLOCK
365dc9cf83SThomas Weißschuh * TIMENS <- Not really required
375dc9cf83SThomas Weißschuh *
385dc9cf83SThomas Weißschuh * Now for a timens task the pages are installed in the following order:
395dc9cf83SThomas Weißschuh * TIMENS
405dc9cf83SThomas Weißschuh * PVCLOCK
415dc9cf83SThomas Weißschuh * HVCLOCK
425dc9cf83SThomas Weißschuh * VVAR
435dc9cf83SThomas Weißschuh *
445dc9cf83SThomas Weißschuh * The check for vdso_clock->clock_mode is in the unlikely path of
455dc9cf83SThomas Weißschuh * the seq begin magic. So for the non-timens case most of the time
465dc9cf83SThomas Weißschuh * 'seq' is even, so the branch is not taken.
475dc9cf83SThomas Weißschuh *
485dc9cf83SThomas Weißschuh * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
495dc9cf83SThomas Weißschuh * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
505dc9cf83SThomas Weißschuh * update to finish and for 'seq' to become even anyway.
515dc9cf83SThomas Weißschuh *
525dc9cf83SThomas Weißschuh * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
535dc9cf83SThomas Weißschuh * enforces the time namespace handling path.
545dc9cf83SThomas Weißschuh */
timens_setup_vdso_clock_data(struct vdso_clock * vc,struct time_namespace * ns)555dc9cf83SThomas Weißschuh static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
565dc9cf83SThomas Weißschuh struct time_namespace *ns)
575dc9cf83SThomas Weißschuh {
585dc9cf83SThomas Weißschuh struct timens_offset *offset = vc->offset;
595dc9cf83SThomas Weißschuh struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
605dc9cf83SThomas Weißschuh struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
615dc9cf83SThomas Weißschuh
625dc9cf83SThomas Weißschuh vc->seq = 1;
635dc9cf83SThomas Weißschuh vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
645dc9cf83SThomas Weißschuh offset[CLOCK_MONOTONIC] = monotonic;
655dc9cf83SThomas Weißschuh offset[CLOCK_MONOTONIC_RAW] = monotonic;
665dc9cf83SThomas Weißschuh offset[CLOCK_MONOTONIC_COARSE] = monotonic;
675dc9cf83SThomas Weißschuh offset[CLOCK_BOOTTIME] = boottime;
685dc9cf83SThomas Weißschuh offset[CLOCK_BOOTTIME_ALARM] = boottime;
695dc9cf83SThomas Weißschuh }
705dc9cf83SThomas Weißschuh
find_timens_vvar_page(struct vm_area_struct * vma)715dc9cf83SThomas Weißschuh struct page *find_timens_vvar_page(struct vm_area_struct *vma)
725dc9cf83SThomas Weißschuh {
735dc9cf83SThomas Weißschuh if (likely(vma->vm_mm == current->mm))
745dc9cf83SThomas Weißschuh return current->nsproxy->time_ns->vvar_page;
755dc9cf83SThomas Weißschuh
765dc9cf83SThomas Weißschuh /*
775dc9cf83SThomas Weißschuh * VM_PFNMAP | VM_IO protect .fault() handler from being called
785dc9cf83SThomas Weißschuh * through interfaces like /proc/$pid/mem or
795dc9cf83SThomas Weißschuh * process_vm_{readv,writev}() as long as there's no .access()
805dc9cf83SThomas Weißschuh * in special_mapping_vmops().
815dc9cf83SThomas Weißschuh * For more details check_vma_flags() and __access_remote_vm()
825dc9cf83SThomas Weißschuh */
835dc9cf83SThomas Weißschuh
845dc9cf83SThomas Weißschuh WARN(1, "vvar_page accessed remotely");
855dc9cf83SThomas Weißschuh
865dc9cf83SThomas Weißschuh return NULL;
875dc9cf83SThomas Weißschuh }
885dc9cf83SThomas Weißschuh
timens_set_vvar_page(struct task_struct * task,struct time_namespace * ns)895dc9cf83SThomas Weißschuh static void timens_set_vvar_page(struct task_struct *task,
905dc9cf83SThomas Weißschuh struct time_namespace *ns)
915dc9cf83SThomas Weißschuh {
925dc9cf83SThomas Weißschuh struct vdso_time_data *vdata;
935dc9cf83SThomas Weißschuh struct vdso_clock *vc;
945dc9cf83SThomas Weißschuh unsigned int i;
955dc9cf83SThomas Weißschuh
965dc9cf83SThomas Weißschuh if (ns == &init_time_ns)
975dc9cf83SThomas Weißschuh return;
985dc9cf83SThomas Weißschuh
995dc9cf83SThomas Weißschuh /* Fast-path, taken by every task in namespace except the first. */
1005dc9cf83SThomas Weißschuh if (likely(ns->frozen_offsets))
1015dc9cf83SThomas Weißschuh return;
1025dc9cf83SThomas Weißschuh
1035dc9cf83SThomas Weißschuh guard(mutex)(&timens_offset_lock);
1045dc9cf83SThomas Weißschuh /* Nothing to-do: vvar_page has been already initialized. */
1055dc9cf83SThomas Weißschuh if (ns->frozen_offsets)
1065dc9cf83SThomas Weißschuh return;
1075dc9cf83SThomas Weißschuh
1085dc9cf83SThomas Weißschuh ns->frozen_offsets = true;
1095dc9cf83SThomas Weißschuh vdata = page_address(ns->vvar_page);
1105dc9cf83SThomas Weißschuh vc = vdata->clock_data;
1115dc9cf83SThomas Weißschuh
1125dc9cf83SThomas Weißschuh for (i = 0; i < CS_BASES; i++)
1135dc9cf83SThomas Weißschuh timens_setup_vdso_clock_data(&vc[i], ns);
1145dc9cf83SThomas Weißschuh
1155dc9cf83SThomas Weißschuh if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
1165dc9cf83SThomas Weißschuh for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
1175dc9cf83SThomas Weißschuh timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
1185dc9cf83SThomas Weißschuh }
1195dc9cf83SThomas Weißschuh }
1205dc9cf83SThomas Weißschuh
1215dc9cf83SThomas Weißschuh /*
1225dc9cf83SThomas Weißschuh * The vvar page layout depends on whether a task belongs to the root or
1235dc9cf83SThomas Weißschuh * non-root time namespace. Whenever a task changes its namespace, the VVAR
1245dc9cf83SThomas Weißschuh * page tables are cleared and then they will be re-faulted with a
1255dc9cf83SThomas Weißschuh * corresponding layout.
1265dc9cf83SThomas Weißschuh * See also the comment near timens_setup_vdso_clock_data() for details.
1275dc9cf83SThomas Weißschuh */
vdso_join_timens(struct task_struct * task,struct time_namespace * ns)1285dc9cf83SThomas Weißschuh static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
1295dc9cf83SThomas Weißschuh {
1305dc9cf83SThomas Weißschuh struct mm_struct *mm = task->mm;
1315dc9cf83SThomas Weißschuh struct vm_area_struct *vma;
1325dc9cf83SThomas Weißschuh VMA_ITERATOR(vmi, mm, 0);
1335dc9cf83SThomas Weißschuh
1345dc9cf83SThomas Weißschuh guard(mmap_read_lock)(mm);
1355dc9cf83SThomas Weißschuh for_each_vma(vmi, vma) {
1365dc9cf83SThomas Weißschuh if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
137*334fbe73SLinus Torvalds zap_vma(vma);
1385dc9cf83SThomas Weißschuh }
1395dc9cf83SThomas Weißschuh return 0;
1405dc9cf83SThomas Weißschuh }
1415dc9cf83SThomas Weißschuh
timens_commit(struct task_struct * tsk,struct time_namespace * ns)1425dc9cf83SThomas Weißschuh void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
1435dc9cf83SThomas Weißschuh {
1445dc9cf83SThomas Weißschuh timens_set_vvar_page(tsk, ns);
1455dc9cf83SThomas Weißschuh vdso_join_timens(tsk, ns);
1465dc9cf83SThomas Weißschuh }
1471b6c8928SThomas Weißschuh
timens_vdso_alloc_vvar_page(struct time_namespace * ns)1481b6c8928SThomas Weißschuh int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
1491b6c8928SThomas Weißschuh {
1501b6c8928SThomas Weißschuh ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1511b6c8928SThomas Weißschuh if (!ns->vvar_page)
1521b6c8928SThomas Weißschuh return -ENOMEM;
1531b6c8928SThomas Weißschuh
1541b6c8928SThomas Weißschuh return 0;
1551b6c8928SThomas Weißschuh }
1561b6c8928SThomas Weißschuh
timens_vdso_free_vvar_page(struct time_namespace * ns)1571b6c8928SThomas Weißschuh void timens_vdso_free_vvar_page(struct time_namespace *ns)
1581b6c8928SThomas Weißschuh {
1591b6c8928SThomas Weißschuh __free_page(ns->vvar_page);
1601b6c8928SThomas Weißschuh }
161