1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Author: Andrei Vagin <avagin@openvz.org>
4 * Author: Dmitry Safonov <dima@arista.com>
5 */
6
7 #include <linux/cleanup.h>
8 #include <linux/mm.h>
9 #include <linux/time_namespace.h>
10 #include <linux/time.h>
11 #include <linux/vdso_datastore.h>
12
13 #include <vdso/clocksource.h>
14 #include <vdso/datapage.h>
15
16 #include "namespace_internal.h"
17
offset_from_ts(struct timespec64 off)18 static struct timens_offset offset_from_ts(struct timespec64 off)
19 {
20 struct timens_offset ret;
21
22 ret.sec = off.tv_sec;
23 ret.nsec = off.tv_nsec;
24
25 return ret;
26 }
27
28 /*
29 * A time namespace VVAR page has the same layout as the VVAR page which
30 * contains the system wide VDSO data.
31 *
32 * For a normal task the VVAR pages are installed in the normal ordering:
33 * VVAR
34 * PVCLOCK
35 * HVCLOCK
36 * TIMENS <- Not really required
37 *
38 * Now for a timens task the pages are installed in the following order:
39 * TIMENS
40 * PVCLOCK
41 * HVCLOCK
42 * VVAR
43 *
44 * The check for vdso_clock->clock_mode is in the unlikely path of
45 * the seq begin magic. So for the non-timens case most of the time
46 * 'seq' is even, so the branch is not taken.
47 *
48 * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
49 * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
50 * update to finish and for 'seq' to become even anyway.
51 *
52 * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
53 * enforces the time namespace handling path.
54 */
timens_setup_vdso_clock_data(struct vdso_clock * vc,struct time_namespace * ns)55 static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
56 struct time_namespace *ns)
57 {
58 struct timens_offset *offset = vc->offset;
59 struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
60 struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
61
62 vc->seq = 1;
63 vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
64 offset[CLOCK_MONOTONIC] = monotonic;
65 offset[CLOCK_MONOTONIC_RAW] = monotonic;
66 offset[CLOCK_MONOTONIC_COARSE] = monotonic;
67 offset[CLOCK_BOOTTIME] = boottime;
68 offset[CLOCK_BOOTTIME_ALARM] = boottime;
69 }
70
find_timens_vvar_page(struct vm_area_struct * vma)71 struct page *find_timens_vvar_page(struct vm_area_struct *vma)
72 {
73 if (likely(vma->vm_mm == current->mm))
74 return current->nsproxy->time_ns->vvar_page;
75
76 /*
77 * VM_PFNMAP | VM_IO protect .fault() handler from being called
78 * through interfaces like /proc/$pid/mem or
79 * process_vm_{readv,writev}() as long as there's no .access()
80 * in special_mapping_vmops().
81 * For more details check_vma_flags() and __access_remote_vm()
82 */
83
84 WARN(1, "vvar_page accessed remotely");
85
86 return NULL;
87 }
88
timens_set_vvar_page(struct task_struct * task,struct time_namespace * ns)89 static void timens_set_vvar_page(struct task_struct *task,
90 struct time_namespace *ns)
91 {
92 struct vdso_time_data *vdata;
93 struct vdso_clock *vc;
94 unsigned int i;
95
96 if (ns == &init_time_ns)
97 return;
98
99 /* Fast-path, taken by every task in namespace except the first. */
100 if (likely(ns->frozen_offsets))
101 return;
102
103 guard(mutex)(&timens_offset_lock);
104 /* Nothing to-do: vvar_page has been already initialized. */
105 if (ns->frozen_offsets)
106 return;
107
108 ns->frozen_offsets = true;
109 vdata = page_address(ns->vvar_page);
110 vc = vdata->clock_data;
111
112 for (i = 0; i < CS_BASES; i++)
113 timens_setup_vdso_clock_data(&vc[i], ns);
114
115 if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
116 for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
117 timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
118 }
119 }
120
121 /*
122 * The vvar page layout depends on whether a task belongs to the root or
123 * non-root time namespace. Whenever a task changes its namespace, the VVAR
124 * page tables are cleared and then they will be re-faulted with a
125 * corresponding layout.
126 * See also the comment near timens_setup_vdso_clock_data() for details.
127 */
vdso_join_timens(struct task_struct * task,struct time_namespace * ns)128 static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
129 {
130 struct mm_struct *mm = task->mm;
131 struct vm_area_struct *vma;
132 VMA_ITERATOR(vmi, mm, 0);
133
134 guard(mmap_read_lock)(mm);
135 for_each_vma(vmi, vma) {
136 if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
137 zap_vma(vma);
138 }
139 return 0;
140 }
141
timens_commit(struct task_struct * tsk,struct time_namespace * ns)142 void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
143 {
144 timens_set_vvar_page(tsk, ns);
145 vdso_join_timens(tsk, ns);
146 }
147
timens_vdso_alloc_vvar_page(struct time_namespace * ns)148 int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
149 {
150 ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
151 if (!ns->vvar_page)
152 return -ENOMEM;
153
154 return 0;
155 }
156
timens_vdso_free_vvar_page(struct time_namespace * ns)157 void timens_vdso_free_vvar_page(struct time_namespace *ns)
158 {
159 __free_page(ns->vvar_page);
160 }
161