xref: /linux/kernel/time/namespace_vdso.c (revision 334fbe734e687404f346eba7d5d96ed2b44d35ab)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Author: Andrei Vagin <avagin@openvz.org>
4  * Author: Dmitry Safonov <dima@arista.com>
5  */
6 
7 #include <linux/cleanup.h>
8 #include <linux/mm.h>
9 #include <linux/time_namespace.h>
10 #include <linux/time.h>
11 #include <linux/vdso_datastore.h>
12 
13 #include <vdso/clocksource.h>
14 #include <vdso/datapage.h>
15 
16 #include "namespace_internal.h"
17 
offset_from_ts(struct timespec64 off)18 static struct timens_offset offset_from_ts(struct timespec64 off)
19 {
20 	struct timens_offset ret;
21 
22 	ret.sec = off.tv_sec;
23 	ret.nsec = off.tv_nsec;
24 
25 	return ret;
26 }
27 
28 /*
29  * A time namespace VVAR page has the same layout as the VVAR page which
30  * contains the system wide VDSO data.
31  *
32  * For a normal task the VVAR pages are installed in the normal ordering:
33  *     VVAR
34  *     PVCLOCK
35  *     HVCLOCK
36  *     TIMENS   <- Not really required
37  *
38  * Now for a timens task the pages are installed in the following order:
39  *     TIMENS
40  *     PVCLOCK
41  *     HVCLOCK
42  *     VVAR
43  *
44  * The check for vdso_clock->clock_mode is in the unlikely path of
45  * the seq begin magic. So for the non-timens case most of the time
46  * 'seq' is even, so the branch is not taken.
47  *
48  * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
49  * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
50  * update to finish and for 'seq' to become even anyway.
51  *
52  * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
53  * enforces the time namespace handling path.
54  */
timens_setup_vdso_clock_data(struct vdso_clock * vc,struct time_namespace * ns)55 static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
56 					 struct time_namespace *ns)
57 {
58 	struct timens_offset *offset = vc->offset;
59 	struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
60 	struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
61 
62 	vc->seq				= 1;
63 	vc->clock_mode			= VDSO_CLOCKMODE_TIMENS;
64 	offset[CLOCK_MONOTONIC]		= monotonic;
65 	offset[CLOCK_MONOTONIC_RAW]	= monotonic;
66 	offset[CLOCK_MONOTONIC_COARSE]	= monotonic;
67 	offset[CLOCK_BOOTTIME]		= boottime;
68 	offset[CLOCK_BOOTTIME_ALARM]	= boottime;
69 }
70 
find_timens_vvar_page(struct vm_area_struct * vma)71 struct page *find_timens_vvar_page(struct vm_area_struct *vma)
72 {
73 	if (likely(vma->vm_mm == current->mm))
74 		return current->nsproxy->time_ns->vvar_page;
75 
76 	/*
77 	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
78 	 * through interfaces like /proc/$pid/mem or
79 	 * process_vm_{readv,writev}() as long as there's no .access()
80 	 * in special_mapping_vmops().
81 	 * For more details check_vma_flags() and __access_remote_vm()
82 	 */
83 
84 	WARN(1, "vvar_page accessed remotely");
85 
86 	return NULL;
87 }
88 
timens_set_vvar_page(struct task_struct * task,struct time_namespace * ns)89 static void timens_set_vvar_page(struct task_struct *task,
90 				struct time_namespace *ns)
91 {
92 	struct vdso_time_data *vdata;
93 	struct vdso_clock *vc;
94 	unsigned int i;
95 
96 	if (ns == &init_time_ns)
97 		return;
98 
99 	/* Fast-path, taken by every task in namespace except the first. */
100 	if (likely(ns->frozen_offsets))
101 		return;
102 
103 	guard(mutex)(&timens_offset_lock);
104 	/* Nothing to-do: vvar_page has been already initialized. */
105 	if (ns->frozen_offsets)
106 		return;
107 
108 	ns->frozen_offsets = true;
109 	vdata = page_address(ns->vvar_page);
110 	vc = vdata->clock_data;
111 
112 	for (i = 0; i < CS_BASES; i++)
113 		timens_setup_vdso_clock_data(&vc[i], ns);
114 
115 	if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
116 		for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
117 			timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
118 	}
119 }
120 
121 /*
122  * The vvar page layout depends on whether a task belongs to the root or
123  * non-root time namespace. Whenever a task changes its namespace, the VVAR
124  * page tables are cleared and then they will be re-faulted with a
125  * corresponding layout.
126  * See also the comment near timens_setup_vdso_clock_data() for details.
127  */
vdso_join_timens(struct task_struct * task,struct time_namespace * ns)128 static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
129 {
130 	struct mm_struct *mm = task->mm;
131 	struct vm_area_struct *vma;
132 	VMA_ITERATOR(vmi, mm, 0);
133 
134 	guard(mmap_read_lock)(mm);
135 	for_each_vma(vmi, vma) {
136 		if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
137 			zap_vma(vma);
138 	}
139 	return 0;
140 }
141 
timens_commit(struct task_struct * tsk,struct time_namespace * ns)142 void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
143 {
144 	timens_set_vvar_page(tsk, ns);
145 	vdso_join_timens(tsk, ns);
146 }
147 
timens_vdso_alloc_vvar_page(struct time_namespace * ns)148 int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
149 {
150 	ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
151 	if (!ns->vvar_page)
152 		return -ENOMEM;
153 
154 	return 0;
155 }
156 
timens_vdso_free_vvar_page(struct time_namespace * ns)157 void timens_vdso_free_vvar_page(struct time_namespace *ns)
158 {
159 	__free_page(ns->vvar_page);
160 }
161