1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Generic userspace implementations of gettimeofday() and similar.
4  */
5 #include <vdso/datapage.h>
6 #include <vdso/helpers.h>
7 
8 /* Bring in default accessors */
9 #include <vdso/vsyscall.h>
10 
11 #ifndef vdso_calc_ns
12 
13 #ifdef VDSO_DELTA_NOMASK
14 # define VDSO_DELTA_MASK(vd)	ULLONG_MAX
15 #else
16 # define VDSO_DELTA_MASK(vd)	(vd->mask)
17 #endif
18 
19 #ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
vdso_delta_ok(const struct vdso_clock * vc,u64 delta)20 static __always_inline bool vdso_delta_ok(const struct vdso_clock *vc, u64 delta)
21 {
22 	return delta < vc->max_cycles;
23 }
24 #else
vdso_delta_ok(const struct vdso_clock * vc,u64 delta)25 static __always_inline bool vdso_delta_ok(const struct vdso_clock *vc, u64 delta)
26 {
27 	return true;
28 }
29 #endif
30 
31 #ifndef vdso_shift_ns
vdso_shift_ns(u64 ns,u32 shift)32 static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift)
33 {
34 	return ns >> shift;
35 }
36 #endif
37 
38 /*
39  * Default implementation which works for all sane clocksources. That
40  * obviously excludes x86/TSC.
41  */
vdso_calc_ns(const struct vdso_clock * vc,u64 cycles,u64 base)42 static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base)
43 {
44 	u64 delta = (cycles - vc->cycle_last) & VDSO_DELTA_MASK(vc);
45 
46 	if (likely(vdso_delta_ok(vc, delta)))
47 		return vdso_shift_ns((delta * vc->mult) + base, vc->shift);
48 
49 	return mul_u64_u32_add_u64_shr(delta, vc->mult, base, vc->shift);
50 }
51 #endif /* vdso_calc_ns */
52 
53 #ifndef __arch_vdso_hres_capable
__arch_vdso_hres_capable(void)54 static inline bool __arch_vdso_hres_capable(void)
55 {
56 	return true;
57 }
58 #endif
59 
60 #ifndef vdso_clocksource_ok
vdso_clocksource_ok(const struct vdso_clock * vc)61 static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
62 {
63 	return vc->clock_mode != VDSO_CLOCKMODE_NONE;
64 }
65 #endif
66 
67 #ifndef vdso_cycles_ok
vdso_cycles_ok(u64 cycles)68 static inline bool vdso_cycles_ok(u64 cycles)
69 {
70 	return true;
71 }
72 #endif
73 
74 #ifdef CONFIG_TIME_NS
75 
76 #ifdef CONFIG_GENERIC_VDSO_DATA_STORE
77 static __always_inline
__arch_get_vdso_u_timens_data(const struct vdso_time_data * vd)78 const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd)
79 {
80 	return (void *)vd + PAGE_SIZE;
81 }
82 #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
83 
84 static __always_inline
do_hres_timens(const struct vdso_time_data * vdns,const struct vdso_clock * vcns,clockid_t clk,struct __kernel_timespec * ts)85 int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
86 		   clockid_t clk, struct __kernel_timespec *ts)
87 {
88 	const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns);
89 	const struct timens_offset *offs = &vcns->offset[clk];
90 	const struct vdso_clock *vc = vd->clock_data;
91 	const struct vdso_timestamp *vdso_ts;
92 	u64 cycles, ns;
93 	u32 seq;
94 	s64 sec;
95 
96 	if (clk != CLOCK_MONOTONIC_RAW)
97 		vc = &vc[CS_HRES_COARSE];
98 	else
99 		vc = &vc[CS_RAW];
100 	vdso_ts = &vc->basetime[clk];
101 
102 	do {
103 		seq = vdso_read_begin(vc);
104 
105 		if (unlikely(!vdso_clocksource_ok(vc)))
106 			return -1;
107 
108 		cycles = __arch_get_hw_counter(vc->clock_mode, vd);
109 		if (unlikely(!vdso_cycles_ok(cycles)))
110 			return -1;
111 		ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec);
112 		sec = vdso_ts->sec;
113 	} while (unlikely(vdso_read_retry(vc, seq)));
114 
115 	/* Add the namespace offset */
116 	sec += offs->sec;
117 	ns += offs->nsec;
118 
119 	/*
120 	 * Do this outside the loop: a race inside the loop could result
121 	 * in __iter_div_u64_rem() being extremely slow.
122 	 */
123 	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
124 	ts->tv_nsec = ns;
125 
126 	return 0;
127 }
128 #else
129 static __always_inline
__arch_get_vdso_u_timens_data(const struct vdso_time_data * vd)130 const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd)
131 {
132 	return NULL;
133 }
134 
135 static __always_inline
do_hres_timens(const struct vdso_time_data * vdns,const struct vdso_clock * vcns,clockid_t clk,struct __kernel_timespec * ts)136 int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
137 		   clockid_t clk, struct __kernel_timespec *ts)
138 {
139 	return -EINVAL;
140 }
141 #endif
142 
143 static __always_inline
do_hres(const struct vdso_time_data * vd,const struct vdso_clock * vc,clockid_t clk,struct __kernel_timespec * ts)144 int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
145 	    clockid_t clk, struct __kernel_timespec *ts)
146 {
147 	const struct vdso_timestamp *vdso_ts = &vc->basetime[clk];
148 	u64 cycles, sec, ns;
149 	u32 seq;
150 
151 	/* Allows to compile the high resolution parts out */
152 	if (!__arch_vdso_hres_capable())
153 		return -1;
154 
155 	do {
156 		/*
157 		 * Open coded function vdso_read_begin() to handle
158 		 * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a
159 		 * special VVAR page installed which has vc->seq set to 1 and
160 		 * vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time
161 		 * namespace affected tasks this does not affect performance
162 		 * because if vc->seq is odd, i.e. a concurrent update is in
163 		 * progress the extra check for vc->clock_mode is just a few
164 		 * extra instructions while spin waiting for vc->seq to become
165 		 * even again.
166 		 */
167 		while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) {
168 			if (IS_ENABLED(CONFIG_TIME_NS) &&
169 			    vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
170 				return do_hres_timens(vd, vc, clk, ts);
171 			cpu_relax();
172 		}
173 		smp_rmb();
174 
175 		if (unlikely(!vdso_clocksource_ok(vc)))
176 			return -1;
177 
178 		cycles = __arch_get_hw_counter(vc->clock_mode, vd);
179 		if (unlikely(!vdso_cycles_ok(cycles)))
180 			return -1;
181 		ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec);
182 		sec = vdso_ts->sec;
183 	} while (unlikely(vdso_read_retry(vc, seq)));
184 
185 	/*
186 	 * Do this outside the loop: a race inside the loop could result
187 	 * in __iter_div_u64_rem() being extremely slow.
188 	 */
189 	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
190 	ts->tv_nsec = ns;
191 
192 	return 0;
193 }
194 
195 #ifdef CONFIG_TIME_NS
196 static __always_inline
do_coarse_timens(const struct vdso_time_data * vdns,const struct vdso_clock * vcns,clockid_t clk,struct __kernel_timespec * ts)197 int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
198 		     clockid_t clk, struct __kernel_timespec *ts)
199 {
200 	const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns);
201 	const struct timens_offset *offs = &vcns->offset[clk];
202 	const struct vdso_clock *vc = vd->clock_data;
203 	const struct vdso_timestamp *vdso_ts;
204 	u64 nsec;
205 	s64 sec;
206 	s32 seq;
207 
208 	vdso_ts = &vc->basetime[clk];
209 
210 	do {
211 		seq = vdso_read_begin(vc);
212 		sec = vdso_ts->sec;
213 		nsec = vdso_ts->nsec;
214 	} while (unlikely(vdso_read_retry(vc, seq)));
215 
216 	/* Add the namespace offset */
217 	sec += offs->sec;
218 	nsec += offs->nsec;
219 
220 	/*
221 	 * Do this outside the loop: a race inside the loop could result
222 	 * in __iter_div_u64_rem() being extremely slow.
223 	 */
224 	ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
225 	ts->tv_nsec = nsec;
226 	return 0;
227 }
228 #else
229 static __always_inline
do_coarse_timens(const struct vdso_time_data * vdns,const struct vdso_clock * vcns,clockid_t clk,struct __kernel_timespec * ts)230 int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
231 		     clockid_t clk, struct __kernel_timespec *ts)
232 {
233 	return -1;
234 }
235 #endif
236 
237 static __always_inline
do_coarse(const struct vdso_time_data * vd,const struct vdso_clock * vc,clockid_t clk,struct __kernel_timespec * ts)238 int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
239 	      clockid_t clk, struct __kernel_timespec *ts)
240 {
241 	const struct vdso_timestamp *vdso_ts = &vc->basetime[clk];
242 	u32 seq;
243 
244 	do {
245 		/*
246 		 * Open coded function vdso_read_begin() to handle
247 		 * VDSO_CLOCK_TIMENS. See comment in do_hres().
248 		 */
249 		while ((seq = READ_ONCE(vc->seq)) & 1) {
250 			if (IS_ENABLED(CONFIG_TIME_NS) &&
251 			    vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
252 				return do_coarse_timens(vd, vc, clk, ts);
253 			cpu_relax();
254 		}
255 		smp_rmb();
256 
257 		ts->tv_sec = vdso_ts->sec;
258 		ts->tv_nsec = vdso_ts->nsec;
259 	} while (unlikely(vdso_read_retry(vc, seq)));
260 
261 	return 0;
262 }
263 
264 static __always_inline int
__cvdso_clock_gettime_common(const struct vdso_time_data * vd,clockid_t clock,struct __kernel_timespec * ts)265 __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
266 			     struct __kernel_timespec *ts)
267 {
268 	const struct vdso_clock *vc = vd->clock_data;
269 	u32 msk;
270 
271 	/* Check for negative values or invalid clocks */
272 	if (unlikely((u32) clock >= MAX_CLOCKS))
273 		return -1;
274 
275 	/*
276 	 * Convert the clockid to a bitmask and use it to check which
277 	 * clocks are handled in the VDSO directly.
278 	 */
279 	msk = 1U << clock;
280 	if (likely(msk & VDSO_HRES))
281 		vc = &vc[CS_HRES_COARSE];
282 	else if (msk & VDSO_COARSE)
283 		return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts);
284 	else if (msk & VDSO_RAW)
285 		vc = &vc[CS_RAW];
286 	else
287 		return -1;
288 
289 	return do_hres(vd, vc, clock, ts);
290 }
291 
292 static __maybe_unused int
__cvdso_clock_gettime_data(const struct vdso_time_data * vd,clockid_t clock,struct __kernel_timespec * ts)293 __cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock,
294 			   struct __kernel_timespec *ts)
295 {
296 	int ret = __cvdso_clock_gettime_common(vd, clock, ts);
297 
298 	if (unlikely(ret))
299 		return clock_gettime_fallback(clock, ts);
300 	return 0;
301 }
302 
303 static __maybe_unused int
__cvdso_clock_gettime(clockid_t clock,struct __kernel_timespec * ts)304 __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
305 {
306 	return __cvdso_clock_gettime_data(__arch_get_vdso_u_time_data(), clock, ts);
307 }
308 
309 #ifdef BUILD_VDSO32
310 static __maybe_unused int
__cvdso_clock_gettime32_data(const struct vdso_time_data * vd,clockid_t clock,struct old_timespec32 * res)311 __cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock,
312 			     struct old_timespec32 *res)
313 {
314 	struct __kernel_timespec ts;
315 	int ret;
316 
317 	ret = __cvdso_clock_gettime_common(vd, clock, &ts);
318 
319 	if (unlikely(ret))
320 		return clock_gettime32_fallback(clock, res);
321 
322 	/* For ret == 0 */
323 	res->tv_sec = ts.tv_sec;
324 	res->tv_nsec = ts.tv_nsec;
325 
326 	return ret;
327 }
328 
329 static __maybe_unused int
__cvdso_clock_gettime32(clockid_t clock,struct old_timespec32 * res)330 __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
331 {
332 	return __cvdso_clock_gettime32_data(__arch_get_vdso_u_time_data(), clock, res);
333 }
334 #endif /* BUILD_VDSO32 */
335 
336 static __maybe_unused int
__cvdso_gettimeofday_data(const struct vdso_time_data * vd,struct __kernel_old_timeval * tv,struct timezone * tz)337 __cvdso_gettimeofday_data(const struct vdso_time_data *vd,
338 			  struct __kernel_old_timeval *tv, struct timezone *tz)
339 {
340 	const struct vdso_clock *vc = vd->clock_data;
341 
342 	if (likely(tv != NULL)) {
343 		struct __kernel_timespec ts;
344 
345 		if (do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
346 			return gettimeofday_fallback(tv, tz);
347 
348 		tv->tv_sec = ts.tv_sec;
349 		tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
350 	}
351 
352 	if (unlikely(tz != NULL)) {
353 		if (IS_ENABLED(CONFIG_TIME_NS) &&
354 		    vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
355 			vd = __arch_get_vdso_u_timens_data(vd);
356 
357 		tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
358 		tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
359 	}
360 
361 	return 0;
362 }
363 
364 static __maybe_unused int
__cvdso_gettimeofday(struct __kernel_old_timeval * tv,struct timezone * tz)365 __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
366 {
367 	return __cvdso_gettimeofday_data(__arch_get_vdso_u_time_data(), tv, tz);
368 }
369 
370 #ifdef VDSO_HAS_TIME
371 static __maybe_unused __kernel_old_time_t
__cvdso_time_data(const struct vdso_time_data * vd,__kernel_old_time_t * time)372 __cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time)
373 {
374 	const struct vdso_clock *vc = vd->clock_data;
375 	__kernel_old_time_t t;
376 
377 	if (IS_ENABLED(CONFIG_TIME_NS) &&
378 	    vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
379 		vd = __arch_get_vdso_u_timens_data(vd);
380 		vc = vd->clock_data;
381 	}
382 
383 	t = READ_ONCE(vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
384 
385 	if (time)
386 		*time = t;
387 
388 	return t;
389 }
390 
__cvdso_time(__kernel_old_time_t * time)391 static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
392 {
393 	return __cvdso_time_data(__arch_get_vdso_u_time_data(), time);
394 }
395 #endif /* VDSO_HAS_TIME */
396 
397 #ifdef VDSO_HAS_CLOCK_GETRES
398 static __maybe_unused
__cvdso_clock_getres_common(const struct vdso_time_data * vd,clockid_t clock,struct __kernel_timespec * res)399 int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock,
400 				struct __kernel_timespec *res)
401 {
402 	const struct vdso_clock *vc = vd->clock_data;
403 	u32 msk;
404 	u64 ns;
405 
406 	/* Check for negative values or invalid clocks */
407 	if (unlikely((u32) clock >= MAX_CLOCKS))
408 		return -1;
409 
410 	if (IS_ENABLED(CONFIG_TIME_NS) &&
411 	    vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
412 		vd = __arch_get_vdso_u_timens_data(vd);
413 
414 	/*
415 	 * Convert the clockid to a bitmask and use it to check which
416 	 * clocks are handled in the VDSO directly.
417 	 */
418 	msk = 1U << clock;
419 	if (msk & (VDSO_HRES | VDSO_RAW)) {
420 		/*
421 		 * Preserves the behaviour of posix_get_hrtimer_res().
422 		 */
423 		ns = READ_ONCE(vd->hrtimer_res);
424 	} else if (msk & VDSO_COARSE) {
425 		/*
426 		 * Preserves the behaviour of posix_get_coarse_res().
427 		 */
428 		ns = LOW_RES_NSEC;
429 	} else {
430 		return -1;
431 	}
432 
433 	if (likely(res)) {
434 		res->tv_sec = 0;
435 		res->tv_nsec = ns;
436 	}
437 	return 0;
438 }
439 
440 static __maybe_unused
__cvdso_clock_getres_data(const struct vdso_time_data * vd,clockid_t clock,struct __kernel_timespec * res)441 int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock,
442 			      struct __kernel_timespec *res)
443 {
444 	int ret = __cvdso_clock_getres_common(vd, clock, res);
445 
446 	if (unlikely(ret))
447 		return clock_getres_fallback(clock, res);
448 	return 0;
449 }
450 
451 static __maybe_unused
__cvdso_clock_getres(clockid_t clock,struct __kernel_timespec * res)452 int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
453 {
454 	return __cvdso_clock_getres_data(__arch_get_vdso_u_time_data(), clock, res);
455 }
456 
457 #ifdef BUILD_VDSO32
458 static __maybe_unused int
__cvdso_clock_getres_time32_data(const struct vdso_time_data * vd,clockid_t clock,struct old_timespec32 * res)459 __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock,
460 				 struct old_timespec32 *res)
461 {
462 	struct __kernel_timespec ts;
463 	int ret;
464 
465 	ret = __cvdso_clock_getres_common(vd, clock, &ts);
466 
467 	if (unlikely(ret))
468 		return clock_getres32_fallback(clock, res);
469 
470 	if (likely(res)) {
471 		res->tv_sec = ts.tv_sec;
472 		res->tv_nsec = ts.tv_nsec;
473 	}
474 	return ret;
475 }
476 
477 static __maybe_unused int
__cvdso_clock_getres_time32(clockid_t clock,struct old_timespec32 * res)478 __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
479 {
480 	return __cvdso_clock_getres_time32_data(__arch_get_vdso_u_time_data(),
481 						clock, res);
482 }
483 #endif /* BUILD_VDSO32 */
484 #endif /* VDSO_HAS_CLOCK_GETRES */
485