1b215e283SDavide Libenzi /* 2b215e283SDavide Libenzi * fs/timerfd.c 3b215e283SDavide Libenzi * 4b215e283SDavide Libenzi * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> 5b215e283SDavide Libenzi * 6b215e283SDavide Libenzi * 7b215e283SDavide Libenzi * Thanks to Thomas Gleixner for code reviews and useful comments. 8b215e283SDavide Libenzi * 9b215e283SDavide Libenzi */ 10b215e283SDavide Libenzi 11b215e283SDavide Libenzi #include <linux/file.h> 12b215e283SDavide Libenzi #include <linux/poll.h> 13b215e283SDavide Libenzi #include <linux/init.h> 14b215e283SDavide Libenzi #include <linux/fs.h> 15b215e283SDavide Libenzi #include <linux/sched.h> 16b215e283SDavide Libenzi #include <linux/kernel.h> 17*5a0e3ad6STejun Heo #include <linux/slab.h> 18b215e283SDavide Libenzi #include <linux/list.h> 19b215e283SDavide Libenzi #include <linux/spinlock.h> 20b215e283SDavide Libenzi #include <linux/time.h> 21b215e283SDavide Libenzi #include <linux/hrtimer.h> 22b215e283SDavide Libenzi #include <linux/anon_inodes.h> 23b215e283SDavide Libenzi #include <linux/timerfd.h> 2445cc2b96SAdrian Bunk #include <linux/syscalls.h> 25b215e283SDavide Libenzi 26b215e283SDavide Libenzi struct timerfd_ctx { 27b215e283SDavide Libenzi struct hrtimer tmr; 28b215e283SDavide Libenzi ktime_t tintv; 29b215e283SDavide Libenzi wait_queue_head_t wqh; 304d672e7aSDavide Libenzi u64 ticks; 31b215e283SDavide Libenzi int expired; 324d672e7aSDavide Libenzi int clockid; 33b215e283SDavide Libenzi }; 34b215e283SDavide Libenzi 35b215e283SDavide Libenzi /* 36b215e283SDavide Libenzi * This gets called when the timer event triggers. We set the "expired" 37b215e283SDavide Libenzi * flag, but we do not re-arm the timer (in case it's necessary, 384d672e7aSDavide Libenzi * tintv.tv64 != 0) until the timer is accessed. 39b215e283SDavide Libenzi */ 40b215e283SDavide Libenzi static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) 41b215e283SDavide Libenzi { 42b215e283SDavide Libenzi struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); 43b215e283SDavide Libenzi unsigned long flags; 44b215e283SDavide Libenzi 4518963c01SDavide Libenzi spin_lock_irqsave(&ctx->wqh.lock, flags); 46b215e283SDavide Libenzi ctx->expired = 1; 474d672e7aSDavide Libenzi ctx->ticks++; 48b215e283SDavide Libenzi wake_up_locked(&ctx->wqh); 4918963c01SDavide Libenzi spin_unlock_irqrestore(&ctx->wqh.lock, flags); 50b215e283SDavide Libenzi 51b215e283SDavide Libenzi return HRTIMER_NORESTART; 52b215e283SDavide Libenzi } 53b215e283SDavide Libenzi 544d672e7aSDavide Libenzi static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 554d672e7aSDavide Libenzi { 5676369470SArjan van de Ven ktime_t remaining; 574d672e7aSDavide Libenzi 5876369470SArjan van de Ven remaining = hrtimer_expires_remaining(&ctx->tmr); 594d672e7aSDavide Libenzi return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 604d672e7aSDavide Libenzi } 614d672e7aSDavide Libenzi 624d672e7aSDavide Libenzi static void timerfd_setup(struct timerfd_ctx *ctx, int flags, 63b215e283SDavide Libenzi const struct itimerspec *ktmr) 64b215e283SDavide Libenzi { 65b215e283SDavide Libenzi enum hrtimer_mode htmode; 66b215e283SDavide Libenzi ktime_t texp; 67b215e283SDavide Libenzi 68b215e283SDavide Libenzi htmode = (flags & TFD_TIMER_ABSTIME) ? 69b215e283SDavide Libenzi HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 70b215e283SDavide Libenzi 71b215e283SDavide Libenzi texp = timespec_to_ktime(ktmr->it_value); 72b215e283SDavide Libenzi ctx->expired = 0; 734d672e7aSDavide Libenzi ctx->ticks = 0; 74b215e283SDavide Libenzi ctx->tintv = timespec_to_ktime(ktmr->it_interval); 754d672e7aSDavide Libenzi hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 7676369470SArjan van de Ven hrtimer_set_expires(&ctx->tmr, texp); 77b215e283SDavide Libenzi ctx->tmr.function = timerfd_tmrproc; 78b215e283SDavide Libenzi if (texp.tv64 != 0) 79b215e283SDavide Libenzi hrtimer_start(&ctx->tmr, texp, htmode); 80b215e283SDavide Libenzi } 81b215e283SDavide Libenzi 82b215e283SDavide Libenzi static int timerfd_release(struct inode *inode, struct file *file) 83b215e283SDavide Libenzi { 84b215e283SDavide Libenzi struct timerfd_ctx *ctx = file->private_data; 85b215e283SDavide Libenzi 86b215e283SDavide Libenzi hrtimer_cancel(&ctx->tmr); 87b215e283SDavide Libenzi kfree(ctx); 88b215e283SDavide Libenzi return 0; 89b215e283SDavide Libenzi } 90b215e283SDavide Libenzi 91b215e283SDavide Libenzi static unsigned int timerfd_poll(struct file *file, poll_table *wait) 92b215e283SDavide Libenzi { 93b215e283SDavide Libenzi struct timerfd_ctx *ctx = file->private_data; 94b215e283SDavide Libenzi unsigned int events = 0; 95b215e283SDavide Libenzi unsigned long flags; 96b215e283SDavide Libenzi 97b215e283SDavide Libenzi poll_wait(file, &ctx->wqh, wait); 98b215e283SDavide Libenzi 9918963c01SDavide Libenzi spin_lock_irqsave(&ctx->wqh.lock, flags); 1004d672e7aSDavide Libenzi if (ctx->ticks) 101b215e283SDavide Libenzi events |= POLLIN; 10218963c01SDavide Libenzi spin_unlock_irqrestore(&ctx->wqh.lock, flags); 103b215e283SDavide Libenzi 104b215e283SDavide Libenzi return events; 105b215e283SDavide Libenzi } 106b215e283SDavide Libenzi 107b215e283SDavide Libenzi static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, 108b215e283SDavide Libenzi loff_t *ppos) 109b215e283SDavide Libenzi { 110b215e283SDavide Libenzi struct timerfd_ctx *ctx = file->private_data; 111b215e283SDavide Libenzi ssize_t res; 11209828402SDavide Libenzi u64 ticks = 0; 113b215e283SDavide Libenzi DECLARE_WAITQUEUE(wait, current); 114b215e283SDavide Libenzi 115b215e283SDavide Libenzi if (count < sizeof(ticks)) 116b215e283SDavide Libenzi return -EINVAL; 11718963c01SDavide Libenzi spin_lock_irq(&ctx->wqh.lock); 118b215e283SDavide Libenzi res = -EAGAIN; 1194d672e7aSDavide Libenzi if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) { 120b215e283SDavide Libenzi __add_wait_queue(&ctx->wqh, &wait); 121b215e283SDavide Libenzi for (res = 0;;) { 122b215e283SDavide Libenzi set_current_state(TASK_INTERRUPTIBLE); 1234d672e7aSDavide Libenzi if (ctx->ticks) { 124b215e283SDavide Libenzi res = 0; 125b215e283SDavide Libenzi break; 126b215e283SDavide Libenzi } 127b215e283SDavide Libenzi if (signal_pending(current)) { 128b215e283SDavide Libenzi res = -ERESTARTSYS; 129b215e283SDavide Libenzi break; 130b215e283SDavide Libenzi } 13118963c01SDavide Libenzi spin_unlock_irq(&ctx->wqh.lock); 132b215e283SDavide Libenzi schedule(); 13318963c01SDavide Libenzi spin_lock_irq(&ctx->wqh.lock); 134b215e283SDavide Libenzi } 135b215e283SDavide Libenzi __remove_wait_queue(&ctx->wqh, &wait); 136b215e283SDavide Libenzi __set_current_state(TASK_RUNNING); 137b215e283SDavide Libenzi } 1384d672e7aSDavide Libenzi if (ctx->ticks) { 1394d672e7aSDavide Libenzi ticks = ctx->ticks; 1404d672e7aSDavide Libenzi if (ctx->expired && ctx->tintv.tv64) { 141b215e283SDavide Libenzi /* 142b215e283SDavide Libenzi * If tintv.tv64 != 0, this is a periodic timer that 143b215e283SDavide Libenzi * needs to be re-armed. We avoid doing it in the timer 144b215e283SDavide Libenzi * callback to avoid DoS attacks specifying a very 145b215e283SDavide Libenzi * short timer period. 146b215e283SDavide Libenzi */ 1474d672e7aSDavide Libenzi ticks += hrtimer_forward_now(&ctx->tmr, 1484d672e7aSDavide Libenzi ctx->tintv) - 1; 149b215e283SDavide Libenzi hrtimer_restart(&ctx->tmr); 1504d672e7aSDavide Libenzi } 1514d672e7aSDavide Libenzi ctx->expired = 0; 1524d672e7aSDavide Libenzi ctx->ticks = 0; 153b215e283SDavide Libenzi } 15418963c01SDavide Libenzi spin_unlock_irq(&ctx->wqh.lock); 155b215e283SDavide Libenzi if (ticks) 15609828402SDavide Libenzi res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); 157b215e283SDavide Libenzi return res; 158b215e283SDavide Libenzi } 159b215e283SDavide Libenzi 160b215e283SDavide Libenzi static const struct file_operations timerfd_fops = { 161b215e283SDavide Libenzi .release = timerfd_release, 162b215e283SDavide Libenzi .poll = timerfd_poll, 163b215e283SDavide Libenzi .read = timerfd_read, 164b215e283SDavide Libenzi }; 165b215e283SDavide Libenzi 1664d672e7aSDavide Libenzi static struct file *timerfd_fget(int fd) 167b215e283SDavide Libenzi { 1684d672e7aSDavide Libenzi struct file *file; 1694d672e7aSDavide Libenzi 1704d672e7aSDavide Libenzi file = fget(fd); 1714d672e7aSDavide Libenzi if (!file) 1724d672e7aSDavide Libenzi return ERR_PTR(-EBADF); 1734d672e7aSDavide Libenzi if (file->f_op != &timerfd_fops) { 1744d672e7aSDavide Libenzi fput(file); 1754d672e7aSDavide Libenzi return ERR_PTR(-EINVAL); 1764d672e7aSDavide Libenzi } 1774d672e7aSDavide Libenzi 1784d672e7aSDavide Libenzi return file; 1794d672e7aSDavide Libenzi } 1804d672e7aSDavide Libenzi 181836f92adSHeiko Carstens SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) 1824d672e7aSDavide Libenzi { 1832030a42cSAl Viro int ufd; 184b215e283SDavide Libenzi struct timerfd_ctx *ctx; 185b215e283SDavide Libenzi 186e38b36f3SUlrich Drepper /* Check the TFD_* constants for consistency. */ 187e38b36f3SUlrich Drepper BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); 188e38b36f3SUlrich Drepper BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); 189e38b36f3SUlrich Drepper 190610d18f4SDavide Libenzi if ((flags & ~TFD_CREATE_FLAGS) || 191610d18f4SDavide Libenzi (clockid != CLOCK_MONOTONIC && 192610d18f4SDavide Libenzi clockid != CLOCK_REALTIME)) 193b215e283SDavide Libenzi return -EINVAL; 194b215e283SDavide Libenzi 1954d672e7aSDavide Libenzi ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 196b215e283SDavide Libenzi if (!ctx) 197b215e283SDavide Libenzi return -ENOMEM; 198b215e283SDavide Libenzi 199b215e283SDavide Libenzi init_waitqueue_head(&ctx->wqh); 2004d672e7aSDavide Libenzi ctx->clockid = clockid; 2014d672e7aSDavide Libenzi hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 202b215e283SDavide Libenzi 20311fcb6c1SUlrich Drepper ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 204628ff7c1SRoland Dreier O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 2052030a42cSAl Viro if (ufd < 0) 2064d672e7aSDavide Libenzi kfree(ctx); 2074d672e7aSDavide Libenzi 2084d672e7aSDavide Libenzi return ufd; 2094d672e7aSDavide Libenzi } 2104d672e7aSDavide Libenzi 211836f92adSHeiko Carstens SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 212836f92adSHeiko Carstens const struct itimerspec __user *, utmr, 213836f92adSHeiko Carstens struct itimerspec __user *, otmr) 2144d672e7aSDavide Libenzi { 2154d672e7aSDavide Libenzi struct file *file; 2164d672e7aSDavide Libenzi struct timerfd_ctx *ctx; 2174d672e7aSDavide Libenzi struct itimerspec ktmr, kotmr; 2184d672e7aSDavide Libenzi 2194d672e7aSDavide Libenzi if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 2204d672e7aSDavide Libenzi return -EFAULT; 2214d672e7aSDavide Libenzi 222610d18f4SDavide Libenzi if ((flags & ~TFD_SETTIME_FLAGS) || 223610d18f4SDavide Libenzi !timespec_valid(&ktmr.it_value) || 2244d672e7aSDavide Libenzi !timespec_valid(&ktmr.it_interval)) 2254d672e7aSDavide Libenzi return -EINVAL; 2264d672e7aSDavide Libenzi 2274d672e7aSDavide Libenzi file = timerfd_fget(ufd); 2284d672e7aSDavide Libenzi if (IS_ERR(file)) 2294d672e7aSDavide Libenzi return PTR_ERR(file); 2304d672e7aSDavide Libenzi ctx = file->private_data; 2314d672e7aSDavide Libenzi 232b215e283SDavide Libenzi /* 233b215e283SDavide Libenzi * We need to stop the existing timer before reprogramming 234b215e283SDavide Libenzi * it to the new values. 235b215e283SDavide Libenzi */ 236b215e283SDavide Libenzi for (;;) { 23718963c01SDavide Libenzi spin_lock_irq(&ctx->wqh.lock); 238b215e283SDavide Libenzi if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) 239b215e283SDavide Libenzi break; 24018963c01SDavide Libenzi spin_unlock_irq(&ctx->wqh.lock); 241b215e283SDavide Libenzi cpu_relax(); 242b215e283SDavide Libenzi } 2434d672e7aSDavide Libenzi 2444d672e7aSDavide Libenzi /* 2454d672e7aSDavide Libenzi * If the timer is expired and it's periodic, we need to advance it 2464d672e7aSDavide Libenzi * because the caller may want to know the previous expiration time. 2474d672e7aSDavide Libenzi * We do not update "ticks" and "expired" since the timer will be 2484d672e7aSDavide Libenzi * re-programmed again in the following timerfd_setup() call. 2494d672e7aSDavide Libenzi */ 2504d672e7aSDavide Libenzi if (ctx->expired && ctx->tintv.tv64) 2514d672e7aSDavide Libenzi hrtimer_forward_now(&ctx->tmr, ctx->tintv); 2524d672e7aSDavide Libenzi 2534d672e7aSDavide Libenzi kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 2544d672e7aSDavide Libenzi kotmr.it_interval = ktime_to_timespec(ctx->tintv); 2554d672e7aSDavide Libenzi 256b215e283SDavide Libenzi /* 257b215e283SDavide Libenzi * Re-program the timer to the new value ... 258b215e283SDavide Libenzi */ 2594d672e7aSDavide Libenzi timerfd_setup(ctx, flags, &ktmr); 260b215e283SDavide Libenzi 26118963c01SDavide Libenzi spin_unlock_irq(&ctx->wqh.lock); 262b215e283SDavide Libenzi fput(file); 2634d672e7aSDavide Libenzi if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 2644d672e7aSDavide Libenzi return -EFAULT; 2654d672e7aSDavide Libenzi 2664d672e7aSDavide Libenzi return 0; 267b215e283SDavide Libenzi } 268b215e283SDavide Libenzi 269d4e82042SHeiko Carstens SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 2704d672e7aSDavide Libenzi { 2714d672e7aSDavide Libenzi struct file *file; 2724d672e7aSDavide Libenzi struct timerfd_ctx *ctx; 2734d672e7aSDavide Libenzi struct itimerspec kotmr; 274b215e283SDavide Libenzi 2754d672e7aSDavide Libenzi file = timerfd_fget(ufd); 2764d672e7aSDavide Libenzi if (IS_ERR(file)) 2774d672e7aSDavide Libenzi return PTR_ERR(file); 2784d672e7aSDavide Libenzi ctx = file->private_data; 2794d672e7aSDavide Libenzi 2804d672e7aSDavide Libenzi spin_lock_irq(&ctx->wqh.lock); 2814d672e7aSDavide Libenzi if (ctx->expired && ctx->tintv.tv64) { 2824d672e7aSDavide Libenzi ctx->expired = 0; 2834d672e7aSDavide Libenzi ctx->ticks += 2844d672e7aSDavide Libenzi hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; 2854d672e7aSDavide Libenzi hrtimer_restart(&ctx->tmr); 2864d672e7aSDavide Libenzi } 2874d672e7aSDavide Libenzi kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 2884d672e7aSDavide Libenzi kotmr.it_interval = ktime_to_timespec(ctx->tintv); 2894d672e7aSDavide Libenzi spin_unlock_irq(&ctx->wqh.lock); 2904d672e7aSDavide Libenzi fput(file); 2914d672e7aSDavide Libenzi 2924d672e7aSDavide Libenzi return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 293b215e283SDavide Libenzi } 294b215e283SDavide Libenzi 295