1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/systm.h>
29 #include <sys/event.h>
30 #include <sys/eventfd.h>
31 #include <sys/errno.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/poll.h>
42 #include <sys/proc.h>
43 #include <sys/refcount.h>
44 #include <sys/selinfo.h>
45 #include <sys/stat.h>
46 #include <sys/uio.h>
47 #include <sys/user.h>
48
49 #include <security/audit/audit.h>
50
51 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
52 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
53
54 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
55
56 static fo_rdwr_t eventfd_read;
57 static fo_rdwr_t eventfd_write;
58 static fo_ioctl_t eventfd_ioctl;
59 static fo_poll_t eventfd_poll;
60 static fo_kqfilter_t eventfd_kqfilter;
61 static fo_stat_t eventfd_stat;
62 static fo_close_t eventfd_close;
63 static fo_fill_kinfo_t eventfd_fill_kinfo;
64
65 static const struct fileops eventfdops = {
66 .fo_read = eventfd_read,
67 .fo_write = eventfd_write,
68 .fo_truncate = invfo_truncate,
69 .fo_ioctl = eventfd_ioctl,
70 .fo_poll = eventfd_poll,
71 .fo_kqfilter = eventfd_kqfilter,
72 .fo_stat = eventfd_stat,
73 .fo_close = eventfd_close,
74 .fo_chmod = invfo_chmod,
75 .fo_chown = invfo_chown,
76 .fo_sendfile = invfo_sendfile,
77 .fo_fill_kinfo = eventfd_fill_kinfo,
78 .fo_cmp = file_kcmp_generic,
79 .fo_flags = DFLAG_PASSABLE
80 };
81
82 static void filt_eventfddetach(struct knote *kn);
83 static int filt_eventfdread(struct knote *kn, long hint);
84 static int filt_eventfdwrite(struct knote *kn, long hint);
85
86 static const struct filterops eventfd_rfiltops = {
87 .f_isfd = 1,
88 .f_detach = filt_eventfddetach,
89 .f_event = filt_eventfdread,
90 .f_copy = knote_triv_copy,
91 };
92
93
94 static const struct filterops eventfd_wfiltops = {
95 .f_isfd = 1,
96 .f_detach = filt_eventfddetach,
97 .f_event = filt_eventfdwrite,
98 .f_copy = knote_triv_copy,
99 };
100
101 struct eventfd {
102 eventfd_t efd_count;
103 uint32_t efd_flags;
104 struct selinfo efd_sel;
105 struct mtx efd_lock;
106 unsigned int efd_refcount;
107 };
108
109 int
eventfd_create_file(struct thread * td,struct file * fp,uint32_t initval,int flags)110 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
111 int flags)
112 {
113 struct eventfd *efd;
114 int fflags;
115
116 AUDIT_ARG_FFLAGS(flags);
117 AUDIT_ARG_VALUE(initval);
118
119 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
120 efd->efd_flags = flags;
121 efd->efd_count = initval;
122 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
123 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
124 refcount_init(&efd->efd_refcount, 1);
125
126 fflags = FREAD | FWRITE;
127 if ((flags & EFD_NONBLOCK) != 0)
128 fflags |= FNONBLOCK;
129 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
130
131 return (0);
132 }
133
134 struct eventfd *
eventfd_get(struct file * fp)135 eventfd_get(struct file *fp)
136 {
137 struct eventfd *efd;
138
139 if (fp->f_data == NULL || fp->f_ops != &eventfdops)
140 return (NULL);
141
142 efd = fp->f_data;
143 refcount_acquire(&efd->efd_refcount);
144
145 return (efd);
146 }
147
148 void
eventfd_put(struct eventfd * efd)149 eventfd_put(struct eventfd *efd)
150 {
151 if (!refcount_release(&efd->efd_refcount))
152 return;
153
154 seldrain(&efd->efd_sel);
155 knlist_destroy(&efd->efd_sel.si_note);
156 mtx_destroy(&efd->efd_lock);
157 free(efd, M_EVENTFD);
158 }
159
160 static void
eventfd_wakeup(struct eventfd * efd)161 eventfd_wakeup(struct eventfd *efd)
162 {
163 KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
164 selwakeup(&efd->efd_sel);
165 wakeup(&efd->efd_count);
166 }
167
168 void
eventfd_signal(struct eventfd * efd)169 eventfd_signal(struct eventfd *efd)
170 {
171 mtx_lock(&efd->efd_lock);
172
173 if (efd->efd_count < UINT64_MAX)
174 efd->efd_count++;
175
176 eventfd_wakeup(efd);
177
178 mtx_unlock(&efd->efd_lock);
179 }
180
181 static int
eventfd_close(struct file * fp,struct thread * td)182 eventfd_close(struct file *fp, struct thread *td)
183 {
184 struct eventfd *efd;
185
186 efd = fp->f_data;
187 eventfd_put(efd);
188 return (0);
189 }
190
191 static int
eventfd_read(struct file * fp,struct uio * uio,struct ucred * active_cred,int flags,struct thread * td)192 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
193 int flags, struct thread *td)
194 {
195 struct eventfd *efd;
196 eventfd_t count;
197 int error;
198
199 if (uio->uio_resid < sizeof(eventfd_t))
200 return (EINVAL);
201
202 error = 0;
203 efd = fp->f_data;
204 mtx_lock(&efd->efd_lock);
205 while (error == 0 && efd->efd_count == 0) {
206 if ((fp->f_flag & FNONBLOCK) != 0) {
207 mtx_unlock(&efd->efd_lock);
208 return (EAGAIN);
209 }
210 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
211 "efdrd", 0);
212 }
213 if (error == 0) {
214 MPASS(efd->efd_count > 0);
215 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
216 count = 1;
217 --efd->efd_count;
218 } else {
219 count = efd->efd_count;
220 efd->efd_count = 0;
221 }
222 KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
223 selwakeup(&efd->efd_sel);
224 wakeup(&efd->efd_count);
225 mtx_unlock(&efd->efd_lock);
226 error = uiomove(&count, sizeof(eventfd_t), uio);
227 } else
228 mtx_unlock(&efd->efd_lock);
229
230 return (error);
231 }
232
233 static int
eventfd_write(struct file * fp,struct uio * uio,struct ucred * active_cred,int flags,struct thread * td)234 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
235 int flags, struct thread *td)
236 {
237 struct eventfd *efd;
238 eventfd_t count;
239 int error;
240
241 if (uio->uio_resid < sizeof(eventfd_t))
242 return (EINVAL);
243
244 error = uiomove(&count, sizeof(eventfd_t), uio);
245 if (error != 0)
246 return (error);
247 if (count == UINT64_MAX)
248 return (EINVAL);
249
250 efd = fp->f_data;
251 mtx_lock(&efd->efd_lock);
252 retry:
253 if (UINT64_MAX - efd->efd_count <= count) {
254 if ((fp->f_flag & FNONBLOCK) != 0) {
255 mtx_unlock(&efd->efd_lock);
256 /* Do not not return the number of bytes written */
257 uio->uio_resid += sizeof(eventfd_t);
258 return (EAGAIN);
259 }
260 error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
261 PCATCH, "efdwr", 0);
262 if (error == 0)
263 goto retry;
264 }
265 if (error == 0) {
266 MPASS(UINT64_MAX - efd->efd_count > count);
267 efd->efd_count += count;
268 eventfd_wakeup(efd);
269 }
270 mtx_unlock(&efd->efd_lock);
271
272 return (error);
273 }
274
275 static int
eventfd_poll(struct file * fp,int events,struct ucred * active_cred,struct thread * td)276 eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
277 struct thread *td)
278 {
279 struct eventfd *efd;
280 int revents;
281
282 efd = fp->f_data;
283 revents = 0;
284 mtx_lock(&efd->efd_lock);
285 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
286 revents |= events & (POLLIN | POLLRDNORM);
287 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
288 efd->efd_count)
289 revents |= events & (POLLOUT | POLLWRNORM);
290 if (revents == 0)
291 selrecord(td, &efd->efd_sel);
292 mtx_unlock(&efd->efd_lock);
293
294 return (revents);
295 }
296
297 static int
eventfd_kqfilter(struct file * fp,struct knote * kn)298 eventfd_kqfilter(struct file *fp, struct knote *kn)
299 {
300 struct eventfd *efd = fp->f_data;
301
302 mtx_lock(&efd->efd_lock);
303 switch (kn->kn_filter) {
304 case EVFILT_READ:
305 kn->kn_fop = &eventfd_rfiltops;
306 break;
307 case EVFILT_WRITE:
308 kn->kn_fop = &eventfd_wfiltops;
309 break;
310 default:
311 mtx_unlock(&efd->efd_lock);
312 return (EINVAL);
313 }
314
315 kn->kn_hook = efd;
316 knlist_add(&efd->efd_sel.si_note, kn, 1);
317 mtx_unlock(&efd->efd_lock);
318
319 return (0);
320 }
321
322 static void
filt_eventfddetach(struct knote * kn)323 filt_eventfddetach(struct knote *kn)
324 {
325 struct eventfd *efd = kn->kn_hook;
326
327 mtx_lock(&efd->efd_lock);
328 knlist_remove(&efd->efd_sel.si_note, kn, 1);
329 mtx_unlock(&efd->efd_lock);
330 }
331
332 static int
filt_eventfdread(struct knote * kn,long hint)333 filt_eventfdread(struct knote *kn, long hint)
334 {
335 struct eventfd *efd = kn->kn_hook;
336 int ret;
337
338 mtx_assert(&efd->efd_lock, MA_OWNED);
339 kn->kn_data = (int64_t)efd->efd_count;
340 ret = efd->efd_count > 0;
341
342 return (ret);
343 }
344
345 static int
filt_eventfdwrite(struct knote * kn,long hint)346 filt_eventfdwrite(struct knote *kn, long hint)
347 {
348 struct eventfd *efd = kn->kn_hook;
349 int ret;
350
351 mtx_assert(&efd->efd_lock, MA_OWNED);
352 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
353 ret = UINT64_MAX - 1 > efd->efd_count;
354
355 return (ret);
356 }
357
358 static int
eventfd_ioctl(struct file * fp,u_long cmd,void * data,struct ucred * active_cred,struct thread * td)359 eventfd_ioctl(struct file *fp, u_long cmd, void *data,
360 struct ucred *active_cred, struct thread *td)
361 {
362 switch (cmd) {
363 case FIONBIO:
364 case FIOASYNC:
365 return (0);
366 }
367
368 return (ENOTTY);
369 }
370
371 static int
eventfd_stat(struct file * fp,struct stat * st,struct ucred * active_cred)372 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
373 {
374 bzero((void *)st, sizeof *st);
375 st->st_mode = S_IFIFO;
376 return (0);
377 }
378
379 static int
eventfd_fill_kinfo(struct file * fp,struct kinfo_file * kif,struct filedesc * fdp)380 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
381 {
382 struct eventfd *efd = fp->f_data;
383
384 kif->kf_type = KF_TYPE_EVENTFD;
385 mtx_lock(&efd->efd_lock);
386 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
387 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
388 kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd;
389 mtx_unlock(&efd->efd_lock);
390 return (0);
391 }
392