1 | /* |
2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | * |
28 | */ |
29 | /*- |
30 | * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> |
31 | * All rights reserved. |
32 | * |
33 | * Redistribution and use in source and binary forms, with or without |
34 | * modification, are permitted provided that the following conditions |
35 | * are met: |
36 | * 1. Redistributions of source code must retain the above copyright |
37 | * notice, this list of conditions and the following disclaimer. |
38 | * 2. Redistributions in binary form must reproduce the above copyright |
39 | * notice, this list of conditions and the following disclaimer in the |
40 | * documentation and/or other materials provided with the distribution. |
41 | * |
42 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
43 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
44 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
45 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
46 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
47 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
48 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
49 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
50 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
51 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
52 | * SUCH DAMAGE. |
53 | */ |
54 | /* |
55 | * @(#)kern_event.c 1.0 (3/31/2000) |
56 | */ |
57 | #include <stdint.h> |
58 | #include <machine/atomic.h> |
59 | |
60 | #include <sys/param.h> |
61 | #include <sys/systm.h> |
62 | #include <sys/filedesc.h> |
63 | #include <sys/kernel.h> |
64 | #include <sys/proc_internal.h> |
65 | #include <sys/kauth.h> |
66 | #include <sys/malloc.h> |
67 | #include <sys/unistd.h> |
68 | #include <sys/file_internal.h> |
69 | #include <sys/fcntl.h> |
70 | #include <sys/select.h> |
71 | #include <sys/queue.h> |
72 | #include <sys/event.h> |
73 | #include <sys/eventvar.h> |
74 | #include <sys/protosw.h> |
75 | #include <sys/socket.h> |
76 | #include <sys/socketvar.h> |
77 | #include <sys/stat.h> |
78 | #include <sys/sysctl.h> |
79 | #include <sys/uio.h> |
80 | #include <sys/sysproto.h> |
81 | #include <sys/user.h> |
82 | #include <sys/vnode_internal.h> |
83 | #include <string.h> |
84 | #include <sys/proc_info.h> |
85 | #include <sys/codesign.h> |
86 | #include <sys/pthread_shims.h> |
87 | #include <sys/kdebug.h> |
88 | #include <sys/reason.h> |
89 | #include <os/reason_private.h> |
90 | #include <pexpert/pexpert.h> |
91 | |
92 | #include <kern/locks.h> |
93 | #include <kern/clock.h> |
94 | #include <kern/cpu_data.h> |
95 | #include <kern/policy_internal.h> |
96 | #include <kern/thread_call.h> |
97 | #include <kern/sched_prim.h> |
98 | #include <kern/waitq.h> |
99 | #include <kern/zalloc.h> |
100 | #include <kern/kalloc.h> |
101 | #include <kern/assert.h> |
102 | #include <kern/ast.h> |
103 | #include <kern/thread.h> |
104 | #include <kern/kcdata.h> |
105 | |
106 | #include <pthread/priority_private.h> |
107 | #include <pthread/workqueue_syscalls.h> |
108 | #include <pthread/workqueue_internal.h> |
109 | #include <libkern/libkern.h> |
110 | #include <libkern/OSAtomic.h> |
111 | |
112 | #include "net/net_str_id.h" |
113 | |
114 | #include <mach/task.h> |
115 | #include <libkern/section_keywords.h> |
116 | |
117 | #if CONFIG_MEMORYSTATUS |
118 | #include <sys/kern_memorystatus.h> |
119 | #endif |
120 | |
121 | extern thread_t port_name_to_thread(mach_port_name_t port_name); /* osfmk/kern/ipc_tt.h */ |
122 | extern mach_port_name_t ipc_entry_name_mask(mach_port_name_t name); /* osfmk/ipc/ipc_entry.h */ |
123 | |
124 | #define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code)) |
125 | |
126 | MALLOC_DEFINE(M_KQUEUE, "kqueue" , "memory for kqueue system" ); |
127 | |
128 | #define KQ_EVENT NO_EVENT64 |
129 | |
130 | static int kqueue_read(struct fileproc *fp, struct uio *uio, |
131 | int flags, vfs_context_t ctx); |
132 | static int kqueue_write(struct fileproc *fp, struct uio *uio, |
133 | int flags, vfs_context_t ctx); |
134 | static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data, |
135 | vfs_context_t ctx); |
136 | static int kqueue_select(struct fileproc *fp, int which, void *wq_link_id, |
137 | vfs_context_t ctx); |
138 | static int kqueue_close(struct fileglob *fg, vfs_context_t ctx); |
139 | static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, |
140 | struct kevent_internal_s *kev, vfs_context_t ctx); |
141 | static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx); |
142 | |
143 | static const struct fileops kqueueops = { |
144 | .fo_type = DTYPE_KQUEUE, |
145 | .fo_read = kqueue_read, |
146 | .fo_write = kqueue_write, |
147 | .fo_ioctl = kqueue_ioctl, |
148 | .fo_select = kqueue_select, |
149 | .fo_close = kqueue_close, |
150 | .fo_kqfilter = kqueue_kqfilter, |
151 | .fo_drain = kqueue_drain, |
152 | }; |
153 | |
154 | static void kevent_put_kq(struct proc *p, kqueue_id_t id, struct fileproc *fp, struct kqueue *kq); |
155 | static int kevent_internal(struct proc *p, |
156 | kqueue_id_t id, kqueue_id_t *id_out, |
157 | user_addr_t changelist, int nchanges, |
158 | user_addr_t eventlist, int nevents, |
159 | user_addr_t data_out, uint64_t data_available, |
160 | unsigned int flags, user_addr_t utimeout, |
161 | kqueue_continue_t continuation, |
162 | int32_t *retval); |
163 | static int kevent_copyin(user_addr_t *addrp, struct kevent_internal_s *kevp, |
164 | struct proc *p, unsigned int flags); |
165 | static int kevent_copyout(struct kevent_internal_s *kevp, user_addr_t *addrp, |
166 | struct proc *p, unsigned int flags); |
167 | char * kevent_description(struct kevent_internal_s *kevp, char *s, size_t n); |
168 | |
169 | static int kevent_register_wait_prepare(struct knote *kn, struct kevent_internal_s *kev); |
170 | static void kevent_register_wait_block(struct turnstile *ts, thread_t handoff_thread, |
171 | struct knote_lock_ctx *knlc, thread_continue_t cont, |
172 | struct _kevent_register *cont_args) __dead2; |
173 | static void kevent_register_wait_return(struct _kevent_register *cont_args) __dead2; |
174 | static void kevent_register_wait_cleanup(struct knote *kn); |
175 | static inline void kqueue_release_last(struct proc *p, kqueue_t kqu); |
176 | static void kqueue_interrupt(struct kqueue *kq); |
177 | static int kevent_callback(struct kqueue *kq, struct kevent_internal_s *kevp, |
178 | void *data); |
179 | static void kevent_continue(struct kqueue *kq, void *data, int error); |
180 | static void kqueue_scan_continue(void *contp, wait_result_t wait_result); |
181 | static int kqueue_process(struct kqueue *kq, kevent_callback_t callback, void *callback_data, |
182 | struct filt_process_s *process_data, int *countp); |
183 | static int kqueue_queue_empty(struct kqueue *kq, kq_index_t qos_index); |
184 | |
185 | static struct kqtailq *kqueue_get_suppressed_queue(kqueue_t kq, struct knote *kn); |
186 | static void kqueue_threadreq_initiate(struct kqueue *kq, struct kqrequest *kqr, kq_index_t qos, int flags); |
187 | |
188 | static void kqworkq_update_override(struct kqworkq *kqwq, struct knote *kn, kq_index_t qos); |
189 | static void kqworkq_unbind(proc_t p, struct kqrequest *kqr); |
190 | static thread_qos_t kqworkq_unbind_locked(struct kqworkq *kqwq, struct kqrequest *kqr, thread_t thread); |
191 | static struct kqrequest *kqworkq_get_request(struct kqworkq *kqwq, kq_index_t qos_index); |
192 | |
193 | static void kqworkloop_update_override(struct kqworkloop *kqwl, kq_index_t override_index); |
194 | static void kqworkloop_unbind(proc_t p, struct kqworkloop *kwql); |
195 | static thread_qos_t kqworkloop_unbind_locked(struct kqworkloop *kwql, thread_t thread); |
196 | static kq_index_t kqworkloop_owner_override(struct kqworkloop *kqwl); |
197 | enum { |
198 | KQWL_UTQ_NONE, |
199 | /* |
200 | * The wakeup qos is the qos of QUEUED knotes. |
201 | * |
202 | * This QoS is accounted for with the events override in the |
203 | * kqr_override_index field. It is raised each time a new knote is queued at |
204 | * a given QoS. The kqr_wakeup_indexes field is a superset of the non empty |
205 | * knote buckets and is recomputed after each event delivery. |
206 | */ |
207 | KQWL_UTQ_UPDATE_WAKEUP_QOS, |
208 | KQWL_UTQ_UPDATE_STAYACTIVE_QOS, |
209 | KQWL_UTQ_RECOMPUTE_WAKEUP_QOS, |
210 | KQWL_UTQ_UNBINDING, /* attempt to rebind */ |
211 | KQWL_UTQ_PARKING, |
212 | /* |
213 | * The wakeup override is for suppressed knotes that have fired again at |
214 | * a higher QoS than the one for which they are suppressed already. |
215 | * This override is cleared when the knote suppressed list becomes empty. |
216 | */ |
217 | KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE, |
218 | KQWL_UTQ_RESET_WAKEUP_OVERRIDE, |
219 | /* |
220 | * The QoS is the maximum QoS of an event enqueued on this workloop in |
221 | * userland. It is copied from the only EVFILT_WORKLOOP knote with |
222 | * a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no |
223 | * such knote, this QoS is 0. |
224 | */ |
225 | KQWL_UTQ_SET_QOS_INDEX, |
226 | KQWL_UTQ_REDRIVE_EVENTS, |
227 | }; |
228 | static void kqworkloop_update_threads_qos(struct kqworkloop *kqwl, int op, kq_index_t qos); |
229 | static void kqworkloop_request_help(struct kqworkloop *kqwl, kq_index_t qos_index); |
230 | static int kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags); |
231 | |
232 | static int knote_process(struct knote *kn, kevent_callback_t callback, void *callback_data, |
233 | struct filt_process_s *process_data); |
234 | |
235 | static int kq_add_knote(struct kqueue *kq, struct knote *kn, |
236 | struct knote_lock_ctx *knlc, struct proc *p); |
237 | static struct knote *kq_find_knote_and_kq_lock(struct kqueue *kq, struct kevent_internal_s *kev, bool is_fd, struct proc *p); |
238 | |
239 | static void knote_drop(struct kqueue *kq, struct knote *kn, struct knote_lock_ctx *knlc); |
240 | static struct knote *knote_alloc(void); |
241 | static void knote_free(struct knote *kn); |
242 | |
243 | static void knote_activate(struct knote *kn); |
244 | static void knote_deactivate(struct knote *kn); |
245 | |
246 | static void knote_enable(struct knote *kn); |
247 | static void knote_disable(struct knote *kn); |
248 | |
249 | static int knote_enqueue(struct knote *kn); |
250 | static void knote_dequeue(struct knote *kn); |
251 | |
252 | static void knote_suppress(struct knote *kn); |
253 | static void knote_unsuppress(struct knote *kn); |
254 | static void knote_wakeup(struct knote *kn); |
255 | |
256 | static bool knote_should_apply_qos_override(struct kqueue *kq, struct knote *kn, |
257 | int result, thread_qos_t *qos_out); |
258 | static void knote_apply_qos_override(struct knote *kn, kq_index_t qos_index); |
259 | static void knote_adjust_qos(struct kqueue *kq, struct knote *kn, int result); |
260 | static void knote_reset_priority(struct knote *kn, pthread_priority_t pp); |
261 | static kq_index_t knote_get_qos_override_index(struct knote *kn); |
262 | static void knote_set_qos_overcommit(struct knote *kn); |
263 | |
264 | static zone_t knote_zone; |
265 | static zone_t kqfile_zone; |
266 | static zone_t kqworkq_zone; |
267 | static zone_t kqworkloop_zone; |
268 | #if DEVELOPMENT || DEBUG |
269 | #define KEVENT_PANIC_ON_WORKLOOP_OWNERSHIP_LEAK (1U << 0) |
270 | #define KEVENT_PANIC_ON_NON_ENQUEUED_PROCESS (1U << 1) |
271 | #define KEVENT_PANIC_BOOT_ARG_INITIALIZED (1U << 31) |
272 | |
273 | #define KEVENT_PANIC_DEFAULT_VALUE (0) |
274 | static uint32_t |
275 | kevent_debug_flags(void) |
276 | { |
277 | static uint32_t flags = KEVENT_PANIC_DEFAULT_VALUE; |
278 | |
279 | if ((flags & KEVENT_PANIC_BOOT_ARG_INITIALIZED) == 0) { |
280 | uint32_t value = 0; |
281 | if (!PE_parse_boot_argn("kevent_debug" , &value, sizeof(value))) { |
282 | value = KEVENT_PANIC_DEFAULT_VALUE; |
283 | } |
284 | value |= KEVENT_PANIC_BOOT_ARG_INITIALIZED; |
285 | os_atomic_store(&flags, value, relaxed); |
286 | } |
287 | return flags; |
288 | } |
289 | #endif |
290 | |
291 | #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) |
292 | |
293 | /* placeholder for not-yet-implemented filters */ |
294 | static int filt_badattach(struct knote *kn, struct kevent_internal_s *kev); |
295 | static int filt_badevent(struct knote *kn, long hint); |
296 | SECURITY_READ_ONLY_EARLY(static struct filterops) bad_filtops = { |
297 | .f_attach = filt_badattach, |
298 | }; |
299 | |
300 | #if CONFIG_MEMORYSTATUS |
301 | extern const struct filterops memorystatus_filtops; |
302 | #endif /* CONFIG_MEMORYSTATUS */ |
303 | extern const struct filterops fs_filtops; |
304 | extern const struct filterops sig_filtops; |
305 | extern const struct filterops machport_filtops; |
306 | extern const struct filterops pipe_rfiltops; |
307 | extern const struct filterops pipe_wfiltops; |
308 | extern const struct filterops ptsd_kqops; |
309 | extern const struct filterops ptmx_kqops; |
310 | extern const struct filterops soread_filtops; |
311 | extern const struct filterops sowrite_filtops; |
312 | extern const struct filterops sock_filtops; |
313 | extern const struct filterops soexcept_filtops; |
314 | extern const struct filterops spec_filtops; |
315 | extern const struct filterops bpfread_filtops; |
316 | extern const struct filterops necp_fd_rfiltops; |
317 | extern const struct filterops fsevent_filtops; |
318 | extern const struct filterops vnode_filtops; |
319 | extern const struct filterops tty_filtops; |
320 | |
321 | const static struct filterops file_filtops; |
322 | const static struct filterops kqread_filtops; |
323 | const static struct filterops proc_filtops; |
324 | const static struct filterops timer_filtops; |
325 | const static struct filterops user_filtops; |
326 | const static struct filterops workloop_filtops; |
327 | |
328 | /* |
329 | * |
330 | * Rules for adding new filters to the system: |
331 | * Public filters: |
332 | * - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value) |
333 | * in the exported section of the header |
334 | * - Update the EVFILT_SYSCOUNT value to reflect the new addition |
335 | * - Add a filterops to the sysfilt_ops array. Public filters should be added at the end |
336 | * of the Public Filters section in the array. |
337 | * Private filters: |
338 | * - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value) |
339 | * in the XNU_KERNEL_PRIVATE section of the header |
340 | * - Update the EVFILTID_MAX value to reflect the new addition |
341 | * - Add a filterops to the sysfilt_ops. Private filters should be added at the end of |
342 | * the Private filters section of the array. |
343 | */ |
344 | SECURITY_READ_ONLY_EARLY(static struct filterops *) sysfilt_ops[EVFILTID_MAX] = { |
345 | /* Public Filters */ |
346 | [~EVFILT_READ] = &file_filtops, |
347 | [~EVFILT_WRITE] = &file_filtops, |
348 | [~EVFILT_AIO] = &bad_filtops, |
349 | [~EVFILT_VNODE] = &file_filtops, |
350 | [~EVFILT_PROC] = &proc_filtops, |
351 | [~EVFILT_SIGNAL] = &sig_filtops, |
352 | [~EVFILT_TIMER] = &timer_filtops, |
353 | [~EVFILT_MACHPORT] = &machport_filtops, |
354 | [~EVFILT_FS] = &fs_filtops, |
355 | [~EVFILT_USER] = &user_filtops, |
356 | &bad_filtops, |
357 | [~EVFILT_VM] = &bad_filtops, |
358 | [~EVFILT_SOCK] = &file_filtops, |
359 | #if CONFIG_MEMORYSTATUS |
360 | [~EVFILT_MEMORYSTATUS] = &memorystatus_filtops, |
361 | #else |
362 | [~EVFILT_MEMORYSTATUS] = &bad_filtops, |
363 | #endif |
364 | [~EVFILT_EXCEPT] = &file_filtops, |
365 | [~EVFILT_WORKLOOP] = &workloop_filtops, |
366 | |
367 | /* Private filters */ |
368 | [EVFILTID_KQREAD] = &kqread_filtops, |
369 | [EVFILTID_PIPE_R] = &pipe_rfiltops, |
370 | [EVFILTID_PIPE_W] = &pipe_wfiltops, |
371 | [EVFILTID_PTSD] = &ptsd_kqops, |
372 | [EVFILTID_SOREAD] = &soread_filtops, |
373 | [EVFILTID_SOWRITE] = &sowrite_filtops, |
374 | [EVFILTID_SCK] = &sock_filtops, |
375 | [EVFILTID_SOEXCEPT] = &soexcept_filtops, |
376 | [EVFILTID_SPEC] = &spec_filtops, |
377 | [EVFILTID_BPFREAD] = &bpfread_filtops, |
378 | [EVFILTID_NECP_FD] = &necp_fd_rfiltops, |
379 | [EVFILTID_FSEVENT] = &fsevent_filtops, |
380 | [EVFILTID_VN] = &vnode_filtops, |
381 | [EVFILTID_TTY] = &tty_filtops, |
382 | [EVFILTID_PTMX] = &ptmx_kqops, |
383 | }; |
384 | |
385 | /* waitq prepost callback */ |
386 | void waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook, void *knote_hook, int qos); |
387 | |
388 | static inline struct kqworkloop * |
389 | kqr_kqworkloop(struct kqrequest *kqr) |
390 | { |
391 | if (kqr->kqr_state & KQR_WORKLOOP) { |
392 | return __container_of(kqr, struct kqworkloop, kqwl_request); |
393 | } |
394 | return NULL; |
395 | } |
396 | |
397 | static inline kqueue_t |
398 | kqr_kqueue(proc_t p, struct kqrequest *kqr) |
399 | { |
400 | kqueue_t kqu; |
401 | if (kqr->kqr_state & KQR_WORKLOOP) { |
402 | kqu.kqwl = kqr_kqworkloop(kqr); |
403 | } else { |
404 | kqu.kqwq = (struct kqworkq *)p->p_fd->fd_wqkqueue; |
405 | assert(kqr >= kqu.kqwq->kqwq_request && |
406 | kqr < kqu.kqwq->kqwq_request + KQWQ_NBUCKETS); |
407 | } |
408 | return kqu; |
409 | } |
410 | |
411 | static inline boolean_t |
412 | is_workqueue_thread(thread_t thread) |
413 | { |
414 | return (thread_get_tag(thread) & THREAD_TAG_WORKQUEUE); |
415 | } |
416 | |
417 | /* |
418 | * kqueue/note lock implementations |
419 | * |
420 | * The kqueue lock guards the kq state, the state of its queues, |
421 | * and the kqueue-aware status and locks of individual knotes. |
422 | * |
423 | * The kqueue workq lock is used to protect state guarding the |
424 | * interaction of the kqueue with the workq. This state cannot |
425 | * be guarded by the kq lock - as it needs to be taken when we |
426 | * already have the waitq set lock held (during the waitq hook |
427 | * callback). It might be better to use the waitq lock itself |
428 | * for this, but the IRQ requirements make that difficult). |
429 | * |
430 | * Knote flags, filter flags, and associated data are protected |
431 | * by the underlying object lock - and are only ever looked at |
432 | * by calling the filter to get a [consistent] snapshot of that |
433 | * data. |
434 | */ |
435 | static lck_grp_attr_t *kq_lck_grp_attr; |
436 | static lck_grp_t *kq_lck_grp; |
437 | static lck_attr_t *kq_lck_attr; |
438 | |
439 | static inline void |
440 | kqlock(kqueue_t kqu) |
441 | { |
442 | lck_spin_lock(&kqu.kq->kq_lock); |
443 | } |
444 | |
445 | static inline void |
446 | kqlock_held(__assert_only kqueue_t kqu) |
447 | { |
448 | LCK_SPIN_ASSERT(&kqu.kq->kq_lock, LCK_ASSERT_OWNED); |
449 | } |
450 | |
451 | static inline void |
452 | kqunlock(kqueue_t kqu) |
453 | { |
454 | lck_spin_unlock(&kqu.kq->kq_lock); |
455 | } |
456 | |
457 | static inline void |
458 | kq_req_lock(kqueue_t kqu) |
459 | { |
460 | assert(kqu.kq->kq_state & (KQ_WORKLOOP | KQ_WORKQ)); |
461 | lck_spin_lock(&kqu.kq->kq_reqlock); |
462 | } |
463 | |
464 | static inline void |
465 | kq_req_unlock(kqueue_t kqu) |
466 | { |
467 | assert(kqu.kq->kq_state & (KQ_WORKLOOP | KQ_WORKQ)); |
468 | lck_spin_unlock(&kqu.kq->kq_reqlock); |
469 | } |
470 | |
471 | static inline void |
472 | kq_req_held(__assert_only kqueue_t kqu) |
473 | { |
474 | assert(kqu.kq->kq_state & (KQ_WORKLOOP | KQ_WORKQ)); |
475 | LCK_SPIN_ASSERT(&kqu.kq->kq_reqlock, LCK_ASSERT_OWNED); |
476 | } |
477 | |
478 | static inline void |
479 | knhash_lock(proc_t p) |
480 | { |
481 | lck_mtx_lock(&p->p_fd->fd_knhashlock); |
482 | } |
483 | |
484 | static inline void |
485 | knhash_unlock(proc_t p) |
486 | { |
487 | lck_mtx_unlock(&p->p_fd->fd_knhashlock); |
488 | } |
489 | |
490 | #pragma mark knote locks |
491 | |
492 | /* |
493 | * Enum used by the knote_lock_* functions. |
494 | * |
495 | * KNOTE_KQ_LOCK_ALWAYS |
496 | * The function will always return with the kq lock held. |
497 | * |
498 | * KNOTE_KQ_UNLOCK_ON_SUCCESS |
499 | * The function will return with the kq lock held if it was successful |
500 | * (knote_lock() is the only function that can fail). |
501 | * |
502 | * KNOTE_KQ_UNLOCK_ON_FAILURE |
503 | * The function will return with the kq lock held if it was unsuccessful |
504 | * (knote_lock() is the only function that can fail). |
505 | * |
506 | * KNOTE_KQ_UNLOCK: |
507 | * The function returns with the kq unlocked. |
508 | */ |
509 | #define KNOTE_KQ_LOCK_ALWAYS 0x0 |
510 | #define KNOTE_KQ_LOCK_ON_SUCCESS 0x1 |
511 | #define KNOTE_KQ_LOCK_ON_FAILURE 0x2 |
512 | #define KNOTE_KQ_UNLOCK 0x3 |
513 | |
514 | #if DEBUG || DEVELOPMENT |
515 | __attribute__((noinline, not_tail_called, disable_tail_calls)) |
516 | void knote_lock_ctx_chk(struct knote_lock_ctx *knlc) |
517 | { |
518 | /* evil hackery to make sure no one forgets to unlock */ |
519 | assert(knlc->knlc_state == KNOTE_LOCK_CTX_UNLOCKED); |
520 | } |
521 | #endif |
522 | |
523 | static struct knote_lock_ctx * |
524 | knote_lock_ctx_find(struct kqueue *kq, struct knote *kn) |
525 | { |
526 | struct knote_lock_ctx *ctx; |
527 | LIST_FOREACH(ctx, &kq->kq_knlocks, knlc_le) { |
528 | if (ctx->knlc_knote == kn) return ctx; |
529 | } |
530 | panic("knote lock context not found: %p" , kn); |
531 | __builtin_trap(); |
532 | } |
533 | |
534 | /* slowpath of knote_lock() */ |
535 | __attribute__((noinline)) |
536 | static bool __result_use_check |
537 | knote_lock_slow(struct kqueue *kq, struct knote *kn, |
538 | struct knote_lock_ctx *knlc, int kqlocking) |
539 | { |
540 | kqlock_held(kq); |
541 | |
542 | struct knote_lock_ctx *owner_lc = knote_lock_ctx_find(kq, kn); |
543 | thread_t owner_thread = owner_lc->knlc_thread; |
544 | |
545 | #if DEBUG || DEVELOPMENT |
546 | knlc->knlc_state = KNOTE_LOCK_CTX_WAITING; |
547 | #endif |
548 | |
549 | thread_reference(owner_thread); |
550 | TAILQ_INSERT_TAIL(&owner_lc->knlc_head, knlc, knlc_tqe); |
551 | assert_wait(&kn->kn_status, THREAD_UNINT | THREAD_WAIT_NOREPORT); |
552 | kqunlock(kq); |
553 | |
554 | if (thread_handoff_deallocate(owner_thread) == THREAD_RESTART) { |
555 | if (kqlocking == KNOTE_KQ_LOCK_ALWAYS || |
556 | kqlocking == KNOTE_KQ_LOCK_ON_FAILURE) { |
557 | kqlock(kq); |
558 | } |
559 | #if DEBUG || DEVELOPMENT |
560 | assert(knlc->knlc_state == KNOTE_LOCK_CTX_WAITING); |
561 | knlc->knlc_state = KNOTE_LOCK_CTX_UNLOCKED; |
562 | #endif |
563 | return false; |
564 | } |
565 | #if DEBUG || DEVELOPMENT |
566 | assert(knlc->knlc_state == KNOTE_LOCK_CTX_LOCKED); |
567 | #endif |
568 | if (kqlocking == KNOTE_KQ_LOCK_ALWAYS || |
569 | kqlocking == KNOTE_KQ_LOCK_ON_SUCCESS) { |
570 | kqlock(kq); |
571 | } |
572 | return true; |
573 | } |
574 | |
575 | /* |
576 | * Attempts to take the "knote" lock. |
577 | * |
578 | * Called with the kqueue lock held. |
579 | * |
580 | * Returns true if the knote lock is acquired, false if it has been dropped |
581 | */ |
582 | static bool __result_use_check |
583 | knote_lock(struct kqueue *kq, struct knote *kn, struct knote_lock_ctx *knlc, |
584 | int kqlocking) |
585 | { |
586 | kqlock_held(kq); |
587 | |
588 | #if DEBUG || DEVELOPMENT |
589 | assert(knlc->knlc_state == KNOTE_LOCK_CTX_UNLOCKED); |
590 | #endif |
591 | knlc->knlc_knote = kn; |
592 | knlc->knlc_thread = current_thread(); |
593 | TAILQ_INIT(&knlc->knlc_head); |
594 | |
595 | if (__improbable(kn->kn_status & KN_LOCKED)) { |
596 | return knote_lock_slow(kq, kn, knlc, kqlocking); |
597 | } |
598 | |
599 | /* |
600 | * When the knote will be dropped, the knote lock is taken before |
601 | * KN_DROPPING is set, and then the knote will be removed from any |
602 | * hash table that references it before the lock is canceled. |
603 | */ |
604 | assert((kn->kn_status & KN_DROPPING) == 0); |
605 | LIST_INSERT_HEAD(&kq->kq_knlocks, knlc, knlc_le); |
606 | kn->kn_status |= KN_LOCKED; |
607 | #if DEBUG || DEVELOPMENT |
608 | knlc->knlc_state = KNOTE_LOCK_CTX_LOCKED; |
609 | #endif |
610 | |
611 | if (kqlocking == KNOTE_KQ_UNLOCK || |
612 | kqlocking == KNOTE_KQ_LOCK_ON_FAILURE) { |
613 | kqunlock(kq); |
614 | } |
615 | return true; |
616 | } |
617 | |
618 | /* |
619 | * Unlocks a knote successfully locked with knote_lock(). |
620 | * |
621 | * Called with the kqueue lock held. |
622 | * |
623 | * Returns with the kqueue lock held according to KNOTE_KQ_* flags |
624 | */ |
625 | static void |
626 | knote_unlock(struct kqueue *kq, struct knote *kn, |
627 | struct knote_lock_ctx *knlc, int flags) |
628 | { |
629 | kqlock_held(kq); |
630 | |
631 | assert(knlc->knlc_knote == kn); |
632 | assert(kn->kn_status & KN_LOCKED); |
633 | #if DEBUG || DEVELOPMENT |
634 | assert(knlc->knlc_state == KNOTE_LOCK_CTX_LOCKED); |
635 | #endif |
636 | |
637 | struct knote_lock_ctx *next_owner_lc = TAILQ_FIRST(&knlc->knlc_head); |
638 | |
639 | LIST_REMOVE(knlc, knlc_le); |
640 | |
641 | if (next_owner_lc) { |
642 | assert(next_owner_lc->knlc_knote == kn); |
643 | TAILQ_REMOVE(&knlc->knlc_head, next_owner_lc, knlc_tqe); |
644 | |
645 | assert(TAILQ_EMPTY(&next_owner_lc->knlc_head)); |
646 | TAILQ_CONCAT(&next_owner_lc->knlc_head, &knlc->knlc_head, knlc_tqe); |
647 | LIST_INSERT_HEAD(&kq->kq_knlocks, next_owner_lc, knlc_le); |
648 | #if DEBUG || DEVELOPMENT |
649 | next_owner_lc->knlc_state = KNOTE_LOCK_CTX_LOCKED; |
650 | #endif |
651 | } else { |
652 | kn->kn_status &= ~KN_LOCKED; |
653 | } |
654 | if (kn->kn_inuse == 0) { |
655 | /* |
656 | * No f_event() in flight anymore, we can leave QoS "Merge" mode |
657 | * |
658 | * See knote_should_apply_qos_override() |
659 | */ |
660 | kn->kn_status &= ~KN_MERGE_QOS; |
661 | } |
662 | if (flags & KNOTE_KQ_UNLOCK) { |
663 | kqunlock(kq); |
664 | } |
665 | if (next_owner_lc) { |
666 | thread_wakeup_thread(&kn->kn_status, next_owner_lc->knlc_thread); |
667 | } |
668 | #if DEBUG || DEVELOPMENT |
669 | knlc->knlc_state = KNOTE_LOCK_CTX_UNLOCKED; |
670 | #endif |
671 | } |
672 | |
673 | /* |
674 | * Aborts all waiters for a knote lock, and unlock the knote. |
675 | * |
676 | * Called with the kqueue lock held. |
677 | * |
678 | * Returns with the kqueue lock held according to KNOTE_KQ_* flags |
679 | */ |
680 | static void |
681 | knote_unlock_cancel(struct kqueue *kq, struct knote *kn, |
682 | struct knote_lock_ctx *knlc, int kqlocking) |
683 | { |
684 | kqlock_held(kq); |
685 | |
686 | assert(knlc->knlc_knote == kn); |
687 | assert(kn->kn_status & KN_LOCKED); |
688 | assert(kn->kn_status & KN_DROPPING); |
689 | |
690 | LIST_REMOVE(knlc, knlc_le); |
691 | kn->kn_status &= ~KN_LOCKED; |
692 | |
693 | if (kqlocking == KNOTE_KQ_UNLOCK || |
694 | kqlocking == KNOTE_KQ_LOCK_ON_FAILURE) { |
695 | kqunlock(kq); |
696 | } |
697 | if (!TAILQ_EMPTY(&knlc->knlc_head)) { |
698 | thread_wakeup_with_result(&kn->kn_status, THREAD_RESTART); |
699 | } |
700 | #if DEBUG || DEVELOPMENT |
701 | knlc->knlc_state = KNOTE_LOCK_CTX_UNLOCKED; |
702 | #endif |
703 | } |
704 | |
705 | /* |
706 | * Call the f_event hook of a given filter. |
707 | * |
708 | * Takes a use count to protect against concurrent drops. |
709 | */ |
710 | static void |
711 | knote_call_filter_event(struct kqueue *kq, struct knote *kn, long hint) |
712 | { |
713 | int result, dropping = 0; |
714 | |
715 | kqlock_held(kq); |
716 | |
717 | if (kn->kn_status & (KN_DROPPING | KN_VANISHED)) |
718 | return; |
719 | |
720 | kn->kn_inuse++; |
721 | kqunlock(kq); |
722 | result = filter_call(knote_fops(kn), f_event(kn, hint)); |
723 | kqlock(kq); |
724 | |
725 | dropping = (kn->kn_status & KN_DROPPING); |
726 | |
727 | if (!dropping && (result & FILTER_ACTIVE)) { |
728 | if (result & FILTER_ADJUST_EVENT_QOS_BIT) |
729 | knote_adjust_qos(kq, kn, result); |
730 | knote_activate(kn); |
731 | } |
732 | |
733 | if (--kn->kn_inuse == 0) { |
734 | if ((kn->kn_status & KN_LOCKED) == 0) { |
735 | /* |
736 | * We're the last f_event() call and there's no other f_* call in |
737 | * flight, we can leave QoS "Merge" mode. |
738 | * |
739 | * See knote_should_apply_qos_override() |
740 | */ |
741 | kn->kn_status &= ~KN_MERGE_QOS; |
742 | } |
743 | if (dropping) { |
744 | waitq_wakeup64_all((struct waitq *)&kq->kq_wqs, |
745 | CAST_EVENT64_T(&kn->kn_inuse), |
746 | THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); |
747 | } |
748 | } |
749 | } |
750 | |
751 | /* |
752 | * Called by knote_drop() to wait for the last f_event() caller to be done. |
753 | * |
754 | * - kq locked at entry |
755 | * - kq unlocked at exit |
756 | */ |
757 | static void |
758 | knote_wait_for_filter_events(struct kqueue *kq, struct knote *kn) |
759 | { |
760 | wait_result_t wr = THREAD_NOT_WAITING; |
761 | |
762 | kqlock_held(kq); |
763 | |
764 | assert(kn->kn_status & KN_DROPPING); |
765 | |
766 | if (kn->kn_inuse) { |
767 | wr = waitq_assert_wait64((struct waitq *)&kq->kq_wqs, |
768 | CAST_EVENT64_T(&kn->kn_inuse), |
769 | THREAD_UNINT | THREAD_WAIT_NOREPORT, TIMEOUT_WAIT_FOREVER); |
770 | } |
771 | kqunlock(kq); |
772 | if (wr == THREAD_WAITING) { |
773 | thread_block(THREAD_CONTINUE_NULL); |
774 | } |
775 | } |
776 | |
777 | #pragma mark file_filtops |
778 | |
779 | static int |
780 | filt_fileattach(struct knote *kn, struct kevent_internal_s *kev) |
781 | { |
782 | return fo_kqfilter(kn->kn_fp, kn, kev, vfs_context_current()); |
783 | } |
784 | |
785 | SECURITY_READ_ONLY_EARLY(static struct filterops) file_filtops = { |
786 | .f_isfd = 1, |
787 | .f_attach = filt_fileattach, |
788 | }; |
789 | |
790 | #pragma mark kqread_filtops |
791 | |
792 | #define f_flag f_fglob->fg_flag |
793 | #define f_ops f_fglob->fg_ops |
794 | #define f_data f_fglob->fg_data |
795 | |
796 | static void |
797 | filt_kqdetach(struct knote *kn) |
798 | { |
799 | struct kqfile *kqf = (struct kqfile *)kn->kn_fp->f_data; |
800 | struct kqueue *kq = &kqf->kqf_kqueue; |
801 | |
802 | kqlock(kq); |
803 | KNOTE_DETACH(&kqf->kqf_sel.si_note, kn); |
804 | kqunlock(kq); |
805 | } |
806 | |
807 | static int |
808 | filt_kqueue(struct knote *kn, __unused long hint) |
809 | { |
810 | struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; |
811 | |
812 | return (kq->kq_count > 0); |
813 | } |
814 | |
815 | static int |
816 | filt_kqtouch(struct knote *kn, struct kevent_internal_s *kev) |
817 | { |
818 | #pragma unused(kev) |
819 | struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; |
820 | int res; |
821 | |
822 | kqlock(kq); |
823 | kn->kn_data = kq->kq_count; |
824 | res = (kn->kn_data > 0); |
825 | |
826 | kqunlock(kq); |
827 | |
828 | return res; |
829 | } |
830 | |
831 | static int |
832 | filt_kqprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev) |
833 | { |
834 | #pragma unused(data) |
835 | struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; |
836 | int res; |
837 | |
838 | kqlock(kq); |
839 | kn->kn_data = kq->kq_count; |
840 | res = (kn->kn_data > 0); |
841 | if (res) { |
842 | *kev = kn->kn_kevent; |
843 | if (kn->kn_flags & EV_CLEAR) |
844 | kn->kn_data = 0; |
845 | } |
846 | kqunlock(kq); |
847 | |
848 | return res; |
849 | } |
850 | |
851 | SECURITY_READ_ONLY_EARLY(static struct filterops) kqread_filtops = { |
852 | .f_isfd = 1, |
853 | .f_detach = filt_kqdetach, |
854 | .f_event = filt_kqueue, |
855 | .f_touch = filt_kqtouch, |
856 | .f_process = filt_kqprocess, |
857 | }; |
858 | |
859 | #pragma mark proc_filtops |
860 | |
861 | static int |
862 | filt_procattach(struct knote *kn, __unused struct kevent_internal_s *kev) |
863 | { |
864 | struct proc *p; |
865 | |
866 | assert(PID_MAX < NOTE_PDATAMASK); |
867 | |
868 | if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0) { |
869 | knote_set_error(kn, ENOTSUP); |
870 | return 0; |
871 | } |
872 | |
873 | p = proc_find(kn->kn_id); |
874 | if (p == NULL) { |
875 | knote_set_error(kn, ESRCH); |
876 | return 0; |
877 | } |
878 | |
879 | const int NoteExitStatusBits = NOTE_EXIT | NOTE_EXITSTATUS; |
880 | |
881 | if ((kn->kn_sfflags & NoteExitStatusBits) == NoteExitStatusBits) |
882 | do { |
883 | pid_t selfpid = proc_selfpid(); |
884 | |
885 | if (p->p_ppid == selfpid) |
886 | break; /* parent => ok */ |
887 | |
888 | if ((p->p_lflag & P_LTRACED) != 0 && |
889 | (p->p_oppid == selfpid)) |
890 | break; /* parent-in-waiting => ok */ |
891 | |
892 | proc_rele(p); |
893 | knote_set_error(kn, EACCES); |
894 | return 0; |
895 | } while (0); |
896 | |
897 | proc_klist_lock(); |
898 | |
899 | kn->kn_ptr.p_proc = p; /* store the proc handle */ |
900 | |
901 | KNOTE_ATTACH(&p->p_klist, kn); |
902 | |
903 | proc_klist_unlock(); |
904 | |
905 | proc_rele(p); |
906 | |
907 | /* |
908 | * only captures edge-triggered events after this point |
909 | * so it can't already be fired. |
910 | */ |
911 | return (0); |
912 | } |
913 | |
914 | |
915 | /* |
916 | * The knote may be attached to a different process, which may exit, |
917 | * leaving nothing for the knote to be attached to. In that case, |
918 | * the pointer to the process will have already been nulled out. |
919 | */ |
920 | static void |
921 | filt_procdetach(struct knote *kn) |
922 | { |
923 | struct proc *p; |
924 | |
925 | proc_klist_lock(); |
926 | |
927 | p = kn->kn_ptr.p_proc; |
928 | if (p != PROC_NULL) { |
929 | kn->kn_ptr.p_proc = PROC_NULL; |
930 | KNOTE_DETACH(&p->p_klist, kn); |
931 | } |
932 | |
933 | proc_klist_unlock(); |
934 | } |
935 | |
936 | static int |
937 | filt_proc(struct knote *kn, long hint) |
938 | { |
939 | u_int event; |
940 | |
941 | /* ALWAYS CALLED WITH proc_klist_lock */ |
942 | |
943 | /* |
944 | * Note: a lot of bits in hint may be obtained from the knote |
945 | * To free some of those bits, see <rdar://problem/12592988> Freeing up |
946 | * bits in hint for filt_proc |
947 | * |
948 | * mask off extra data |
949 | */ |
950 | event = (u_int)hint & NOTE_PCTRLMASK; |
951 | |
952 | /* |
953 | * termination lifecycle events can happen while a debugger |
954 | * has reparented a process, in which case notifications |
955 | * should be quashed except to the tracing parent. When |
956 | * the debugger reaps the child (either via wait4(2) or |
957 | * process exit), the child will be reparented to the original |
958 | * parent and these knotes re-fired. |
959 | */ |
960 | if (event & NOTE_EXIT) { |
961 | if ((kn->kn_ptr.p_proc->p_oppid != 0) |
962 | && (knote_get_kq(kn)->kq_p->p_pid != kn->kn_ptr.p_proc->p_ppid)) { |
963 | /* |
964 | * This knote is not for the current ptrace(2) parent, ignore. |
965 | */ |
966 | return 0; |
967 | } |
968 | } |
969 | |
970 | /* |
971 | * if the user is interested in this event, record it. |
972 | */ |
973 | if (kn->kn_sfflags & event) |
974 | kn->kn_fflags |= event; |
975 | |
976 | #pragma clang diagnostic push |
977 | #pragma clang diagnostic ignored "-Wdeprecated-declarations" |
978 | if ((event == NOTE_REAP) || ((event == NOTE_EXIT) && !(kn->kn_sfflags & NOTE_REAP))) { |
979 | kn->kn_flags |= (EV_EOF | EV_ONESHOT); |
980 | } |
981 | #pragma clang diagnostic pop |
982 | |
983 | |
984 | /* |
985 | * The kernel has a wrapper in place that returns the same data |
986 | * as is collected here, in kn_data. Any changes to how |
987 | * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected |
988 | * should also be reflected in the proc_pidnoteexit() wrapper. |
989 | */ |
990 | if (event == NOTE_EXIT) { |
991 | kn->kn_data = 0; |
992 | if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) { |
993 | kn->kn_fflags |= NOTE_EXITSTATUS; |
994 | kn->kn_data |= (hint & NOTE_PDATAMASK); |
995 | } |
996 | if ((kn->kn_sfflags & NOTE_EXIT_DETAIL) != 0) { |
997 | kn->kn_fflags |= NOTE_EXIT_DETAIL; |
998 | if ((kn->kn_ptr.p_proc->p_lflag & |
999 | P_LTERM_DECRYPTFAIL) != 0) { |
1000 | kn->kn_data |= NOTE_EXIT_DECRYPTFAIL; |
1001 | } |
1002 | if ((kn->kn_ptr.p_proc->p_lflag & |
1003 | P_LTERM_JETSAM) != 0) { |
1004 | kn->kn_data |= NOTE_EXIT_MEMORY; |
1005 | switch (kn->kn_ptr.p_proc->p_lflag & P_JETSAM_MASK) { |
1006 | case P_JETSAM_VMPAGESHORTAGE: |
1007 | kn->kn_data |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE; |
1008 | break; |
1009 | case P_JETSAM_VMTHRASHING: |
1010 | kn->kn_data |= NOTE_EXIT_MEMORY_VMTHRASHING; |
1011 | break; |
1012 | case P_JETSAM_FCTHRASHING: |
1013 | kn->kn_data |= NOTE_EXIT_MEMORY_FCTHRASHING; |
1014 | break; |
1015 | case P_JETSAM_VNODE: |
1016 | kn->kn_data |= NOTE_EXIT_MEMORY_VNODE; |
1017 | break; |
1018 | case P_JETSAM_HIWAT: |
1019 | kn->kn_data |= NOTE_EXIT_MEMORY_HIWAT; |
1020 | break; |
1021 | case P_JETSAM_PID: |
1022 | kn->kn_data |= NOTE_EXIT_MEMORY_PID; |
1023 | break; |
1024 | case P_JETSAM_IDLEEXIT: |
1025 | kn->kn_data |= NOTE_EXIT_MEMORY_IDLE; |
1026 | break; |
1027 | } |
1028 | } |
1029 | if ((kn->kn_ptr.p_proc->p_csflags & |
1030 | CS_KILLED) != 0) { |
1031 | kn->kn_data |= NOTE_EXIT_CSERROR; |
1032 | } |
1033 | } |
1034 | } |
1035 | |
1036 | /* if we have any matching state, activate the knote */ |
1037 | return (kn->kn_fflags != 0); |
1038 | } |
1039 | |
1040 | static int |
1041 | filt_proctouch(struct knote *kn, struct kevent_internal_s *kev) |
1042 | { |
1043 | int res; |
1044 | |
1045 | proc_klist_lock(); |
1046 | |
1047 | /* accept new filter flags and mask off output events no long interesting */ |
1048 | kn->kn_sfflags = kev->fflags; |
1049 | |
1050 | /* restrict the current results to the (smaller?) set of new interest */ |
1051 | /* |
1052 | * For compatibility with previous implementations, we leave kn_fflags |
1053 | * as they were before. |
1054 | */ |
1055 | //kn->kn_fflags &= kn->kn_sfflags; |
1056 | |
1057 | res = (kn->kn_fflags != 0); |
1058 | |
1059 | proc_klist_unlock(); |
1060 | |
1061 | return res; |
1062 | } |
1063 | |
1064 | static int |
1065 | filt_procprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev) |
1066 | { |
1067 | #pragma unused(data) |
1068 | int res; |
1069 | |
1070 | proc_klist_lock(); |
1071 | res = (kn->kn_fflags != 0); |
1072 | if (res) { |
1073 | *kev = kn->kn_kevent; |
1074 | kn->kn_flags |= EV_CLEAR; /* automatically set */ |
1075 | kn->kn_fflags = 0; |
1076 | kn->kn_data = 0; |
1077 | } |
1078 | proc_klist_unlock(); |
1079 | return res; |
1080 | } |
1081 | |
1082 | SECURITY_READ_ONLY_EARLY(static struct filterops) proc_filtops = { |
1083 | .f_attach = filt_procattach, |
1084 | .f_detach = filt_procdetach, |
1085 | .f_event = filt_proc, |
1086 | .f_touch = filt_proctouch, |
1087 | .f_process = filt_procprocess, |
1088 | }; |
1089 | |
1090 | #pragma mark timer_filtops |
1091 | |
1092 | struct filt_timer_params { |
1093 | uint64_t deadline; /* deadline in abs/cont time |
1094 | (or 0 if NOTE_ABSOLUTE and deadline is in past) */ |
1095 | uint64_t leeway; /* leeway in abstime, or 0 if none */ |
1096 | uint64_t interval; /* interval in abstime or 0 if non-repeating timer */ |
1097 | }; |
1098 | |
1099 | /* |
1100 | * Values stored in the knote at rest (using Mach absolute time units) |
1101 | * |
1102 | * kn->kn_hook where the thread_call object is stored |
1103 | * kn->kn_ext[0] next deadline or 0 if immediate expiration |
1104 | * kn->kn_ext[1] leeway value |
1105 | * kn->kn_sdata interval timer: the interval |
1106 | * absolute/deadline timer: 0 |
1107 | * kn->kn_hookid timer state |
1108 | * |
1109 | * TIMER_IDLE: |
1110 | * The timer has either never been scheduled or been cancelled. |
1111 | * It is safe to schedule a new one in this state. |
1112 | * |
1113 | * TIMER_ARMED: |
1114 | * The timer has been scheduled |
1115 | * |
1116 | * TIMER_FIRED |
1117 | * The timer has fired and an event needs to be delivered. |
1118 | * When in this state, the callout may still be running. |
1119 | * |
1120 | * TIMER_IMMEDIATE |
1121 | * The timer has fired at registration time, and the callout was never |
1122 | * dispatched. |
1123 | */ |
1124 | #define TIMER_IDLE 0x0 |
1125 | #define TIMER_ARMED 0x1 |
1126 | #define TIMER_FIRED 0x2 |
1127 | #define TIMER_IMMEDIATE 0x3 |
1128 | |
1129 | static void |
1130 | filt_timer_set_params(struct knote *kn, struct filt_timer_params *params) |
1131 | { |
1132 | kn->kn_ext[0] = params->deadline; |
1133 | kn->kn_ext[1] = params->leeway; |
1134 | kn->kn_sdata = params->interval; |
1135 | } |
1136 | |
1137 | /* |
1138 | * filt_timervalidate - process data from user |
1139 | * |
1140 | * Sets up the deadline, interval, and leeway from the provided user data |
1141 | * |
1142 | * Input: |
1143 | * kn_sdata timer deadline or interval time |
1144 | * kn_sfflags style of timer, unit of measurement |
1145 | * |
1146 | * Output: |
1147 | * struct filter_timer_params to apply to the filter with |
1148 | * filt_timer_set_params when changes are ready to be commited. |
1149 | * |
1150 | * Returns: |
1151 | * EINVAL Invalid user data parameters |
1152 | * ERANGE Various overflows with the parameters |
1153 | * |
1154 | * Called with timer filter lock held. |
1155 | */ |
1156 | static int |
1157 | filt_timervalidate(const struct kevent_internal_s *kev, |
1158 | struct filt_timer_params *params) |
1159 | { |
1160 | /* |
1161 | * There are 5 knobs that need to be chosen for a timer registration: |
1162 | * |
1163 | * A) Units of time (what is the time duration of the specified number) |
1164 | * Absolute and interval take: |
1165 | * NOTE_SECONDS, NOTE_USECONDS, NOTE_NSECONDS, NOTE_MACHTIME |
1166 | * Defaults to milliseconds if not specified |
1167 | * |
1168 | * B) Clock epoch (what is the zero point of the specified number) |
1169 | * For interval, there is none |
1170 | * For absolute, defaults to the gettimeofday/calendar epoch |
1171 | * With NOTE_MACHTIME, uses mach_absolute_time() |
1172 | * With NOTE_MACHTIME and NOTE_MACH_CONTINUOUS_TIME, uses mach_continuous_time() |
1173 | * |
1174 | * C) The knote's behavior on delivery |
1175 | * Interval timer causes the knote to arm for the next interval unless one-shot is set |
1176 | * Absolute is a forced one-shot timer which deletes on delivery |
1177 | * TODO: Add a way for absolute to be not forced one-shot |
1178 | * |
1179 | * D) Whether the time duration is relative to now or absolute |
1180 | * Interval fires at now + duration when it is set up |
1181 | * Absolute fires at now + difference between now walltime and passed in walltime |
1182 | * With NOTE_MACHTIME it fires at an absolute MAT or MCT. |
1183 | * |
1184 | * E) Whether the timer continues to tick across sleep |
1185 | * By default all three do not. |
1186 | * For interval and absolute, NOTE_MACH_CONTINUOUS_TIME causes them to tick across sleep |
1187 | * With NOTE_ABSOLUTE | NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME: |
1188 | * expires when mach_continuous_time() is > the passed in value. |
1189 | */ |
1190 | |
1191 | uint64_t multiplier; |
1192 | |
1193 | boolean_t use_abstime = FALSE; |
1194 | |
1195 | switch (kev->fflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS|NOTE_MACHTIME)) { |
1196 | case NOTE_SECONDS: |
1197 | multiplier = NSEC_PER_SEC; |
1198 | break; |
1199 | case NOTE_USECONDS: |
1200 | multiplier = NSEC_PER_USEC; |
1201 | break; |
1202 | case NOTE_NSECONDS: |
1203 | multiplier = 1; |
1204 | break; |
1205 | case NOTE_MACHTIME: |
1206 | multiplier = 0; |
1207 | use_abstime = TRUE; |
1208 | break; |
1209 | case 0: /* milliseconds (default) */ |
1210 | multiplier = NSEC_PER_SEC / 1000; |
1211 | break; |
1212 | default: |
1213 | return (EINVAL); |
1214 | } |
1215 | |
1216 | /* transform the leeway in kn_ext[1] to same time scale */ |
1217 | if (kev->fflags & NOTE_LEEWAY) { |
1218 | uint64_t leeway_abs; |
1219 | |
1220 | if (use_abstime) { |
1221 | leeway_abs = (uint64_t)kev->ext[1]; |
1222 | } else { |
1223 | uint64_t leeway_ns; |
1224 | if (os_mul_overflow((uint64_t)kev->ext[1], multiplier, &leeway_ns)) |
1225 | return (ERANGE); |
1226 | |
1227 | nanoseconds_to_absolutetime(leeway_ns, &leeway_abs); |
1228 | } |
1229 | |
1230 | params->leeway = leeway_abs; |
1231 | } else { |
1232 | params->leeway = 0; |
1233 | } |
1234 | |
1235 | if (kev->fflags & NOTE_ABSOLUTE) { |
1236 | uint64_t deadline_abs; |
1237 | |
1238 | if (use_abstime) { |
1239 | deadline_abs = (uint64_t)kev->data; |
1240 | } else { |
1241 | uint64_t calendar_deadline_ns; |
1242 | |
1243 | if (os_mul_overflow((uint64_t)kev->data, multiplier, &calendar_deadline_ns)) |
1244 | return (ERANGE); |
1245 | |
1246 | /* calendar_deadline_ns is in nanoseconds since the epoch */ |
1247 | |
1248 | clock_sec_t seconds; |
1249 | clock_nsec_t nanoseconds; |
1250 | |
1251 | /* |
1252 | * Note that the conversion through wall-time is only done once. |
1253 | * |
1254 | * If the relationship between MAT and gettimeofday changes, |
1255 | * the underlying timer does not update. |
1256 | * |
1257 | * TODO: build a wall-time denominated timer_call queue |
1258 | * and a flag to request DTRTing with wall-time timers |
1259 | */ |
1260 | clock_get_calendar_nanotime(&seconds, &nanoseconds); |
1261 | |
1262 | uint64_t calendar_now_ns = (uint64_t)seconds * NSEC_PER_SEC + nanoseconds; |
1263 | |
1264 | /* if deadline is in the future */ |
1265 | if (calendar_now_ns < calendar_deadline_ns) { |
1266 | uint64_t interval_ns = calendar_deadline_ns - calendar_now_ns; |
1267 | uint64_t interval_abs; |
1268 | |
1269 | nanoseconds_to_absolutetime(interval_ns, &interval_abs); |
1270 | |
1271 | /* |
1272 | * Note that the NOTE_MACH_CONTINUOUS_TIME flag here only |
1273 | * causes the timer to keep ticking across sleep, but |
1274 | * it does not change the calendar timebase. |
1275 | */ |
1276 | |
1277 | if (kev->fflags & NOTE_MACH_CONTINUOUS_TIME) |
1278 | clock_continuoustime_interval_to_deadline(interval_abs, |
1279 | &deadline_abs); |
1280 | else |
1281 | clock_absolutetime_interval_to_deadline(interval_abs, |
1282 | &deadline_abs); |
1283 | } else { |
1284 | deadline_abs = 0; /* cause immediate expiration */ |
1285 | } |
1286 | } |
1287 | |
1288 | params->deadline = deadline_abs; |
1289 | params->interval = 0; /* NOTE_ABSOLUTE is non-repeating */ |
1290 | } else if (kev->data < 0) { |
1291 | /* |
1292 | * Negative interval timers fire immediately, once. |
1293 | * |
1294 | * Ideally a negative interval would be an error, but certain clients |
1295 | * pass negative values on accident, and expect an event back. |
1296 | * |
1297 | * In the old implementation the timer would repeat with no delay |
1298 | * N times until mach_absolute_time() + (N * interval) underflowed, |
1299 | * then it would wait ~forever by accidentally arming a timer for the far future. |
1300 | * |
1301 | * We now skip the power-wasting hot spin phase and go straight to the idle phase. |
1302 | */ |
1303 | |
1304 | params->deadline = 0; /* expire immediately */ |
1305 | params->interval = 0; /* non-repeating */ |
1306 | } else { |
1307 | uint64_t interval_abs = 0; |
1308 | |
1309 | if (use_abstime) { |
1310 | interval_abs = (uint64_t)kev->data; |
1311 | } else { |
1312 | uint64_t interval_ns; |
1313 | if (os_mul_overflow((uint64_t)kev->data, multiplier, &interval_ns)) |
1314 | return (ERANGE); |
1315 | |
1316 | nanoseconds_to_absolutetime(interval_ns, &interval_abs); |
1317 | } |
1318 | |
1319 | uint64_t deadline = 0; |
1320 | |
1321 | if (kev->fflags & NOTE_MACH_CONTINUOUS_TIME) |
1322 | clock_continuoustime_interval_to_deadline(interval_abs, &deadline); |
1323 | else |
1324 | clock_absolutetime_interval_to_deadline(interval_abs, &deadline); |
1325 | |
1326 | params->deadline = deadline; |
1327 | params->interval = interval_abs; |
1328 | } |
1329 | |
1330 | return (0); |
1331 | } |
1332 | |
1333 | /* |
1334 | * filt_timerexpire - the timer callout routine |
1335 | */ |
1336 | static void |
1337 | filt_timerexpire(void *knx, __unused void *spare) |
1338 | { |
1339 | struct knote *kn = knx; |
1340 | int v; |
1341 | |
1342 | if (os_atomic_cmpxchgv(&kn->kn_hookid, TIMER_ARMED, TIMER_FIRED, |
1343 | &v, relaxed)) { |
1344 | // our f_event always would say FILTER_ACTIVE, |
1345 | // so be leaner and just do it. |
1346 | struct kqueue *kq = knote_get_kq(kn); |
1347 | kqlock(kq); |
1348 | knote_activate(kn); |
1349 | kqunlock(kq); |
1350 | } else { |
1351 | /* |
1352 | * From TIMER_ARMED, the only allowed transition are: |
1353 | * - to TIMER_FIRED through the timer callout just above |
1354 | * - to TIMER_IDLE due to filt_timercancel() which will wait for the |
1355 | * timer callout (and any possible invocation of filt_timerexpire) to |
1356 | * have finished before the state is changed again. |
1357 | */ |
1358 | assert(v == TIMER_IDLE); |
1359 | } |
1360 | } |
1361 | |
1362 | static void |
1363 | filt_timercancel(struct knote *kn) |
1364 | { |
1365 | if (os_atomic_xchg(&kn->kn_hookid, TIMER_IDLE, relaxed) == TIMER_ARMED) { |
1366 | /* cancel the thread call and wait for any filt_timerexpire in flight */ |
1367 | thread_call_cancel_wait((thread_call_t)kn->kn_hook); |
1368 | } |
1369 | } |
1370 | |
1371 | /* |
1372 | * Does this deadline needs a timer armed for it, or has it expired? |
1373 | */ |
1374 | static bool |
1375 | filt_timer_is_ready(struct knote *kn) |
1376 | { |
1377 | uint64_t now, deadline = kn->kn_ext[0]; |
1378 | |
1379 | if (deadline == 0) { |
1380 | return true; |
1381 | } |
1382 | |
1383 | if (kn->kn_sfflags & NOTE_MACH_CONTINUOUS_TIME) { |
1384 | now = mach_continuous_time(); |
1385 | } else { |
1386 | now = mach_absolute_time(); |
1387 | } |
1388 | return deadline <= now; |
1389 | } |
1390 | |
1391 | /* |
1392 | * Arm a timer |
1393 | * |
1394 | * It is the responsibility of the caller to make sure the timer call |
1395 | * has completed or been cancelled properly prior to arming it. |
1396 | */ |
1397 | static void |
1398 | filt_timerarm(struct knote *kn) |
1399 | { |
1400 | uint64_t deadline = kn->kn_ext[0]; |
1401 | uint64_t leeway = kn->kn_ext[1]; |
1402 | |
1403 | int filter_flags = kn->kn_sfflags; |
1404 | unsigned int timer_flags = 0; |
1405 | |
1406 | assert(os_atomic_load(&kn->kn_hookid, relaxed) == TIMER_IDLE); |
1407 | |
1408 | if (filter_flags & NOTE_CRITICAL) |
1409 | timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL; |
1410 | else if (filter_flags & NOTE_BACKGROUND) |
1411 | timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND; |
1412 | else |
1413 | timer_flags |= THREAD_CALL_DELAY_USER_NORMAL; |
1414 | |
1415 | if (filter_flags & NOTE_LEEWAY) |
1416 | timer_flags |= THREAD_CALL_DELAY_LEEWAY; |
1417 | |
1418 | if (filter_flags & NOTE_MACH_CONTINUOUS_TIME) |
1419 | timer_flags |= THREAD_CALL_CONTINUOUS; |
1420 | |
1421 | os_atomic_store(&kn->kn_hookid, TIMER_ARMED, relaxed); |
1422 | thread_call_enter_delayed_with_leeway((thread_call_t)kn->kn_hook, NULL, |
1423 | deadline, leeway, timer_flags); |
1424 | } |
1425 | |
1426 | /* |
1427 | * Allocate a thread call for the knote's lifetime, and kick off the timer. |
1428 | */ |
1429 | static int |
1430 | filt_timerattach(struct knote *kn, struct kevent_internal_s *kev) |
1431 | { |
1432 | thread_call_t callout; |
1433 | struct filt_timer_params params; |
1434 | int error; |
1435 | |
1436 | if ((error = filt_timervalidate(kev, ¶ms)) != 0) { |
1437 | knote_set_error(kn, error); |
1438 | return 0; |
1439 | } |
1440 | |
1441 | callout = thread_call_allocate_with_options(filt_timerexpire, |
1442 | (thread_call_param_t)kn, THREAD_CALL_PRIORITY_HIGH, |
1443 | THREAD_CALL_OPTIONS_ONCE); |
1444 | |
1445 | if (NULL == callout) { |
1446 | knote_set_error(kn, ENOMEM); |
1447 | return 0; |
1448 | } |
1449 | |
1450 | filt_timer_set_params(kn, ¶ms); |
1451 | kn->kn_hook = callout; |
1452 | kn->kn_flags |= EV_CLEAR; |
1453 | os_atomic_store(&kn->kn_hookid, TIMER_IDLE, relaxed); |
1454 | |
1455 | /* NOTE_ABSOLUTE implies EV_ONESHOT */ |
1456 | if (kn->kn_sfflags & NOTE_ABSOLUTE) |
1457 | kn->kn_flags |= EV_ONESHOT; |
1458 | |
1459 | if (filt_timer_is_ready(kn)) { |
1460 | os_atomic_store(&kn->kn_hookid, TIMER_IMMEDIATE, relaxed); |
1461 | return FILTER_ACTIVE; |
1462 | } else { |
1463 | filt_timerarm(kn); |
1464 | return 0; |
1465 | } |
1466 | } |
1467 | |
1468 | /* |
1469 | * Shut down the timer if it's running, and free the callout. |
1470 | */ |
1471 | static void |
1472 | filt_timerdetach(struct knote *kn) |
1473 | { |
1474 | __assert_only boolean_t freed; |
1475 | |
1476 | /* |
1477 | * Unconditionally cancel to make sure there can't be any filt_timerexpire() |
1478 | * running anymore. |
1479 | */ |
1480 | thread_call_cancel_wait((thread_call_t)kn->kn_hook); |
1481 | freed = thread_call_free((thread_call_t)kn->kn_hook); |
1482 | assert(freed); |
1483 | } |
1484 | |
1485 | /* |
1486 | * filt_timertouch - update timer knote with new user input |
1487 | * |
1488 | * Cancel and restart the timer based on new user data. When |
1489 | * the user picks up a knote, clear the count of how many timer |
1490 | * pops have gone off (in kn_data). |
1491 | */ |
1492 | static int |
1493 | filt_timertouch(struct knote *kn, struct kevent_internal_s *kev) |
1494 | { |
1495 | struct filt_timer_params params; |
1496 | uint32_t changed_flags = (kn->kn_sfflags ^ kev->fflags); |
1497 | int error; |
1498 | |
1499 | if (changed_flags & NOTE_ABSOLUTE) { |
1500 | kev->flags |= EV_ERROR; |
1501 | kev->data = EINVAL; |
1502 | return 0; |
1503 | } |
1504 | |
1505 | if ((error = filt_timervalidate(kev, ¶ms)) != 0) { |
1506 | kev->flags |= EV_ERROR; |
1507 | kev->data = error; |
1508 | return 0; |
1509 | } |
1510 | |
1511 | /* capture the new values used to compute deadline */ |
1512 | filt_timercancel(kn); |
1513 | filt_timer_set_params(kn, ¶ms); |
1514 | kn->kn_sfflags = kev->fflags; |
1515 | |
1516 | if (filt_timer_is_ready(kn)) { |
1517 | os_atomic_store(&kn->kn_hookid, TIMER_IMMEDIATE, relaxed); |
1518 | return FILTER_ACTIVE | FILTER_UPDATE_REQ_QOS; |
1519 | } else { |
1520 | filt_timerarm(kn); |
1521 | return FILTER_UPDATE_REQ_QOS; |
1522 | } |
1523 | } |
1524 | |
1525 | /* |
1526 | * filt_timerprocess - query state of knote and snapshot event data |
1527 | * |
1528 | * Determine if the timer has fired in the past, snapshot the state |
1529 | * of the kevent for returning to user-space, and clear pending event |
1530 | * counters for the next time. |
1531 | */ |
1532 | static int |
1533 | filt_timerprocess( |
1534 | struct knote *kn, |
1535 | __unused struct filt_process_s *data, |
1536 | struct kevent_internal_s *kev) |
1537 | { |
1538 | /* |
1539 | * filt_timerprocess is serialized with any filter routine except for |
1540 | * filt_timerexpire which atomically does a TIMER_ARMED -> TIMER_FIRED |
1541 | * transition, and on success, activates the knote. |
1542 | * |
1543 | * Hence, we don't need atomic modifications of the state, only to peek at |
1544 | * whether we see any of the "FIRED" state, and if we do, it is safe to |
1545 | * do simple state machine transitions. |
1546 | */ |
1547 | switch (os_atomic_load(&kn->kn_hookid, relaxed)) { |
1548 | case TIMER_IDLE: |
1549 | case TIMER_ARMED: |
1550 | /* |
1551 | * This can happen if a touch resets a timer that had fired |
1552 | * without being processed |
1553 | */ |
1554 | return 0; |
1555 | } |
1556 | |
1557 | os_atomic_store(&kn->kn_hookid, TIMER_IDLE, relaxed); |
1558 | |
1559 | /* |
1560 | * Copy out the interesting kevent state, |
1561 | * but don't leak out the raw time calculations. |
1562 | * |
1563 | * TODO: potential enhancements - tell the user about: |
1564 | * - deadline to which this timer thought it was expiring |
1565 | * - return kn_sfflags in the fflags field so the client can know |
1566 | * under what flags the timer fired |
1567 | */ |
1568 | *kev = kn->kn_kevent; |
1569 | kev->ext[0] = 0; |
1570 | /* kev->ext[1] = 0; JMM - shouldn't we hide this too? */ |
1571 | |
1572 | if (kn->kn_sdata == 0) { |
1573 | kev->data = 1; |
1574 | } else { |
1575 | /* |
1576 | * This is a 'repeating' timer, so we have to emit |
1577 | * how many intervals expired between the arm |
1578 | * and the process. |
1579 | * |
1580 | * A very strange style of interface, because |
1581 | * this could easily be done in the client... |
1582 | */ |
1583 | |
1584 | uint64_t now; |
1585 | |
1586 | if (kn->kn_sfflags & NOTE_MACH_CONTINUOUS_TIME) |
1587 | now = mach_continuous_time(); |
1588 | else |
1589 | now = mach_absolute_time(); |
1590 | |
1591 | uint64_t first_deadline = kn->kn_ext[0]; |
1592 | uint64_t interval_abs = kn->kn_sdata; |
1593 | uint64_t orig_arm_time = first_deadline - interval_abs; |
1594 | |
1595 | assert(now > orig_arm_time); |
1596 | assert(now > first_deadline); |
1597 | |
1598 | uint64_t elapsed = now - orig_arm_time; |
1599 | |
1600 | uint64_t num_fired = elapsed / interval_abs; |
1601 | |
1602 | /* |
1603 | * To reach this code, we must have seen the timer pop |
1604 | * and be in repeating mode, so therefore it must have been |
1605 | * more than 'interval' time since the attach or last |
1606 | * successful touch. |
1607 | */ |
1608 | assert(num_fired > 0); |
1609 | |
1610 | /* report how many intervals have elapsed to the user */ |
1611 | kev->data = (int64_t)num_fired; |
1612 | |
1613 | /* We only need to re-arm the timer if it's not about to be destroyed */ |
1614 | if ((kn->kn_flags & EV_ONESHOT) == 0) { |
1615 | /* fire at the end of the next interval */ |
1616 | uint64_t new_deadline = first_deadline + num_fired * interval_abs; |
1617 | |
1618 | assert(new_deadline > now); |
1619 | |
1620 | kn->kn_ext[0] = new_deadline; |
1621 | |
1622 | /* |
1623 | * This can't shortcut setting up the thread call, because |
1624 | * knote_process deactivates EV_CLEAR knotes unconditionnally. |
1625 | */ |
1626 | filt_timerarm(kn); |
1627 | } |
1628 | } |
1629 | |
1630 | return FILTER_ACTIVE; |
1631 | } |
1632 | |
1633 | SECURITY_READ_ONLY_EARLY(static struct filterops) timer_filtops = { |
1634 | .f_extended_codes = true, |
1635 | .f_attach = filt_timerattach, |
1636 | .f_detach = filt_timerdetach, |
1637 | .f_event = filt_badevent, |
1638 | .f_touch = filt_timertouch, |
1639 | .f_process = filt_timerprocess, |
1640 | }; |
1641 | |
1642 | #pragma mark user_filtops |
1643 | |
1644 | static int |
1645 | filt_userattach(struct knote *kn, __unused struct kevent_internal_s *kev) |
1646 | { |
1647 | if (kn->kn_sfflags & NOTE_TRIGGER) { |
1648 | kn->kn_hookid = FILTER_ACTIVE; |
1649 | } else { |
1650 | kn->kn_hookid = 0; |
1651 | } |
1652 | return (kn->kn_hookid); |
1653 | } |
1654 | |
1655 | static void |
1656 | filt_userdetach(__unused struct knote *kn) |
1657 | { |
1658 | /* EVFILT_USER knotes are not attached to anything in the kernel */ |
1659 | } |
1660 | |
1661 | static int |
1662 | filt_usertouch(struct knote *kn, struct kevent_internal_s *kev) |
1663 | { |
1664 | uint32_t ffctrl; |
1665 | int fflags; |
1666 | |
1667 | ffctrl = kev->fflags & NOTE_FFCTRLMASK; |
1668 | fflags = kev->fflags & NOTE_FFLAGSMASK; |
1669 | switch (ffctrl) { |
1670 | case NOTE_FFNOP: |
1671 | break; |
1672 | case NOTE_FFAND: |
1673 | kn->kn_sfflags &= fflags; |
1674 | break; |
1675 | case NOTE_FFOR: |
1676 | kn->kn_sfflags |= fflags; |
1677 | break; |
1678 | case NOTE_FFCOPY: |
1679 | kn->kn_sfflags = fflags; |
1680 | break; |
1681 | } |
1682 | kn->kn_sdata = kev->data; |
1683 | |
1684 | if (kev->fflags & NOTE_TRIGGER) { |
1685 | kn->kn_hookid = FILTER_ACTIVE; |
1686 | } |
1687 | return (int)kn->kn_hookid; |
1688 | } |
1689 | |
1690 | static int |
1691 | filt_userprocess( |
1692 | struct knote *kn, |
1693 | __unused struct filt_process_s *data, |
1694 | struct kevent_internal_s *kev) |
1695 | { |
1696 | int result = (int)kn->kn_hookid; |
1697 | |
1698 | if (result) { |
1699 | *kev = kn->kn_kevent; |
1700 | kev->fflags = kn->kn_sfflags; |
1701 | kev->data = kn->kn_sdata; |
1702 | if (kn->kn_flags & EV_CLEAR) { |
1703 | kn->kn_hookid = 0; |
1704 | kn->kn_data = 0; |
1705 | kn->kn_fflags = 0; |
1706 | } |
1707 | } |
1708 | |
1709 | return result; |
1710 | } |
1711 | |
1712 | SECURITY_READ_ONLY_EARLY(static struct filterops) user_filtops = { |
1713 | .f_extended_codes = true, |
1714 | .f_attach = filt_userattach, |
1715 | .f_detach = filt_userdetach, |
1716 | .f_event = filt_badevent, |
1717 | .f_touch = filt_usertouch, |
1718 | .f_process = filt_userprocess, |
1719 | }; |
1720 | |
1721 | #pragma mark workloop_filtops |
1722 | |
1723 | static inline void |
1724 | filt_wllock(struct kqworkloop *kqwl) |
1725 | { |
1726 | lck_mtx_lock(&kqwl->kqwl_statelock); |
1727 | } |
1728 | |
1729 | static inline void |
1730 | filt_wlunlock(struct kqworkloop *kqwl) |
1731 | { |
1732 | lck_mtx_unlock(&kqwl->kqwl_statelock); |
1733 | } |
1734 | |
1735 | /* |
1736 | * Returns true when the interlock for the turnstile is the workqueue lock |
1737 | * |
1738 | * When this is the case, all turnstiles operations are delegated |
1739 | * to the workqueue subsystem. |
1740 | * |
1741 | * This is required because kqueue_threadreq_bind_prepost only holds the |
1742 | * workqueue lock but needs to move the inheritor from the workloop turnstile |
1743 | * away from the creator thread, so that this now fulfilled request cannot be |
1744 | * picked anymore by other threads. |
1745 | */ |
1746 | static inline bool |
1747 | filt_wlturnstile_interlock_is_workq(struct kqworkloop *kqwl) |
1748 | { |
1749 | struct kqrequest *kqr = &kqwl->kqwl_request; |
1750 | return (kqr->kqr_state & KQR_THREQUESTED) && |
1751 | (kqr->kqr_thread == THREAD_NULL); |
1752 | } |
1753 | |
1754 | static void |
1755 | filt_wlupdate_inheritor(struct kqworkloop *kqwl, struct turnstile *ts, |
1756 | turnstile_update_flags_t flags) |
1757 | { |
1758 | turnstile_inheritor_t inheritor = TURNSTILE_INHERITOR_NULL; |
1759 | struct kqrequest *kqr = &kqwl->kqwl_request; |
1760 | |
1761 | /* |
1762 | * binding to the workq should always happen through |
1763 | * workq_kern_threadreq_update_inheritor() |
1764 | */ |
1765 | assert(!filt_wlturnstile_interlock_is_workq(kqwl)); |
1766 | |
1767 | if ((inheritor = kqwl->kqwl_owner)) { |
1768 | flags |= TURNSTILE_INHERITOR_THREAD; |
1769 | } else if ((inheritor = kqr->kqr_thread)) { |
1770 | flags |= TURNSTILE_INHERITOR_THREAD; |
1771 | } |
1772 | |
1773 | turnstile_update_inheritor(ts, inheritor, flags); |
1774 | } |
1775 | |
1776 | #define FILT_WLATTACH 0 |
1777 | #define FILT_WLTOUCH 1 |
1778 | #define FILT_WLDROP 2 |
1779 | |
1780 | __result_use_check |
1781 | static int |
1782 | filt_wlupdate(struct kqworkloop *kqwl, struct knote *kn, |
1783 | struct kevent_internal_s *kev, kq_index_t qos_index, int op) |
1784 | { |
1785 | user_addr_t uaddr = CAST_USER_ADDR_T(kev->ext[EV_EXTIDX_WL_ADDR]); |
1786 | struct kqrequest *kqr = &kqwl->kqwl_request; |
1787 | thread_t cur_owner, new_owner, = THREAD_NULL; |
1788 | kq_index_t cur_owner_override = THREAD_QOS_UNSPECIFIED; |
1789 | int action = KQWL_UTQ_NONE, error = 0; |
1790 | bool needs_wake = false, needs_wllock = false; |
1791 | uint64_t kdata = kev->ext[EV_EXTIDX_WL_VALUE]; |
1792 | uint64_t mask = kev->ext[EV_EXTIDX_WL_MASK]; |
1793 | uint64_t udata = 0; |
1794 | |
1795 | if (kev->fflags & (NOTE_WL_END_OWNERSHIP | NOTE_WL_DISCOVER_OWNER)) { |
1796 | /* |
1797 | * If we're maybe going to change the kqwl_owner, |
1798 | * then we need to hold the filt_wllock(). |
1799 | */ |
1800 | needs_wllock = true; |
1801 | } else if (kqr->kqr_thread == current_thread()) { |
1802 | /* |
1803 | * <rdar://problem/41531764> Servicer updates need to be serialized with |
1804 | * any ownership change too, as the kqr_thread value influences the |
1805 | * outcome of handling NOTE_WL_DISCOVER_OWNER. |
1806 | */ |
1807 | needs_wllock = true; |
1808 | } |
1809 | |
1810 | if (needs_wllock) { |
1811 | filt_wllock(kqwl); |
1812 | /* |
1813 | * The kqwl owner is set under both the req and filter lock, |
1814 | * meaning it's fine to look at it under any. |
1815 | */ |
1816 | new_owner = cur_owner = kqwl->kqwl_owner; |
1817 | } else { |
1818 | new_owner = cur_owner = THREAD_NULL; |
1819 | } |
1820 | |
1821 | /* |
1822 | * Phase 1: |
1823 | * |
1824 | * If asked, load the uint64 value at the user provided address and compare |
1825 | * it against the passed in mask and expected value. |
1826 | * |
1827 | * If NOTE_WL_DISCOVER_OWNER is specified, translate the loaded name as |
1828 | * a thread reference. |
1829 | * |
1830 | * If NOTE_WL_END_OWNERSHIP is specified and the currently known owner is |
1831 | * the current thread, then end ownership. |
1832 | * |
1833 | * Lastly decide whether we need to perform a QoS update. |
1834 | */ |
1835 | if (uaddr) { |
1836 | error = copyin_word(uaddr, &udata, sizeof(udata)); |
1837 | if (error) { |
1838 | goto out; |
1839 | } |
1840 | |
1841 | /* Update state as copied in. */ |
1842 | kev->ext[EV_EXTIDX_WL_VALUE] = udata; |
1843 | |
1844 | if ((udata & mask) != (kdata & mask)) { |
1845 | error = ESTALE; |
1846 | } else if (kev->fflags & NOTE_WL_DISCOVER_OWNER) { |
1847 | /* |
1848 | * Decipher the owner port name, and translate accordingly. |
1849 | * The low 2 bits were borrowed for other flags, so mask them off. |
1850 | * |
1851 | * Then attempt translation to a thread reference or fail. |
1852 | */ |
1853 | mach_port_name_t name = (mach_port_name_t)udata & ~0x3; |
1854 | if (name != MACH_PORT_NULL) { |
1855 | name = ipc_entry_name_mask(name); |
1856 | extra_thread_ref = port_name_to_thread(name); |
1857 | if (extra_thread_ref == THREAD_NULL) { |
1858 | error = EOWNERDEAD; |
1859 | goto out; |
1860 | } |
1861 | new_owner = extra_thread_ref; |
1862 | } |
1863 | } |
1864 | } |
1865 | |
1866 | if ((kev->fflags & NOTE_WL_END_OWNERSHIP) && new_owner == current_thread()) { |
1867 | new_owner = THREAD_NULL; |
1868 | } |
1869 | |
1870 | if (error == 0) { |
1871 | if ((kev->fflags & NOTE_WL_THREAD_REQUEST) && (kev->flags & EV_DELETE)) { |
1872 | action = KQWL_UTQ_SET_QOS_INDEX; |
1873 | } else if (qos_index && kqr->kqr_qos_index != qos_index) { |
1874 | action = KQWL_UTQ_SET_QOS_INDEX; |
1875 | } |
1876 | |
1877 | if (op == FILT_WLTOUCH) { |
1878 | /* |
1879 | * Save off any additional fflags/data we just accepted |
1880 | * But only keep the last round of "update" bits we acted on which helps |
1881 | * debugging a lot. |
1882 | */ |
1883 | kn->kn_sfflags &= ~NOTE_WL_UPDATES_MASK; |
1884 | kn->kn_sfflags |= kev->fflags; |
1885 | kn->kn_sdata = kev->data; |
1886 | if (kev->fflags & NOTE_WL_SYNC_WAKE) { |
1887 | needs_wake = (kn->kn_hook != THREAD_NULL); |
1888 | } |
1889 | } else if (op == FILT_WLDROP) { |
1890 | if ((kn->kn_sfflags & (NOTE_WL_SYNC_WAIT | NOTE_WL_SYNC_WAKE)) == |
1891 | NOTE_WL_SYNC_WAIT) { |
1892 | /* |
1893 | * When deleting a SYNC_WAIT knote that hasn't been woken up |
1894 | * explicitly, issue a wake up. |
1895 | */ |
1896 | kn->kn_sfflags |= NOTE_WL_SYNC_WAKE; |
1897 | needs_wake = (kn->kn_hook != THREAD_NULL); |
1898 | } |
1899 | } |
1900 | } |
1901 | |
1902 | /* |
1903 | * Phase 2: |
1904 | * |
1905 | * Commit ownership and QoS changes if any, possibly wake up waiters |
1906 | */ |
1907 | |
1908 | if (cur_owner == new_owner && action == KQWL_UTQ_NONE && !needs_wake) { |
1909 | goto out; |
1910 | } |
1911 | |
1912 | kq_req_lock(kqwl); |
1913 | |
1914 | /* If already tracked as servicer, don't track as owner */ |
1915 | if (new_owner == kqr->kqr_thread) { |
1916 | new_owner = THREAD_NULL; |
1917 | } |
1918 | |
1919 | if (cur_owner != new_owner) { |
1920 | kqwl->kqwl_owner = new_owner; |
1921 | if (new_owner == extra_thread_ref) { |
1922 | /* we just transfered this ref to kqwl_owner */ |
1923 | extra_thread_ref = THREAD_NULL; |
1924 | } |
1925 | cur_owner_override = kqworkloop_owner_override(kqwl); |
1926 | |
1927 | if (cur_owner) { |
1928 | thread_ends_owning_workloop(cur_owner); |
1929 | } |
1930 | |
1931 | if (new_owner) { |
1932 | /* override it before we drop the old */ |
1933 | if (cur_owner_override != THREAD_QOS_UNSPECIFIED) { |
1934 | thread_add_ipc_override(new_owner, cur_owner_override); |
1935 | } |
1936 | thread_starts_owning_workloop(new_owner); |
1937 | if ((kqr->kqr_state & KQR_THREQUESTED) && !kqr->kqr_thread) { |
1938 | if (action == KQWL_UTQ_NONE) { |
1939 | action = KQWL_UTQ_REDRIVE_EVENTS; |
1940 | } |
1941 | } |
1942 | } else { |
1943 | if ((kqr->kqr_state & (KQR_THREQUESTED | KQR_WAKEUP)) == KQR_WAKEUP) { |
1944 | if (action == KQWL_UTQ_NONE) { |
1945 | action = KQWL_UTQ_REDRIVE_EVENTS; |
1946 | } |
1947 | } |
1948 | } |
1949 | } |
1950 | |
1951 | struct turnstile *ts = kqwl->kqwl_turnstile; |
1952 | bool wl_inheritor_updated = false; |
1953 | |
1954 | if (action != KQWL_UTQ_NONE) { |
1955 | kqworkloop_update_threads_qos(kqwl, action, qos_index); |
1956 | } |
1957 | |
1958 | if (cur_owner != new_owner && ts) { |
1959 | if (action == KQWL_UTQ_REDRIVE_EVENTS) { |
1960 | /* |
1961 | * Note that when action is KQWL_UTQ_REDRIVE_EVENTS, |
1962 | * the code went through workq_kern_threadreq_initiate() |
1963 | * and the workqueue has set the inheritor already |
1964 | */ |
1965 | assert(filt_wlturnstile_interlock_is_workq(kqwl)); |
1966 | } else if (filt_wlturnstile_interlock_is_workq(kqwl)) { |
1967 | workq_kern_threadreq_lock(kqwl->kqwl_p); |
1968 | workq_kern_threadreq_update_inheritor(kqwl->kqwl_p, kqr, new_owner, |
1969 | ts, TURNSTILE_IMMEDIATE_UPDATE); |
1970 | workq_kern_threadreq_unlock(kqwl->kqwl_p); |
1971 | if (!filt_wlturnstile_interlock_is_workq(kqwl)) { |
1972 | /* |
1973 | * If the workq is no longer the interlock, then |
1974 | * workq_kern_threadreq_update_inheritor() has finished a bind |
1975 | * and we need to fallback to the regular path. |
1976 | */ |
1977 | filt_wlupdate_inheritor(kqwl, ts, TURNSTILE_IMMEDIATE_UPDATE); |
1978 | } |
1979 | wl_inheritor_updated = true; |
1980 | } else { |
1981 | filt_wlupdate_inheritor(kqwl, ts, TURNSTILE_IMMEDIATE_UPDATE); |
1982 | wl_inheritor_updated = true; |
1983 | } |
1984 | |
1985 | /* |
1986 | * We need a turnstile reference because we are dropping the interlock |
1987 | * and the caller has not called turnstile_prepare. |
1988 | */ |
1989 | if (wl_inheritor_updated) { |
1990 | turnstile_reference(ts); |
1991 | } |
1992 | } |
1993 | |
1994 | if (needs_wake && ts) { |
1995 | waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T((event_t)kn), |
1996 | (thread_t)kn->kn_hook, THREAD_AWAKENED); |
1997 | } |
1998 | |
1999 | kq_req_unlock(kqwl); |
2000 | |
2001 | if (wl_inheritor_updated) { |
2002 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); |
2003 | turnstile_deallocate(ts); |
2004 | } |
2005 | |
2006 | out: |
2007 | /* |
2008 | * Phase 3: |
2009 | * |
2010 | * Unlock and cleanup various lingering references and things. |
2011 | */ |
2012 | if (needs_wllock) { |
2013 | filt_wlunlock(kqwl); |
2014 | } |
2015 | |
2016 | #if CONFIG_WORKLOOP_DEBUG |
2017 | KQWL_HISTORY_WRITE_ENTRY(kqwl, { |
2018 | .updater = current_thread(), |
2019 | .servicer = kqr->kqr_thread, /* Note: racy */ |
2020 | .old_owner = cur_owner, |
2021 | .new_owner = new_owner, |
2022 | |
2023 | .kev_ident = kev->ident, |
2024 | .error = (int16_t)error, |
2025 | .kev_flags = kev->flags, |
2026 | .kev_fflags = kev->fflags, |
2027 | |
2028 | .kev_mask = mask, |
2029 | .kev_value = kdata, |
2030 | .in_value = udata, |
2031 | }); |
2032 | #endif // CONFIG_WORKLOOP_DEBUG |
2033 | |
2034 | if (cur_owner && new_owner != cur_owner) { |
2035 | if (cur_owner_override != THREAD_QOS_UNSPECIFIED) { |
2036 | thread_drop_ipc_override(cur_owner); |
2037 | } |
2038 | thread_deallocate(cur_owner); |
2039 | } |
2040 | |
2041 | if (extra_thread_ref) { |
2042 | thread_deallocate(extra_thread_ref); |
2043 | } |
2044 | return error; |
2045 | } |
2046 | |
2047 | /* |
2048 | * Remembers the last updated that came in from userspace for debugging reasons. |
2049 | * - fflags is mirrored from the userspace kevent |
2050 | * - ext[i, i != VALUE] is mirrored from the userspace kevent |
2051 | * - ext[VALUE] is set to what the kernel loaded atomically |
2052 | * - data is set to the error if any |
2053 | */ |
2054 | static inline void |
2055 | filt_wlremember_last_update(struct knote *kn, struct kevent_internal_s *kev, |
2056 | int error) |
2057 | { |
2058 | kn->kn_fflags = kev->fflags; |
2059 | kn->kn_data = error; |
2060 | memcpy(kn->kn_ext, kev->ext, sizeof(kev->ext)); |
2061 | } |
2062 | |
2063 | static int |
2064 | filt_wlattach(struct knote *kn, struct kevent_internal_s *kev) |
2065 | { |
2066 | struct kqueue *kq = knote_get_kq(kn); |
2067 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
2068 | int error = 0; |
2069 | kq_index_t qos_index = 0; |
2070 | |
2071 | if ((kq->kq_state & KQ_WORKLOOP) == 0) { |
2072 | error = ENOTSUP; |
2073 | goto out; |
2074 | } |
2075 | |
2076 | #if DEVELOPMENT || DEBUG |
2077 | if (kev->ident == 0 && kev->udata == 0 && kev->fflags == 0) { |
2078 | struct kqrequest *kqr = &kqwl->kqwl_request; |
2079 | |
2080 | kq_req_lock(kqwl); |
2081 | kev->fflags = 0; |
2082 | if (kqr->kqr_dsync_waiters) { |
2083 | kev->fflags |= NOTE_WL_SYNC_WAIT; |
2084 | } |
2085 | if (kqr->kqr_qos_index) { |
2086 | kev->fflags |= NOTE_WL_THREAD_REQUEST; |
2087 | } |
2088 | kev->ext[0] = thread_tid(kqwl->kqwl_owner); |
2089 | kev->ext[1] = thread_tid(kqwl->kqwl_request.kqr_thread); |
2090 | kev->ext[2] = thread_owned_workloops_count(current_thread()); |
2091 | kev->ext[3] = kn->kn_kevent.ext[3]; |
2092 | kq_req_unlock(kqwl); |
2093 | error = EBUSY; |
2094 | goto out; |
2095 | } |
2096 | #endif |
2097 | |
2098 | int command = (kn->kn_sfflags & NOTE_WL_COMMANDS_MASK); |
2099 | switch (command) { |
2100 | case NOTE_WL_THREAD_REQUEST: |
2101 | if (kn->kn_id != kqwl->kqwl_dynamicid) { |
2102 | error = EINVAL; |
2103 | goto out; |
2104 | } |
2105 | qos_index = _pthread_priority_thread_qos(kn->kn_qos); |
2106 | if (qos_index == THREAD_QOS_UNSPECIFIED) { |
2107 | error = ERANGE; |
2108 | goto out; |
2109 | } |
2110 | if (kqwl->kqwl_request.kqr_qos_index) { |
2111 | /* |
2112 | * There already is a thread request, and well, you're only allowed |
2113 | * one per workloop, so fail the attach. |
2114 | */ |
2115 | error = EALREADY; |
2116 | goto out; |
2117 | } |
2118 | break; |
2119 | case NOTE_WL_SYNC_WAIT: |
2120 | case NOTE_WL_SYNC_WAKE: |
2121 | if (kn->kn_id == kqwl->kqwl_dynamicid) { |
2122 | error = EINVAL; |
2123 | goto out; |
2124 | } |
2125 | if ((kn->kn_flags & EV_DISABLE) == 0) { |
2126 | error = EINVAL; |
2127 | goto out; |
2128 | } |
2129 | if (kn->kn_sfflags & NOTE_WL_END_OWNERSHIP) { |
2130 | error = EINVAL; |
2131 | goto out; |
2132 | } |
2133 | break; |
2134 | default: |
2135 | error = EINVAL; |
2136 | goto out; |
2137 | } |
2138 | |
2139 | error = filt_wlupdate(kqwl, kn, kev, qos_index, FILT_WLATTACH); |
2140 | |
2141 | out: |
2142 | if (error) { |
2143 | /* If userland wants ESTALE to be hidden, fail the attach anyway */ |
2144 | if (error == ESTALE && (kn->kn_sfflags & NOTE_WL_IGNORE_ESTALE)) { |
2145 | error = 0; |
2146 | } |
2147 | knote_set_error(kn, error); |
2148 | return 0; |
2149 | } |
2150 | if (command == NOTE_WL_SYNC_WAIT) { |
2151 | return kevent_register_wait_prepare(kn, kev); |
2152 | } |
2153 | /* Just attaching the thread request successfully will fire it */ |
2154 | if (command == NOTE_WL_THREAD_REQUEST) { |
2155 | /* |
2156 | * Thread Request knotes need an explicit touch to be active again, |
2157 | * so delivering an event needs to also consume it. |
2158 | */ |
2159 | kn->kn_flags |= EV_CLEAR; |
2160 | return FILTER_ACTIVE; |
2161 | } |
2162 | return 0; |
2163 | } |
2164 | |
2165 | static void __dead2 |
2166 | filt_wlwait_continue(void *parameter, wait_result_t wr) |
2167 | { |
2168 | struct _kevent_register *cont_args = parameter; |
2169 | struct kqworkloop *kqwl = (struct kqworkloop *)cont_args->kq; |
2170 | struct kqrequest *kqr = &kqwl->kqwl_request; |
2171 | |
2172 | kq_req_lock(kqwl); |
2173 | kqr->kqr_dsync_waiters--; |
2174 | if (filt_wlturnstile_interlock_is_workq(kqwl)) { |
2175 | workq_kern_threadreq_lock(kqwl->kqwl_p); |
2176 | turnstile_complete((uintptr_t)kqwl, &kqwl->kqwl_turnstile, NULL); |
2177 | workq_kern_threadreq_unlock(kqwl->kqwl_p); |
2178 | } else { |
2179 | turnstile_complete((uintptr_t)kqwl, &kqwl->kqwl_turnstile, NULL); |
2180 | } |
2181 | kq_req_unlock(kqwl); |
2182 | |
2183 | turnstile_cleanup(); |
2184 | |
2185 | if (wr == THREAD_INTERRUPTED) { |
2186 | cont_args->kev.flags |= EV_ERROR; |
2187 | cont_args->kev.data = EINTR; |
2188 | } else if (wr != THREAD_AWAKENED) { |
2189 | panic("Unexpected wait result: %d" , wr); |
2190 | } |
2191 | |
2192 | kevent_register_wait_return(cont_args); |
2193 | } |
2194 | |
2195 | /* |
2196 | * Called with the workloop mutex held, most of the time never returns as it |
2197 | * calls filt_wlwait_continue through a continuation. |
2198 | */ |
2199 | static void __dead2 |
2200 | filt_wlpost_register_wait(struct uthread *uth, struct knote_lock_ctx *knlc, |
2201 | struct _kevent_register *cont_args) |
2202 | { |
2203 | struct kqworkloop *kqwl = (struct kqworkloop *)cont_args->kq; |
2204 | struct kqrequest *kqr = &kqwl->kqwl_request; |
2205 | struct turnstile *ts; |
2206 | bool workq_locked = false; |
2207 | |
2208 | kq_req_lock(kqwl); |
2209 | |
2210 | kqr->kqr_dsync_waiters++; |
2211 | |
2212 | if (filt_wlturnstile_interlock_is_workq(kqwl)) { |
2213 | workq_kern_threadreq_lock(kqwl->kqwl_p); |
2214 | workq_locked = true; |
2215 | } |
2216 | |
2217 | ts = turnstile_prepare((uintptr_t)kqwl, &kqwl->kqwl_turnstile, |
2218 | TURNSTILE_NULL, TURNSTILE_WORKLOOPS); |
2219 | |
2220 | if (workq_locked) { |
2221 | workq_kern_threadreq_update_inheritor(kqwl->kqwl_p, |
2222 | &kqwl->kqwl_request, kqwl->kqwl_owner, ts, |
2223 | TURNSTILE_DELAYED_UPDATE); |
2224 | if (!filt_wlturnstile_interlock_is_workq(kqwl)) { |
2225 | /* |
2226 | * if the interlock is no longer the workqueue lock, |
2227 | * then we don't need to hold it anymore. |
2228 | */ |
2229 | workq_kern_threadreq_unlock(kqwl->kqwl_p); |
2230 | workq_locked = false; |
2231 | } |
2232 | } |
2233 | if (!workq_locked) { |
2234 | /* |
2235 | * If the interlock is the workloop's, then it's our responsibility to |
2236 | * call update_inheritor, so just do it. |
2237 | */ |
2238 | filt_wlupdate_inheritor(kqwl, ts, TURNSTILE_DELAYED_UPDATE); |
2239 | } |
2240 | |
2241 | thread_set_pending_block_hint(uth->uu_thread, kThreadWaitWorkloopSyncWait); |
2242 | waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cont_args->knote), |
2243 | THREAD_ABORTSAFE, TIMEOUT_WAIT_FOREVER); |
2244 | |
2245 | if (workq_locked) { |
2246 | workq_kern_threadreq_unlock(kqwl->kqwl_p); |
2247 | } |
2248 | |
2249 | thread_t thread = kqwl->kqwl_owner ?: kqr->kqr_thread; |
2250 | if (thread) { |
2251 | thread_reference(thread); |
2252 | } |
2253 | kq_req_unlock(kqwl); |
2254 | |
2255 | kevent_register_wait_block(ts, thread, knlc, filt_wlwait_continue, cont_args); |
2256 | } |
2257 | |
2258 | /* called in stackshot context to report the thread responsible for blocking this thread */ |
2259 | void |
2260 | kdp_workloop_sync_wait_find_owner(__assert_only thread_t thread, |
2261 | event64_t event, thread_waitinfo_t *waitinfo) |
2262 | { |
2263 | struct knote *kn = (struct knote *)event; |
2264 | assert(kdp_is_in_zone(kn, "knote zone" )); |
2265 | |
2266 | assert(kn->kn_hook == thread); |
2267 | |
2268 | struct kqueue *kq = knote_get_kq(kn); |
2269 | assert(kdp_is_in_zone(kq, "kqueue workloop zone" )); |
2270 | assert(kq->kq_state & KQ_WORKLOOP); |
2271 | |
2272 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
2273 | struct kqrequest *kqr = &kqwl->kqwl_request; |
2274 | |
2275 | thread_t kqwl_owner = kqwl->kqwl_owner; |
2276 | thread_t servicer = kqr->kqr_thread; |
2277 | |
2278 | if (kqwl_owner != THREAD_NULL) { |
2279 | assert(kdp_is_in_zone(kqwl_owner, "threads" )); |
2280 | |
2281 | waitinfo->owner = thread_tid(kqwl->kqwl_owner); |
2282 | } else if (servicer != THREAD_NULL) { |
2283 | assert(kdp_is_in_zone(servicer, "threads" )); |
2284 | |
2285 | waitinfo->owner = thread_tid(servicer); |
2286 | } else if (kqr->kqr_state & KQR_THREQUESTED) { |
2287 | waitinfo->owner = STACKSHOT_WAITOWNER_THREQUESTED; |
2288 | } else { |
2289 | waitinfo->owner = 0; |
2290 | } |
2291 | |
2292 | waitinfo->context = kqwl->kqwl_dynamicid; |
2293 | } |
2294 | |
2295 | static void |
2296 | filt_wldetach(__assert_only struct knote *kn) |
2297 | { |
2298 | assert(knote_get_kq(kn)->kq_state & KQ_WORKLOOP); |
2299 | if (kn->kn_hook) { |
2300 | kevent_register_wait_cleanup(kn); |
2301 | } |
2302 | } |
2303 | |
2304 | static int |
2305 | filt_wlvalidate_kev_flags(struct knote *kn, struct kevent_internal_s *kev, |
2306 | thread_qos_t *qos_index) |
2307 | { |
2308 | int new_commands = kev->fflags & NOTE_WL_COMMANDS_MASK; |
2309 | int sav_commands = kn->kn_sfflags & NOTE_WL_COMMANDS_MASK; |
2310 | |
2311 | if ((kev->fflags & NOTE_WL_DISCOVER_OWNER) && (kev->flags & EV_DELETE)) { |
2312 | return EINVAL; |
2313 | } |
2314 | if (kev->fflags & NOTE_WL_UPDATE_QOS) { |
2315 | if (kev->flags & EV_DELETE) { |
2316 | return EINVAL; |
2317 | } |
2318 | if (sav_commands != NOTE_WL_THREAD_REQUEST) { |
2319 | return EINVAL; |
2320 | } |
2321 | if (!(*qos_index = _pthread_priority_thread_qos(kev->qos))) { |
2322 | return ERANGE; |
2323 | } |
2324 | } |
2325 | |
2326 | switch (new_commands) { |
2327 | case NOTE_WL_THREAD_REQUEST: |
2328 | /* thread requests can only update themselves */ |
2329 | if (sav_commands != NOTE_WL_THREAD_REQUEST) |
2330 | return EINVAL; |
2331 | break; |
2332 | |
2333 | case NOTE_WL_SYNC_WAIT: |
2334 | if (kev->fflags & NOTE_WL_END_OWNERSHIP) |
2335 | return EINVAL; |
2336 | goto sync_checks; |
2337 | |
2338 | case NOTE_WL_SYNC_WAKE: |
2339 | sync_checks: |
2340 | if (!(sav_commands & (NOTE_WL_SYNC_WAIT | NOTE_WL_SYNC_WAKE))) |
2341 | return EINVAL; |
2342 | if ((kev->flags & (EV_ENABLE | EV_DELETE)) == EV_ENABLE) |
2343 | return EINVAL; |
2344 | break; |
2345 | |
2346 | default: |
2347 | return EINVAL; |
2348 | } |
2349 | return 0; |
2350 | } |
2351 | |
2352 | static int |
2353 | filt_wltouch(struct knote *kn, struct kevent_internal_s *kev) |
2354 | { |
2355 | struct kqworkloop *kqwl = (struct kqworkloop *)knote_get_kq(kn); |
2356 | thread_qos_t qos_index = THREAD_QOS_UNSPECIFIED; |
2357 | |
2358 | int error = filt_wlvalidate_kev_flags(kn, kev, &qos_index); |
2359 | if (error) { |
2360 | goto out; |
2361 | } |
2362 | |
2363 | error = filt_wlupdate(kqwl, kn, kev, qos_index, FILT_WLTOUCH); |
2364 | filt_wlremember_last_update(kn, kev, error); |
2365 | if (error) { |
2366 | goto out; |
2367 | } |
2368 | |
2369 | out: |
2370 | if (error) { |
2371 | if (error == ESTALE && (kev->fflags & NOTE_WL_IGNORE_ESTALE)) { |
2372 | /* If userland wants ESTALE to be hidden, do not activate */ |
2373 | return 0; |
2374 | } |
2375 | kev->flags |= EV_ERROR; |
2376 | kev->data = error; |
2377 | return 0; |
2378 | } |
2379 | int command = kev->fflags & NOTE_WL_COMMANDS_MASK; |
2380 | if (command == NOTE_WL_SYNC_WAIT && !(kn->kn_sfflags & NOTE_WL_SYNC_WAKE)) { |
2381 | return kevent_register_wait_prepare(kn, kev); |
2382 | } |
2383 | /* Just touching the thread request successfully will fire it */ |
2384 | if (command == NOTE_WL_THREAD_REQUEST) { |
2385 | if (kev->fflags & NOTE_WL_UPDATE_QOS) { |
2386 | return FILTER_ACTIVE | FILTER_UPDATE_REQ_QOS; |
2387 | } |
2388 | return FILTER_ACTIVE; |
2389 | } |
2390 | return 0; |
2391 | } |
2392 | |
2393 | static bool |
2394 | filt_wlallow_drop(struct knote *kn, struct kevent_internal_s *kev) |
2395 | { |
2396 | struct kqworkloop *kqwl = (struct kqworkloop *)knote_get_kq(kn); |
2397 | |
2398 | int error = filt_wlvalidate_kev_flags(kn, kev, NULL); |
2399 | if (error) { |
2400 | goto out; |
2401 | } |
2402 | |
2403 | error = filt_wlupdate(kqwl, kn, kev, 0, FILT_WLDROP); |
2404 | filt_wlremember_last_update(kn, kev, error); |
2405 | if (error) { |
2406 | goto out; |
2407 | } |
2408 | |
2409 | out: |
2410 | if (error) { |
2411 | if (error == ESTALE && (kev->fflags & NOTE_WL_IGNORE_ESTALE)) { |
2412 | return false; |
2413 | } |
2414 | kev->flags |= EV_ERROR; |
2415 | kev->data = error; |
2416 | return false; |
2417 | } |
2418 | return true; |
2419 | } |
2420 | |
2421 | static int |
2422 | filt_wlprocess( |
2423 | struct knote *kn, |
2424 | __unused struct filt_process_s *data, |
2425 | struct kevent_internal_s *kev) |
2426 | { |
2427 | struct kqworkloop *kqwl = (struct kqworkloop *)knote_get_kq(kn); |
2428 | int rc = 0; |
2429 | |
2430 | assert(kn->kn_sfflags & NOTE_WL_THREAD_REQUEST); |
2431 | |
2432 | filt_wllock(kqwl); |
2433 | |
2434 | if (kqwl->kqwl_owner) { |
2435 | /* |
2436 | * <rdar://problem/33584321> userspace sometimes due to events being |
2437 | * delivered but not triggering a drain session can cause a process |
2438 | * of the thread request knote. |
2439 | * |
2440 | * When that happens, the automatic deactivation due to process |
2441 | * would swallow the event, so we have to activate the knote again. |
2442 | */ |
2443 | kqlock(kqwl); |
2444 | knote_activate(kn); |
2445 | kqunlock(kqwl); |
2446 | } else { |
2447 | #if DEBUG || DEVELOPMENT |
2448 | if (kevent_debug_flags() & KEVENT_PANIC_ON_NON_ENQUEUED_PROCESS) { |
2449 | /* |
2450 | * see src/queue_internal.h in libdispatch |
2451 | */ |
2452 | #define DISPATCH_QUEUE_ENQUEUED 0x1ull |
2453 | user_addr_t addr = CAST_USER_ADDR_T(kn->kn_ext[EV_EXTIDX_WL_ADDR]); |
2454 | task_t t = current_task(); |
2455 | uint64_t val; |
2456 | if (addr && task_is_active(t) && !task_is_halting(t) && |
2457 | copyin_word(addr, &val, sizeof(val)) == 0 && |
2458 | val && (val & DISPATCH_QUEUE_ENQUEUED) == 0 && |
2459 | (val >> 48) != 0xdead && (val >> 48) != 0 && (val >> 48) != 0xffff) { |
2460 | panic("kevent: workloop %#016llx is not enqueued " |
2461 | "(kn:%p dq_state:%#016llx kev.dq_state:%#016llx)" , |
2462 | kn->kn_udata, kn, val, kn->kn_ext[EV_EXTIDX_WL_VALUE]); |
2463 | } |
2464 | } |
2465 | #endif |
2466 | *kev = kn->kn_kevent; |
2467 | kev->fflags = kn->kn_sfflags; |
2468 | kev->data = kn->kn_sdata; |
2469 | kev->qos = kn->kn_qos; |
2470 | rc |= FILTER_ACTIVE; |
2471 | } |
2472 | |
2473 | filt_wlunlock(kqwl); |
2474 | |
2475 | if (rc & FILTER_ACTIVE) { |
2476 | workq_thread_set_max_qos(kqwl->kqwl_p, &kqwl->kqwl_request); |
2477 | } |
2478 | return rc; |
2479 | } |
2480 | |
2481 | SECURITY_READ_ONLY_EARLY(static struct filterops) workloop_filtops = { |
2482 | .f_extended_codes = true, |
2483 | .f_attach = filt_wlattach, |
2484 | .f_detach = filt_wldetach, |
2485 | .f_event = filt_badevent, |
2486 | .f_touch = filt_wltouch, |
2487 | .f_process = filt_wlprocess, |
2488 | .f_allow_drop = filt_wlallow_drop, |
2489 | .f_post_register_wait = filt_wlpost_register_wait, |
2490 | }; |
2491 | |
2492 | #pragma mark kevent / knotes |
2493 | |
2494 | /* |
2495 | * JMM - placeholder for not-yet-implemented filters |
2496 | */ |
2497 | static int |
2498 | filt_badevent(struct knote *kn, long hint) |
2499 | { |
2500 | panic("%s[%d](%p, %ld)" , __func__, kn->kn_filter, kn, hint); |
2501 | return 0; |
2502 | } |
2503 | |
2504 | static int |
2505 | filt_badattach(__unused struct knote *kn, __unused struct kevent_internal_s *kev) |
2506 | { |
2507 | knote_set_error(kn, ENOTSUP); |
2508 | return 0; |
2509 | } |
2510 | |
2511 | struct kqueue * |
2512 | kqueue_alloc(struct proc *p, unsigned int flags) |
2513 | { |
2514 | struct filedesc *fdp = p->p_fd; |
2515 | struct kqueue *kq = NULL; |
2516 | int policy; |
2517 | void *hook = NULL; |
2518 | |
2519 | if (flags & KEVENT_FLAG_WORKQ) { |
2520 | struct kqworkq *kqwq; |
2521 | int i; |
2522 | |
2523 | kqwq = (struct kqworkq *)zalloc(kqworkq_zone); |
2524 | if (kqwq == NULL) |
2525 | return NULL; |
2526 | |
2527 | kq = &kqwq->kqwq_kqueue; |
2528 | bzero(kqwq, sizeof (struct kqworkq)); |
2529 | |
2530 | kqwq->kqwq_state = KQ_WORKQ; |
2531 | |
2532 | for (i = 0; i < KQWQ_NBUCKETS; i++) { |
2533 | TAILQ_INIT(&kqwq->kqwq_queue[i]); |
2534 | } |
2535 | for (i = 0; i < KQWQ_NBUCKETS; i++) { |
2536 | if (i != KQWQ_QOS_MANAGER) { |
2537 | /* |
2538 | * Because of how the bucketized system works, we mix overcommit |
2539 | * sources with not overcommit: each time we move a knote from |
2540 | * one bucket to the next due to overrides, we'd had to track |
2541 | * overcommitness, and it's really not worth it in the workloop |
2542 | * enabled world that track this faithfully. |
2543 | * |
2544 | * Incidentally, this behaves like the original manager-based |
2545 | * kqwq where event delivery always happened (hence is |
2546 | * "overcommit") |
2547 | */ |
2548 | kqwq->kqwq_request[i].kqr_state |= KQR_THOVERCOMMIT; |
2549 | } |
2550 | kqwq->kqwq_request[i].kqr_qos_index = i; |
2551 | TAILQ_INIT(&kqwq->kqwq_request[i].kqr_suppressed); |
2552 | } |
2553 | |
2554 | policy = SYNC_POLICY_FIFO; |
2555 | hook = (void *)kqwq; |
2556 | } else if (flags & KEVENT_FLAG_WORKLOOP) { |
2557 | struct kqworkloop *kqwl; |
2558 | int i; |
2559 | |
2560 | kqwl = (struct kqworkloop *)zalloc(kqworkloop_zone); |
2561 | if (kqwl == NULL) |
2562 | return NULL; |
2563 | |
2564 | bzero(kqwl, sizeof (struct kqworkloop)); |
2565 | |
2566 | kqwl->kqwl_state = KQ_WORKLOOP | KQ_DYNAMIC; |
2567 | kqwl->kqwl_retains = 1; /* donate a retain to creator */ |
2568 | kqwl->kqwl_request.kqr_state = KQR_WORKLOOP; |
2569 | |
2570 | kq = &kqwl->kqwl_kqueue; |
2571 | for (i = 0; i < KQWL_NBUCKETS; i++) { |
2572 | TAILQ_INIT(&kqwl->kqwl_queue[i]); |
2573 | } |
2574 | TAILQ_INIT(&kqwl->kqwl_request.kqr_suppressed); |
2575 | |
2576 | lck_mtx_init(&kqwl->kqwl_statelock, kq_lck_grp, kq_lck_attr); |
2577 | |
2578 | policy = SYNC_POLICY_FIFO; |
2579 | hook = (void *)kqwl; |
2580 | } else { |
2581 | struct kqfile *kqf; |
2582 | |
2583 | kqf = (struct kqfile *)zalloc(kqfile_zone); |
2584 | if (kqf == NULL) |
2585 | return NULL; |
2586 | |
2587 | kq = &kqf->kqf_kqueue; |
2588 | bzero(kqf, sizeof (struct kqfile)); |
2589 | TAILQ_INIT(&kqf->kqf_queue); |
2590 | TAILQ_INIT(&kqf->kqf_suppressed); |
2591 | |
2592 | policy = SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST; |
2593 | } |
2594 | |
2595 | waitq_set_init(&kq->kq_wqs, policy, NULL, hook); |
2596 | lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr); |
2597 | lck_spin_init(&kq->kq_reqlock, kq_lck_grp, kq_lck_attr); |
2598 | kq->kq_p = p; |
2599 | |
2600 | if (fdp->fd_knlistsize < 0) { |
2601 | proc_fdlock(p); |
2602 | if (fdp->fd_knlistsize < 0) |
2603 | fdp->fd_knlistsize = 0; /* this process has had a kq */ |
2604 | proc_fdunlock(p); |
2605 | } |
2606 | |
2607 | return (kq); |
2608 | } |
2609 | |
2610 | /* |
2611 | * knotes_dealloc - detach all knotes for the process and drop them |
2612 | * |
2613 | * Called with proc_fdlock held. |
2614 | * Returns with it locked. |
2615 | * May drop it temporarily. |
2616 | * Process is in such a state that it will not try to allocate |
2617 | * any more knotes during this process (stopped for exit or exec). |
2618 | */ |
2619 | void |
2620 | knotes_dealloc(proc_t p) |
2621 | { |
2622 | struct filedesc *fdp = p->p_fd; |
2623 | struct kqueue *kq; |
2624 | struct knote *kn; |
2625 | struct klist *kn_hash = NULL; |
2626 | int i; |
2627 | |
2628 | /* Close all the fd-indexed knotes up front */ |
2629 | if (fdp->fd_knlistsize > 0) { |
2630 | for (i = 0; i < fdp->fd_knlistsize; i++) { |
2631 | while ((kn = SLIST_FIRST(&fdp->fd_knlist[i])) != NULL) { |
2632 | kq = knote_get_kq(kn); |
2633 | kqlock(kq); |
2634 | proc_fdunlock(p); |
2635 | knote_drop(kq, kn, NULL); |
2636 | proc_fdlock(p); |
2637 | } |
2638 | } |
2639 | /* free the table */ |
2640 | FREE(fdp->fd_knlist, M_KQUEUE); |
2641 | fdp->fd_knlist = NULL; |
2642 | } |
2643 | fdp->fd_knlistsize = -1; |
2644 | |
2645 | knhash_lock(p); |
2646 | proc_fdunlock(p); |
2647 | |
2648 | /* Clean out all the hashed knotes as well */ |
2649 | if (fdp->fd_knhashmask != 0) { |
2650 | for (i = 0; i <= (int)fdp->fd_knhashmask; i++) { |
2651 | while ((kn = SLIST_FIRST(&fdp->fd_knhash[i])) != NULL) { |
2652 | kq = knote_get_kq(kn); |
2653 | kqlock(kq); |
2654 | knhash_unlock(p); |
2655 | knote_drop(kq, kn, NULL); |
2656 | knhash_lock(p); |
2657 | } |
2658 | } |
2659 | kn_hash = fdp->fd_knhash; |
2660 | fdp->fd_knhashmask = 0; |
2661 | fdp->fd_knhash = NULL; |
2662 | } |
2663 | |
2664 | knhash_unlock(p); |
2665 | |
2666 | /* free the kn_hash table */ |
2667 | if (kn_hash) |
2668 | FREE(kn_hash, M_KQUEUE); |
2669 | |
2670 | proc_fdlock(p); |
2671 | } |
2672 | |
2673 | /* |
2674 | * kqworkloop_invalidate |
2675 | * |
2676 | * Invalidate ownership of a workloop. |
2677 | * |
2678 | * This is meant to be used so that any remnant of overrides and ownership |
2679 | * information is dropped before a kqworkloop can no longer be found in the |
2680 | * global hash table and have ghost workloop ownership left over. |
2681 | * |
2682 | * Possibly returns a thread to deallocate in a safe context. |
2683 | */ |
2684 | static thread_t |
2685 | kqworkloop_invalidate(struct kqworkloop *kqwl) |
2686 | { |
2687 | thread_t cur_owner = kqwl->kqwl_owner; |
2688 | |
2689 | assert(TAILQ_EMPTY(&kqwl->kqwl_request.kqr_suppressed)); |
2690 | if (cur_owner) { |
2691 | /* |
2692 | * If the kqueue had an owner that prevented the thread request to |
2693 | * go through, then no unbind happened, and we may have lingering |
2694 | * overrides to drop. |
2695 | */ |
2696 | if (kqworkloop_owner_override(kqwl) != THREAD_QOS_UNSPECIFIED) { |
2697 | thread_drop_ipc_override(cur_owner); |
2698 | } |
2699 | thread_ends_owning_workloop(cur_owner); |
2700 | kqwl->kqwl_owner = THREAD_NULL; |
2701 | } |
2702 | |
2703 | return cur_owner; |
2704 | } |
2705 | |
2706 | /* |
2707 | * kqueue_dealloc - detach all knotes from a kqueue and free it |
2708 | * |
2709 | * We walk each list looking for knotes referencing this |
2710 | * this kqueue. If we find one, we try to drop it. But |
2711 | * if we fail to get a drop reference, that will wait |
2712 | * until it is dropped. So, we can just restart again |
2713 | * safe in the assumption that the list will eventually |
2714 | * not contain any more references to this kqueue (either |
2715 | * we dropped them all, or someone else did). |
2716 | * |
2717 | * Assumes no new events are being added to the kqueue. |
2718 | * Nothing locked on entry or exit. |
2719 | * |
2720 | * Workloop kqueues cant get here unless all the knotes |
2721 | * are already gone and all requested threads have come |
2722 | * and gone (cancelled or arrived). |
2723 | */ |
2724 | void |
2725 | kqueue_dealloc(struct kqueue *kq) |
2726 | { |
2727 | struct proc *p; |
2728 | struct filedesc *fdp; |
2729 | struct knote *kn; |
2730 | int i; |
2731 | |
2732 | if (kq == NULL) |
2733 | return; |
2734 | |
2735 | p = kq->kq_p; |
2736 | fdp = p->p_fd; |
2737 | |
2738 | /* |
2739 | * Workloops are refcounted by their knotes, so there's no point |
2740 | * spending a lot of time under these locks just to deallocate one. |
2741 | */ |
2742 | if ((kq->kq_state & KQ_WORKLOOP) == 0) { |
2743 | KNOTE_LOCK_CTX(knlc); |
2744 | |
2745 | proc_fdlock(p); |
2746 | for (i = 0; i < fdp->fd_knlistsize; i++) { |
2747 | kn = SLIST_FIRST(&fdp->fd_knlist[i]); |
2748 | while (kn != NULL) { |
2749 | if (kq == knote_get_kq(kn)) { |
2750 | kqlock(kq); |
2751 | proc_fdunlock(p); |
2752 | if (knote_lock(kq, kn, &knlc, KNOTE_KQ_LOCK_ON_SUCCESS)) { |
2753 | knote_drop(kq, kn, &knlc); |
2754 | } |
2755 | proc_fdlock(p); |
2756 | /* start over at beginning of list */ |
2757 | kn = SLIST_FIRST(&fdp->fd_knlist[i]); |
2758 | continue; |
2759 | } |
2760 | kn = SLIST_NEXT(kn, kn_link); |
2761 | } |
2762 | } |
2763 | |
2764 | knhash_lock(p); |
2765 | proc_fdunlock(p); |
2766 | |
2767 | if (fdp->fd_knhashmask != 0) { |
2768 | for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) { |
2769 | kn = SLIST_FIRST(&fdp->fd_knhash[i]); |
2770 | while (kn != NULL) { |
2771 | if (kq == knote_get_kq(kn)) { |
2772 | kqlock(kq); |
2773 | knhash_unlock(p); |
2774 | if (knote_lock(kq, kn, &knlc, KNOTE_KQ_LOCK_ON_SUCCESS)) { |
2775 | knote_drop(kq, kn, &knlc); |
2776 | } |
2777 | knhash_lock(p); |
2778 | /* start over at beginning of list */ |
2779 | kn = SLIST_FIRST(&fdp->fd_knhash[i]); |
2780 | continue; |
2781 | } |
2782 | kn = SLIST_NEXT(kn, kn_link); |
2783 | } |
2784 | } |
2785 | } |
2786 | knhash_unlock(p); |
2787 | } |
2788 | |
2789 | if (kq->kq_state & KQ_WORKLOOP) { |
2790 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
2791 | thread_t cur_owner = kqworkloop_invalidate(kqwl); |
2792 | |
2793 | if (cur_owner) thread_deallocate(cur_owner); |
2794 | |
2795 | if (kqwl->kqwl_request.kqr_state & KQR_ALLOCATED_TURNSTILE) { |
2796 | struct turnstile *ts; |
2797 | turnstile_complete((uintptr_t)kqwl, &kqwl->kqwl_turnstile, &ts); |
2798 | turnstile_cleanup(); |
2799 | turnstile_deallocate(ts); |
2800 | } else { |
2801 | assert(kqwl->kqwl_turnstile == NULL); |
2802 | } |
2803 | } |
2804 | |
2805 | /* |
2806 | * waitq_set_deinit() remove the KQ's waitq set from |
2807 | * any select sets to which it may belong. |
2808 | */ |
2809 | waitq_set_deinit(&kq->kq_wqs); |
2810 | lck_spin_destroy(&kq->kq_lock, kq_lck_grp); |
2811 | lck_spin_destroy(&kq->kq_reqlock, kq_lck_grp); |
2812 | |
2813 | if (kq->kq_state & KQ_WORKQ) { |
2814 | zfree(kqworkq_zone, (struct kqworkq *)kq); |
2815 | } else if (kq->kq_state & KQ_WORKLOOP) { |
2816 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
2817 | |
2818 | assert(kqwl->kqwl_retains == 0); |
2819 | lck_mtx_destroy(&kqwl->kqwl_statelock, kq_lck_grp); |
2820 | zfree(kqworkloop_zone, kqwl); |
2821 | } else { |
2822 | zfree(kqfile_zone, (struct kqfile *)kq); |
2823 | } |
2824 | } |
2825 | |
2826 | static inline void |
2827 | kqueue_retain(struct kqueue *kq) |
2828 | { |
2829 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
2830 | uint32_t previous; |
2831 | |
2832 | if ((kq->kq_state & KQ_DYNAMIC) == 0) |
2833 | return; |
2834 | |
2835 | previous = OSIncrementAtomic(&kqwl->kqwl_retains); |
2836 | if (previous == KQ_WORKLOOP_RETAINS_MAX) |
2837 | panic("kq(%p) retain overflow" , kq); |
2838 | |
2839 | if (previous == 0) |
2840 | panic("kq(%p) resurrection" , kq); |
2841 | } |
2842 | |
2843 | #define KQUEUE_CANT_BE_LAST_REF 0 |
2844 | #define KQUEUE_MIGHT_BE_LAST_REF 1 |
2845 | |
2846 | static inline int |
2847 | kqueue_release(kqueue_t kqu, __assert_only int possibly_last) |
2848 | { |
2849 | if ((kqu.kq->kq_state & KQ_DYNAMIC) == 0) { |
2850 | return 0; |
2851 | } |
2852 | |
2853 | assert(kqu.kq->kq_state & KQ_WORKLOOP); /* for now */ |
2854 | uint32_t refs = OSDecrementAtomic(&kqu.kqwl->kqwl_retains); |
2855 | if (__improbable(refs == 0)) { |
2856 | panic("kq(%p) over-release" , kqu.kq); |
2857 | } |
2858 | if (refs == 1) { |
2859 | assert(possibly_last); |
2860 | } |
2861 | return refs == 1; |
2862 | } |
2863 | |
2864 | int |
2865 | kqueue_body(struct proc *p, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval) |
2866 | { |
2867 | struct kqueue *kq; |
2868 | struct fileproc *fp; |
2869 | int fd, error; |
2870 | |
2871 | error = falloc_withalloc(p, |
2872 | &fp, &fd, vfs_context_current(), fp_zalloc, cra); |
2873 | if (error) { |
2874 | return (error); |
2875 | } |
2876 | |
2877 | kq = kqueue_alloc(p, 0); |
2878 | if (kq == NULL) { |
2879 | fp_free(p, fd, fp); |
2880 | return (ENOMEM); |
2881 | } |
2882 | |
2883 | fp->f_flag = FREAD | FWRITE; |
2884 | fp->f_ops = &kqueueops; |
2885 | fp->f_data = kq; |
2886 | |
2887 | proc_fdlock(p); |
2888 | *fdflags(p, fd) |= UF_EXCLOSE; |
2889 | procfdtbl_releasefd(p, fd, NULL); |
2890 | fp_drop(p, fd, fp, 1); |
2891 | proc_fdunlock(p); |
2892 | |
2893 | *retval = fd; |
2894 | return (error); |
2895 | } |
2896 | |
2897 | int |
2898 | kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) |
2899 | { |
2900 | return (kqueue_body(p, fileproc_alloc_init, NULL, retval)); |
2901 | } |
2902 | |
2903 | static int |
2904 | kevent_copyin(user_addr_t *addrp, struct kevent_internal_s *kevp, struct proc *p, |
2905 | unsigned int flags) |
2906 | { |
2907 | int advance; |
2908 | int error; |
2909 | |
2910 | if (flags & KEVENT_FLAG_LEGACY32) { |
2911 | bzero(kevp, sizeof (*kevp)); |
2912 | |
2913 | if (IS_64BIT_PROCESS(p)) { |
2914 | struct user64_kevent kev64; |
2915 | |
2916 | advance = sizeof (kev64); |
2917 | error = copyin(*addrp, (caddr_t)&kev64, advance); |
2918 | if (error) |
2919 | return (error); |
2920 | kevp->ident = kev64.ident; |
2921 | kevp->filter = kev64.filter; |
2922 | kevp->flags = kev64.flags; |
2923 | kevp->udata = kev64.udata; |
2924 | kevp->fflags = kev64.fflags; |
2925 | kevp->data = kev64.data; |
2926 | } else { |
2927 | struct user32_kevent kev32; |
2928 | |
2929 | advance = sizeof (kev32); |
2930 | error = copyin(*addrp, (caddr_t)&kev32, advance); |
2931 | if (error) |
2932 | return (error); |
2933 | kevp->ident = (uintptr_t)kev32.ident; |
2934 | kevp->filter = kev32.filter; |
2935 | kevp->flags = kev32.flags; |
2936 | kevp->udata = CAST_USER_ADDR_T(kev32.udata); |
2937 | kevp->fflags = kev32.fflags; |
2938 | kevp->data = (intptr_t)kev32.data; |
2939 | } |
2940 | } else if (flags & KEVENT_FLAG_LEGACY64) { |
2941 | struct kevent64_s kev64; |
2942 | |
2943 | bzero(kevp, sizeof (*kevp)); |
2944 | |
2945 | advance = sizeof (struct kevent64_s); |
2946 | error = copyin(*addrp, (caddr_t)&kev64, advance); |
2947 | if (error) |
2948 | return(error); |
2949 | kevp->ident = kev64.ident; |
2950 | kevp->filter = kev64.filter; |
2951 | kevp->flags = kev64.flags; |
2952 | kevp->udata = kev64.udata; |
2953 | kevp->fflags = kev64.fflags; |
2954 | kevp->data = kev64.data; |
2955 | kevp->ext[0] = kev64.ext[0]; |
2956 | kevp->ext[1] = kev64.ext[1]; |
2957 | |
2958 | } else { |
2959 | struct kevent_qos_s kevqos; |
2960 | |
2961 | bzero(kevp, sizeof (*kevp)); |
2962 | |
2963 | advance = sizeof (struct kevent_qos_s); |
2964 | error = copyin(*addrp, (caddr_t)&kevqos, advance); |
2965 | if (error) |
2966 | return error; |
2967 | kevp->ident = kevqos.ident; |
2968 | kevp->filter = kevqos.filter; |
2969 | kevp->flags = kevqos.flags; |
2970 | kevp->qos = kevqos.qos; |
2971 | // kevp->xflags = kevqos.xflags; |
2972 | kevp->udata = kevqos.udata; |
2973 | kevp->fflags = kevqos.fflags; |
2974 | kevp->data = kevqos.data; |
2975 | kevp->ext[0] = kevqos.ext[0]; |
2976 | kevp->ext[1] = kevqos.ext[1]; |
2977 | kevp->ext[2] = kevqos.ext[2]; |
2978 | kevp->ext[3] = kevqos.ext[3]; |
2979 | } |
2980 | if (!error) |
2981 | *addrp += advance; |
2982 | return (error); |
2983 | } |
2984 | |
2985 | static int |
2986 | kevent_copyout(struct kevent_internal_s *kevp, user_addr_t *addrp, struct proc *p, |
2987 | unsigned int flags) |
2988 | { |
2989 | user_addr_t addr = *addrp; |
2990 | int advance; |
2991 | int error; |
2992 | |
2993 | /* |
2994 | * fully initialize the differnt output event structure |
2995 | * types from the internal kevent (and some universal |
2996 | * defaults for fields not represented in the internal |
2997 | * form). |
2998 | */ |
2999 | if (flags & KEVENT_FLAG_LEGACY32) { |
3000 | assert((flags & KEVENT_FLAG_STACK_EVENTS) == 0); |
3001 | |
3002 | if (IS_64BIT_PROCESS(p)) { |
3003 | struct user64_kevent kev64; |
3004 | |
3005 | advance = sizeof (kev64); |
3006 | bzero(&kev64, advance); |
3007 | |
3008 | /* |
3009 | * deal with the special case of a user-supplied |
3010 | * value of (uintptr_t)-1. |
3011 | */ |
3012 | kev64.ident = (kevp->ident == (uintptr_t)-1) ? |
3013 | (uint64_t)-1LL : (uint64_t)kevp->ident; |
3014 | |
3015 | kev64.filter = kevp->filter; |
3016 | kev64.flags = kevp->flags; |
3017 | kev64.fflags = kevp->fflags; |
3018 | kev64.data = (int64_t) kevp->data; |
3019 | kev64.udata = kevp->udata; |
3020 | error = copyout((caddr_t)&kev64, addr, advance); |
3021 | } else { |
3022 | struct user32_kevent kev32; |
3023 | |
3024 | advance = sizeof (kev32); |
3025 | bzero(&kev32, advance); |
3026 | kev32.ident = (uint32_t)kevp->ident; |
3027 | kev32.filter = kevp->filter; |
3028 | kev32.flags = kevp->flags; |
3029 | kev32.fflags = kevp->fflags; |
3030 | kev32.data = (int32_t)kevp->data; |
3031 | kev32.udata = kevp->udata; |
3032 | error = copyout((caddr_t)&kev32, addr, advance); |
3033 | } |
3034 | } else if (flags & KEVENT_FLAG_LEGACY64) { |
3035 | struct kevent64_s kev64; |
3036 | |
3037 | advance = sizeof (struct kevent64_s); |
3038 | if (flags & KEVENT_FLAG_STACK_EVENTS) { |
3039 | addr -= advance; |
3040 | } |
3041 | bzero(&kev64, advance); |
3042 | kev64.ident = kevp->ident; |
3043 | kev64.filter = kevp->filter; |
3044 | kev64.flags = kevp->flags; |
3045 | kev64.fflags = kevp->fflags; |
3046 | kev64.data = (int64_t) kevp->data; |
3047 | kev64.udata = kevp->udata; |
3048 | kev64.ext[0] = kevp->ext[0]; |
3049 | kev64.ext[1] = kevp->ext[1]; |
3050 | error = copyout((caddr_t)&kev64, addr, advance); |
3051 | } else { |
3052 | struct kevent_qos_s kevqos; |
3053 | |
3054 | advance = sizeof (struct kevent_qos_s); |
3055 | if (flags & KEVENT_FLAG_STACK_EVENTS) { |
3056 | addr -= advance; |
3057 | } |
3058 | bzero(&kevqos, advance); |
3059 | kevqos.ident = kevp->ident; |
3060 | kevqos.filter = kevp->filter; |
3061 | kevqos.flags = kevp->flags; |
3062 | kevqos.qos = kevp->qos; |
3063 | kevqos.udata = kevp->udata; |
3064 | kevqos.fflags = kevp->fflags; |
3065 | kevqos.xflags = 0; |
3066 | kevqos.data = (int64_t) kevp->data; |
3067 | kevqos.ext[0] = kevp->ext[0]; |
3068 | kevqos.ext[1] = kevp->ext[1]; |
3069 | kevqos.ext[2] = kevp->ext[2]; |
3070 | kevqos.ext[3] = kevp->ext[3]; |
3071 | error = copyout((caddr_t)&kevqos, addr, advance); |
3072 | } |
3073 | if (!error) { |
3074 | if (flags & KEVENT_FLAG_STACK_EVENTS) |
3075 | *addrp = addr; |
3076 | else |
3077 | *addrp = addr + advance; |
3078 | } |
3079 | return (error); |
3080 | } |
3081 | |
3082 | static int |
3083 | kevent_get_data_size( |
3084 | struct proc *p, |
3085 | uint64_t data_available, |
3086 | unsigned int flags, |
3087 | user_size_t *residp) |
3088 | { |
3089 | user_size_t resid; |
3090 | int error = 0; |
3091 | |
3092 | if (data_available != USER_ADDR_NULL) { |
3093 | if (flags & KEVENT_FLAG_KERNEL) { |
3094 | resid = *(user_size_t *)(uintptr_t)data_available; |
3095 | } else if (IS_64BIT_PROCESS(p)) { |
3096 | user64_size_t usize; |
3097 | error = copyin((user_addr_t)data_available, &usize, sizeof(usize)); |
3098 | resid = (user_size_t)usize; |
3099 | } else { |
3100 | user32_size_t usize; |
3101 | error = copyin((user_addr_t)data_available, &usize, sizeof(usize)); |
3102 | resid = (user_size_t)usize; |
3103 | } |
3104 | if (error) |
3105 | return(error); |
3106 | } else { |
3107 | resid = 0; |
3108 | } |
3109 | *residp = resid; |
3110 | return 0; |
3111 | } |
3112 | |
3113 | static int |
3114 | kevent_put_data_size( |
3115 | struct proc *p, |
3116 | uint64_t data_available, |
3117 | unsigned int flags, |
3118 | user_size_t resid) |
3119 | { |
3120 | int error = 0; |
3121 | |
3122 | if (data_available) { |
3123 | if (flags & KEVENT_FLAG_KERNEL) { |
3124 | *(user_size_t *)(uintptr_t)data_available = resid; |
3125 | } else if (IS_64BIT_PROCESS(p)) { |
3126 | user64_size_t usize = (user64_size_t)resid; |
3127 | error = copyout(&usize, (user_addr_t)data_available, sizeof(usize)); |
3128 | } else { |
3129 | user32_size_t usize = (user32_size_t)resid; |
3130 | error = copyout(&usize, (user_addr_t)data_available, sizeof(usize)); |
3131 | } |
3132 | } |
3133 | return error; |
3134 | } |
3135 | |
3136 | /* |
3137 | * kevent_continue - continue a kevent syscall after blocking |
3138 | * |
3139 | * assume we inherit a use count on the kq fileglob. |
3140 | */ |
3141 | __attribute__((noreturn)) |
3142 | static void |
3143 | kevent_continue(__unused struct kqueue *kq, void *data, int error) |
3144 | { |
3145 | struct _kevent *cont_args; |
3146 | struct fileproc *fp; |
3147 | uint64_t data_available; |
3148 | user_size_t data_size; |
3149 | user_size_t data_resid; |
3150 | unsigned int flags; |
3151 | int32_t *retval; |
3152 | int noutputs; |
3153 | int fd; |
3154 | struct proc *p = current_proc(); |
3155 | |
3156 | cont_args = (struct _kevent *)data; |
3157 | data_available = cont_args->data_available; |
3158 | flags = cont_args->process_data.fp_flags; |
3159 | data_size = cont_args->process_data.fp_data_size; |
3160 | data_resid = cont_args->process_data.fp_data_resid; |
3161 | noutputs = cont_args->eventout; |
3162 | retval = cont_args->retval; |
3163 | fd = cont_args->fd; |
3164 | fp = cont_args->fp; |
3165 | |
3166 | kevent_put_kq(p, fd, fp, kq); |
3167 | |
3168 | /* don't abandon other output just because of residual copyout failures */ |
3169 | if (error == 0 && data_available && data_resid != data_size) { |
3170 | (void)kevent_put_data_size(p, data_available, flags, data_resid); |
3171 | } |
3172 | |
3173 | /* don't restart after signals... */ |
3174 | if (error == ERESTART) |
3175 | error = EINTR; |
3176 | else if (error == EWOULDBLOCK) |
3177 | error = 0; |
3178 | if (error == 0) |
3179 | *retval = noutputs; |
3180 | unix_syscall_return(error); |
3181 | } |
3182 | |
3183 | /* |
3184 | * kevent - [syscall] register and wait for kernel events |
3185 | * |
3186 | */ |
3187 | int |
3188 | kevent(struct proc *p, struct kevent_args *uap, int32_t *retval) |
3189 | { |
3190 | unsigned int flags = KEVENT_FLAG_LEGACY32; |
3191 | |
3192 | return kevent_internal(p, |
3193 | (kqueue_id_t)uap->fd, NULL, |
3194 | uap->changelist, uap->nchanges, |
3195 | uap->eventlist, uap->nevents, |
3196 | 0ULL, 0ULL, |
3197 | flags, |
3198 | uap->timeout, |
3199 | kevent_continue, |
3200 | retval); |
3201 | } |
3202 | |
3203 | int |
3204 | kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval) |
3205 | { |
3206 | unsigned int flags; |
3207 | |
3208 | /* restrict to user flags and set legacy64 */ |
3209 | flags = uap->flags & KEVENT_FLAG_USER; |
3210 | flags |= KEVENT_FLAG_LEGACY64; |
3211 | |
3212 | return kevent_internal(p, |
3213 | (kqueue_id_t)uap->fd, NULL, |
3214 | uap->changelist, uap->nchanges, |
3215 | uap->eventlist, uap->nevents, |
3216 | 0ULL, 0ULL, |
3217 | flags, |
3218 | uap->timeout, |
3219 | kevent_continue, |
3220 | retval); |
3221 | } |
3222 | |
3223 | int |
3224 | kevent_qos(struct proc *p, struct kevent_qos_args *uap, int32_t *retval) |
3225 | { |
3226 | /* restrict to user flags */ |
3227 | uap->flags &= KEVENT_FLAG_USER; |
3228 | |
3229 | return kevent_internal(p, |
3230 | (kqueue_id_t)uap->fd, NULL, |
3231 | uap->changelist, uap->nchanges, |
3232 | uap->eventlist, uap->nevents, |
3233 | uap->data_out, (uint64_t)uap->data_available, |
3234 | uap->flags, |
3235 | 0ULL, |
3236 | kevent_continue, |
3237 | retval); |
3238 | } |
3239 | |
3240 | int |
3241 | kevent_qos_internal(struct proc *p, int fd, |
3242 | user_addr_t changelist, int nchanges, |
3243 | user_addr_t eventlist, int nevents, |
3244 | user_addr_t data_out, user_size_t *data_available, |
3245 | unsigned int flags, |
3246 | int32_t *retval) |
3247 | { |
3248 | return kevent_internal(p, |
3249 | (kqueue_id_t)fd, NULL, |
3250 | changelist, nchanges, |
3251 | eventlist, nevents, |
3252 | data_out, (uint64_t)data_available, |
3253 | (flags | KEVENT_FLAG_KERNEL), |
3254 | 0ULL, |
3255 | NULL, |
3256 | retval); |
3257 | } |
3258 | |
3259 | int |
3260 | kevent_id(struct proc *p, struct kevent_id_args *uap, int32_t *retval) |
3261 | { |
3262 | /* restrict to user flags */ |
3263 | uap->flags &= KEVENT_FLAG_USER; |
3264 | |
3265 | return kevent_internal(p, |
3266 | (kqueue_id_t)uap->id, NULL, |
3267 | uap->changelist, uap->nchanges, |
3268 | uap->eventlist, uap->nevents, |
3269 | uap->data_out, (uint64_t)uap->data_available, |
3270 | (uap->flags | KEVENT_FLAG_DYNAMIC_KQUEUE), |
3271 | 0ULL, |
3272 | kevent_continue, |
3273 | retval); |
3274 | } |
3275 | |
3276 | int |
3277 | kevent_id_internal(struct proc *p, kqueue_id_t *id, |
3278 | user_addr_t changelist, int nchanges, |
3279 | user_addr_t eventlist, int nevents, |
3280 | user_addr_t data_out, user_size_t *data_available, |
3281 | unsigned int flags, |
3282 | int32_t *retval) |
3283 | { |
3284 | return kevent_internal(p, |
3285 | *id, id, |
3286 | changelist, nchanges, |
3287 | eventlist, nevents, |
3288 | data_out, (uint64_t)data_available, |
3289 | (flags | KEVENT_FLAG_KERNEL | KEVENT_FLAG_DYNAMIC_KQUEUE), |
3290 | 0ULL, |
3291 | NULL, |
3292 | retval); |
3293 | } |
3294 | |
3295 | static int |
3296 | kevent_get_timeout(struct proc *p, |
3297 | user_addr_t utimeout, |
3298 | unsigned int flags, |
3299 | struct timeval *atvp) |
3300 | { |
3301 | struct timeval atv; |
3302 | int error = 0; |
3303 | |
3304 | if (flags & KEVENT_FLAG_IMMEDIATE) { |
3305 | getmicrouptime(&atv); |
3306 | } else if (utimeout != USER_ADDR_NULL) { |
3307 | struct timeval rtv; |
3308 | if (flags & KEVENT_FLAG_KERNEL) { |
3309 | struct timespec *tsp = (struct timespec *)utimeout; |
3310 | TIMESPEC_TO_TIMEVAL(&rtv, tsp); |
3311 | } else if (IS_64BIT_PROCESS(p)) { |
3312 | struct user64_timespec ts; |
3313 | error = copyin(utimeout, &ts, sizeof(ts)); |
3314 | if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0) |
3315 | error = EINVAL; |
3316 | else |
3317 | TIMESPEC_TO_TIMEVAL(&rtv, &ts); |
3318 | } else { |
3319 | struct user32_timespec ts; |
3320 | error = copyin(utimeout, &ts, sizeof(ts)); |
3321 | TIMESPEC_TO_TIMEVAL(&rtv, &ts); |
3322 | } |
3323 | if (error) |
3324 | return (error); |
3325 | if (itimerfix(&rtv)) |
3326 | return (EINVAL); |
3327 | getmicrouptime(&atv); |
3328 | timevaladd(&atv, &rtv); |
3329 | } else { |
3330 | /* wait forever value */ |
3331 | atv.tv_sec = 0; |
3332 | atv.tv_usec = 0; |
3333 | } |
3334 | *atvp = atv; |
3335 | return 0; |
3336 | } |
3337 | |
3338 | static int |
3339 | kevent_set_kq_mode(struct kqueue *kq, unsigned int flags) |
3340 | { |
3341 | /* each kq should only be used for events of one type */ |
3342 | kqlock(kq); |
3343 | if (kq->kq_state & (KQ_KEV32 | KQ_KEV64 | KQ_KEV_QOS)) { |
3344 | if (flags & KEVENT_FLAG_LEGACY32) { |
3345 | if ((kq->kq_state & KQ_KEV32) == 0) { |
3346 | kqunlock(kq); |
3347 | return EINVAL; |
3348 | } |
3349 | } else if (kq->kq_state & KQ_KEV32) { |
3350 | kqunlock(kq); |
3351 | return EINVAL; |
3352 | } |
3353 | } else if (flags & KEVENT_FLAG_LEGACY32) { |
3354 | kq->kq_state |= KQ_KEV32; |
3355 | } else if (flags & KEVENT_FLAG_LEGACY64) { |
3356 | kq->kq_state |= KQ_KEV64; |
3357 | } else { |
3358 | kq->kq_state |= KQ_KEV_QOS; |
3359 | } |
3360 | kqunlock(kq); |
3361 | return 0; |
3362 | } |
3363 | |
3364 | #define KQ_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) |
3365 | #define CONFIG_KQ_HASHSIZE CONFIG_KN_HASHSIZE |
3366 | |
3367 | static inline void |
3368 | kqhash_lock(proc_t p) |
3369 | { |
3370 | lck_mtx_lock_spin_always(&p->p_fd->fd_kqhashlock); |
3371 | } |
3372 | |
3373 | static inline void |
3374 | kqhash_lock_held(__assert_only proc_t p) |
3375 | { |
3376 | LCK_MTX_ASSERT(&p->p_fd->fd_kqhashlock, LCK_MTX_ASSERT_OWNED); |
3377 | } |
3378 | |
3379 | static inline void |
3380 | kqhash_unlock(proc_t p) |
3381 | { |
3382 | lck_mtx_unlock(&p->p_fd->fd_kqhashlock); |
3383 | } |
3384 | |
3385 | static void |
3386 | kqueue_hash_init_if_needed(proc_t p) |
3387 | { |
3388 | struct filedesc *fdp = p->p_fd; |
3389 | |
3390 | kqhash_lock_held(p); |
3391 | |
3392 | if (__improbable(fdp->fd_kqhash == NULL)) { |
3393 | struct kqlist *alloc_hash; |
3394 | u_long alloc_mask; |
3395 | |
3396 | kqhash_unlock(p); |
3397 | alloc_hash = hashinit(CONFIG_KQ_HASHSIZE, M_KQUEUE, &alloc_mask); |
3398 | kqhash_lock(p); |
3399 | |
3400 | /* See if we won the race */ |
3401 | if (fdp->fd_kqhashmask == 0) { |
3402 | fdp->fd_kqhash = alloc_hash; |
3403 | fdp->fd_kqhashmask = alloc_mask; |
3404 | } else { |
3405 | kqhash_unlock(p); |
3406 | FREE(alloc_hash, M_KQUEUE); |
3407 | kqhash_lock(p); |
3408 | } |
3409 | } |
3410 | } |
3411 | |
3412 | /* |
3413 | * Called with the kqhash_lock() held |
3414 | */ |
3415 | static void |
3416 | kqueue_hash_insert( |
3417 | struct proc *p, |
3418 | kqueue_id_t id, |
3419 | struct kqueue *kq) |
3420 | { |
3421 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
3422 | struct filedesc *fdp = p->p_fd; |
3423 | struct kqlist *list; |
3424 | |
3425 | /* should hold the kq hash lock */ |
3426 | kqhash_lock_held(p); |
3427 | |
3428 | if ((kq->kq_state & KQ_DYNAMIC) == 0) { |
3429 | assert(kq->kq_state & KQ_DYNAMIC); |
3430 | return; |
3431 | } |
3432 | |
3433 | /* only dynamically allocate workloop kqs for now */ |
3434 | assert(kq->kq_state & KQ_WORKLOOP); |
3435 | assert(fdp->fd_kqhash); |
3436 | |
3437 | kqwl->kqwl_dynamicid = id; |
3438 | |
3439 | list = &fdp->fd_kqhash[KQ_HASH(id, fdp->fd_kqhashmask)]; |
3440 | SLIST_INSERT_HEAD(list, kqwl, kqwl_hashlink); |
3441 | } |
3442 | |
3443 | /* Called with kqhash_lock held */ |
3444 | static void |
3445 | kqueue_hash_remove( |
3446 | struct proc *p, |
3447 | struct kqueue *kq) |
3448 | { |
3449 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
3450 | struct filedesc *fdp = p->p_fd; |
3451 | struct kqlist *list; |
3452 | |
3453 | /* should hold the kq hash lock */ |
3454 | kqhash_lock_held(p); |
3455 | |
3456 | if ((kq->kq_state & KQ_DYNAMIC) == 0) { |
3457 | assert(kq->kq_state & KQ_DYNAMIC); |
3458 | return; |
3459 | } |
3460 | assert(kq->kq_state & KQ_WORKLOOP); /* for now */ |
3461 | list = &fdp->fd_kqhash[KQ_HASH(kqwl->kqwl_dynamicid, fdp->fd_kqhashmask)]; |
3462 | SLIST_REMOVE(list, kqwl, kqworkloop, kqwl_hashlink); |
3463 | } |
3464 | |
3465 | /* Called with kqhash_lock held */ |
3466 | static struct kqueue * |
3467 | kqueue_hash_lookup(struct proc *p, kqueue_id_t id) |
3468 | { |
3469 | struct filedesc *fdp = p->p_fd; |
3470 | struct kqlist *list; |
3471 | struct kqworkloop *kqwl; |
3472 | |
3473 | /* should hold the kq hash lock */ |
3474 | kqhash_lock_held(p); |
3475 | |
3476 | if (fdp->fd_kqhashmask == 0) return NULL; |
3477 | |
3478 | list = &fdp->fd_kqhash[KQ_HASH(id, fdp->fd_kqhashmask)]; |
3479 | SLIST_FOREACH(kqwl, list, kqwl_hashlink) { |
3480 | if (kqwl->kqwl_dynamicid == id) { |
3481 | struct kqueue *kq = (struct kqueue *)kqwl; |
3482 | |
3483 | assert(kq->kq_state & KQ_DYNAMIC); |
3484 | assert(kq->kq_state & KQ_WORKLOOP); /* for now */ |
3485 | return kq; |
3486 | } |
3487 | } |
3488 | return NULL; |
3489 | } |
3490 | |
3491 | static inline void |
3492 | kqueue_release_last(struct proc *p, kqueue_t kqu) |
3493 | { |
3494 | struct kqueue *kq = kqu.kq; |
3495 | if (kq->kq_state & KQ_DYNAMIC) { |
3496 | kqhash_lock(p); |
3497 | if (kqueue_release(kq, KQUEUE_MIGHT_BE_LAST_REF)) { |
3498 | thread_t cur_owner = kqworkloop_invalidate(kqu.kqwl); |
3499 | kqueue_hash_remove(p, kq); |
3500 | kqhash_unlock(p); |
3501 | if (cur_owner) thread_deallocate(cur_owner); |
3502 | kqueue_dealloc(kq); |
3503 | } else { |
3504 | kqhash_unlock(p); |
3505 | } |
3506 | } |
3507 | } |
3508 | |
3509 | /* |
3510 | * kqworkloops_dealloc - rebalance retains on kqworkloops created with |
3511 | * scheduling parameters |
3512 | * |
3513 | * Called with proc_fdlock held. |
3514 | * Returns with it locked. |
3515 | * Process is in such a state that it will not try to allocate |
3516 | * any more knotes during this process (stopped for exit or exec). |
3517 | */ |
3518 | void |
3519 | kqworkloops_dealloc(proc_t p) |
3520 | { |
3521 | struct filedesc *fdp = p->p_fd; |
3522 | struct kqlist *list; |
3523 | struct kqworkloop *kqwl, *kqwln; |
3524 | struct kqlist tofree; |
3525 | int i; |
3526 | |
3527 | if (!(fdp->fd_flags & FD_WORKLOOP)) { |
3528 | return; |
3529 | } |
3530 | |
3531 | SLIST_INIT(&tofree); |
3532 | |
3533 | kqhash_lock(p); |
3534 | assert(fdp->fd_kqhashmask != 0); |
3535 | |
3536 | for (i = 0; i <= (int)fdp->fd_kqhashmask; i++) { |
3537 | list = &fdp->fd_kqhash[i]; |
3538 | SLIST_FOREACH_SAFE(kqwl, list, kqwl_hashlink, kqwln) { |
3539 | /* |
3540 | * kqworkloops that have scheduling parameters have an |
3541 | * implicit retain from kqueue_workloop_ctl that needs |
3542 | * to be balanced on process exit. |
3543 | */ |
3544 | assert(kqwl->kqwl_params); |
3545 | SLIST_REMOVE(list, kqwl, kqworkloop, kqwl_hashlink); |
3546 | SLIST_INSERT_HEAD(&tofree, kqwl, kqwl_hashlink); |
3547 | } |
3548 | } |
3549 | |
3550 | kqhash_unlock(p); |
3551 | |
3552 | SLIST_FOREACH_SAFE(kqwl, &tofree, kqwl_hashlink, kqwln) { |
3553 | struct kqueue *kq = (struct kqueue *)kqwl; |
3554 | __assert_only bool released; |
3555 | released = kqueue_release(kq, KQUEUE_MIGHT_BE_LAST_REF); |
3556 | assert(released); |
3557 | kqueue_dealloc(kq); |
3558 | } |
3559 | } |
3560 | |
3561 | static struct kqueue * |
3562 | kevent_get_bound_kqworkloop(thread_t thread) |
3563 | { |
3564 | struct uthread *ut = get_bsdthread_info(thread); |
3565 | struct kqrequest *kqr = ut->uu_kqr_bound; |
3566 | |
3567 | return kqr ? (struct kqueue *)kqr_kqworkloop(kqr) : NULL; |
3568 | } |
3569 | |
3570 | static int |
3571 | kevent_get_kq(struct proc *p, kqueue_id_t id, workq_threadreq_param_t *trp, |
3572 | unsigned int flags, struct fileproc **fpp, int *fdp, |
3573 | struct kqueue **kqp) |
3574 | { |
3575 | struct filedesc *descp = p->p_fd; |
3576 | struct fileproc *fp = NULL; |
3577 | struct kqueue *kq = NULL; |
3578 | int fd = 0; |
3579 | int error = 0; |
3580 | thread_t th = current_thread(); |
3581 | |
3582 | assert(!trp || (flags & KEVENT_FLAG_WORKLOOP)); |
3583 | |
3584 | /* Was the workloop flag passed? Then it is for sure only a workloop */ |
3585 | if (flags & KEVENT_FLAG_DYNAMIC_KQUEUE) { |
3586 | assert(flags & KEVENT_FLAG_WORKLOOP); |
3587 | assert(!trp || (flags & KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST)); |
3588 | kq = kevent_get_bound_kqworkloop(th); |
3589 | |
3590 | /* |
3591 | * when kevent_id_internal is called from within the |
3592 | * kernel, and the passed 'id' value is '-1' then we |
3593 | * look for the currently bound workloop kq. |
3594 | */ |
3595 | if (id == (kqueue_id_t)-1 && |
3596 | (flags & KEVENT_FLAG_KERNEL) && |
3597 | (flags & KEVENT_FLAG_WORKLOOP)) { |
3598 | |
3599 | if (!is_workqueue_thread(th) || !kq) { |
3600 | return EINVAL; |
3601 | } |
3602 | |
3603 | kqueue_retain(kq); |
3604 | goto out; |
3605 | } |
3606 | |
3607 | if (id == 0 || id == (kqueue_id_t)-1) { |
3608 | return EINVAL; |
3609 | } |
3610 | |
3611 | /* try shortcut on kq lookup for bound threads */ |
3612 | if (kq != NULL && ((struct kqworkloop *)kq)->kqwl_dynamicid == id) { |
3613 | |
3614 | if (flags & KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST) { |
3615 | return EEXIST; |
3616 | } |
3617 | |
3618 | /* retain a reference while working with this kq. */ |
3619 | assert(kq->kq_state & KQ_DYNAMIC); |
3620 | kqueue_retain(kq); |
3621 | goto out; |
3622 | } |
3623 | |
3624 | /* look for the kq on the hash table */ |
3625 | kqhash_lock(p); |
3626 | kq = kqueue_hash_lookup(p, id); |
3627 | if (kq == NULL) { |
3628 | kqhash_unlock(p); |
3629 | |
3630 | if (flags & KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST) { |
3631 | return ENOENT; |
3632 | } |
3633 | |
3634 | struct kqueue *alloc_kq; |
3635 | alloc_kq = kqueue_alloc(p, flags); |
3636 | if (!alloc_kq) { |
3637 | return ENOMEM; |
3638 | } |
3639 | |
3640 | kqhash_lock(p); |
3641 | kqueue_hash_init_if_needed(p); |
3642 | kq = kqueue_hash_lookup(p, id); |
3643 | if (kq == NULL) { |
3644 | /* insert our new one */ |
3645 | kq = alloc_kq; |
3646 | if (trp) { |
3647 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
3648 | kqwl->kqwl_params = trp->trp_value; |
3649 | } |
3650 | kqueue_hash_insert(p, id, kq); |
3651 | kqhash_unlock(p); |
3652 | } else if (flags & KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST) { |
3653 | /* lost race and caller wants an error */ |
3654 | kqhash_unlock(p); |
3655 | kqueue_release(alloc_kq, KQUEUE_MIGHT_BE_LAST_REF); |
3656 | kqueue_dealloc(alloc_kq); |
3657 | return EEXIST; |
3658 | } else { |
3659 | /* lost race, retain existing workloop */ |
3660 | kqueue_retain(kq); |
3661 | kqhash_unlock(p); |
3662 | kqueue_release(alloc_kq, KQUEUE_MIGHT_BE_LAST_REF); |
3663 | kqueue_dealloc(alloc_kq); |
3664 | } |
3665 | } else { |
3666 | |
3667 | if (flags & KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST) { |
3668 | kqhash_unlock(p); |
3669 | return EEXIST; |
3670 | } |
3671 | |
3672 | /* retain a reference while working with this kq. */ |
3673 | assert(kq->kq_state & KQ_DYNAMIC); |
3674 | kqueue_retain(kq); |
3675 | kqhash_unlock(p); |
3676 | } |
3677 | |
3678 | } else if (flags & KEVENT_FLAG_WORKQ) { |
3679 | /* must already exist for bound threads. */ |
3680 | if (flags & KEVENT_FLAG_KERNEL) { |
3681 | assert(descp->fd_wqkqueue != NULL); |
3682 | } |
3683 | |
3684 | /* |
3685 | * use the private kq associated with the proc workq. |
3686 | * Just being a thread within the process (and not |
3687 | * being the exit/exec thread) is enough to hold a |
3688 | * reference on this special kq. |
3689 | */ |
3690 | kq = descp->fd_wqkqueue; |
3691 | if (kq == NULL) { |
3692 | struct kqueue *alloc_kq = kqueue_alloc(p, KEVENT_FLAG_WORKQ); |
3693 | if (alloc_kq == NULL) { |
3694 | return ENOMEM; |
3695 | } |
3696 | |
3697 | knhash_lock(p); |
3698 | if (descp->fd_wqkqueue == NULL) { |
3699 | kq = descp->fd_wqkqueue = alloc_kq; |
3700 | knhash_unlock(p); |
3701 | } else { |
3702 | knhash_unlock(p); |
3703 | kq = descp->fd_wqkqueue; |
3704 | kqueue_dealloc(alloc_kq); |
3705 | } |
3706 | } |
3707 | } else { |
3708 | /* get a usecount for the kq itself */ |
3709 | fd = (int)id; |
3710 | if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) |
3711 | return (error); |
3712 | } |
3713 | if ((error = kevent_set_kq_mode(kq, flags)) != 0) { |
3714 | /* drop the usecount */ |
3715 | if (fp != NULL) |
3716 | fp_drop(p, fd, fp, 0); |
3717 | return error; |
3718 | } |
3719 | |
3720 | out: |
3721 | *fpp = fp; |
3722 | *fdp = fd; |
3723 | *kqp = kq; |
3724 | |
3725 | return error; |
3726 | } |
3727 | |
3728 | static void |
3729 | kevent_put_kq( |
3730 | struct proc *p, |
3731 | kqueue_id_t id, |
3732 | struct fileproc *fp, |
3733 | struct kqueue *kq) |
3734 | { |
3735 | kqueue_release_last(p, kq); |
3736 | if (fp != NULL) { |
3737 | assert((kq->kq_state & KQ_WORKQ) == 0); |
3738 | fp_drop(p, (int)id, fp, 0); |
3739 | } |
3740 | } |
3741 | |
3742 | static uint64_t |
3743 | kevent_workloop_serial_no_copyin(proc_t p, uint64_t workloop_id) |
3744 | { |
3745 | uint64_t serial_no = 0; |
3746 | user_addr_t addr; |
3747 | int rc; |
3748 | |
3749 | if (workloop_id == 0 || p->p_dispatchqueue_serialno_offset == 0) { |
3750 | return 0; |
3751 | } |
3752 | addr = (user_addr_t)(workloop_id + p->p_dispatchqueue_serialno_offset); |
3753 | |
3754 | if (proc_is64bit(p)) { |
3755 | rc = copyin(addr, (caddr_t)&serial_no, sizeof(serial_no)); |
3756 | } else { |
3757 | uint32_t serial_no32 = 0; |
3758 | rc = copyin(addr, (caddr_t)&serial_no32, sizeof(serial_no32)); |
3759 | serial_no = serial_no32; |
3760 | } |
3761 | return rc == 0 ? serial_no : 0; |
3762 | } |
3763 | |
3764 | int |
3765 | kevent_exit_on_workloop_ownership_leak(thread_t thread) |
3766 | { |
3767 | proc_t p = current_proc(); |
3768 | struct filedesc *fdp = p->p_fd; |
3769 | kqueue_id_t workloop_id = 0; |
3770 | os_reason_t reason = OS_REASON_NULL; |
3771 | mach_vm_address_t addr; |
3772 | uint32_t reason_size; |
3773 | |
3774 | kqhash_lock(p); |
3775 | if (fdp->fd_kqhashmask > 0) { |
3776 | for (uint32_t i = 0; i < fdp->fd_kqhashmask + 1; i++) { |
3777 | struct kqworkloop *kqwl; |
3778 | |
3779 | SLIST_FOREACH(kqwl, &fdp->fd_kqhash[i], kqwl_hashlink) { |
3780 | struct kqueue *kq = &kqwl->kqwl_kqueue; |
3781 | if ((kq->kq_state & KQ_DYNAMIC) && kqwl->kqwl_owner == thread) { |
3782 | workloop_id = kqwl->kqwl_dynamicid; |
3783 | break; |
3784 | } |
3785 | } |
3786 | } |
3787 | } |
3788 | kqhash_unlock(p); |
3789 | |
3790 | reason = os_reason_create(OS_REASON_LIBSYSTEM, |
3791 | OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK); |
3792 | if (reason == OS_REASON_NULL) { |
3793 | goto out; |
3794 | } |
3795 | |
3796 | reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
3797 | reason_size = 2 * sizeof(uint64_t); |
3798 | reason_size = kcdata_estimate_required_buffer_size(2, reason_size); |
3799 | if (os_reason_alloc_buffer(reason, reason_size) != 0) { |
3800 | goto out; |
3801 | } |
3802 | |
3803 | if (workloop_id) { |
3804 | struct kcdata_descriptor *kcd = &reason->osr_kcd_descriptor; |
3805 | |
3806 | if (kcdata_get_memory_addr(kcd, EXIT_REASON_WORKLOOP_ID, |
3807 | sizeof(workloop_id), &addr) == KERN_SUCCESS) { |
3808 | kcdata_memcpy(kcd, addr, &workloop_id, sizeof(workloop_id)); |
3809 | } |
3810 | |
3811 | uint64_t serial_no = kevent_workloop_serial_no_copyin(p, workloop_id); |
3812 | if (serial_no && kcdata_get_memory_addr(kcd, EXIT_REASON_DISPATCH_QUEUE_NO, |
3813 | sizeof(serial_no), &addr) == KERN_SUCCESS) { |
3814 | kcdata_memcpy(kcd, addr, &serial_no, sizeof(serial_no)); |
3815 | } |
3816 | } |
3817 | out: |
3818 | #if DEVELOPMENT || DEBUG |
3819 | if (kevent_debug_flags() & KEVENT_PANIC_ON_WORKLOOP_OWNERSHIP_LEAK) { |
3820 | panic("thread %p in task %p is leaked workloop 0x%016llx ownership" , |
3821 | thread, p->task, workloop_id); |
3822 | } |
3823 | psignal_try_thread_with_reason(p, thread, SIGABRT, reason); |
3824 | return 0; |
3825 | #else |
3826 | return exit_with_reason(p, W_EXITCODE(0, SIGKILL), (int *)NULL, |
3827 | FALSE, FALSE, 0, reason); |
3828 | #endif |
3829 | } |
3830 | |
3831 | static inline boolean_t |
3832 | kevent_args_requesting_events(unsigned int flags, int nevents) |
3833 | { |
3834 | return (!(flags & KEVENT_FLAG_ERROR_EVENTS) && nevents > 0); |
3835 | } |
3836 | |
3837 | static int |
3838 | kevent_internal(struct proc *p, |
3839 | kqueue_id_t id, kqueue_id_t *id_out, |
3840 | user_addr_t changelist, int nchanges, |
3841 | user_addr_t ueventlist, int nevents, |
3842 | user_addr_t data_out, uint64_t data_available, |
3843 | unsigned int flags, |
3844 | user_addr_t utimeout, |
3845 | kqueue_continue_t continuation, |
3846 | int32_t *retval) |
3847 | { |
3848 | uthread_t ut; |
3849 | struct kqueue *kq; |
3850 | struct fileproc *fp = NULL; |
3851 | int fd = 0; |
3852 | struct kevent_internal_s kev; |
3853 | int error, noutputs, register_rc; |
3854 | bool needs_end_processing = false; |
3855 | struct timeval atv; |
3856 | user_size_t data_size; |
3857 | user_size_t data_resid; |
3858 | thread_t thread = current_thread(); |
3859 | KNOTE_LOCK_CTX(knlc); |
3860 | |
3861 | /* Don't allow user-space threads to process output events from the workq kqs */ |
3862 | if (((flags & (KEVENT_FLAG_WORKQ | KEVENT_FLAG_KERNEL)) == KEVENT_FLAG_WORKQ) && |
3863 | kevent_args_requesting_events(flags, nevents)) |
3864 | return EINVAL; |
3865 | |
3866 | if (flags & KEVENT_FLAG_PARKING) { |
3867 | if (!kevent_args_requesting_events(flags, nevents) || id != (kqueue_id_t)-1) |
3868 | return EINVAL; |
3869 | } |
3870 | |
3871 | /* restrict dynamic kqueue allocation to workloops (for now) */ |
3872 | if ((flags & (KEVENT_FLAG_DYNAMIC_KQUEUE | KEVENT_FLAG_WORKLOOP)) == KEVENT_FLAG_DYNAMIC_KQUEUE) |
3873 | return EINVAL; |
3874 | |
3875 | if ((flags & (KEVENT_FLAG_WORKLOOP)) && (flags & (KEVENT_FLAG_WORKQ))) |
3876 | return EINVAL; |
3877 | |
3878 | if (flags & (KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST)) { |
3879 | |
3880 | /* allowed only on workloops when calling kevent_id from user-space */ |
3881 | if (!(flags & KEVENT_FLAG_WORKLOOP) || (flags & KEVENT_FLAG_KERNEL) || !(flags & KEVENT_FLAG_DYNAMIC_KQUEUE)) |
3882 | return EINVAL; |
3883 | } |
3884 | |
3885 | /* prepare to deal with stack-wise allocation of out events */ |
3886 | if (flags & KEVENT_FLAG_STACK_EVENTS) { |
3887 | int scale = ((flags & KEVENT_FLAG_LEGACY32) ? |
3888 | (IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : |
3889 | sizeof(struct user32_kevent)) : |
3890 | ((flags & KEVENT_FLAG_LEGACY64) ? sizeof(struct kevent64_s) : |
3891 | sizeof(struct kevent_qos_s))); |
3892 | ueventlist += nevents * scale; |
3893 | } |
3894 | |
3895 | /* convert timeout to absolute - if we have one (and not immediate) */ |
3896 | error = kevent_get_timeout(p, utimeout, flags, &atv); |
3897 | if (error) |
3898 | return error; |
3899 | |
3900 | /* copyin initial value of data residual from data_available */ |
3901 | error = kevent_get_data_size(p, data_available, flags, &data_size); |
3902 | if (error) |
3903 | return error; |
3904 | |
3905 | /* get the kq we are going to be working on */ |
3906 | error = kevent_get_kq(p, id, NULL, flags, &fp, &fd, &kq); |
3907 | #if CONFIG_WORKLOOP_DEBUG |
3908 | ut = (uthread_t)get_bsdthread_info(thread); |
3909 | UU_KEVENT_HISTORY_WRITE_ENTRY(ut, { |
3910 | .uu_kqid = id, |
3911 | .uu_kq = error ? NULL : kq, |
3912 | .uu_error = error, |
3913 | .uu_nchanges = nchanges, |
3914 | .uu_nevents = nevents, |
3915 | .uu_flags = flags, |
3916 | }); |
3917 | #endif // CONFIG_WORKLOOP_DEBUG |
3918 | if (error) |
3919 | return error; |
3920 | |
3921 | /* only bound threads can receive events on workloops */ |
3922 | if (flags & KEVENT_FLAG_WORKLOOP) { |
3923 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
3924 | struct kqrequest *kqr = &kqwl->kqwl_request; |
3925 | |
3926 | assert(kq->kq_state & KQ_WORKLOOP); |
3927 | |
3928 | if (kevent_args_requesting_events(flags, nevents)) { |
3929 | if (kq != kevent_get_bound_kqworkloop(thread)) { |
3930 | error = EXDEV; |
3931 | goto out; |
3932 | } |
3933 | |
3934 | kq_req_lock(kqwl); |
3935 | /* |
3936 | * Disable the R2K notification while doing a register, if the |
3937 | * caller wants events too, we don't want the AST to be set if we |
3938 | * will process these events soon. |
3939 | */ |
3940 | kqr->kqr_state &= ~KQR_R2K_NOTIF_ARMED; |
3941 | needs_end_processing = true; |
3942 | kq_req_unlock(kq); |
3943 | } |
3944 | |
3945 | if (id_out) { |
3946 | *id_out = kqwl->kqwl_dynamicid; |
3947 | } |
3948 | |
3949 | } |
3950 | |
3951 | /* register all the change requests the user provided... */ |
3952 | noutputs = 0; |
3953 | while (nchanges > 0 && error == 0) { |
3954 | error = kevent_copyin(&changelist, &kev, p, flags); |
3955 | if (error) |
3956 | break; |
3957 | |
3958 | /* Make sure user doesn't pass in any system flags */ |
3959 | kev.flags &= ~EV_SYSFLAGS; |
3960 | |
3961 | register_rc = kevent_register(kq, &kev, &knlc); |
3962 | if (register_rc & FILTER_REGISTER_WAIT) { |
3963 | kqlock_held(kq); |
3964 | |
3965 | // f_post_register_wait is meant to call a continuation and not to |
3966 | // return, which is why we don't support FILTER_REGISTER_WAIT if |
3967 | // KEVENT_FLAG_ERROR_EVENTS is not passed, or if the event that |
3968 | // waits isn't the last. |
3969 | // |
3970 | // It is implementable, but not used by any userspace code at the |
3971 | // moment, so for now return ENOTSUP if someone tries to do it. |
3972 | if (nchanges == 1 && nevents >= 1 && (flags & KEVENT_FLAG_ERROR_EVENTS)) { |
3973 | struct _kevent_register *cont_args; |
3974 | /* store the continuation/completion data in the uthread */ |
3975 | ut = (uthread_t)get_bsdthread_info(thread); |
3976 | cont_args = &ut->uu_save.uus_kevent_register; |
3977 | cont_args->kev = kev; |
3978 | cont_args->kq = kq; |
3979 | cont_args->fp = fp; |
3980 | cont_args->fd = fd; |
3981 | cont_args->ueventlist = ueventlist; |
3982 | cont_args->flags = flags; |
3983 | cont_args->retval = retval; |
3984 | cont_args->eventcount = nevents; |
3985 | cont_args->eventout = noutputs; |
3986 | knote_fops(cont_args->knote)->f_post_register_wait(ut, &knlc, cont_args); |
3987 | panic("f_post_register_wait returned (kev: %p)" , &kev); |
3988 | } |
3989 | |
3990 | kev.flags |= EV_ERROR; |
3991 | kev.data = ENOTSUP; |
3992 | knote_unlock(kq, knlc.knlc_knote, &knlc, KNOTE_KQ_UNLOCK); |
3993 | } |
3994 | |
3995 | // keep in sync with kevent_register_wait_return() |
3996 | if (nevents > 0 && (kev.flags & (EV_ERROR|EV_RECEIPT))) { |
3997 | if ((kev.flags & EV_ERROR) == 0) { |
3998 | kev.flags |= EV_ERROR; |
3999 | kev.data = 0; |
4000 | } |
4001 | error = kevent_copyout(&kev, &ueventlist, p, flags); |
4002 | if (error == 0) { |
4003 | nevents--; |
4004 | noutputs++; |
4005 | } |
4006 | } else if (kev.flags & EV_ERROR) { |
4007 | error = kev.data; |
4008 | } |
4009 | nchanges--; |
4010 | } |
4011 | |
4012 | /* short-circuit the scan if we only want error events */ |
4013 | if (flags & KEVENT_FLAG_ERROR_EVENTS) |
4014 | nevents = 0; |
4015 | |
4016 | /* process pending events */ |
4017 | if (nevents > 0 && noutputs == 0 && error == 0) { |
4018 | struct _kevent *cont_args; |
4019 | /* store the continuation/completion data in the uthread */ |
4020 | ut = (uthread_t)get_bsdthread_info(thread); |
4021 | cont_args = &ut->uu_save.uus_kevent; |
4022 | cont_args->fp = fp; |
4023 | cont_args->fd = fd; |
4024 | cont_args->retval = retval; |
4025 | cont_args->eventlist = ueventlist; |
4026 | cont_args->eventcount = nevents; |
4027 | cont_args->eventout = noutputs; |
4028 | cont_args->data_available = data_available; |
4029 | cont_args->process_data.fp_fd = (int)id; |
4030 | cont_args->process_data.fp_flags = flags; |
4031 | cont_args->process_data.fp_data_out = data_out; |
4032 | cont_args->process_data.fp_data_size = data_size; |
4033 | cont_args->process_data.fp_data_resid = data_size; |
4034 | |
4035 | /* |
4036 | * kqworkloop_end_processing() will happen at the end of kqueue_scan() |
4037 | */ |
4038 | needs_end_processing = false; |
4039 | |
4040 | error = kqueue_scan(kq, kevent_callback, |
4041 | continuation, cont_args, |
4042 | &cont_args->process_data, |
4043 | &atv, p); |
4044 | |
4045 | /* process remaining outputs */ |
4046 | noutputs = cont_args->eventout; |
4047 | data_resid = cont_args->process_data.fp_data_resid; |
4048 | |
4049 | /* copyout residual data size value (if it needs to be copied out) */ |
4050 | /* don't abandon other output just because of residual copyout failures */ |
4051 | if (error == 0 && data_available && data_resid != data_size) { |
4052 | (void)kevent_put_data_size(p, data_available, flags, data_resid); |
4053 | } |
4054 | } |
4055 | |
4056 | out: |
4057 | if (__improbable(needs_end_processing)) { |
4058 | /* |
4059 | * If we didn't through kqworkloop_end_processing(), |
4060 | * we need to do it here. |
4061 | */ |
4062 | kqlock(kq); |
4063 | kqworkloop_end_processing((struct kqworkloop *)kq, 0, 0); |
4064 | kqunlock(kq); |
4065 | } |
4066 | kevent_put_kq(p, id, fp, kq); |
4067 | |
4068 | /* don't restart after signals... */ |
4069 | if (error == ERESTART) |
4070 | error = EINTR; |
4071 | else if (error == EWOULDBLOCK) |
4072 | error = 0; |
4073 | if (error == 0) |
4074 | *retval = noutputs; |
4075 | return (error); |
4076 | } |
4077 | |
4078 | |
4079 | /* |
4080 | * kevent_callback - callback for each individual event |
4081 | * |
4082 | * called with nothing locked |
4083 | * caller holds a reference on the kqueue |
4084 | */ |
4085 | static int |
4086 | kevent_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp, |
4087 | void *data) |
4088 | { |
4089 | struct _kevent *cont_args; |
4090 | int error; |
4091 | |
4092 | cont_args = (struct _kevent *)data; |
4093 | assert(cont_args->eventout < cont_args->eventcount); |
4094 | |
4095 | /* |
4096 | * Copy out the appropriate amount of event data for this user. |
4097 | */ |
4098 | error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(), |
4099 | cont_args->process_data.fp_flags); |
4100 | |
4101 | /* |
4102 | * If there isn't space for additional events, return |
4103 | * a harmless error to stop the processing here |
4104 | */ |
4105 | if (error == 0 && ++cont_args->eventout == cont_args->eventcount) |
4106 | error = EWOULDBLOCK; |
4107 | return (error); |
4108 | } |
4109 | |
4110 | /* |
4111 | * kevent_description - format a description of a kevent for diagnostic output |
4112 | * |
4113 | * called with a 256-byte string buffer |
4114 | */ |
4115 | |
4116 | char * |
4117 | kevent_description(struct kevent_internal_s *kevp, char *s, size_t n) |
4118 | { |
4119 | snprintf(s, n, |
4120 | "kevent=" |
4121 | "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}" , |
4122 | kevp->ident, |
4123 | kevp->filter, |
4124 | kevp->flags, |
4125 | kevp->udata, |
4126 | kevp->fflags, |
4127 | kevp->data, |
4128 | kevp->ext[0], |
4129 | kevp->ext[1] ); |
4130 | |
4131 | return (s); |
4132 | } |
4133 | |
4134 | static int |
4135 | kevent_register_validate_priority(struct kqueue *kq, struct knote *kn, |
4136 | struct kevent_internal_s *kev) |
4137 | { |
4138 | /* We don't care about the priority of a disabled or deleted knote */ |
4139 | if (kev->flags & (EV_DISABLE | EV_DELETE)) { |
4140 | return 0; |
4141 | } |
4142 | |
4143 | if (kq->kq_state & KQ_WORKLOOP) { |
4144 | /* |
4145 | * Workloops need valid priorities with a QOS (excluding manager) for |
4146 | * any enabled knote. |
4147 | * |
4148 | * When it is pre-existing, just make sure it has a valid QoS as |
4149 | * kevent_register() will not use the incoming priority (filters who do |
4150 | * have the responsibility to validate it again, see filt_wltouch). |
4151 | * |
4152 | * If the knote is being made, validate the incoming priority. |
4153 | */ |
4154 | if (!_pthread_priority_thread_qos(kn ? kn->kn_qos : kev->qos)) { |
4155 | return ERANGE; |
4156 | } |
4157 | } |
4158 | |
4159 | return 0; |
4160 | } |
4161 | |
4162 | /* |
4163 | * Prepare a filter for waiting after register. |
4164 | * |
4165 | * The f_post_register_wait hook will be called later by kevent_register() |
4166 | * and should call kevent_register_wait_block() |
4167 | */ |
4168 | static int |
4169 | kevent_register_wait_prepare(struct knote *kn, struct kevent_internal_s *kev) |
4170 | { |
4171 | thread_t thread = current_thread(); |
4172 | struct uthread *uth = get_bsdthread_info(thread); |
4173 | |
4174 | assert(knote_fops(kn)->f_extended_codes); |
4175 | |
4176 | if (kn->kn_hook == NULL) { |
4177 | thread_reference(thread); |
4178 | kn->kn_hook = thread; |
4179 | } else if (kn->kn_hook != thread) { |
4180 | /* |
4181 | * kn_hook may be set from a previous aborted wait |
4182 | * However, it has to be from the same thread. |
4183 | */ |
4184 | kev->flags |= EV_ERROR; |
4185 | kev->data = EXDEV; |
4186 | return 0; |
4187 | } |
4188 | |
4189 | uth->uu_save.uus_kevent_register.knote = kn; |
4190 | return FILTER_REGISTER_WAIT; |
4191 | } |
4192 | |
4193 | /* |
4194 | * Cleanup a kevent_register_wait_prepare() effect for threads that have been |
4195 | * aborted instead of properly woken up with thread_wakeup_thread(). |
4196 | */ |
4197 | static void |
4198 | kevent_register_wait_cleanup(struct knote *kn) |
4199 | { |
4200 | thread_t thread = kn->kn_hook; |
4201 | kn->kn_hook = NULL; |
4202 | thread_deallocate(thread); |
4203 | } |
4204 | |
4205 | /* |
4206 | * Must be called at the end of a f_post_register_wait call from a filter. |
4207 | */ |
4208 | static void |
4209 | kevent_register_wait_block(struct turnstile *ts, thread_t thread, |
4210 | struct knote_lock_ctx *knlc, thread_continue_t cont, |
4211 | struct _kevent_register *cont_args) |
4212 | { |
4213 | knote_unlock(cont_args->kq, cont_args->knote, knlc, KNOTE_KQ_UNLOCK); |
4214 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); |
4215 | cont_args->handoff_thread = thread; |
4216 | thread_handoff_parameter(thread, cont, cont_args); |
4217 | } |
4218 | |
4219 | /* |
4220 | * Called by Filters using a f_post_register_wait to return from their wait. |
4221 | */ |
4222 | static void |
4223 | kevent_register_wait_return(struct _kevent_register *cont_args) |
4224 | { |
4225 | struct kqueue *kq = cont_args->kq; |
4226 | proc_t p = kq->kq_p; |
4227 | struct kevent_internal_s *kev = &cont_args->kev; |
4228 | int error = 0; |
4229 | |
4230 | if (cont_args->handoff_thread) { |
4231 | thread_deallocate(cont_args->handoff_thread); |
4232 | } |
4233 | |
4234 | if (kev->flags & (EV_ERROR|EV_RECEIPT)) { |
4235 | if ((kev->flags & EV_ERROR) == 0) { |
4236 | kev->flags |= EV_ERROR; |
4237 | kev->data = 0; |
4238 | } |
4239 | error = kevent_copyout(kev, &cont_args->ueventlist, p, cont_args->flags); |
4240 | if (error == 0) cont_args->eventout++; |
4241 | } |
4242 | |
4243 | kevent_put_kq(p, cont_args->fd, cont_args->fp, kq); |
4244 | if (error == 0) { |
4245 | *cont_args->retval = cont_args->eventout; |
4246 | } |
4247 | unix_syscall_return(error); |
4248 | } |
4249 | |
4250 | /* |
4251 | * kevent_register - add a new event to a kqueue |
4252 | * |
4253 | * Creates a mapping between the event source and |
4254 | * the kqueue via a knote data structure. |
4255 | * |
4256 | * Because many/most the event sources are file |
4257 | * descriptor related, the knote is linked off |
4258 | * the filedescriptor table for quick access. |
4259 | * |
4260 | * called with nothing locked |
4261 | * caller holds a reference on the kqueue |
4262 | */ |
4263 | |
4264 | int |
4265 | kevent_register(struct kqueue *kq, struct kevent_internal_s *kev, |
4266 | struct knote_lock_ctx *knlc) |
4267 | { |
4268 | struct proc *p = kq->kq_p; |
4269 | const struct filterops *fops; |
4270 | struct knote *kn = NULL; |
4271 | int result = 0, error = 0; |
4272 | unsigned short kev_flags = kev->flags; |
4273 | |
4274 | if (kev->filter < 0) { |
4275 | if (kev->filter + EVFILT_SYSCOUNT < 0) { |
4276 | error = EINVAL; |
4277 | goto out; |
4278 | } |
4279 | fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ |
4280 | } else { |
4281 | error = EINVAL; |
4282 | goto out; |
4283 | } |
4284 | |
4285 | /* restrict EV_VANISHED to adding udata-specific dispatch kevents */ |
4286 | if ((kev->flags & EV_VANISHED) && |
4287 | (kev->flags & (EV_ADD | EV_DISPATCH2)) != (EV_ADD | EV_DISPATCH2)) { |
4288 | error = EINVAL; |
4289 | goto out; |
4290 | } |
4291 | |
4292 | /* Simplify the flags - delete and disable overrule */ |
4293 | if (kev->flags & EV_DELETE) |
4294 | kev->flags &= ~EV_ADD; |
4295 | if (kev->flags & EV_DISABLE) |
4296 | kev->flags &= ~EV_ENABLE; |
4297 | |
4298 | if (kq->kq_state & KQ_WORKLOOP) { |
4299 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER), |
4300 | ((struct kqworkloop *)kq)->kqwl_dynamicid, |
4301 | kev->udata, kev->flags, kev->filter); |
4302 | } else if (kq->kq_state & KQ_WORKQ) { |
4303 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER), |
4304 | 0, kev->udata, kev->flags, kev->filter); |
4305 | } else { |
4306 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER), |
4307 | VM_KERNEL_UNSLIDE_OR_PERM(kq), |
4308 | kev->udata, kev->flags, kev->filter); |
4309 | } |
4310 | |
4311 | restart: |
4312 | /* find the matching knote from the fd tables/hashes */ |
4313 | kn = kq_find_knote_and_kq_lock(kq, kev, fops->f_isfd, p); |
4314 | error = kevent_register_validate_priority(kq, kn, kev); |
4315 | result = 0; |
4316 | if (error) { |
4317 | goto out; |
4318 | } |
4319 | |
4320 | if (kn == NULL && (kev->flags & EV_ADD) == 0) { |
4321 | /* |
4322 | * No knote found, EV_ADD wasn't specified |
4323 | */ |
4324 | |
4325 | if ((kev_flags & EV_ADD) && (kev_flags & EV_DELETE) && |
4326 | (kq->kq_state & KQ_WORKLOOP)) { |
4327 | /* |
4328 | * For workloops, understand EV_ADD|EV_DELETE as a "soft" delete |
4329 | * that doesn't care about ENOENT, so just pretend the deletion |
4330 | * happened. |
4331 | */ |
4332 | } else { |
4333 | error = ENOENT; |
4334 | } |
4335 | goto out; |
4336 | |
4337 | } else if (kn == NULL) { |
4338 | /* |
4339 | * No knote found, need to attach a new one (attach) |
4340 | */ |
4341 | |
4342 | struct fileproc *knote_fp = NULL; |
4343 | |
4344 | /* grab a file reference for the new knote */ |
4345 | if (fops->f_isfd) { |
4346 | if ((error = fp_lookup(p, kev->ident, &knote_fp, 0)) != 0) { |
4347 | goto out; |
4348 | } |
4349 | } |
4350 | |
4351 | kn = knote_alloc(); |
4352 | if (kn == NULL) { |
4353 | error = ENOMEM; |
4354 | if (knote_fp != NULL) |
4355 | fp_drop(p, kev->ident, knote_fp, 0); |
4356 | goto out; |
4357 | } |
4358 | |
4359 | kn->kn_fp = knote_fp; |
4360 | kn->kn_kq_packed = (intptr_t)(struct kqueue *)kq; |
4361 | kqueue_retain(kq); /* retain a kq ref */ |
4362 | kn->kn_filtid = ~kev->filter; |
4363 | kn->kn_status = KN_ATTACHING | KN_ATTACHED; |
4364 | |
4365 | /* was vanish support requested */ |
4366 | if (kev->flags & EV_VANISHED) { |
4367 | kev->flags &= ~EV_VANISHED; |
4368 | kn->kn_status |= KN_REQVANISH; |
4369 | } |
4370 | |
4371 | /* snapshot matching/dispatching protcol flags into knote */ |
4372 | if (kev->flags & EV_DISPATCH) |
4373 | kn->kn_status |= KN_DISPATCH; |
4374 | if (kev->flags & EV_UDATA_SPECIFIC) |
4375 | kn->kn_status |= KN_UDATA_SPECIFIC; |
4376 | if (kev->flags & EV_DISABLE) |
4377 | kn->kn_status |= KN_DISABLED; |
4378 | |
4379 | /* |
4380 | * copy the kevent state into knote |
4381 | * protocol is that fflags and data |
4382 | * are saved off, and cleared before |
4383 | * calling the attach routine. |
4384 | */ |
4385 | kn->kn_kevent = *kev; |
4386 | kn->kn_sfflags = kev->fflags; |
4387 | kn->kn_sdata = kev->data; |
4388 | kn->kn_fflags = 0; |
4389 | kn->kn_data = 0; |
4390 | knote_reset_priority(kn, kev->qos); |
4391 | |
4392 | /* Add the knote for lookup thru the fd table */ |
4393 | error = kq_add_knote(kq, kn, knlc, p); |
4394 | if (error) { |
4395 | (void)kqueue_release(kq, KQUEUE_CANT_BE_LAST_REF); |
4396 | knote_free(kn); |
4397 | if (knote_fp != NULL) |
4398 | fp_drop(p, kev->ident, knote_fp, 0); |
4399 | |
4400 | if (error == ERESTART) { |
4401 | goto restart; |
4402 | } |
4403 | goto out; |
4404 | } |
4405 | |
4406 | /* fp reference count now applies to knote */ |
4407 | |
4408 | /* |
4409 | * we can't use filter_call() because f_attach can change the filter ops |
4410 | * for a filter that supports f_extended_codes, so we need to reload |
4411 | * knote_fops() and not use `fops`. |
4412 | */ |
4413 | result = fops->f_attach(kn, kev); |
4414 | if (result && !knote_fops(kn)->f_extended_codes) { |
4415 | result = FILTER_ACTIVE; |
4416 | } |
4417 | |
4418 | kqlock(kq); |
4419 | |
4420 | if (kn->kn_flags & EV_ERROR) { |
4421 | /* |
4422 | * Failed to attach correctly, so drop. |
4423 | */ |
4424 | kn->kn_status &= ~(KN_ATTACHED | KN_ATTACHING); |
4425 | error = kn->kn_data; |
4426 | knote_drop(kq, kn, knlc); |
4427 | result = 0; |
4428 | goto out; |
4429 | } |
4430 | |
4431 | /* |
4432 | * end "attaching" phase - now just attached |
4433 | * |
4434 | * Mark the thread request overcommit, if appropos |
4435 | * |
4436 | * If the attach routine indicated that an |
4437 | * event is already fired, activate the knote. |
4438 | */ |
4439 | kn->kn_status &= ~KN_ATTACHING; |
4440 | knote_set_qos_overcommit(kn); |
4441 | |
4442 | if (result & FILTER_ACTIVE) { |
4443 | if (result & FILTER_ADJUST_EVENT_QOS_BIT) |
4444 | knote_adjust_qos(kq, kn, result); |
4445 | knote_activate(kn); |
4446 | } |
4447 | |
4448 | } else if (!knote_lock(kq, kn, knlc, KNOTE_KQ_LOCK_ON_SUCCESS)) { |
4449 | |
4450 | /* |
4451 | * The knote was dropped while we were waiting for the lock, |
4452 | * we need to re-evaluate entirely |
4453 | */ |
4454 | |
4455 | goto restart; |
4456 | |
4457 | } else if (kev->flags & EV_DELETE) { |
4458 | /* |
4459 | * Deletion of a knote (drop) |
4460 | * |
4461 | * If the filter wants to filter drop events, let it do so. |
4462 | * |
4463 | * defer-delete: when trying to delete a disabled EV_DISPATCH2 knote, |
4464 | * we must wait for the knote to be re-enabled (unless it is being |
4465 | * re-enabled atomically here). |
4466 | */ |
4467 | |
4468 | if (knote_fops(kn)->f_allow_drop) { |
4469 | bool drop; |
4470 | |
4471 | kqunlock(kq); |
4472 | drop = knote_fops(kn)->f_allow_drop(kn, kev); |
4473 | kqlock(kq); |
4474 | |
4475 | if (!drop) goto out_unlock; |
4476 | } |
4477 | |
4478 | if ((kev->flags & EV_ENABLE) == 0 && |
4479 | (kn->kn_status & (KN_DISPATCH2 | KN_DISABLED)) == |
4480 | (KN_DISPATCH2 | KN_DISABLED)) { |
4481 | kn->kn_status |= KN_DEFERDELETE; |
4482 | error = EINPROGRESS; |
4483 | goto out_unlock; |
4484 | } |
4485 | |
4486 | knote_drop(kq, kn, knlc); |
4487 | goto out; |
4488 | |
4489 | } else { |
4490 | /* |
4491 | * Regular update of a knote (touch) |
4492 | * |
4493 | * Call touch routine to notify filter of changes in filter values |
4494 | * (and to re-determine if any events are fired). |
4495 | * |
4496 | * If the knote is in defer-delete, avoid calling the filter touch |
4497 | * routine (it has delivered its last event already). |
4498 | * |
4499 | * If the touch routine had no failure, |
4500 | * apply the requested side effects to the knote. |
4501 | */ |
4502 | |
4503 | if (kn->kn_status & (KN_DEFERDELETE | KN_VANISHED)) { |
4504 | if (kev->flags & EV_ENABLE) { |
4505 | result = FILTER_ACTIVE; |
4506 | } |
4507 | } else { |
4508 | kqunlock(kq); |
4509 | result = filter_call(knote_fops(kn), f_touch(kn, kev)); |
4510 | kqlock(kq); |
4511 | } |
4512 | |
4513 | if (kev->flags & EV_ERROR) { |
4514 | result = 0; |
4515 | } else { |
4516 | /* accept new kevent state */ |
4517 | if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0) |
4518 | kn->kn_udata = kev->udata; |
4519 | if (kev->flags & EV_DISABLE) |
4520 | knote_disable(kn); |
4521 | if (result & (FILTER_UPDATE_REQ_QOS | FILTER_ADJUST_EVENT_QOS_BIT)) |
4522 | knote_dequeue(kn); |
4523 | if ((result & FILTER_UPDATE_REQ_QOS) && |
4524 | kev->qos && kev->qos != kn->kn_qos) { |
4525 | knote_reset_priority(kn, kev->qos); |
4526 | } |
4527 | if (result & FILTER_ACTIVE) { |
4528 | thread_qos_t qos; |
4529 | if (result & FILTER_ADJUST_EVENT_QOS_BIT) { |
4530 | if (knote_should_apply_qos_override(kq, kn, result, &qos)) { |
4531 | knote_apply_qos_override(kn, qos); |
4532 | } |
4533 | } |
4534 | knote_activate(kn); |
4535 | } |
4536 | if (result & (FILTER_UPDATE_REQ_QOS | FILTER_ADJUST_EVENT_QOS_BIT)) { |
4537 | if (knote_enqueue(kn) && (kn->kn_status & KN_ACTIVE)) { |
4538 | knote_wakeup(kn); |
4539 | } |
4540 | } |
4541 | if (kev->flags & EV_ENABLE) |
4542 | knote_enable(kn); |
4543 | } |
4544 | } |
4545 | |
4546 | out_unlock: |
4547 | if ((result & FILTER_REGISTER_WAIT) == 0) { |
4548 | /* |
4549 | * When the filter asked for a post-register wait, |
4550 | * we leave the knote and kqueue locked for kevent_register() |
4551 | * to call the filter's f_post_register_wait hook. |
4552 | */ |
4553 | knote_unlock(kq, kn, knlc, KNOTE_KQ_UNLOCK); |
4554 | } |
4555 | |
4556 | out: |
4557 | /* output local errors through the kevent */ |
4558 | if (error) { |
4559 | kev->flags |= EV_ERROR; |
4560 | kev->data = error; |
4561 | } |
4562 | return result; |
4563 | } |
4564 | |
4565 | /* |
4566 | * knote_process - process a triggered event |
4567 | * |
4568 | * Validate that it is really still a triggered event |
4569 | * by calling the filter routines (if necessary). Hold |
4570 | * a use reference on the knote to avoid it being detached. |
4571 | * |
4572 | * If it is still considered triggered, we will have taken |
4573 | * a copy of the state under the filter lock. We use that |
4574 | * snapshot to dispatch the knote for future processing (or |
4575 | * not, if this was a lost event). |
4576 | * |
4577 | * Our caller assures us that nobody else can be processing |
4578 | * events from this knote during the whole operation. But |
4579 | * others can be touching or posting events to the knote |
4580 | * interspersed with our processing it. |
4581 | * |
4582 | * caller holds a reference on the kqueue. |
4583 | * kqueue locked on entry and exit - but may be dropped |
4584 | */ |
4585 | static int |
4586 | knote_process(struct knote *kn, |
4587 | kevent_callback_t callback, |
4588 | void *callback_data, |
4589 | struct filt_process_s *process_data) |
4590 | { |
4591 | struct kevent_internal_s kev; |
4592 | struct kqueue *kq = knote_get_kq(kn); |
4593 | KNOTE_LOCK_CTX(knlc); |
4594 | int result = FILTER_ACTIVE; |
4595 | int error = 0; |
4596 | bool drop = false; |
4597 | |
4598 | bzero(&kev, sizeof(kev)); |
4599 | |
4600 | /* |
4601 | * Must be active or stayactive |
4602 | * Must be queued and not disabled/suppressed |
4603 | */ |
4604 | assert(kn->kn_status & KN_QUEUED); |
4605 | assert(kn->kn_status & (KN_ACTIVE|KN_STAYACTIVE)); |
4606 | assert(!(kn->kn_status & (KN_DISABLED|KN_SUPPRESSED|KN_DROPPING))); |
4607 | |
4608 | if (kq->kq_state & KQ_WORKLOOP) { |
4609 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS), |
4610 | ((struct kqworkloop *)kq)->kqwl_dynamicid, |
4611 | kn->kn_udata, kn->kn_status | (kn->kn_id << 32), |
4612 | kn->kn_filtid); |
4613 | } else if (kq->kq_state & KQ_WORKQ) { |
4614 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS), |
4615 | 0, kn->kn_udata, kn->kn_status | (kn->kn_id << 32), |
4616 | kn->kn_filtid); |
4617 | } else { |
4618 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS), |
4619 | VM_KERNEL_UNSLIDE_OR_PERM(kq), kn->kn_udata, |
4620 | kn->kn_status | (kn->kn_id << 32), kn->kn_filtid); |
4621 | } |
4622 | |
4623 | if ((kn->kn_status & KN_DROPPING) || |
4624 | !knote_lock(kq, kn, &knlc, KNOTE_KQ_LOCK_ALWAYS)) { |
4625 | /* |
4626 | * When the knote is dropping or has dropped, |
4627 | * then there's nothing we want to process. |
4628 | */ |
4629 | return EJUSTRETURN; |
4630 | } |
4631 | |
4632 | /* |
4633 | * For deferred-drop or vanished events, we just create a fake |
4634 | * event to acknowledge end-of-life. Otherwise, we call the |
4635 | * filter's process routine to snapshot the kevent state under |
4636 | * the filter's locking protocol. |
4637 | * |
4638 | * suppress knotes to avoid returning the same event multiple times in |
4639 | * a single call. |
4640 | */ |
4641 | knote_suppress(kn); |
4642 | |
4643 | if (kn->kn_status & (KN_DEFERDELETE | KN_VANISHED)) { |
4644 | /* create fake event */ |
4645 | kev.filter = kn->kn_filter; |
4646 | kev.ident = kn->kn_id; |
4647 | kev.flags = (kn->kn_status & KN_DEFERDELETE) ? EV_DELETE : EV_VANISHED; |
4648 | kev.flags |= (EV_DISPATCH2 | EV_ONESHOT); |
4649 | kev.udata = kn->kn_udata; |
4650 | } else { |
4651 | /* deactivate - so new activations indicate a wakeup */ |
4652 | knote_deactivate(kn); |
4653 | |
4654 | kqunlock(kq); |
4655 | result = filter_call(knote_fops(kn), f_process(kn, process_data, &kev)); |
4656 | kqlock(kq); |
4657 | } |
4658 | |
4659 | /* |
4660 | * Determine how to dispatch the knote for future event handling. |
4661 | * not-fired: just return (do not callout, leave deactivated). |
4662 | * One-shot: If dispatch2, enter deferred-delete mode (unless this is |
4663 | * is the deferred delete event delivery itself). Otherwise, |
4664 | * drop it. |
4665 | * Dispatch: don't clear state, just mark it disabled. |
4666 | * Cleared: just leave it deactivated. |
4667 | * Others: re-activate as there may be more events to handle. |
4668 | * This will not wake up more handlers right now, but |
4669 | * at the completion of handling events it may trigger |
4670 | * more handler threads (TODO: optimize based on more than |
4671 | * just this one event being detected by the filter). |
4672 | */ |
4673 | if ((result & FILTER_ACTIVE) == 0) { |
4674 | if ((kn->kn_status & (KN_ACTIVE | KN_STAYACTIVE)) == 0) { |
4675 | /* |
4676 | * Stay active knotes should not be unsuppressed or we'd create an |
4677 | * infinite loop. |
4678 | * |
4679 | * Some knotes (like EVFILT_WORKLOOP) can be reactivated from |
4680 | * within f_process() but that doesn't necessarily make them |
4681 | * ready to process, so we should leave them be. |
4682 | * |
4683 | * For other knotes, since we will not return an event, |
4684 | * there's no point keeping the knote suppressed. |
4685 | */ |
4686 | knote_unsuppress(kn); |
4687 | } |
4688 | knote_unlock(kq, kn, &knlc, KNOTE_KQ_LOCK_ALWAYS); |
4689 | return EJUSTRETURN; |
4690 | } |
4691 | |
4692 | if (result & FILTER_ADJUST_EVENT_QOS_BIT) |
4693 | knote_adjust_qos(kq, kn, result); |
4694 | kev.qos = _pthread_priority_combine(kn->kn_qos, kn->kn_qos_override); |
4695 | |
4696 | if (kev.flags & EV_ONESHOT) { |
4697 | if ((kn->kn_status & (KN_DISPATCH2 | KN_DEFERDELETE)) == KN_DISPATCH2) { |
4698 | /* defer dropping non-delete oneshot dispatch2 events */ |
4699 | kn->kn_status |= KN_DEFERDELETE; |
4700 | knote_disable(kn); |
4701 | } else { |
4702 | drop = true; |
4703 | } |
4704 | } else if (kn->kn_status & KN_DISPATCH) { |
4705 | /* disable all dispatch knotes */ |
4706 | knote_disable(kn); |
4707 | } else if ((kev.flags & EV_CLEAR) == 0) { |
4708 | /* re-activate in case there are more events */ |
4709 | knote_activate(kn); |
4710 | } |
4711 | |
4712 | /* |
4713 | * callback to handle each event as we find it. |
4714 | * If we have to detach and drop the knote, do |
4715 | * it while we have the kq unlocked. |
4716 | */ |
4717 | if (drop) { |
4718 | knote_drop(kq, kn, &knlc); |
4719 | } else { |
4720 | knote_unlock(kq, kn, &knlc, KNOTE_KQ_UNLOCK); |
4721 | } |
4722 | |
4723 | if (kev.flags & EV_VANISHED) { |
4724 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED), |
4725 | kev.ident, kn->kn_udata, kn->kn_status | (kn->kn_id << 32), |
4726 | kn->kn_filtid); |
4727 | } |
4728 | |
4729 | error = (callback)(kq, &kev, callback_data); |
4730 | kqlock(kq); |
4731 | return error; |
4732 | } |
4733 | |
4734 | /* |
4735 | * Returns -1 if the kqueue was unbound and processing should not happen |
4736 | */ |
4737 | #define KQWQAE_BEGIN_PROCESSING 1 |
4738 | #define KQWQAE_END_PROCESSING 2 |
4739 | #define KQWQAE_UNBIND 3 |
4740 | static int |
4741 | kqworkq_acknowledge_events(struct kqworkq *kqwq, struct kqrequest *kqr, |
4742 | int kevent_flags, int kqwqae_op) |
4743 | { |
4744 | thread_qos_t old_override = THREAD_QOS_UNSPECIFIED; |
4745 | thread_t thread = kqr->kqr_thread; |
4746 | struct knote *kn; |
4747 | int rc = 0; |
4748 | bool seen_stayactive = false, unbind; |
4749 | |
4750 | kqlock_held(&kqwq->kqwq_kqueue); |
4751 | |
4752 | if (!TAILQ_EMPTY(&kqr->kqr_suppressed)) { |
4753 | /* |
4754 | * Return suppressed knotes to their original state. |
4755 | * For workq kqueues, suppressed ones that are still |
4756 | * truly active (not just forced into the queue) will |
4757 | * set flags we check below to see if anything got |
4758 | * woken up. |
4759 | */ |
4760 | while ((kn = TAILQ_FIRST(&kqr->kqr_suppressed)) != NULL) { |
4761 | assert(kn->kn_status & KN_SUPPRESSED); |
4762 | knote_unsuppress(kn); |
4763 | if (kn->kn_status & KN_STAYACTIVE) { |
4764 | seen_stayactive = true; |
4765 | } |
4766 | } |
4767 | } |
4768 | |
4769 | kq_req_lock(kqwq); |
4770 | |
4771 | #if DEBUG || DEVELOPMENT |
4772 | thread_t self = current_thread(); |
4773 | struct uthread *ut = get_bsdthread_info(self); |
4774 | |
4775 | assert(kqr->kqr_state & KQR_THREQUESTED); |
4776 | assert(kqr->kqr_thread == self); |
4777 | assert(ut->uu_kqr_bound == kqr); |
4778 | #endif // DEBUG || DEVELOPMENT |
4779 | |
4780 | if (kqwqae_op == KQWQAE_UNBIND) { |
4781 | unbind = true; |
4782 | } else if ((kevent_flags & KEVENT_FLAG_PARKING) == 0) { |
4783 | unbind = false; |
4784 | } else if (kqwqae_op == KQWQAE_BEGIN_PROCESSING && seen_stayactive) { |
4785 | /* |
4786 | * When we unsuppress stayactive knotes, for the kind that are hooked |
4787 | * through select, we need to process once before we can assert there's |
4788 | * no event pending. Hence we can't unbind during BEGIN PROCESSING. |
4789 | */ |
4790 | unbind = false; |
4791 | } else { |
4792 | unbind = ((kqr->kqr_state & KQR_WAKEUP) == 0); |
4793 | } |
4794 | if (unbind) { |
4795 | old_override = kqworkq_unbind_locked(kqwq, kqr, thread); |
4796 | rc = -1; |
4797 | /* |
4798 | * request a new thread if we didn't process the whole queue or real events |
4799 | * have happened (not just putting stay-active events back). |
4800 | */ |
4801 | if (kqr->kqr_state & KQR_WAKEUP) { |
4802 | kqueue_threadreq_initiate(&kqwq->kqwq_kqueue, kqr, |
4803 | kqr->kqr_qos_index, 0); |
4804 | } |
4805 | } |
4806 | |
4807 | if (rc == 0) { |
4808 | /* |
4809 | * Reset wakeup bit to notice events firing while we are processing, |
4810 | * as we cannot rely on the bucket queue emptiness because of stay |
4811 | * active knotes. |
4812 | */ |
4813 | kqr->kqr_state &= ~KQR_WAKEUP; |
4814 | } |
4815 | |
4816 | kq_req_unlock(kqwq); |
4817 | |
4818 | if (old_override) { |
4819 | thread_drop_ipc_override(thread); |
4820 | } |
4821 | |
4822 | return rc; |
4823 | } |
4824 | |
4825 | /* |
4826 | * Return 0 to indicate that processing should proceed, |
4827 | * -1 if there is nothing to process. |
4828 | * |
4829 | * Called with kqueue locked and returns the same way, |
4830 | * but may drop lock temporarily. |
4831 | */ |
4832 | static int |
4833 | kqworkq_begin_processing(struct kqworkq *kqwq, struct kqrequest *kqr, |
4834 | int kevent_flags) |
4835 | { |
4836 | int rc = 0; |
4837 | |
4838 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START, |
4839 | 0, kqr->kqr_qos_index); |
4840 | |
4841 | rc = kqworkq_acknowledge_events(kqwq, kqr, kevent_flags, |
4842 | KQWQAE_BEGIN_PROCESSING); |
4843 | |
4844 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END, |
4845 | thread_tid(kqr->kqr_thread), kqr->kqr_state); |
4846 | |
4847 | return rc; |
4848 | } |
4849 | |
4850 | static inline bool |
4851 | kqworkloop_is_processing_on_current_thread(struct kqworkloop *kqwl) |
4852 | { |
4853 | struct kqueue *kq = &kqwl->kqwl_kqueue; |
4854 | |
4855 | kqlock_held(kq); |
4856 | |
4857 | if (kq->kq_state & KQ_PROCESSING) { |
4858 | /* |
4859 | * KQ_PROCESSING is unset with the kqlock held, and the kqr thread is |
4860 | * never modified while KQ_PROCESSING is set, meaning that peeking at |
4861 | * its value is safe from this context. |
4862 | */ |
4863 | return kqwl->kqwl_request.kqr_thread == current_thread(); |
4864 | } |
4865 | return false; |
4866 | } |
4867 | |
4868 | static thread_qos_t |
4869 | kqworkloop_acknowledge_events(struct kqworkloop *kqwl) |
4870 | { |
4871 | struct kqrequest *kqr = &kqwl->kqwl_request; |
4872 | kq_index_t qos = THREAD_QOS_UNSPECIFIED; |
4873 | struct knote *kn, *tmp; |
4874 | |
4875 | kqlock_held(&kqwl->kqwl_kqueue); |
4876 | |
4877 | TAILQ_FOREACH_SAFE(kn, &kqr->kqr_suppressed, kn_tqe, tmp) { |
4878 | /* |
4879 | * If a knote that can adjust QoS is disabled because of the automatic |
4880 | * behavior of EV_DISPATCH, the knotes should stay suppressed so that |
4881 | * further overrides keep pushing. |
4882 | */ |
4883 | if (knote_fops(kn)->f_adjusts_qos && (kn->kn_status & KN_DISABLED) && |
4884 | (kn->kn_status & (KN_STAYACTIVE | KN_DROPPING)) == 0 && |
4885 | (kn->kn_flags & (EV_DISPATCH | EV_DISABLE)) == EV_DISPATCH) { |
4886 | qos = MAX(qos, knote_get_qos_override_index(kn)); |
4887 | continue; |
4888 | } |
4889 | knote_unsuppress(kn); |
4890 | } |
4891 | |
4892 | return qos; |
4893 | } |
4894 | |
4895 | static int |
4896 | kqworkloop_begin_processing(struct kqworkloop *kqwl, unsigned int kevent_flags) |
4897 | { |
4898 | struct kqrequest *kqr = &kqwl->kqwl_request; |
4899 | struct kqueue *kq = &kqwl->kqwl_kqueue; |
4900 | thread_qos_t old_override = THREAD_QOS_UNSPECIFIED, qos_override; |
4901 | thread_t thread = kqr->kqr_thread; |
4902 | int rc = 0, op = KQWL_UTQ_NONE; |
4903 | |
4904 | kqlock_held(kq); |
4905 | |
4906 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START, |
4907 | kqwl->kqwl_dynamicid, 0, 0); |
4908 | |
4909 | /* nobody else should still be processing */ |
4910 | assert((kq->kq_state & KQ_PROCESSING) == 0); |
4911 | |
4912 | kq->kq_state |= KQ_PROCESSING; |
4913 | |
4914 | if (!TAILQ_EMPTY(&kqr->kqr_suppressed)) { |
4915 | op = KQWL_UTQ_RESET_WAKEUP_OVERRIDE; |
4916 | } |
4917 | |
4918 | if (kevent_flags & KEVENT_FLAG_PARKING) { |
4919 | /* |
4920 | * When "parking" we want to process events and if no events are found |
4921 | * unbind. |
4922 | * |
4923 | * However, non overcommit threads sometimes park even when they have |
4924 | * more work so that the pool can narrow. For these, we need to unbind |
4925 | * early, so that calling kqworkloop_update_threads_qos() can ask the |
4926 | * workqueue subsystem whether the thread should park despite having |
4927 | * pending events. |
4928 | */ |
4929 | if (kqr->kqr_state & KQR_THOVERCOMMIT) { |
4930 | op = KQWL_UTQ_PARKING; |
4931 | } else { |
4932 | op = KQWL_UTQ_UNBINDING; |
4933 | } |
4934 | } |
4935 | if (op == KQWL_UTQ_NONE) { |
4936 | goto done; |
4937 | } |
4938 | |
4939 | qos_override = kqworkloop_acknowledge_events(kqwl); |
4940 | |
4941 | kq_req_lock(kqwl); |
4942 | |
4943 | if (op == KQWL_UTQ_UNBINDING) { |
4944 | old_override = kqworkloop_unbind_locked(kqwl, thread); |
4945 | (void)kqueue_release(kqwl, KQUEUE_CANT_BE_LAST_REF); |
4946 | } |
4947 | kqworkloop_update_threads_qos(kqwl, op, qos_override); |
4948 | if (op == KQWL_UTQ_PARKING) { |
4949 | if (!TAILQ_EMPTY(&kqwl->kqwl_queue[KQWL_BUCKET_STAYACTIVE])) { |
4950 | /* |
4951 | * We cannot trust KQR_WAKEUP when looking at stay active knotes. |
4952 | * We need to process once, and kqworkloop_end_processing will |
4953 | * handle the unbind. |
4954 | */ |
4955 | } else if ((kqr->kqr_state & KQR_WAKEUP) == 0 || kqwl->kqwl_owner) { |
4956 | old_override = kqworkloop_unbind_locked(kqwl, thread); |
4957 | (void)kqueue_release(kqwl, KQUEUE_CANT_BE_LAST_REF); |
4958 | rc = -1; |
4959 | } |
4960 | } else if (op == KQWL_UTQ_UNBINDING) { |
4961 | if (kqr->kqr_thread == thread) { |
4962 | /* |
4963 | * The thread request fired again, passed the admission check and |
4964 | * got bound to the current thread again. |
4965 | */ |
4966 | } else { |
4967 | rc = -1; |
4968 | } |
4969 | } |
4970 | |
4971 | if (rc == 0) { |
4972 | /* |
4973 | * Reset wakeup bit to notice stay active events firing while we are |
4974 | * processing, as we cannot rely on the stayactive bucket emptiness. |
4975 | */ |
4976 | kqr->kqr_wakeup_indexes &= ~KQWL_STAYACTIVE_FIRED_BIT; |
4977 | } else { |
4978 | kq->kq_state &= ~KQ_PROCESSING; |
4979 | } |
4980 | |
4981 | kq_req_unlock(kqwl); |
4982 | |
4983 | if (old_override) { |
4984 | thread_drop_ipc_override(thread); |
4985 | } |
4986 | |
4987 | done: |
4988 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END, |
4989 | kqwl->kqwl_dynamicid, 0, 0); |
4990 | |
4991 | return rc; |
4992 | } |
4993 | |
4994 | /* |
4995 | * Return 0 to indicate that processing should proceed, |
4996 | * -1 if there is nothing to process. |
4997 | * |
4998 | * Called with kqueue locked and returns the same way, |
4999 | * but may drop lock temporarily. |
5000 | * May block. |
5001 | */ |
5002 | static int |
5003 | kqfile_begin_processing(struct kqueue *kq) |
5004 | { |
5005 | struct kqtailq *suppressq; |
5006 | |
5007 | kqlock_held(kq); |
5008 | |
5009 | assert((kq->kq_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0); |
5010 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START, |
5011 | VM_KERNEL_UNSLIDE_OR_PERM(kq), 0); |
5012 | |
5013 | /* wait to become the exclusive processing thread */ |
5014 | for (;;) { |
5015 | if (kq->kq_state & KQ_DRAIN) { |
5016 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, |
5017 | VM_KERNEL_UNSLIDE_OR_PERM(kq), 2); |
5018 | return -1; |
5019 | } |
5020 | |
5021 | if ((kq->kq_state & KQ_PROCESSING) == 0) |
5022 | break; |
5023 | |
5024 | /* if someone else is processing the queue, wait */ |
5025 | kq->kq_state |= KQ_PROCWAIT; |
5026 | suppressq = kqueue_get_suppressed_queue(kq, NULL); |
5027 | waitq_assert_wait64((struct waitq *)&kq->kq_wqs, |
5028 | CAST_EVENT64_T(suppressq), THREAD_UNINT | THREAD_WAIT_NOREPORT, |
5029 | TIMEOUT_WAIT_FOREVER); |
5030 | |
5031 | kqunlock(kq); |
5032 | thread_block(THREAD_CONTINUE_NULL); |
5033 | kqlock(kq); |
5034 | } |
5035 | |
5036 | /* Nobody else processing */ |
5037 | |
5038 | /* clear pre-posts and KQ_WAKEUP now, in case we bail early */ |
5039 | waitq_set_clear_preposts(&kq->kq_wqs); |
5040 | kq->kq_state &= ~KQ_WAKEUP; |
5041 | |
5042 | /* anything left to process? */ |
5043 | if (kqueue_queue_empty(kq, QOS_INDEX_KQFILE)) { |
5044 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, |
5045 | VM_KERNEL_UNSLIDE_OR_PERM(kq), 1); |
5046 | return -1; |
5047 | } |
5048 | |
5049 | /* convert to processing mode */ |
5050 | kq->kq_state |= KQ_PROCESSING; |
5051 | |
5052 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, |
5053 | VM_KERNEL_UNSLIDE_OR_PERM(kq)); |
5054 | |
5055 | return 0; |
5056 | } |
5057 | |
5058 | /* |
5059 | * Try to end the processing, only called when a workq thread is attempting to |
5060 | * park (KEVENT_FLAG_PARKING is set). |
5061 | * |
5062 | * When returning -1, the kqworkq is setup again so that it is ready to be |
5063 | * processed. |
5064 | */ |
5065 | static int |
5066 | kqworkq_end_processing(struct kqworkq *kqwq, struct kqrequest *kqr, |
5067 | int kevent_flags) |
5068 | { |
5069 | if (!kqueue_queue_empty(&kqwq->kqwq_kqueue, kqr->kqr_qos_index)) { |
5070 | /* remember we didn't process everything */ |
5071 | kq_req_lock(kqwq); |
5072 | kqr->kqr_state |= KQR_WAKEUP; |
5073 | kq_req_unlock(kqwq); |
5074 | } |
5075 | |
5076 | if (kevent_flags & KEVENT_FLAG_PARKING) { |
5077 | /* |
5078 | * if acknowledge events "succeeds" it means there are events, |
5079 | * which is a failure condition for end_processing. |
5080 | */ |
5081 | int rc = kqworkq_acknowledge_events(kqwq, kqr, kevent_flags, |
5082 | KQWQAE_END_PROCESSING); |
5083 | if (rc == 0) { |
5084 | return -1; |
5085 | } |
5086 | } |
5087 | |
5088 | return 0; |
5089 | } |
5090 | |
5091 | /* |
5092 | * Try to end the processing, only called when a workq thread is attempting to |
5093 | * park (KEVENT_FLAG_PARKING is set). |
5094 | * |
5095 | * When returning -1, the kqworkq is setup again so that it is ready to be |
5096 | * processed (as if kqworkloop_begin_processing had just been called). |
5097 | * |
5098 | * If successful and KEVENT_FLAG_PARKING was set in the kevent_flags, |
5099 | * the kqworkloop is unbound from its servicer as a side effect. |
5100 | */ |
5101 | static int |
5102 | kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags) |
5103 | { |
5104 | struct kqueue *kq = &kqwl->kqwl_kqueue; |
5105 | struct kqrequest *kqr = &kqwl->kqwl_request; |
5106 | thread_qos_t old_override = THREAD_QOS_UNSPECIFIED, qos_override; |
5107 | thread_t thread = kqr->kqr_thread; |
5108 | int rc = 0; |
5109 | |
5110 | kqlock_held(kq); |
5111 | |
5112 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START, |
5113 | kqwl->kqwl_dynamicid, 0, 0); |
5114 | |
5115 | if (flags & KQ_PROCESSING) { |
5116 | assert(kq->kq_state & KQ_PROCESSING); |
5117 | |
5118 | /* |
5119 | * If we still have queued stayactive knotes, remember we didn't finish |
5120 | * processing all of them. This should be extremely rare and would |
5121 | * require to have a lot of them registered and fired. |
5122 | */ |
5123 | if (!TAILQ_EMPTY(&kqwl->kqwl_queue[KQWL_BUCKET_STAYACTIVE])) { |
5124 | kq_req_lock(kqwl); |
5125 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_UPDATE_WAKEUP_QOS, |
5126 | KQWL_BUCKET_STAYACTIVE); |
5127 | kq_req_unlock(kqwl); |
5128 | } |
5129 | |
5130 | /* |
5131 | * When KEVENT_FLAG_PARKING is set, we need to attempt an unbind while |
5132 | * still under the lock. |
5133 | * |
5134 | * So we do everything kqworkloop_unbind() would do, but because we're |
5135 | * inside kqueue_process(), if the workloop actually received events |
5136 | * while our locks were dropped, we have the opportunity to fail the end |
5137 | * processing and loop again. |
5138 | * |
5139 | * This avoids going through the process-wide workqueue lock hence |
5140 | * scales better. |
5141 | */ |
5142 | if (kevent_flags & KEVENT_FLAG_PARKING) { |
5143 | qos_override = kqworkloop_acknowledge_events(kqwl); |
5144 | } |
5145 | } |
5146 | |
5147 | kq_req_lock(kqwl); |
5148 | |
5149 | if (kevent_flags & KEVENT_FLAG_PARKING) { |
5150 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_PARKING, qos_override); |
5151 | if ((kqr->kqr_state & KQR_WAKEUP) && !kqwl->kqwl_owner) { |
5152 | /* |
5153 | * Reset wakeup bit to notice stay active events firing while we are |
5154 | * processing, as we cannot rely on the stayactive bucket emptiness. |
5155 | */ |
5156 | kqr->kqr_wakeup_indexes &= ~KQWL_STAYACTIVE_FIRED_BIT; |
5157 | rc = -1; |
5158 | } else { |
5159 | old_override = kqworkloop_unbind_locked(kqwl, thread); |
5160 | (void)kqueue_release(kqwl, KQUEUE_CANT_BE_LAST_REF); |
5161 | kq->kq_state &= ~flags; |
5162 | } |
5163 | } else { |
5164 | kq->kq_state &= ~flags; |
5165 | kqr->kqr_state |= KQR_R2K_NOTIF_ARMED; |
5166 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_RECOMPUTE_WAKEUP_QOS, 0); |
5167 | } |
5168 | |
5169 | kq_req_unlock(kqwl); |
5170 | |
5171 | if (old_override) { |
5172 | thread_drop_ipc_override(thread); |
5173 | } |
5174 | |
5175 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END, |
5176 | kqwl->kqwl_dynamicid, 0, 0); |
5177 | |
5178 | return rc; |
5179 | } |
5180 | |
5181 | /* |
5182 | * Called with kqueue lock held. |
5183 | */ |
5184 | static void |
5185 | kqfile_end_processing(struct kqueue *kq) |
5186 | { |
5187 | struct knote *kn; |
5188 | struct kqtailq *suppressq; |
5189 | int procwait; |
5190 | |
5191 | kqlock_held(kq); |
5192 | |
5193 | assert((kq->kq_state & (KQ_WORKQ|KQ_WORKLOOP)) == 0); |
5194 | |
5195 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END), |
5196 | VM_KERNEL_UNSLIDE_OR_PERM(kq), 0); |
5197 | |
5198 | /* |
5199 | * Return suppressed knotes to their original state. |
5200 | */ |
5201 | suppressq = kqueue_get_suppressed_queue(kq, NULL); |
5202 | while ((kn = TAILQ_FIRST(suppressq)) != NULL) { |
5203 | assert(kn->kn_status & KN_SUPPRESSED); |
5204 | knote_unsuppress(kn); |
5205 | } |
5206 | |
5207 | procwait = (kq->kq_state & KQ_PROCWAIT); |
5208 | kq->kq_state &= ~(KQ_PROCESSING | KQ_PROCWAIT); |
5209 | |
5210 | if (procwait) { |
5211 | /* first wake up any thread already waiting to process */ |
5212 | waitq_wakeup64_all((struct waitq *)&kq->kq_wqs, |
5213 | CAST_EVENT64_T(suppressq), |
5214 | THREAD_AWAKENED, |
5215 | WAITQ_ALL_PRIORITIES); |
5216 | } |
5217 | } |
5218 | |
5219 | static int |
5220 | kqueue_workloop_ctl_internal(proc_t p, uintptr_t cmd, uint64_t __unused options, |
5221 | struct kqueue_workloop_params *params, int *retval) |
5222 | { |
5223 | int error = 0; |
5224 | int fd; |
5225 | struct fileproc *fp; |
5226 | struct kqueue *kq; |
5227 | struct kqworkloop *kqwl; |
5228 | struct filedesc *fdp = p->p_fd; |
5229 | workq_threadreq_param_t trp = { }; |
5230 | |
5231 | switch (cmd) { |
5232 | case KQ_WORKLOOP_CREATE: |
5233 | if (!params->kqwlp_flags) { |
5234 | error = EINVAL; |
5235 | break; |
5236 | } |
5237 | |
5238 | if ((params->kqwlp_flags & KQ_WORKLOOP_CREATE_SCHED_PRI) && |
5239 | (params->kqwlp_sched_pri < 1 || |
5240 | params->kqwlp_sched_pri > 63 /* MAXPRI_USER */)) { |
5241 | error = EINVAL; |
5242 | break; |
5243 | } |
5244 | |
5245 | if ((params->kqwlp_flags & KQ_WORKLOOP_CREATE_SCHED_POL) && |
5246 | invalid_policy(params->kqwlp_sched_pol)) { |
5247 | error = EINVAL; |
5248 | break; |
5249 | } |
5250 | |
5251 | if ((params->kqwlp_flags & KQ_WORKLOOP_CREATE_CPU_PERCENT) && |
5252 | (params->kqwlp_cpu_percent <= 0 || |
5253 | params->kqwlp_cpu_percent > 100 || |
5254 | params->kqwlp_cpu_refillms <= 0 || |
5255 | params->kqwlp_cpu_refillms > 0x00ffffff)) { |
5256 | error = EINVAL; |
5257 | break; |
5258 | } |
5259 | |
5260 | if (params->kqwlp_flags & KQ_WORKLOOP_CREATE_SCHED_PRI) { |
5261 | trp.trp_flags |= TRP_PRIORITY; |
5262 | trp.trp_pri = params->kqwlp_sched_pri; |
5263 | } |
5264 | if (params->kqwlp_flags & KQ_WORKLOOP_CREATE_SCHED_POL) { |
5265 | trp.trp_flags |= TRP_POLICY; |
5266 | trp.trp_pol = params->kqwlp_sched_pol; |
5267 | } |
5268 | if (params->kqwlp_flags & KQ_WORKLOOP_CREATE_CPU_PERCENT) { |
5269 | trp.trp_flags |= TRP_CPUPERCENT; |
5270 | trp.trp_cpupercent = (uint8_t)params->kqwlp_cpu_percent; |
5271 | trp.trp_refillms = params->kqwlp_cpu_refillms; |
5272 | } |
5273 | |
5274 | error = kevent_get_kq(p, params->kqwlp_id, &trp, |
5275 | KEVENT_FLAG_DYNAMIC_KQUEUE | KEVENT_FLAG_WORKLOOP | |
5276 | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST , &fp, &fd, &kq); |
5277 | if (error) { |
5278 | break; |
5279 | } |
5280 | |
5281 | if (!(fdp->fd_flags & FD_WORKLOOP)) { |
5282 | /* FD_WORKLOOP indicates we've ever created a workloop |
5283 | * via this syscall but its only ever added to a process, never |
5284 | * removed. |
5285 | */ |
5286 | proc_fdlock(p); |
5287 | fdp->fd_flags |= FD_WORKLOOP; |
5288 | proc_fdunlock(p); |
5289 | } |
5290 | break; |
5291 | case KQ_WORKLOOP_DESTROY: |
5292 | error = kevent_get_kq(p, params->kqwlp_id, NULL, |
5293 | KEVENT_FLAG_DYNAMIC_KQUEUE | KEVENT_FLAG_WORKLOOP | |
5294 | KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST , &fp, &fd, &kq); |
5295 | if (error) { |
5296 | break; |
5297 | } |
5298 | kqlock(kq); |
5299 | kqwl = (struct kqworkloop *)kq; |
5300 | trp.trp_value = kqwl->kqwl_params; |
5301 | if (trp.trp_flags && !(trp.trp_flags & TRP_RELEASED)) { |
5302 | trp.trp_flags |= TRP_RELEASED; |
5303 | kqueue_release(kq, KQUEUE_CANT_BE_LAST_REF); |
5304 | } else { |
5305 | error = EINVAL; |
5306 | } |
5307 | kqunlock(kq); |
5308 | kqueue_release_last(p, kq); |
5309 | break; |
5310 | } |
5311 | *retval = 0; |
5312 | return error; |
5313 | } |
5314 | |
5315 | int |
5316 | kqueue_workloop_ctl(proc_t p, struct kqueue_workloop_ctl_args *uap, int *retval) |
5317 | { |
5318 | struct kqueue_workloop_params params = { |
5319 | .kqwlp_id = 0, |
5320 | }; |
5321 | if (uap->sz < sizeof(params.kqwlp_version)) { |
5322 | return EINVAL; |
5323 | } |
5324 | |
5325 | size_t copyin_sz = MIN(sizeof(params), uap->sz); |
5326 | int rv = copyin(uap->addr, ¶ms, copyin_sz); |
5327 | if (rv) { |
5328 | return rv; |
5329 | } |
5330 | |
5331 | if (params.kqwlp_version != (int)uap->sz) { |
5332 | return EINVAL; |
5333 | } |
5334 | |
5335 | return kqueue_workloop_ctl_internal(p, uap->cmd, uap->options, ¶ms, |
5336 | retval); |
5337 | } |
5338 | |
5339 | /* |
5340 | * kqueue_process - process the triggered events in a kqueue |
5341 | * |
5342 | * Walk the queued knotes and validate that they are really still triggered |
5343 | * events by calling the filter routines (if necessary). |
5344 | * |
5345 | * For each event that is still considered triggered, invoke the callback |
5346 | * routine provided. |
5347 | * |
5348 | * caller holds a reference on the kqueue. |
5349 | * kqueue locked on entry and exit - but may be dropped |
5350 | * kqueue list locked (held for duration of call) |
5351 | */ |
5352 | static int |
5353 | kqueue_process(struct kqueue *kq, |
5354 | kevent_callback_t callback, |
5355 | void *callback_data, |
5356 | struct filt_process_s *process_data, |
5357 | int *countp) |
5358 | { |
5359 | struct uthread *ut = get_bsdthread_info(current_thread()); |
5360 | struct kqrequest *kqr = ut->uu_kqr_bound; |
5361 | struct knote *kn; |
5362 | unsigned int flags = process_data ? process_data->fp_flags : 0; |
5363 | int nevents = 0, error = 0, rc = 0; |
5364 | struct kqtailq *base_queue, *queue; |
5365 | kqueue_t kqu = { .kq = kq }; |
5366 | #if DEBUG || DEVELOPMENT |
5367 | int retries = 64; |
5368 | #endif |
5369 | |
5370 | if (kq->kq_state & KQ_WORKQ) { |
5371 | if (kqr == NULL || (kqr->kqr_state & KQR_WORKLOOP)) { |
5372 | return EJUSTRETURN; |
5373 | } |
5374 | rc = kqworkq_begin_processing(kqu.kqwq, kqr, flags); |
5375 | } else if (kq->kq_state & KQ_WORKLOOP) { |
5376 | if (ut->uu_kqr_bound != &kqu.kqwl->kqwl_request) { |
5377 | return EJUSTRETURN; |
5378 | } |
5379 | rc = kqworkloop_begin_processing(kqu.kqwl, flags); |
5380 | } else { |
5381 | rc = kqfile_begin_processing(kq); |
5382 | } |
5383 | |
5384 | if (rc == -1) { |
5385 | /* Nothing to process */ |
5386 | *countp = 0; |
5387 | return 0; |
5388 | } |
5389 | |
5390 | /* |
5391 | * loop through the enqueued knotes associated with this request, |
5392 | * processing each one. Each request may have several queues |
5393 | * of knotes to process (depending on the type of kqueue) so we |
5394 | * have to loop through all the queues as long as we have additional |
5395 | * space. |
5396 | */ |
5397 | |
5398 | process_again: |
5399 | if (kq->kq_state & KQ_WORKQ) { |
5400 | base_queue = queue = &kqu.kqwq->kqwq_queue[kqr->kqr_qos_index]; |
5401 | } else if (kq->kq_state & KQ_WORKLOOP) { |
5402 | base_queue = &kqu.kqwl->kqwl_queue[0]; |
5403 | queue = &kqu.kqwl->kqwl_queue[KQWL_NBUCKETS - 1]; |
5404 | } else { |
5405 | base_queue = queue = &kq->kq_queue[QOS_INDEX_KQFILE]; |
5406 | } |
5407 | |
5408 | do { |
5409 | while (error == 0 && (kn = TAILQ_FIRST(queue)) != NULL) { |
5410 | error = knote_process(kn, callback, callback_data, process_data); |
5411 | if (error == EJUSTRETURN) { |
5412 | error = 0; |
5413 | } else { |
5414 | nevents++; |
5415 | } |
5416 | /* error is EWOULDBLOCK when the out event array is full */ |
5417 | } |
5418 | |
5419 | if (error == EWOULDBLOCK) { |
5420 | /* break out if no more space for additional events */ |
5421 | error = 0; |
5422 | break; |
5423 | } |
5424 | } while (queue-- > base_queue); |
5425 | |
5426 | *countp = nevents; |
5427 | |
5428 | /* |
5429 | * If KEVENT_FLAG_PARKING is set, and no kevents have been returned, |
5430 | * we want to unbind the kqrequest from the thread. |
5431 | * |
5432 | * However, because the kq locks are dropped several times during process, |
5433 | * new knotes may have fired again, in which case, we want to fail the end |
5434 | * processing and process again, until it converges. |
5435 | * |
5436 | * If we returned events however, end processing never fails. |
5437 | */ |
5438 | if (error || nevents) flags &= ~KEVENT_FLAG_PARKING; |
5439 | if (kq->kq_state & KQ_WORKQ) { |
5440 | rc = kqworkq_end_processing(kqu.kqwq, kqr, flags); |
5441 | } else if (kq->kq_state & KQ_WORKLOOP) { |
5442 | rc = kqworkloop_end_processing(kqu.kqwl, KQ_PROCESSING, flags); |
5443 | } else { |
5444 | kqfile_end_processing(kq); |
5445 | rc = 0; |
5446 | } |
5447 | if (rc == -1) { |
5448 | assert(flags & KEVENT_FLAG_PARKING); |
5449 | #if DEBUG || DEVELOPMENT |
5450 | if (retries-- == 0) { |
5451 | panic("kevent: way too many knote_process retries, kq: %p (0x%02x)" , |
5452 | kq, kq->kq_state); |
5453 | } |
5454 | #endif |
5455 | goto process_again; |
5456 | } |
5457 | return error; |
5458 | } |
5459 | |
5460 | static void |
5461 | kqueue_scan_continue(void *data, wait_result_t wait_result) |
5462 | { |
5463 | thread_t self = current_thread(); |
5464 | uthread_t ut = (uthread_t)get_bsdthread_info(self); |
5465 | struct _kqueue_scan * cont_args = &ut->uu_save.uus_kqueue_scan; |
5466 | struct kqueue *kq = (struct kqueue *)data; |
5467 | struct filt_process_s *process_data = cont_args->process_data; |
5468 | int error; |
5469 | int count; |
5470 | |
5471 | /* convert the (previous) wait_result to a proper error */ |
5472 | switch (wait_result) { |
5473 | case THREAD_AWAKENED: { |
5474 | kqlock(kq); |
5475 | retry: |
5476 | error = kqueue_process(kq, cont_args->call, cont_args->data, |
5477 | process_data, &count); |
5478 | if (error == 0 && count == 0) { |
5479 | if (kq->kq_state & KQ_DRAIN) { |
5480 | kqunlock(kq); |
5481 | goto drain; |
5482 | } |
5483 | |
5484 | if (kq->kq_state & KQ_WAKEUP) |
5485 | goto retry; |
5486 | |
5487 | waitq_assert_wait64((struct waitq *)&kq->kq_wqs, |
5488 | KQ_EVENT, THREAD_ABORTSAFE, |
5489 | cont_args->deadline); |
5490 | kq->kq_state |= KQ_SLEEP; |
5491 | kqunlock(kq); |
5492 | thread_block_parameter(kqueue_scan_continue, kq); |
5493 | /* NOTREACHED */ |
5494 | } |
5495 | kqunlock(kq); |
5496 | } break; |
5497 | case THREAD_TIMED_OUT: |
5498 | error = EWOULDBLOCK; |
5499 | break; |
5500 | case THREAD_INTERRUPTED: |
5501 | error = EINTR; |
5502 | break; |
5503 | case THREAD_RESTART: |
5504 | drain: |
5505 | error = EBADF; |
5506 | break; |
5507 | default: |
5508 | panic("%s: - invalid wait_result (%d)" , __func__, |
5509 | wait_result); |
5510 | error = 0; |
5511 | } |
5512 | |
5513 | /* call the continuation with the results */ |
5514 | assert(cont_args->cont != NULL); |
5515 | (cont_args->cont)(kq, cont_args->data, error); |
5516 | } |
5517 | |
5518 | |
5519 | /* |
5520 | * kqueue_scan - scan and wait for events in a kqueue |
5521 | * |
5522 | * Process the triggered events in a kqueue. |
5523 | * |
5524 | * If there are no events triggered arrange to |
5525 | * wait for them. If the caller provided a |
5526 | * continuation routine, then kevent_scan will |
5527 | * also. |
5528 | * |
5529 | * The callback routine must be valid. |
5530 | * The caller must hold a use-count reference on the kq. |
5531 | */ |
5532 | int |
5533 | kqueue_scan(struct kqueue *kq, |
5534 | kevent_callback_t callback, |
5535 | kqueue_continue_t continuation, |
5536 | void *callback_data, |
5537 | struct filt_process_s *process_data, |
5538 | struct timeval *atvp, |
5539 | __unused struct proc *p) |
5540 | { |
5541 | thread_continue_t cont = THREAD_CONTINUE_NULL; |
5542 | unsigned int flags; |
5543 | uint64_t deadline; |
5544 | int error; |
5545 | int first; |
5546 | int fd; |
5547 | |
5548 | assert(callback != NULL); |
5549 | |
5550 | /* |
5551 | * Determine which QoS index we are servicing |
5552 | */ |
5553 | flags = (process_data) ? process_data->fp_flags : 0; |
5554 | fd = (process_data) ? process_data->fp_fd : -1; |
5555 | |
5556 | first = 1; |
5557 | for (;;) { |
5558 | wait_result_t wait_result; |
5559 | int count; |
5560 | |
5561 | /* |
5562 | * Make a pass through the kq to find events already |
5563 | * triggered. |
5564 | */ |
5565 | kqlock(kq); |
5566 | error = kqueue_process(kq, callback, callback_data, |
5567 | process_data, &count); |
5568 | if (error || count) |
5569 | break; /* lock still held */ |
5570 | |
5571 | /* looks like we have to consider blocking */ |
5572 | if (first) { |
5573 | first = 0; |
5574 | /* convert the timeout to a deadline once */ |
5575 | if (atvp->tv_sec || atvp->tv_usec) { |
5576 | uint64_t now; |
5577 | |
5578 | clock_get_uptime(&now); |
5579 | nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC + |
5580 | atvp->tv_usec * (long)NSEC_PER_USEC, |
5581 | &deadline); |
5582 | if (now >= deadline) { |
5583 | /* non-blocking call */ |
5584 | error = EWOULDBLOCK; |
5585 | break; /* lock still held */ |
5586 | } |
5587 | deadline -= now; |
5588 | clock_absolutetime_interval_to_deadline(deadline, &deadline); |
5589 | } else { |
5590 | deadline = 0; /* block forever */ |
5591 | } |
5592 | |
5593 | if (continuation) { |
5594 | uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); |
5595 | struct _kqueue_scan *cont_args = &ut->uu_save.uus_kqueue_scan; |
5596 | |
5597 | cont_args->call = callback; |
5598 | cont_args->cont = continuation; |
5599 | cont_args->deadline = deadline; |
5600 | cont_args->data = callback_data; |
5601 | cont_args->process_data = process_data; |
5602 | cont = kqueue_scan_continue; |
5603 | } |
5604 | } |
5605 | |
5606 | if (kq->kq_state & KQ_DRAIN) { |
5607 | kqunlock(kq); |
5608 | return EBADF; |
5609 | } |
5610 | |
5611 | /* If awakened during processing, try again */ |
5612 | if (kq->kq_state & KQ_WAKEUP) { |
5613 | kqunlock(kq); |
5614 | continue; |
5615 | } |
5616 | |
5617 | /* go ahead and wait */ |
5618 | waitq_assert_wait64_leeway((struct waitq *)&kq->kq_wqs, |
5619 | KQ_EVENT, THREAD_ABORTSAFE, |
5620 | TIMEOUT_URGENCY_USER_NORMAL, |
5621 | deadline, TIMEOUT_NO_LEEWAY); |
5622 | kq->kq_state |= KQ_SLEEP; |
5623 | kqunlock(kq); |
5624 | wait_result = thread_block_parameter(cont, kq); |
5625 | /* NOTREACHED if (continuation != NULL) */ |
5626 | |
5627 | switch (wait_result) { |
5628 | case THREAD_AWAKENED: |
5629 | continue; |
5630 | case THREAD_TIMED_OUT: |
5631 | return EWOULDBLOCK; |
5632 | case THREAD_INTERRUPTED: |
5633 | return EINTR; |
5634 | case THREAD_RESTART: |
5635 | return EBADF; |
5636 | default: |
5637 | panic("%s: - bad wait_result (%d)" , __func__, |
5638 | wait_result); |
5639 | error = 0; |
5640 | } |
5641 | } |
5642 | kqunlock(kq); |
5643 | return (error); |
5644 | } |
5645 | |
5646 | |
5647 | /* |
5648 | * XXX |
5649 | * This could be expanded to call kqueue_scan, if desired. |
5650 | */ |
5651 | /*ARGSUSED*/ |
5652 | static int |
5653 | kqueue_read(__unused struct fileproc *fp, |
5654 | __unused struct uio *uio, |
5655 | __unused int flags, |
5656 | __unused vfs_context_t ctx) |
5657 | { |
5658 | return (ENXIO); |
5659 | } |
5660 | |
5661 | /*ARGSUSED*/ |
5662 | static int |
5663 | kqueue_write(__unused struct fileproc *fp, |
5664 | __unused struct uio *uio, |
5665 | __unused int flags, |
5666 | __unused vfs_context_t ctx) |
5667 | { |
5668 | return (ENXIO); |
5669 | } |
5670 | |
5671 | /*ARGSUSED*/ |
5672 | static int |
5673 | kqueue_ioctl(__unused struct fileproc *fp, |
5674 | __unused u_long com, |
5675 | __unused caddr_t data, |
5676 | __unused vfs_context_t ctx) |
5677 | { |
5678 | return (ENOTTY); |
5679 | } |
5680 | |
5681 | /*ARGSUSED*/ |
5682 | static int |
5683 | kqueue_select(struct fileproc *fp, int which, void *wq_link_id, |
5684 | __unused vfs_context_t ctx) |
5685 | { |
5686 | struct kqueue *kq = (struct kqueue *)fp->f_data; |
5687 | struct kqtailq *queue; |
5688 | struct kqtailq *suppressq; |
5689 | struct knote *kn; |
5690 | int retnum = 0; |
5691 | |
5692 | if (which != FREAD) |
5693 | return (0); |
5694 | |
5695 | kqlock(kq); |
5696 | |
5697 | assert((kq->kq_state & KQ_WORKQ) == 0); |
5698 | |
5699 | /* |
5700 | * If this is the first pass, link the wait queue associated with the |
5701 | * the kqueue onto the wait queue set for the select(). Normally we |
5702 | * use selrecord() for this, but it uses the wait queue within the |
5703 | * selinfo structure and we need to use the main one for the kqueue to |
5704 | * catch events from KN_STAYQUEUED sources. So we do the linkage manually. |
5705 | * (The select() call will unlink them when it ends). |
5706 | */ |
5707 | if (wq_link_id != NULL) { |
5708 | thread_t cur_act = current_thread(); |
5709 | struct uthread * ut = get_bsdthread_info(cur_act); |
5710 | |
5711 | kq->kq_state |= KQ_SEL; |
5712 | waitq_link((struct waitq *)&kq->kq_wqs, ut->uu_wqset, |
5713 | WAITQ_SHOULD_LOCK, (uint64_t *)wq_link_id); |
5714 | |
5715 | /* always consume the reserved link object */ |
5716 | waitq_link_release(*(uint64_t *)wq_link_id); |
5717 | *(uint64_t *)wq_link_id = 0; |
5718 | |
5719 | /* |
5720 | * selprocess() is expecting that we send it back the waitq |
5721 | * that was just added to the thread's waitq set. In order |
5722 | * to not change the selrecord() API (which is exported to |
5723 | * kexts), we pass this value back through the |
5724 | * void *wq_link_id pointer we were passed. We need to use |
5725 | * memcpy here because the pointer may not be properly aligned |
5726 | * on 32-bit systems. |
5727 | */ |
5728 | void *wqptr = &kq->kq_wqs; |
5729 | memcpy(wq_link_id, (void *)&wqptr, sizeof(void *)); |
5730 | } |
5731 | |
5732 | if (kqfile_begin_processing(kq) == -1) { |
5733 | kqunlock(kq); |
5734 | return (0); |
5735 | } |
5736 | |
5737 | queue = &kq->kq_queue[QOS_INDEX_KQFILE]; |
5738 | if (!TAILQ_EMPTY(queue)) { |
5739 | /* |
5740 | * there is something queued - but it might be a |
5741 | * KN_STAYACTIVE knote, which may or may not have |
5742 | * any events pending. Otherwise, we have to walk |
5743 | * the list of knotes to see, and peek at the |
5744 | * (non-vanished) stay-active ones to be really sure. |
5745 | */ |
5746 | while ((kn = (struct knote *)TAILQ_FIRST(queue)) != NULL) { |
5747 | if (kn->kn_status & KN_ACTIVE) { |
5748 | retnum = 1; |
5749 | goto out; |
5750 | } |
5751 | assert(kn->kn_status & KN_STAYACTIVE); |
5752 | knote_suppress(kn); |
5753 | } |
5754 | |
5755 | /* |
5756 | * There were no regular events on the queue, so take |
5757 | * a deeper look at the stay-queued ones we suppressed. |
5758 | */ |
5759 | suppressq = kqueue_get_suppressed_queue(kq, NULL); |
5760 | while ((kn = (struct knote *)TAILQ_FIRST(suppressq)) != NULL) { |
5761 | KNOTE_LOCK_CTX(knlc); |
5762 | int result = 0; |
5763 | |
5764 | /* If didn't vanish while suppressed - peek at it */ |
5765 | if ((kn->kn_status & KN_DROPPING) || !knote_lock(kq, kn, &knlc, |
5766 | KNOTE_KQ_LOCK_ON_FAILURE)) { |
5767 | continue; |
5768 | } |
5769 | |
5770 | result = filter_call(knote_fops(kn), f_peek(kn)); |
5771 | |
5772 | kqlock(kq); |
5773 | knote_unlock(kq, kn, &knlc, KNOTE_KQ_LOCK_ALWAYS); |
5774 | |
5775 | /* unsuppress it */ |
5776 | knote_unsuppress(kn); |
5777 | |
5778 | /* has data or it has to report a vanish */ |
5779 | if (result & FILTER_ACTIVE) { |
5780 | retnum = 1; |
5781 | goto out; |
5782 | } |
5783 | } |
5784 | } |
5785 | |
5786 | out: |
5787 | kqfile_end_processing(kq); |
5788 | kqunlock(kq); |
5789 | return (retnum); |
5790 | } |
5791 | |
5792 | /* |
5793 | * kqueue_close - |
5794 | */ |
5795 | /*ARGSUSED*/ |
5796 | static int |
5797 | kqueue_close(struct fileglob *fg, __unused vfs_context_t ctx) |
5798 | { |
5799 | struct kqfile *kqf = (struct kqfile *)fg->fg_data; |
5800 | |
5801 | assert((kqf->kqf_state & KQ_WORKQ) == 0); |
5802 | kqueue_dealloc(&kqf->kqf_kqueue); |
5803 | fg->fg_data = NULL; |
5804 | return (0); |
5805 | } |
5806 | |
5807 | /* |
5808 | * Max depth of the nested kq path that can be created. |
5809 | * Note that this has to be less than the size of kq_level |
5810 | * to avoid wrapping around and mislabeling the level. |
5811 | */ |
5812 | #define MAX_NESTED_KQ 1000 |
5813 | |
5814 | /*ARGSUSED*/ |
5815 | /* |
5816 | * The callers has taken a use-count reference on this kqueue and will donate it |
5817 | * to the kqueue we are being added to. This keeps the kqueue from closing until |
5818 | * that relationship is torn down. |
5819 | */ |
5820 | static int |
5821 | kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, |
5822 | __unused struct kevent_internal_s *kev, __unused vfs_context_t ctx) |
5823 | { |
5824 | struct kqfile *kqf = (struct kqfile *)kn->kn_fp->f_data; |
5825 | struct kqueue *kq = &kqf->kqf_kqueue; |
5826 | struct kqueue *parentkq = knote_get_kq(kn); |
5827 | uint16_t plevel = 0; |
5828 | |
5829 | assert((kqf->kqf_state & KQ_WORKQ) == 0); |
5830 | |
5831 | if (parentkq == kq || kn->kn_filter != EVFILT_READ) { |
5832 | knote_set_error(kn, EINVAL); |
5833 | return 0; |
5834 | } |
5835 | |
5836 | /* |
5837 | * We have to avoid creating a cycle when nesting kqueues |
5838 | * inside another. Rather than trying to walk the whole |
5839 | * potential DAG of nested kqueues, we just use a simple |
5840 | * ceiling protocol. When a kqueue is inserted into another, |
5841 | * we check that the (future) parent is not already nested |
5842 | * into another kqueue at a lower level than the potenial |
5843 | * child (because it could indicate a cycle). If that test |
5844 | * passes, we just mark the nesting levels accordingly. |
5845 | * |
5846 | * Only up to MAX_NESTED_KQ can be nested. |
5847 | */ |
5848 | |
5849 | kqlock(parentkq); |
5850 | if (parentkq->kq_level > 0 && |
5851 | parentkq->kq_level < kq->kq_level) |
5852 | { |
5853 | kqunlock(parentkq); |
5854 | knote_set_error(kn, EINVAL); |
5855 | return 0; |
5856 | } else { |
5857 | /* set parent level appropriately */ |
5858 | plevel = (parentkq->kq_level == 0)? 2: parentkq->kq_level; |
5859 | if (plevel < kq->kq_level + 1) { |
5860 | if (kq->kq_level + 1 > MAX_NESTED_KQ) { |
5861 | kqunlock(parentkq); |
5862 | knote_set_error(kn, EINVAL); |
5863 | return 0; |
5864 | } |
5865 | plevel = kq->kq_level + 1; |
5866 | } |
5867 | |
5868 | parentkq->kq_level = plevel; |
5869 | kqunlock(parentkq); |
5870 | |
5871 | kn->kn_filtid = EVFILTID_KQREAD; |
5872 | kqlock(kq); |
5873 | KNOTE_ATTACH(&kqf->kqf_sel.si_note, kn); |
5874 | /* indicate nesting in child, if needed */ |
5875 | if (kq->kq_level == 0) |
5876 | kq->kq_level = 1; |
5877 | |
5878 | int count = kq->kq_count; |
5879 | kqunlock(kq); |
5880 | return (count > 0); |
5881 | } |
5882 | } |
5883 | |
5884 | /* |
5885 | * kqueue_drain - called when kq is closed |
5886 | */ |
5887 | /*ARGSUSED*/ |
5888 | static int |
5889 | kqueue_drain(struct fileproc *fp, __unused vfs_context_t ctx) |
5890 | { |
5891 | struct kqueue *kq = (struct kqueue *)fp->f_fglob->fg_data; |
5892 | |
5893 | assert((kq->kq_state & KQ_WORKQ) == 0); |
5894 | |
5895 | kqlock(kq); |
5896 | kq->kq_state |= KQ_DRAIN; |
5897 | kqueue_interrupt(kq); |
5898 | kqunlock(kq); |
5899 | return (0); |
5900 | } |
5901 | |
5902 | /*ARGSUSED*/ |
5903 | int |
5904 | kqueue_stat(struct kqueue *kq, void *ub, int isstat64, proc_t p) |
5905 | { |
5906 | assert((kq->kq_state & KQ_WORKQ) == 0); |
5907 | |
5908 | kqlock(kq); |
5909 | if (isstat64 != 0) { |
5910 | struct stat64 *sb64 = (struct stat64 *)ub; |
5911 | |
5912 | bzero((void *)sb64, sizeof(*sb64)); |
5913 | sb64->st_size = kq->kq_count; |
5914 | if (kq->kq_state & KQ_KEV_QOS) |
5915 | sb64->st_blksize = sizeof(struct kevent_qos_s); |
5916 | else if (kq->kq_state & KQ_KEV64) |
5917 | sb64->st_blksize = sizeof(struct kevent64_s); |
5918 | else if (IS_64BIT_PROCESS(p)) |
5919 | sb64->st_blksize = sizeof(struct user64_kevent); |
5920 | else |
5921 | sb64->st_blksize = sizeof(struct user32_kevent); |
5922 | sb64->st_mode = S_IFIFO; |
5923 | } else { |
5924 | struct stat *sb = (struct stat *)ub; |
5925 | |
5926 | bzero((void *)sb, sizeof(*sb)); |
5927 | sb->st_size = kq->kq_count; |
5928 | if (kq->kq_state & KQ_KEV_QOS) |
5929 | sb->st_blksize = sizeof(struct kevent_qos_s); |
5930 | else if (kq->kq_state & KQ_KEV64) |
5931 | sb->st_blksize = sizeof(struct kevent64_s); |
5932 | else if (IS_64BIT_PROCESS(p)) |
5933 | sb->st_blksize = sizeof(struct user64_kevent); |
5934 | else |
5935 | sb->st_blksize = sizeof(struct user32_kevent); |
5936 | sb->st_mode = S_IFIFO; |
5937 | } |
5938 | kqunlock(kq); |
5939 | return (0); |
5940 | } |
5941 | |
5942 | /* |
5943 | * Interact with the pthread kext to request a servicing there at a specific QoS |
5944 | * level. |
5945 | * |
5946 | * - Caller holds the workq request lock |
5947 | * |
5948 | * - May be called with the kqueue's wait queue set locked, |
5949 | * so cannot do anything that could recurse on that. |
5950 | */ |
5951 | static void |
5952 | kqueue_threadreq_initiate(struct kqueue *kq, struct kqrequest *kqr, |
5953 | kq_index_t qos, int flags) |
5954 | { |
5955 | assert(kqr->kqr_state & KQR_WAKEUP); |
5956 | assert(kqr->kqr_thread == THREAD_NULL); |
5957 | assert((kqr->kqr_state & KQR_THREQUESTED) == 0); |
5958 | struct turnstile *ts = TURNSTILE_NULL; |
5959 | |
5960 | if (workq_is_exiting(kq->kq_p)) { |
5961 | return; |
5962 | } |
5963 | |
5964 | /* Add a thread request reference on the kqueue. */ |
5965 | kqueue_retain(kq); |
5966 | |
5967 | kq_req_held(kq); |
5968 | |
5969 | if (kq->kq_state & KQ_WORKLOOP) { |
5970 | __assert_only struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
5971 | |
5972 | assert(kqwl->kqwl_owner == THREAD_NULL); |
5973 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST), |
5974 | kqwl->kqwl_dynamicid, 0, qos, kqr->kqr_state); |
5975 | ts = kqwl->kqwl_turnstile; |
5976 | } else { |
5977 | assert(kq->kq_state & KQ_WORKQ); |
5978 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST), |
5979 | -1, 0, qos, kqr->kqr_state); |
5980 | } |
5981 | |
5982 | kqr->kqr_state |= KQR_THREQUESTED; |
5983 | |
5984 | /* |
5985 | * New-style thread request supported. |
5986 | * Provide the pthread kext a pointer to a workq_threadreq_s structure for |
5987 | * its use until a corresponding kqueue_threadreq_bind callback. |
5988 | */ |
5989 | if ((kq->kq_state & KQ_WORKLOOP) && current_proc() == kq->kq_p) { |
5990 | flags |= WORKQ_THREADREQ_SET_AST_ON_FAILURE; |
5991 | } |
5992 | if (qos == KQWQ_QOS_MANAGER) { |
5993 | qos = WORKQ_THREAD_QOS_MANAGER; |
5994 | } |
5995 | if (!workq_kern_threadreq_initiate(kq->kq_p, kqr, ts, qos, flags)) { |
5996 | /* |
5997 | * Process is shutting down or exec'ing. |
5998 | * All the kqueues are going to be cleaned up |
5999 | * soon. Forget we even asked for a thread - |
6000 | * and make sure we don't ask for more. |
6001 | */ |
6002 | kqr->kqr_state &= ~(KQR_THREQUESTED | KQR_R2K_NOTIF_ARMED); |
6003 | kqueue_release(kq, KQUEUE_CANT_BE_LAST_REF); |
6004 | } |
6005 | } |
6006 | |
6007 | /* |
6008 | * kqueue_threadreq_bind_prepost - prepost the bind to kevent |
6009 | * |
6010 | * This is used when kqueue_threadreq_bind may cause a lock inversion. |
6011 | */ |
6012 | void |
6013 | kqueue_threadreq_bind_prepost(struct proc *p __unused, workq_threadreq_t req, |
6014 | thread_t thread) |
6015 | { |
6016 | struct kqrequest *kqr = __container_of(req, struct kqrequest, kqr_req); |
6017 | struct uthread *ut = get_bsdthread_info(thread); |
6018 | |
6019 | req->tr_binding_thread = thread; |
6020 | ut->uu_kqr_bound = kqr; |
6021 | req->tr_state = TR_STATE_BINDING; |
6022 | |
6023 | struct kqworkloop *kqwl = kqr_kqworkloop(kqr); |
6024 | if (kqwl && kqwl->kqwl_turnstile) { |
6025 | struct turnstile *ts = kqwl->kqwl_turnstile; |
6026 | /* |
6027 | * While a thread request is in flight, the workqueue |
6028 | * is the interlock for the turnstile and can update the inheritor. |
6029 | */ |
6030 | turnstile_update_inheritor(ts, thread, TURNSTILE_IMMEDIATE_UPDATE | |
6031 | TURNSTILE_INHERITOR_THREAD); |
6032 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); |
6033 | } |
6034 | } |
6035 | |
6036 | /* |
6037 | * kqueue_threadreq_bind_commit - commit a bind prepost |
6038 | * |
6039 | * The workq code has to commit any binding prepost before the thread has |
6040 | * a chance to come back to userspace (and do kevent syscalls) or be aborted. |
6041 | */ |
6042 | void |
6043 | kqueue_threadreq_bind_commit(struct proc *p, thread_t thread) |
6044 | { |
6045 | struct uthread *ut = get_bsdthread_info(thread); |
6046 | struct kqrequest *kqr = ut->uu_kqr_bound; |
6047 | kqueue_t kqu = kqr_kqueue(p, kqr); |
6048 | |
6049 | kq_req_lock(kqu); |
6050 | if (kqr->kqr_req.tr_state == TR_STATE_BINDING) { |
6051 | kqueue_threadreq_bind(p, &kqr->kqr_req, thread, 0); |
6052 | } |
6053 | kq_req_unlock(kqu); |
6054 | } |
6055 | |
6056 | static void |
6057 | kqueue_threadreq_modify(struct kqueue *kq, struct kqrequest *kqr, kq_index_t qos) |
6058 | { |
6059 | assert(kqr->kqr_state & KQR_THREQUESTED); |
6060 | assert(kqr->kqr_thread == THREAD_NULL); |
6061 | |
6062 | kq_req_held(kq); |
6063 | |
6064 | int flags = 0; |
6065 | if ((kq->kq_state & KQ_WORKLOOP) && kq->kq_p == current_proc()) { |
6066 | flags |= WORKQ_THREADREQ_SET_AST_ON_FAILURE; |
6067 | } |
6068 | workq_kern_threadreq_modify(kq->kq_p, kqr, qos, flags); |
6069 | } |
6070 | |
6071 | /* |
6072 | * kqueue_threadreq_bind - bind thread to processing kqrequest |
6073 | * |
6074 | * The provided thread will be responsible for delivering events |
6075 | * associated with the given kqrequest. Bind it and get ready for |
6076 | * the thread to eventually arrive. |
6077 | */ |
6078 | void |
6079 | kqueue_threadreq_bind(struct proc *p, workq_threadreq_t req, thread_t thread, |
6080 | unsigned int flags) |
6081 | { |
6082 | struct kqrequest *kqr = __container_of(req, struct kqrequest, kqr_req); |
6083 | kqueue_t kqu = kqr_kqueue(p, kqr); |
6084 | struct uthread *ut = get_bsdthread_info(thread); |
6085 | |
6086 | kq_req_held(kqu); |
6087 | |
6088 | assert(kqr->kqr_state & KQR_THREQUESTED); |
6089 | assert(kqr->kqr_thread == THREAD_NULL); |
6090 | assert(ut->uu_kqueue_override == 0); |
6091 | |
6092 | if (kqr->kqr_req.tr_state == TR_STATE_BINDING) { |
6093 | assert(ut->uu_kqr_bound == kqr); |
6094 | assert(kqr->kqr_req.tr_binding_thread == thread); |
6095 | kqr->kqr_req.tr_state = TR_STATE_IDLE; |
6096 | kqr->kqr_req.tr_binding_thread = NULL; |
6097 | } else { |
6098 | assert(ut->uu_kqr_bound == NULL); |
6099 | } |
6100 | |
6101 | ut->uu_kqr_bound = kqr; |
6102 | kqr->kqr_thread = thread; |
6103 | |
6104 | if (kqu.kq->kq_state & KQ_WORKLOOP) { |
6105 | struct turnstile *ts = kqu.kqwl->kqwl_turnstile; |
6106 | |
6107 | if (__improbable(thread == kqu.kqwl->kqwl_owner)) { |
6108 | /* |
6109 | * <rdar://problem/38626999> shows that asserting here is not ok. |
6110 | * |
6111 | * This is not supposed to happen for correct use of the interface, |
6112 | * but it is sadly possible for userspace (with the help of memory |
6113 | * corruption, such as over-release of a dispatch queue) to make |
6114 | * the creator thread the "owner" of a workloop. |
6115 | * |
6116 | * Once that happens, and that creator thread picks up the same |
6117 | * workloop as a servicer, we trip this codepath. We need to fixup |
6118 | * the state to forget about this thread being the owner, as the |
6119 | * entire workloop state machine expects servicers to never be |
6120 | * owners and everything would basically go downhill from here. |
6121 | */ |
6122 | kqu.kqwl->kqwl_owner = THREAD_NULL; |
6123 | if (kqworkloop_owner_override(kqu.kqwl)) { |
6124 | thread_drop_ipc_override(thread); |
6125 | } |
6126 | thread_ends_owning_workloop(thread); |
6127 | } |
6128 | |
6129 | if (ts && (flags & KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE) == 0) { |
6130 | /* |
6131 | * Past this point, the interlock is the kq req lock again, |
6132 | * so we can fix the inheritor for good. |
6133 | */ |
6134 | filt_wlupdate_inheritor(kqu.kqwl, ts, TURNSTILE_IMMEDIATE_UPDATE); |
6135 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); |
6136 | } |
6137 | |
6138 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid, |
6139 | thread_tid(thread), kqr->kqr_qos_index, |
6140 | (kqr->kqr_override_index << 16) | kqr->kqr_state); |
6141 | |
6142 | ut->uu_kqueue_override = kqr->kqr_override_index; |
6143 | if (kqr->kqr_override_index) { |
6144 | thread_add_ipc_override(thread, kqr->kqr_override_index); |
6145 | } |
6146 | } else { |
6147 | assert(kqr->kqr_override_index == 0); |
6148 | |
6149 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1, |
6150 | thread_tid(thread), kqr->kqr_qos_index, |
6151 | (kqr->kqr_override_index << 16) | kqr->kqr_state); |
6152 | } |
6153 | } |
6154 | |
6155 | /* |
6156 | * kqueue_threadreq_cancel - abort a pending thread request |
6157 | * |
6158 | * Called when exiting/exec'ing. Forget our pending request. |
6159 | */ |
6160 | void |
6161 | kqueue_threadreq_cancel(struct proc *p, workq_threadreq_t req) |
6162 | { |
6163 | struct kqrequest *kqr = __container_of(req, struct kqrequest, kqr_req); |
6164 | kqueue_t kqu = kqr_kqueue(p, kqr); |
6165 | |
6166 | kq_req_lock(kqu); |
6167 | |
6168 | assert(kqr->kqr_thread == THREAD_NULL); |
6169 | assert(kqr->kqr_state & KQR_THREQUESTED); |
6170 | kqr->kqr_state &= ~(KQR_THREQUESTED | KQR_R2K_NOTIF_ARMED); |
6171 | |
6172 | kq_req_unlock(kqu); |
6173 | |
6174 | kqueue_release_last(p, kqu); /* may dealloc kqu */ |
6175 | } |
6176 | |
6177 | workq_threadreq_param_t |
6178 | kqueue_threadreq_workloop_param(workq_threadreq_t req) |
6179 | { |
6180 | struct kqrequest *kqr = __container_of(req, struct kqrequest, kqr_req); |
6181 | struct kqworkloop *kqwl; |
6182 | workq_threadreq_param_t trp; |
6183 | |
6184 | assert(kqr->kqr_state & KQR_WORKLOOP); |
6185 | kqwl = __container_of(kqr, struct kqworkloop, kqwl_request); |
6186 | trp.trp_value = kqwl->kqwl_params; |
6187 | return trp; |
6188 | } |
6189 | |
6190 | /* |
6191 | * kqueue_threadreq_unbind - unbind thread from processing kqueue |
6192 | * |
6193 | * End processing the per-QoS bucket of events and allow other threads |
6194 | * to be requested for future servicing. |
6195 | * |
6196 | * caller holds a reference on the kqueue. |
6197 | */ |
6198 | void |
6199 | kqueue_threadreq_unbind(struct proc *p, struct kqrequest *kqr) |
6200 | { |
6201 | if (kqr->kqr_state & KQR_WORKLOOP) { |
6202 | kqworkloop_unbind(p, kqr_kqworkloop(kqr)); |
6203 | } else { |
6204 | kqworkq_unbind(p, kqr); |
6205 | } |
6206 | } |
6207 | |
6208 | /* |
6209 | * If we aren't already busy processing events [for this QoS], |
6210 | * request workq thread support as appropriate. |
6211 | * |
6212 | * TBD - for now, we don't segregate out processing by QoS. |
6213 | * |
6214 | * - May be called with the kqueue's wait queue set locked, |
6215 | * so cannot do anything that could recurse on that. |
6216 | */ |
6217 | static void |
6218 | kqworkq_request_help(struct kqworkq *kqwq, kq_index_t qos_index) |
6219 | { |
6220 | struct kqrequest *kqr; |
6221 | |
6222 | /* convert to thread qos value */ |
6223 | assert(qos_index < KQWQ_NBUCKETS); |
6224 | |
6225 | kq_req_lock(kqwq); |
6226 | kqr = kqworkq_get_request(kqwq, qos_index); |
6227 | |
6228 | if ((kqr->kqr_state & KQR_WAKEUP) == 0) { |
6229 | kqr->kqr_state |= KQR_WAKEUP; |
6230 | if ((kqr->kqr_state & KQR_THREQUESTED) == 0) { |
6231 | kqueue_threadreq_initiate(&kqwq->kqwq_kqueue, kqr, qos_index, 0); |
6232 | } |
6233 | } |
6234 | kq_req_unlock(kqwq); |
6235 | } |
6236 | |
6237 | static kq_index_t |
6238 | kqworkloop_owner_override(struct kqworkloop *kqwl) |
6239 | { |
6240 | struct kqrequest *kqr = &kqwl->kqwl_request; |
6241 | return MAX(kqr->kqr_qos_index, kqr->kqr_override_index); |
6242 | } |
6243 | |
6244 | static inline void |
6245 | kqworkloop_request_fire_r2k_notification(struct kqworkloop *kqwl) |
6246 | { |
6247 | struct kqrequest *kqr = &kqwl->kqwl_request; |
6248 | |
6249 | kq_req_held(kqwl); |
6250 | |
6251 | if (kqr->kqr_state & KQR_R2K_NOTIF_ARMED) { |
6252 | assert(kqr->kqr_thread); |
6253 | kqr->kqr_state &= ~KQR_R2K_NOTIF_ARMED; |
6254 | act_set_astkevent(kqr->kqr_thread, AST_KEVENT_RETURN_TO_KERNEL); |
6255 | } |
6256 | } |
6257 | |
6258 | static void |
6259 | kqworkloop_update_threads_qos(struct kqworkloop *kqwl, int op, kq_index_t qos) |
6260 | { |
6261 | struct kqrequest *kqr = &kqwl->kqwl_request; |
6262 | struct kqueue *kq = &kqwl->kqwl_kqueue; |
6263 | kq_index_t old_owner_override = kqworkloop_owner_override(kqwl); |
6264 | kq_index_t i; |
6265 | |
6266 | /* must hold the kqr lock */ |
6267 | kq_req_held(kqwl); |
6268 | |
6269 | switch (op) { |
6270 | case KQWL_UTQ_UPDATE_WAKEUP_QOS: |
6271 | if (qos == KQWL_BUCKET_STAYACTIVE) { |
6272 | /* |
6273 | * the KQWL_BUCKET_STAYACTIVE is not a QoS bucket, we only remember |
6274 | * a high watermark (kqr_stayactive_qos) of any stay active knote |
6275 | * that was ever registered with this workloop. |
6276 | * |
6277 | * When waitq_set__CALLING_PREPOST_HOOK__() wakes up any stay active |
6278 | * knote, we use this high-watermark as a wakeup-index, and also set |
6279 | * the magic KQWL_BUCKET_STAYACTIVE bit to make sure we remember |
6280 | * there is at least one stay active knote fired until the next full |
6281 | * processing of this bucket. |
6282 | */ |
6283 | kqr->kqr_wakeup_indexes |= KQWL_STAYACTIVE_FIRED_BIT; |
6284 | qos = kqr->kqr_stayactive_qos; |
6285 | assert(qos); |
6286 | } |
6287 | if (kqr->kqr_wakeup_indexes & (1 << qos)) { |
6288 | assert(kqr->kqr_state & KQR_WAKEUP); |
6289 | break; |
6290 | } |
6291 | |
6292 | kqr->kqr_wakeup_indexes |= (1 << qos); |
6293 | kqr->kqr_state |= KQR_WAKEUP; |
6294 | kqworkloop_request_fire_r2k_notification(kqwl); |
6295 | goto recompute; |
6296 | |
6297 | case KQWL_UTQ_UPDATE_STAYACTIVE_QOS: |
6298 | assert(qos); |
6299 | if (kqr->kqr_stayactive_qos < qos) { |
6300 | kqr->kqr_stayactive_qos = qos; |
6301 | if (kqr->kqr_wakeup_indexes & KQWL_STAYACTIVE_FIRED_BIT) { |
6302 | assert(kqr->kqr_state & KQR_WAKEUP); |
6303 | kqr->kqr_wakeup_indexes |= (1 << qos); |
6304 | goto recompute; |
6305 | } |
6306 | } |
6307 | break; |
6308 | |
6309 | case KQWL_UTQ_PARKING: |
6310 | case KQWL_UTQ_UNBINDING: |
6311 | kqr->kqr_override_index = qos; |
6312 | /* FALLTHROUGH */ |
6313 | case KQWL_UTQ_RECOMPUTE_WAKEUP_QOS: |
6314 | if (op == KQWL_UTQ_RECOMPUTE_WAKEUP_QOS) { |
6315 | assert(qos == THREAD_QOS_UNSPECIFIED); |
6316 | } |
6317 | kqlock_held(kqwl); // to look at kq_queues |
6318 | i = KQWL_BUCKET_STAYACTIVE; |
6319 | if (TAILQ_EMPTY(&kqr->kqr_suppressed)) { |
6320 | kqr->kqr_override_index = THREAD_QOS_UNSPECIFIED; |
6321 | } |
6322 | if (!TAILQ_EMPTY(&kqwl->kqwl_queue[i]) && |
6323 | (kqr->kqr_wakeup_indexes & KQWL_STAYACTIVE_FIRED_BIT)) { |
6324 | /* |
6325 | * If the KQWL_STAYACTIVE_FIRED_BIT is set, it means a stay active |
6326 | * knote may have fired, so we need to merge in kqr_stayactive_qos. |
6327 | * |
6328 | * Unlike other buckets, this one is never empty but could be idle. |
6329 | */ |
6330 | kqr->kqr_wakeup_indexes &= KQWL_STAYACTIVE_FIRED_BIT; |
6331 | kqr->kqr_wakeup_indexes |= (1 << kqr->kqr_stayactive_qos); |
6332 | } else { |
6333 | kqr->kqr_wakeup_indexes = 0; |
6334 | } |
6335 | for (i = THREAD_QOS_UNSPECIFIED + 1; i < KQWL_BUCKET_STAYACTIVE; i++) { |
6336 | if (!TAILQ_EMPTY(&kqwl->kqwl_queue[i])) { |
6337 | kqr->kqr_wakeup_indexes |= (1 << i); |
6338 | } |
6339 | } |
6340 | if (kqr->kqr_wakeup_indexes) { |
6341 | kqr->kqr_state |= KQR_WAKEUP; |
6342 | kqworkloop_request_fire_r2k_notification(kqwl); |
6343 | } else { |
6344 | kqr->kqr_state &= ~KQR_WAKEUP; |
6345 | } |
6346 | goto recompute; |
6347 | |
6348 | case KQWL_UTQ_RESET_WAKEUP_OVERRIDE: |
6349 | kqr->kqr_override_index = qos; |
6350 | goto recompute; |
6351 | |
6352 | case KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE: |
6353 | recompute: |
6354 | /* |
6355 | * When modifying the wakeup QoS or the override QoS, we always need to |
6356 | * maintain our invariant that kqr_override_index is at least as large |
6357 | * as the highest QoS for which an event is fired. |
6358 | * |
6359 | * However this override index can be larger when there is an overriden |
6360 | * suppressed knote pushing on the kqueue. |
6361 | */ |
6362 | if (kqr->kqr_wakeup_indexes > (1 << qos)) { |
6363 | qos = fls(kqr->kqr_wakeup_indexes) - 1; /* fls is 1-based */ |
6364 | } |
6365 | if (kqr->kqr_override_index < qos) { |
6366 | kqr->kqr_override_index = qos; |
6367 | } |
6368 | break; |
6369 | |
6370 | case KQWL_UTQ_REDRIVE_EVENTS: |
6371 | break; |
6372 | |
6373 | case KQWL_UTQ_SET_QOS_INDEX: |
6374 | kqr->kqr_qos_index = qos; |
6375 | break; |
6376 | |
6377 | default: |
6378 | panic("unknown kqwl thread qos update operation: %d" , op); |
6379 | } |
6380 | |
6381 | thread_t kqwl_owner = kqwl->kqwl_owner; |
6382 | thread_t servicer = kqr->kqr_thread; |
6383 | boolean_t qos_changed = FALSE; |
6384 | kq_index_t new_owner_override = kqworkloop_owner_override(kqwl); |
6385 | |
6386 | /* |
6387 | * Apply the diffs to the owner if applicable |
6388 | */ |
6389 | if (kqwl_owner) { |
6390 | #if 0 |
6391 | /* JMM - need new trace hooks for owner overrides */ |
6392 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), |
6393 | kqwl->kqwl_dynamicid, thread_tid(kqwl_owner), kqr->kqr_qos_index, |
6394 | (kqr->kqr_override_index << 16) | kqr->kqr_state); |
6395 | #endif |
6396 | if (new_owner_override == old_owner_override) { |
6397 | // nothing to do |
6398 | } else if (old_owner_override == THREAD_QOS_UNSPECIFIED) { |
6399 | thread_add_ipc_override(kqwl_owner, new_owner_override); |
6400 | } else if (new_owner_override == THREAD_QOS_UNSPECIFIED) { |
6401 | thread_drop_ipc_override(kqwl_owner); |
6402 | } else /* old_owner_override != new_owner_override */ { |
6403 | thread_update_ipc_override(kqwl_owner, new_owner_override); |
6404 | } |
6405 | } |
6406 | |
6407 | /* |
6408 | * apply the diffs to the servicer |
6409 | */ |
6410 | if ((kqr->kqr_state & KQR_THREQUESTED) == 0) { |
6411 | /* |
6412 | * No servicer, nor thread-request |
6413 | * |
6414 | * Make a new thread request, unless there is an owner (or the workloop |
6415 | * is suspended in userland) or if there is no asynchronous work in the |
6416 | * first place. |
6417 | */ |
6418 | |
6419 | if (kqwl_owner == NULL && (kqr->kqr_state & KQR_WAKEUP)) { |
6420 | int initiate_flags = 0; |
6421 | if (op == KQWL_UTQ_UNBINDING) { |
6422 | initiate_flags = WORKQ_THREADREQ_ATTEMPT_REBIND; |
6423 | } |
6424 | kqueue_threadreq_initiate(kq, kqr, new_owner_override, |
6425 | initiate_flags); |
6426 | } |
6427 | } else if (servicer) { |
6428 | /* |
6429 | * Servicer in flight |
6430 | * |
6431 | * Just apply the diff to the servicer |
6432 | */ |
6433 | struct uthread *ut = get_bsdthread_info(servicer); |
6434 | if (ut->uu_kqueue_override != kqr->kqr_override_index) { |
6435 | if (ut->uu_kqueue_override == THREAD_QOS_UNSPECIFIED) { |
6436 | thread_add_ipc_override(servicer, kqr->kqr_override_index); |
6437 | } else if (kqr->kqr_override_index == THREAD_QOS_UNSPECIFIED) { |
6438 | thread_drop_ipc_override(servicer); |
6439 | } else /* ut->uu_kqueue_override != kqr->kqr_override_index */ { |
6440 | thread_update_ipc_override(servicer, kqr->kqr_override_index); |
6441 | } |
6442 | ut->uu_kqueue_override = kqr->kqr_override_index; |
6443 | qos_changed = TRUE; |
6444 | } |
6445 | } else if (new_owner_override == THREAD_QOS_UNSPECIFIED) { |
6446 | /* |
6447 | * No events to deliver anymore. |
6448 | * |
6449 | * However canceling with turnstiles is challenging, so the fact that |
6450 | * the request isn't useful will be discovered by the servicer himself |
6451 | * later on. |
6452 | */ |
6453 | } else if (old_owner_override != new_owner_override) { |
6454 | /* |
6455 | * Request is in flight |
6456 | * |
6457 | * Apply the diff to the thread request |
6458 | */ |
6459 | kqueue_threadreq_modify(kq, kqr, new_owner_override); |
6460 | qos_changed = TRUE; |
6461 | } |
6462 | |
6463 | if (qos_changed) { |
6464 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid, |
6465 | thread_tid(kqr->kqr_thread), kqr->kqr_qos_index, |
6466 | (kqr->kqr_override_index << 16) | kqr->kqr_state); |
6467 | } |
6468 | } |
6469 | |
6470 | static void |
6471 | kqworkloop_request_help(struct kqworkloop *kqwl, kq_index_t qos_index) |
6472 | { |
6473 | /* convert to thread qos value */ |
6474 | assert(qos_index < KQWL_NBUCKETS); |
6475 | |
6476 | kq_req_lock(kqwl); |
6477 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_UPDATE_WAKEUP_QOS, qos_index); |
6478 | kq_req_unlock(kqwl); |
6479 | } |
6480 | |
6481 | static struct kqtailq * |
6482 | kqueue_get_queue(struct kqueue *kq, kq_index_t qos_index) |
6483 | { |
6484 | if (kq->kq_state & KQ_WORKQ) { |
6485 | assert(qos_index < KQWQ_NBUCKETS); |
6486 | } else if (kq->kq_state & KQ_WORKLOOP) { |
6487 | assert(qos_index < KQWL_NBUCKETS); |
6488 | } else { |
6489 | assert(qos_index == QOS_INDEX_KQFILE); |
6490 | } |
6491 | static_assert(offsetof(struct kqueue, kq_queue) == sizeof(struct kqueue), |
6492 | "struct kqueue::kq_queue must be exactly at the end" ); |
6493 | return &kq->kq_queue[qos_index]; |
6494 | } |
6495 | |
6496 | static int |
6497 | kqueue_queue_empty(struct kqueue *kq, kq_index_t qos_index) |
6498 | { |
6499 | return TAILQ_EMPTY(kqueue_get_queue(kq, qos_index)); |
6500 | } |
6501 | |
6502 | static struct kqtailq * |
6503 | kqueue_get_suppressed_queue(kqueue_t kq, struct knote *kn) |
6504 | { |
6505 | if (kq.kq->kq_state & KQ_WORKQ) { |
6506 | return &kqworkq_get_request(kq.kqwq, kn->kn_qos_index)->kqr_suppressed; |
6507 | } else if (kq.kq->kq_state & KQ_WORKLOOP) { |
6508 | return &kq.kqwl->kqwl_request.kqr_suppressed; |
6509 | } else { |
6510 | return &kq.kqf->kqf_suppressed; |
6511 | } |
6512 | } |
6513 | |
6514 | static struct turnstile * |
6515 | kqueue_get_turnstile(kqueue_t kqu, bool can_alloc) |
6516 | { |
6517 | uint8_t kqr_state; |
6518 | |
6519 | if ((kqu.kq->kq_state & KQ_WORKLOOP) == 0) { |
6520 | return TURNSTILE_NULL; |
6521 | } |
6522 | |
6523 | kqr_state = os_atomic_load(&kqu.kqwl->kqwl_request.kqr_state, relaxed); |
6524 | if (kqr_state & KQR_ALLOCATED_TURNSTILE) { |
6525 | /* force a dependency to pair with the atomic or with release below */ |
6526 | return os_atomic_load_with_dependency_on(&kqu.kqwl->kqwl_turnstile, |
6527 | kqr_state); |
6528 | } |
6529 | |
6530 | if (!can_alloc) { |
6531 | return TURNSTILE_NULL; |
6532 | } |
6533 | |
6534 | struct turnstile *ts = turnstile_alloc(), *free_ts = TURNSTILE_NULL; |
6535 | |
6536 | kq_req_lock(kqu); |
6537 | if (filt_wlturnstile_interlock_is_workq(kqu.kqwl)) { |
6538 | workq_kern_threadreq_lock(kqu.kqwl->kqwl_p); |
6539 | } |
6540 | |
6541 | if (kqu.kqwl->kqwl_request.kqr_state & KQR_ALLOCATED_TURNSTILE) { |
6542 | free_ts = ts; |
6543 | ts = kqu.kqwl->kqwl_turnstile; |
6544 | } else { |
6545 | ts = turnstile_prepare((uintptr_t)kqu.kqwl, &kqu.kqwl->kqwl_turnstile, |
6546 | ts, TURNSTILE_WORKLOOPS); |
6547 | |
6548 | /* release-barrier to pair with the unlocked load of kqwl_turnstile above */ |
6549 | os_atomic_or(&kqu.kqwl->kqwl_request.kqr_state, |
6550 | KQR_ALLOCATED_TURNSTILE, release); |
6551 | } |
6552 | |
6553 | if (filt_wlturnstile_interlock_is_workq(kqu.kqwl)) { |
6554 | workq_kern_threadreq_unlock(kqu.kqwl->kqwl_p); |
6555 | } |
6556 | kq_req_unlock(kqu.kqwl); |
6557 | |
6558 | if (free_ts) { |
6559 | turnstile_deallocate(free_ts); |
6560 | } |
6561 | return ts; |
6562 | } |
6563 | |
6564 | struct turnstile * |
6565 | kqueue_turnstile(struct kqueue *kq) |
6566 | { |
6567 | return kqueue_get_turnstile(kq, false); |
6568 | } |
6569 | |
6570 | struct turnstile * |
6571 | kqueue_alloc_turnstile(struct kqueue *kq) |
6572 | { |
6573 | return kqueue_get_turnstile(kq, true); |
6574 | } |
6575 | |
6576 | static struct kqtailq * |
6577 | knote_get_queue(struct knote *kn) |
6578 | { |
6579 | return kqueue_get_queue(knote_get_kq(kn), kn->kn_qos_index); |
6580 | } |
6581 | |
6582 | static void |
6583 | knote_reset_priority(struct knote *kn, pthread_priority_t pp) |
6584 | { |
6585 | struct kqueue *kq = knote_get_kq(kn); |
6586 | kq_index_t qos = _pthread_priority_thread_qos(pp); |
6587 | |
6588 | assert((kn->kn_status & KN_QUEUED) == 0); |
6589 | |
6590 | if (kq->kq_state & KQ_WORKQ) { |
6591 | if (qos == THREAD_QOS_UNSPECIFIED) { |
6592 | /* On workqueues, outside of QoS means MANAGER */ |
6593 | qos = KQWQ_QOS_MANAGER; |
6594 | pp = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; |
6595 | } else { |
6596 | pp = _pthread_priority_normalize(pp); |
6597 | } |
6598 | } else if (kq->kq_state & KQ_WORKLOOP) { |
6599 | assert((pp & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) == 0); |
6600 | pp = _pthread_priority_normalize(pp); |
6601 | } else { |
6602 | pp = _pthread_unspecified_priority(); |
6603 | qos = THREAD_QOS_UNSPECIFIED; |
6604 | } |
6605 | |
6606 | kn->kn_qos = pp; |
6607 | kn->kn_req_index = qos; |
6608 | |
6609 | if ((kn->kn_status & KN_MERGE_QOS) == 0 || qos > kn->kn_qos_override) { |
6610 | /* Never lower QoS when in "Merge" mode */ |
6611 | kn->kn_qos_override = qos; |
6612 | } |
6613 | |
6614 | /* only adjust in-use qos index when not suppressed */ |
6615 | if ((kn->kn_status & KN_SUPPRESSED) == 0) { |
6616 | kn->kn_qos_index = qos; |
6617 | } else if (kq->kq_state & KQ_WORKQ) { |
6618 | kqworkq_update_override((struct kqworkq *)kq, kn, qos); |
6619 | } else if (kq->kq_state & KQ_WORKLOOP) { |
6620 | kqworkloop_update_override((struct kqworkloop *)kq, qos); |
6621 | } |
6622 | } |
6623 | |
6624 | static void |
6625 | knote_set_qos_overcommit(struct knote *kn) |
6626 | { |
6627 | struct kqueue *kq = knote_get_kq(kn); |
6628 | |
6629 | /* turn overcommit on for the appropriate thread request? */ |
6630 | if ((kn->kn_qos & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) && |
6631 | (kq->kq_state & KQ_WORKLOOP)) { |
6632 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
6633 | struct kqrequest *kqr = &kqwl->kqwl_request; |
6634 | |
6635 | /* |
6636 | * This test is racy, but since we never remove this bit, |
6637 | * it allows us to avoid taking a lock. |
6638 | */ |
6639 | if (kqr->kqr_state & KQR_THOVERCOMMIT) { |
6640 | return; |
6641 | } |
6642 | |
6643 | kq_req_lock(kqwl); |
6644 | kqr->kqr_state |= KQR_THOVERCOMMIT; |
6645 | if (!kqr->kqr_thread && (kqr->kqr_state & KQR_THREQUESTED)) { |
6646 | kqueue_threadreq_modify(kq, kqr, kqr->kqr_req.tr_qos); |
6647 | } |
6648 | kq_req_unlock(kqwl); |
6649 | } |
6650 | } |
6651 | |
6652 | static kq_index_t |
6653 | knote_get_qos_override_index(struct knote *kn) |
6654 | { |
6655 | return kn->kn_qos_override; |
6656 | } |
6657 | |
6658 | static void |
6659 | kqworkq_update_override(struct kqworkq *kqwq, struct knote *kn, |
6660 | kq_index_t override_index) |
6661 | { |
6662 | struct kqrequest *kqr; |
6663 | kq_index_t old_override_index; |
6664 | kq_index_t queue_index = kn->kn_qos_index; |
6665 | |
6666 | if (override_index <= queue_index) { |
6667 | return; |
6668 | } |
6669 | |
6670 | kqr = kqworkq_get_request(kqwq, queue_index); |
6671 | |
6672 | kq_req_lock(kqwq); |
6673 | old_override_index = kqr->kqr_override_index; |
6674 | if (override_index > MAX(kqr->kqr_qos_index, old_override_index)) { |
6675 | kqr->kqr_override_index = override_index; |
6676 | |
6677 | /* apply the override to [incoming?] servicing thread */ |
6678 | if (kqr->kqr_thread) { |
6679 | if (old_override_index) |
6680 | thread_update_ipc_override(kqr->kqr_thread, override_index); |
6681 | else |
6682 | thread_add_ipc_override(kqr->kqr_thread, override_index); |
6683 | } |
6684 | } |
6685 | kq_req_unlock(kqwq); |
6686 | } |
6687 | |
6688 | static void |
6689 | kqworkloop_update_override(struct kqworkloop *kqwl, kq_index_t override_index) |
6690 | { |
6691 | kq_req_lock(kqwl); |
6692 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE, |
6693 | override_index); |
6694 | kq_req_unlock(kqwl); |
6695 | } |
6696 | |
6697 | static thread_qos_t |
6698 | kqworkloop_unbind_locked(struct kqworkloop *kqwl, thread_t thread) |
6699 | { |
6700 | struct uthread *ut = get_bsdthread_info(thread); |
6701 | struct kqrequest *kqr = &kqwl->kqwl_request; |
6702 | kq_index_t ipc_override = ut->uu_kqueue_override; |
6703 | |
6704 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid, |
6705 | thread_tid(thread), 0, 0); |
6706 | |
6707 | kq_req_held(kqwl); |
6708 | assert(ut->uu_kqr_bound == kqr); |
6709 | ut->uu_kqr_bound = NULL; |
6710 | ut->uu_kqueue_override = THREAD_QOS_UNSPECIFIED; |
6711 | |
6712 | if (kqwl->kqwl_owner == NULL && kqwl->kqwl_turnstile) { |
6713 | turnstile_update_inheritor(kqwl->kqwl_turnstile, |
6714 | TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE); |
6715 | turnstile_update_inheritor_complete(kqwl->kqwl_turnstile, |
6716 | TURNSTILE_INTERLOCK_HELD); |
6717 | } |
6718 | |
6719 | kqr->kqr_thread = NULL; |
6720 | kqr->kqr_state &= ~(KQR_THREQUESTED | KQR_R2K_NOTIF_ARMED); |
6721 | return ipc_override; |
6722 | } |
6723 | |
6724 | /* |
6725 | * kqworkloop_unbind - Unbind the servicer thread of a workloop kqueue |
6726 | * |
6727 | * It will acknowledge events, and possibly request a new thread if: |
6728 | * - there were active events left |
6729 | * - we pended waitq hook callouts during processing |
6730 | * - we pended wakeups while processing (or unsuppressing) |
6731 | * |
6732 | * Called with kqueue lock held. |
6733 | */ |
6734 | static void |
6735 | kqworkloop_unbind(proc_t p, struct kqworkloop *kqwl) |
6736 | { |
6737 | struct kqueue *kq = &kqwl->kqwl_kqueue; |
6738 | struct kqrequest *kqr = &kqwl->kqwl_request; |
6739 | thread_t thread = kqr->kqr_thread; |
6740 | int op = KQWL_UTQ_PARKING; |
6741 | kq_index_t ipc_override, qos_override = THREAD_QOS_UNSPECIFIED; |
6742 | |
6743 | assert(thread == current_thread()); |
6744 | |
6745 | kqlock(kqwl); |
6746 | |
6747 | /* |
6748 | * Forcing the KQ_PROCESSING flag allows for QoS updates because of |
6749 | * unsuppressing knotes not to be applied until the eventual call to |
6750 | * kqworkloop_update_threads_qos() below. |
6751 | */ |
6752 | assert((kq->kq_state & KQ_PROCESSING) == 0); |
6753 | if (!TAILQ_EMPTY(&kqr->kqr_suppressed)) { |
6754 | kq->kq_state |= KQ_PROCESSING; |
6755 | qos_override = kqworkloop_acknowledge_events(kqwl); |
6756 | kq->kq_state &= ~KQ_PROCESSING; |
6757 | } |
6758 | |
6759 | kq_req_lock(kqwl); |
6760 | |
6761 | ipc_override = kqworkloop_unbind_locked(kqwl, thread); |
6762 | kqworkloop_update_threads_qos(kqwl, op, qos_override); |
6763 | |
6764 | kq_req_unlock(kqwl); |
6765 | |
6766 | kqunlock(kqwl); |
6767 | |
6768 | /* |
6769 | * Drop the override on the current thread last, after the call to |
6770 | * kqworkloop_update_threads_qos above. |
6771 | */ |
6772 | if (ipc_override) { |
6773 | thread_drop_ipc_override(thread); |
6774 | } |
6775 | |
6776 | /* If last reference, dealloc the workloop kq */ |
6777 | kqueue_release_last(p, kqwl); |
6778 | } |
6779 | |
6780 | static thread_qos_t |
6781 | kqworkq_unbind_locked(__assert_only struct kqworkq *kqwq, |
6782 | struct kqrequest *kqr, thread_t thread) |
6783 | { |
6784 | struct uthread *ut = get_bsdthread_info(thread); |
6785 | kq_index_t old_override = kqr->kqr_override_index; |
6786 | |
6787 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1, |
6788 | thread_tid(kqr->kqr_thread), kqr->kqr_qos_index, 0); |
6789 | |
6790 | kq_req_held(kqwq); |
6791 | assert(ut->uu_kqr_bound == kqr); |
6792 | ut->uu_kqr_bound = NULL; |
6793 | kqr->kqr_thread = NULL; |
6794 | kqr->kqr_state &= ~(KQR_THREQUESTED | KQR_R2K_NOTIF_ARMED); |
6795 | kqr->kqr_override_index = THREAD_QOS_UNSPECIFIED; |
6796 | |
6797 | return old_override; |
6798 | } |
6799 | |
6800 | /* |
6801 | * kqworkq_unbind - unbind of a workq kqueue from a thread |
6802 | * |
6803 | * We may have to request new threads. |
6804 | * This can happen there are no waiting processing threads and: |
6805 | * - there were active events we never got to (count > 0) |
6806 | * - we pended waitq hook callouts during processing |
6807 | * - we pended wakeups while processing (or unsuppressing) |
6808 | */ |
6809 | static void |
6810 | kqworkq_unbind(proc_t p, struct kqrequest *kqr) |
6811 | { |
6812 | struct kqworkq *kqwq = (struct kqworkq *)p->p_fd->fd_wqkqueue; |
6813 | __assert_only int rc; |
6814 | |
6815 | kqlock(kqwq); |
6816 | rc = kqworkq_acknowledge_events(kqwq, kqr, 0, KQWQAE_UNBIND); |
6817 | assert(rc == -1); |
6818 | kqunlock(kqwq); |
6819 | } |
6820 | |
6821 | struct kqrequest * |
6822 | kqworkq_get_request(struct kqworkq *kqwq, kq_index_t qos_index) |
6823 | { |
6824 | assert(qos_index < KQWQ_NBUCKETS); |
6825 | return &kqwq->kqwq_request[qos_index]; |
6826 | } |
6827 | |
6828 | static void |
6829 | knote_apply_qos_override(struct knote *kn, kq_index_t qos_index) |
6830 | { |
6831 | assert((kn->kn_status & KN_QUEUED) == 0); |
6832 | |
6833 | kn->kn_qos_override = qos_index; |
6834 | |
6835 | if (kn->kn_status & KN_SUPPRESSED) { |
6836 | struct kqueue *kq = knote_get_kq(kn); |
6837 | /* |
6838 | * For suppressed events, the kn_qos_index field cannot be touched as it |
6839 | * allows us to know on which supress queue the knote is for a kqworkq. |
6840 | * |
6841 | * Also, there's no natural push applied on the kqueues when this field |
6842 | * changes anyway. We hence need to apply manual overrides in this case, |
6843 | * which will be cleared when the events are later acknowledged. |
6844 | */ |
6845 | if (kq->kq_state & KQ_WORKQ) { |
6846 | kqworkq_update_override((struct kqworkq *)kq, kn, qos_index); |
6847 | } else { |
6848 | kqworkloop_update_override((struct kqworkloop *)kq, qos_index); |
6849 | } |
6850 | } else { |
6851 | kn->kn_qos_index = qos_index; |
6852 | } |
6853 | } |
6854 | |
6855 | static bool |
6856 | knote_should_apply_qos_override(struct kqueue *kq, struct knote *kn, int result, |
6857 | thread_qos_t *qos_out) |
6858 | { |
6859 | thread_qos_t qos_index = (result >> FILTER_ADJUST_EVENT_QOS_SHIFT) & 7; |
6860 | |
6861 | kqlock_held(kq); |
6862 | |
6863 | assert(result & FILTER_ADJUST_EVENT_QOS_BIT); |
6864 | assert(qos_index < THREAD_QOS_LAST); |
6865 | |
6866 | /* |
6867 | * Early exit for knotes that should not change QoS |
6868 | * |
6869 | * It is safe to test kn_req_index against MANAGER / STAYACTIVE because |
6870 | * knotes with such kn_req_index values never change for their entire |
6871 | * lifetime. |
6872 | */ |
6873 | if (__improbable(!knote_fops(kn)->f_adjusts_qos)) { |
6874 | panic("filter %d cannot change QoS" , kn->kn_filtid); |
6875 | } else if (kq->kq_state & KQ_WORKLOOP) { |
6876 | if (kn->kn_req_index == KQWL_BUCKET_STAYACTIVE) { |
6877 | return false; |
6878 | } |
6879 | } else if (kq->kq_state & KQ_WORKQ) { |
6880 | if (kn->kn_req_index == KQWQ_QOS_MANAGER) { |
6881 | return false; |
6882 | } |
6883 | } else { |
6884 | return false; |
6885 | } |
6886 | |
6887 | /* |
6888 | * knotes with the FALLBACK flag will only use their registration QoS if the |
6889 | * incoming event has no QoS, else, the registration QoS acts as a floor. |
6890 | */ |
6891 | if (kn->kn_qos & _PTHREAD_PRIORITY_FALLBACK_FLAG) { |
6892 | if (qos_index == THREAD_QOS_UNSPECIFIED) |
6893 | qos_index = kn->kn_req_index; |
6894 | } else { |
6895 | if (qos_index < kn->kn_req_index) |
6896 | qos_index = kn->kn_req_index; |
6897 | } |
6898 | if ((kn->kn_status & KN_MERGE_QOS) && (qos_index < kn->kn_qos_override)) { |
6899 | /* Never lower QoS when in "Merge" mode */ |
6900 | return false; |
6901 | } |
6902 | |
6903 | if ((kn->kn_status & KN_LOCKED) && kn->kn_inuse) { |
6904 | /* |
6905 | * When we're trying to update the QoS override and that both an |
6906 | * f_event() and other f_* calls are running concurrently, any of these |
6907 | * in flight calls may want to perform overrides that aren't properly |
6908 | * serialized with each other. |
6909 | * |
6910 | * The first update that observes this racy situation enters a "Merge" |
6911 | * mode which causes subsequent override requests to saturate the |
6912 | * override instead of replacing its value. |
6913 | * |
6914 | * This mode is left when knote_unlock() or knote_call_filter_event() |
6915 | * observe that no other f_* routine is in flight. |
6916 | */ |
6917 | kn->kn_status |= KN_MERGE_QOS; |
6918 | } |
6919 | |
6920 | if (kn->kn_qos_override == qos_index) { |
6921 | return false; |
6922 | } |
6923 | |
6924 | *qos_out = qos_index; |
6925 | return true; |
6926 | } |
6927 | |
6928 | static void |
6929 | knote_adjust_qos(struct kqueue *kq, struct knote *kn, int result) |
6930 | { |
6931 | thread_qos_t qos; |
6932 | if (knote_should_apply_qos_override(kq, kn, result, &qos)) { |
6933 | knote_dequeue(kn); |
6934 | knote_apply_qos_override(kn, qos); |
6935 | if (knote_enqueue(kn) && (kn->kn_status & KN_ACTIVE)) { |
6936 | knote_wakeup(kn); |
6937 | } |
6938 | } |
6939 | } |
6940 | |
6941 | static void |
6942 | knote_wakeup(struct knote *kn) |
6943 | { |
6944 | struct kqueue *kq = knote_get_kq(kn); |
6945 | |
6946 | kqlock_held(kq); |
6947 | |
6948 | if (kq->kq_state & KQ_WORKQ) { |
6949 | struct kqworkq *kqwq = (struct kqworkq *)kq; |
6950 | |
6951 | kqworkq_request_help(kqwq, kn->kn_qos_index); |
6952 | } else if (kq->kq_state & KQ_WORKLOOP) { |
6953 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
6954 | |
6955 | /* |
6956 | * kqworkloop_end_processing() will perform the required QoS |
6957 | * computations when it unsets the processing mode. |
6958 | */ |
6959 | if (!kqworkloop_is_processing_on_current_thread(kqwl)) { |
6960 | kqworkloop_request_help(kqwl, kn->kn_qos_index); |
6961 | } |
6962 | } else { |
6963 | struct kqfile *kqf = (struct kqfile *)kq; |
6964 | |
6965 | /* flag wakeups during processing */ |
6966 | if (kq->kq_state & KQ_PROCESSING) |
6967 | kq->kq_state |= KQ_WAKEUP; |
6968 | |
6969 | /* wakeup a thread waiting on this queue */ |
6970 | if (kq->kq_state & (KQ_SLEEP | KQ_SEL)) { |
6971 | kq->kq_state &= ~(KQ_SLEEP | KQ_SEL); |
6972 | waitq_wakeup64_all((struct waitq *)&kq->kq_wqs, KQ_EVENT, |
6973 | THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); |
6974 | } |
6975 | |
6976 | /* wakeup other kqueues/select sets we're inside */ |
6977 | KNOTE(&kqf->kqf_sel.si_note, 0); |
6978 | } |
6979 | } |
6980 | |
6981 | /* |
6982 | * Called with the kqueue locked |
6983 | */ |
6984 | static void |
6985 | kqueue_interrupt(struct kqueue *kq) |
6986 | { |
6987 | assert((kq->kq_state & KQ_WORKQ) == 0); |
6988 | |
6989 | /* wakeup sleeping threads */ |
6990 | if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0) { |
6991 | kq->kq_state &= ~(KQ_SLEEP | KQ_SEL); |
6992 | (void)waitq_wakeup64_all((struct waitq *)&kq->kq_wqs, |
6993 | KQ_EVENT, |
6994 | THREAD_RESTART, |
6995 | WAITQ_ALL_PRIORITIES); |
6996 | } |
6997 | |
6998 | /* wakeup threads waiting their turn to process */ |
6999 | if (kq->kq_state & KQ_PROCWAIT) { |
7000 | struct kqtailq *suppressq; |
7001 | |
7002 | assert(kq->kq_state & KQ_PROCESSING); |
7003 | |
7004 | kq->kq_state &= ~KQ_PROCWAIT; |
7005 | suppressq = kqueue_get_suppressed_queue(kq, NULL); |
7006 | (void)waitq_wakeup64_all((struct waitq *)&kq->kq_wqs, |
7007 | CAST_EVENT64_T(suppressq), |
7008 | THREAD_RESTART, |
7009 | WAITQ_ALL_PRIORITIES); |
7010 | } |
7011 | } |
7012 | |
7013 | /* |
7014 | * Called back from waitq code when no threads waiting and the hook was set. |
7015 | * |
7016 | * Interrupts are likely disabled and spin locks are held - minimal work |
7017 | * can be done in this context!!! |
7018 | * |
7019 | * JMM - in the future, this will try to determine which knotes match the |
7020 | * wait queue wakeup and apply these wakeups against those knotes themselves. |
7021 | * For now, all the events dispatched this way are dispatch-manager handled, |
7022 | * so hard-code that for now. |
7023 | */ |
7024 | void |
7025 | waitq_set__CALLING_PREPOST_HOOK__(void *kq_hook, void *knote_hook, int qos) |
7026 | { |
7027 | #pragma unused(knote_hook, qos) |
7028 | |
7029 | struct kqueue *kq = (struct kqueue *)kq_hook; |
7030 | |
7031 | if (kq->kq_state & KQ_WORKQ) { |
7032 | struct kqworkq *kqwq = (struct kqworkq *)kq; |
7033 | |
7034 | kqworkq_request_help(kqwq, KQWQ_QOS_MANAGER); |
7035 | } else if (kq->kq_state & KQ_WORKLOOP) { |
7036 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
7037 | |
7038 | kqworkloop_request_help(kqwl, KQWL_BUCKET_STAYACTIVE); |
7039 | } |
7040 | } |
7041 | |
7042 | void |
7043 | klist_init(struct klist *list) |
7044 | { |
7045 | SLIST_INIT(list); |
7046 | } |
7047 | |
7048 | |
7049 | /* |
7050 | * Query/Post each knote in the object's list |
7051 | * |
7052 | * The object lock protects the list. It is assumed |
7053 | * that the filter/event routine for the object can |
7054 | * determine that the object is already locked (via |
7055 | * the hint) and not deadlock itself. |
7056 | * |
7057 | * The object lock should also hold off pending |
7058 | * detach/drop operations. |
7059 | */ |
7060 | void |
7061 | knote(struct klist *list, long hint) |
7062 | { |
7063 | struct knote *kn; |
7064 | |
7065 | SLIST_FOREACH(kn, list, kn_selnext) { |
7066 | struct kqueue *kq = knote_get_kq(kn); |
7067 | kqlock(kq); |
7068 | knote_call_filter_event(kq, kn, hint); |
7069 | kqunlock(kq); |
7070 | } |
7071 | } |
7072 | |
7073 | /* |
7074 | * attach a knote to the specified list. Return true if this is the first entry. |
7075 | * The list is protected by whatever lock the object it is associated with uses. |
7076 | */ |
7077 | int |
7078 | knote_attach(struct klist *list, struct knote *kn) |
7079 | { |
7080 | int ret = SLIST_EMPTY(list); |
7081 | SLIST_INSERT_HEAD(list, kn, kn_selnext); |
7082 | return (ret); |
7083 | } |
7084 | |
7085 | /* |
7086 | * detach a knote from the specified list. Return true if that was the last entry. |
7087 | * The list is protected by whatever lock the object it is associated with uses. |
7088 | */ |
7089 | int |
7090 | knote_detach(struct klist *list, struct knote *kn) |
7091 | { |
7092 | SLIST_REMOVE(list, kn, knote, kn_selnext); |
7093 | return (SLIST_EMPTY(list)); |
7094 | } |
7095 | |
7096 | /* |
7097 | * knote_vanish - Indicate that the source has vanished |
7098 | * |
7099 | * If the knote has requested EV_VANISHED delivery, |
7100 | * arrange for that. Otherwise, deliver a NOTE_REVOKE |
7101 | * event for backward compatibility. |
7102 | * |
7103 | * The knote is marked as having vanished, but is not |
7104 | * actually detached from the source in this instance. |
7105 | * The actual detach is deferred until the knote drop. |
7106 | * |
7107 | * Our caller already has the object lock held. Calling |
7108 | * the detach routine would try to take that lock |
7109 | * recursively - which likely is not supported. |
7110 | */ |
7111 | void |
7112 | knote_vanish(struct klist *list) |
7113 | { |
7114 | struct knote *kn; |
7115 | struct knote *kn_next; |
7116 | |
7117 | SLIST_FOREACH_SAFE(kn, list, kn_selnext, kn_next) { |
7118 | struct kqueue *kq = knote_get_kq(kn); |
7119 | |
7120 | kqlock(kq); |
7121 | if (kn->kn_status & KN_REQVANISH) { |
7122 | /* If EV_VANISH supported - prepare to deliver one */ |
7123 | kn->kn_status |= KN_VANISHED; |
7124 | knote_activate(kn); |
7125 | } else { |
7126 | knote_call_filter_event(kq, kn, NOTE_REVOKE); |
7127 | } |
7128 | kqunlock(kq); |
7129 | } |
7130 | } |
7131 | |
7132 | /* |
7133 | * Force a lazy allocation of the waitqset link |
7134 | * of the kq_wqs associated with the kn |
7135 | * if it wasn't already allocated. |
7136 | * |
7137 | * This allows knote_link_waitq to never block |
7138 | * if reserved_link is not NULL. |
7139 | */ |
7140 | void |
7141 | knote_link_waitqset_lazy_alloc(struct knote *kn) |
7142 | { |
7143 | struct kqueue *kq = knote_get_kq(kn); |
7144 | waitq_set_lazy_init_link(&kq->kq_wqs); |
7145 | } |
7146 | |
7147 | /* |
7148 | * Check if a lazy allocation for the waitqset link |
7149 | * of the kq_wqs is needed. |
7150 | */ |
7151 | boolean_t |
7152 | knote_link_waitqset_should_lazy_alloc(struct knote *kn) |
7153 | { |
7154 | struct kqueue *kq = knote_get_kq(kn); |
7155 | return waitq_set_should_lazy_init_link(&kq->kq_wqs); |
7156 | } |
7157 | |
7158 | /* |
7159 | * For a given knote, link a provided wait queue directly with the kqueue. |
7160 | * Wakeups will happen via recursive wait queue support. But nothing will move |
7161 | * the knote to the active list at wakeup (nothing calls knote()). Instead, |
7162 | * we permanently enqueue them here. |
7163 | * |
7164 | * kqueue and knote references are held by caller. |
7165 | * waitq locked by caller. |
7166 | * |
7167 | * caller provides the wait queue link structure and insures that the kq->kq_wqs |
7168 | * is linked by previously calling knote_link_waitqset_lazy_alloc. |
7169 | */ |
7170 | int |
7171 | knote_link_waitq(struct knote *kn, struct waitq *wq, uint64_t *reserved_link) |
7172 | { |
7173 | struct kqueue *kq = knote_get_kq(kn); |
7174 | kern_return_t kr; |
7175 | |
7176 | kr = waitq_link(wq, &kq->kq_wqs, WAITQ_ALREADY_LOCKED, reserved_link); |
7177 | if (kr == KERN_SUCCESS) { |
7178 | knote_markstayactive(kn); |
7179 | return (0); |
7180 | } else { |
7181 | return (EINVAL); |
7182 | } |
7183 | } |
7184 | |
7185 | /* |
7186 | * Unlink the provided wait queue from the kqueue associated with a knote. |
7187 | * Also remove it from the magic list of directly attached knotes. |
7188 | * |
7189 | * Note that the unlink may have already happened from the other side, so |
7190 | * ignore any failures to unlink and just remove it from the kqueue list. |
7191 | * |
7192 | * On success, caller is responsible for the link structure |
7193 | */ |
7194 | int |
7195 | knote_unlink_waitq(struct knote *kn, struct waitq *wq) |
7196 | { |
7197 | struct kqueue *kq = knote_get_kq(kn); |
7198 | kern_return_t kr; |
7199 | |
7200 | kr = waitq_unlink(wq, &kq->kq_wqs); |
7201 | knote_clearstayactive(kn); |
7202 | return ((kr != KERN_SUCCESS) ? EINVAL : 0); |
7203 | } |
7204 | |
7205 | /* |
7206 | * remove all knotes referencing a specified fd |
7207 | * |
7208 | * Entered with the proc_fd lock already held. |
7209 | * It returns the same way, but may drop it temporarily. |
7210 | */ |
7211 | void |
7212 | knote_fdclose(struct proc *p, int fd) |
7213 | { |
7214 | struct klist *list; |
7215 | struct knote *kn; |
7216 | KNOTE_LOCK_CTX(knlc); |
7217 | |
7218 | restart: |
7219 | list = &p->p_fd->fd_knlist[fd]; |
7220 | SLIST_FOREACH(kn, list, kn_link) { |
7221 | struct kqueue *kq = knote_get_kq(kn); |
7222 | |
7223 | kqlock(kq); |
7224 | |
7225 | if (kq->kq_p != p) |
7226 | panic("%s: proc mismatch (kq->kq_p=%p != p=%p)" , |
7227 | __func__, kq->kq_p, p); |
7228 | |
7229 | /* |
7230 | * If the knote supports EV_VANISHED delivery, |
7231 | * transition it to vanished mode (or skip over |
7232 | * it if already vanished). |
7233 | */ |
7234 | if (kn->kn_status & KN_VANISHED) { |
7235 | kqunlock(kq); |
7236 | continue; |
7237 | } |
7238 | |
7239 | proc_fdunlock(p); |
7240 | if (!knote_lock(kq, kn, &knlc, KNOTE_KQ_LOCK_ON_SUCCESS)) { |
7241 | /* the knote was dropped by someone, nothing to do */ |
7242 | } else if (kn->kn_status & KN_REQVANISH) { |
7243 | kn->kn_status |= KN_VANISHED; |
7244 | kn->kn_status &= ~KN_ATTACHED; |
7245 | |
7246 | kqunlock(kq); |
7247 | knote_fops(kn)->f_detach(kn); |
7248 | if (knote_fops(kn)->f_isfd) |
7249 | fp_drop(p, kn->kn_id, kn->kn_fp, 0); |
7250 | kqlock(kq); |
7251 | |
7252 | knote_activate(kn); |
7253 | knote_unlock(kq, kn, &knlc, KNOTE_KQ_UNLOCK); |
7254 | } else { |
7255 | knote_drop(kq, kn, &knlc); |
7256 | } |
7257 | |
7258 | proc_fdlock(p); |
7259 | goto restart; |
7260 | } |
7261 | } |
7262 | |
7263 | /* |
7264 | * knote_fdfind - lookup a knote in the fd table for process |
7265 | * |
7266 | * If the filter is file-based, lookup based on fd index. |
7267 | * Otherwise use a hash based on the ident. |
7268 | * |
7269 | * Matching is based on kq, filter, and ident. Optionally, |
7270 | * it may also be based on the udata field in the kevent - |
7271 | * allowing multiple event registration for the file object |
7272 | * per kqueue. |
7273 | * |
7274 | * fd_knhashlock or fdlock held on entry (and exit) |
7275 | */ |
7276 | static struct knote * |
7277 | knote_fdfind(struct kqueue *kq, |
7278 | struct kevent_internal_s *kev, |
7279 | bool is_fd, |
7280 | struct proc *p) |
7281 | { |
7282 | struct filedesc *fdp = p->p_fd; |
7283 | struct klist *list = NULL; |
7284 | struct knote *kn = NULL; |
7285 | |
7286 | /* |
7287 | * determine where to look for the knote |
7288 | */ |
7289 | if (is_fd) { |
7290 | /* fd-based knotes are linked off the fd table */ |
7291 | if (kev->ident < (u_int)fdp->fd_knlistsize) { |
7292 | list = &fdp->fd_knlist[kev->ident]; |
7293 | } |
7294 | } else if (fdp->fd_knhashmask != 0) { |
7295 | /* hash non-fd knotes here too */ |
7296 | list = &fdp->fd_knhash[KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; |
7297 | } |
7298 | |
7299 | /* |
7300 | * scan the selected list looking for a match |
7301 | */ |
7302 | if (list != NULL) { |
7303 | SLIST_FOREACH(kn, list, kn_link) { |
7304 | if (kq == knote_get_kq(kn) && |
7305 | kev->ident == kn->kn_id && |
7306 | kev->filter == kn->kn_filter) { |
7307 | if (kev->flags & EV_UDATA_SPECIFIC) { |
7308 | if ((kn->kn_status & KN_UDATA_SPECIFIC) && |
7309 | kev->udata == kn->kn_udata) { |
7310 | break; /* matching udata-specific knote */ |
7311 | } |
7312 | } else if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0) { |
7313 | break; /* matching non-udata-specific knote */ |
7314 | } |
7315 | } |
7316 | } |
7317 | } |
7318 | return kn; |
7319 | } |
7320 | |
7321 | /* |
7322 | * kq_add_knote- Add knote to the fd table for process |
7323 | * while checking for duplicates. |
7324 | * |
7325 | * All file-based filters associate a list of knotes by file |
7326 | * descriptor index. All other filters hash the knote by ident. |
7327 | * |
7328 | * May have to grow the table of knote lists to cover the |
7329 | * file descriptor index presented. |
7330 | * |
7331 | * fd_knhashlock and fdlock unheld on entry (and exit). |
7332 | * |
7333 | * Takes a rwlock boost if inserting the knote is successful. |
7334 | */ |
7335 | static int |
7336 | kq_add_knote(struct kqueue *kq, struct knote *kn, struct knote_lock_ctx *knlc, |
7337 | struct proc *p) |
7338 | { |
7339 | struct filedesc *fdp = p->p_fd; |
7340 | struct klist *list = NULL; |
7341 | int ret = 0; |
7342 | bool is_fd = knote_fops(kn)->f_isfd; |
7343 | |
7344 | if (is_fd) |
7345 | proc_fdlock(p); |
7346 | else |
7347 | knhash_lock(p); |
7348 | |
7349 | if (knote_fdfind(kq, &kn->kn_kevent, is_fd, p) != NULL) { |
7350 | /* found an existing knote: we can't add this one */ |
7351 | ret = ERESTART; |
7352 | goto out_locked; |
7353 | } |
7354 | |
7355 | /* knote was not found: add it now */ |
7356 | if (!is_fd) { |
7357 | if (fdp->fd_knhashmask == 0) { |
7358 | u_long size = 0; |
7359 | |
7360 | list = hashinit(CONFIG_KN_HASHSIZE, M_KQUEUE, &size); |
7361 | if (list == NULL) { |
7362 | ret = ENOMEM; |
7363 | goto out_locked; |
7364 | } |
7365 | |
7366 | fdp->fd_knhash = list; |
7367 | fdp->fd_knhashmask = size; |
7368 | } |
7369 | |
7370 | list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; |
7371 | SLIST_INSERT_HEAD(list, kn, kn_link); |
7372 | ret = 0; |
7373 | goto out_locked; |
7374 | |
7375 | } else { |
7376 | /* knote is fd based */ |
7377 | |
7378 | if ((u_int)fdp->fd_knlistsize <= kn->kn_id) { |
7379 | u_int size = 0; |
7380 | |
7381 | if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur |
7382 | || kn->kn_id >= (uint64_t)maxfiles) { |
7383 | ret = EINVAL; |
7384 | goto out_locked; |
7385 | } |
7386 | /* have to grow the fd_knlist */ |
7387 | size = fdp->fd_knlistsize; |
7388 | while (size <= kn->kn_id) |
7389 | size += KQEXTENT; |
7390 | |
7391 | if (size >= (UINT_MAX/sizeof(struct klist *))) { |
7392 | ret = EINVAL; |
7393 | goto out_locked; |
7394 | } |
7395 | |
7396 | MALLOC(list, struct klist *, |
7397 | size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); |
7398 | if (list == NULL) { |
7399 | ret = ENOMEM; |
7400 | goto out_locked; |
7401 | } |
7402 | |
7403 | bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, |
7404 | fdp->fd_knlistsize * sizeof(struct klist *)); |
7405 | bzero((caddr_t)list + |
7406 | fdp->fd_knlistsize * sizeof(struct klist *), |
7407 | (size - fdp->fd_knlistsize) * sizeof(struct klist *)); |
7408 | FREE(fdp->fd_knlist, M_KQUEUE); |
7409 | fdp->fd_knlist = list; |
7410 | fdp->fd_knlistsize = size; |
7411 | } |
7412 | |
7413 | list = &fdp->fd_knlist[kn->kn_id]; |
7414 | SLIST_INSERT_HEAD(list, kn, kn_link); |
7415 | ret = 0; |
7416 | goto out_locked; |
7417 | |
7418 | } |
7419 | |
7420 | out_locked: |
7421 | if (ret == 0) { |
7422 | kqlock(kq); |
7423 | assert((kn->kn_status & KN_LOCKED) == 0); |
7424 | (void)knote_lock(kq, kn, knlc, KNOTE_KQ_UNLOCK); |
7425 | } |
7426 | if (is_fd) |
7427 | proc_fdunlock(p); |
7428 | else |
7429 | knhash_unlock(p); |
7430 | |
7431 | return ret; |
7432 | } |
7433 | |
7434 | /* |
7435 | * kq_remove_knote - remove a knote from the fd table for process |
7436 | * |
7437 | * If the filter is file-based, remove based on fd index. |
7438 | * Otherwise remove from the hash based on the ident. |
7439 | * |
7440 | * fd_knhashlock and fdlock unheld on entry (and exit). |
7441 | */ |
7442 | static void |
7443 | kq_remove_knote(struct kqueue *kq, struct knote *kn, struct proc *p, |
7444 | struct knote_lock_ctx *knlc) |
7445 | { |
7446 | struct filedesc *fdp = p->p_fd; |
7447 | struct klist *list = NULL; |
7448 | uint16_t kq_state; |
7449 | bool is_fd; |
7450 | |
7451 | is_fd = knote_fops(kn)->f_isfd; |
7452 | |
7453 | if (is_fd) |
7454 | proc_fdlock(p); |
7455 | else |
7456 | knhash_lock(p); |
7457 | |
7458 | if (is_fd) { |
7459 | assert ((u_int)fdp->fd_knlistsize > kn->kn_id); |
7460 | list = &fdp->fd_knlist[kn->kn_id]; |
7461 | } else { |
7462 | list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; |
7463 | } |
7464 | SLIST_REMOVE(list, kn, knote, kn_link); |
7465 | |
7466 | kqlock(kq); |
7467 | kq_state = kq->kq_state; |
7468 | if (knlc) { |
7469 | knote_unlock_cancel(kq, kn, knlc, KNOTE_KQ_UNLOCK); |
7470 | } else { |
7471 | kqunlock(kq); |
7472 | } |
7473 | if (is_fd) |
7474 | proc_fdunlock(p); |
7475 | else |
7476 | knhash_unlock(p); |
7477 | |
7478 | if (kq_state & KQ_DYNAMIC) |
7479 | kqueue_release_last(p, kq); |
7480 | } |
7481 | |
7482 | /* |
7483 | * kq_find_knote_and_kq_lock - lookup a knote in the fd table for process |
7484 | * and, if the knote is found, acquires the kqlock while holding the fd table lock/spinlock. |
7485 | * |
7486 | * fd_knhashlock or fdlock unheld on entry (and exit) |
7487 | */ |
7488 | |
7489 | static struct knote * |
7490 | kq_find_knote_and_kq_lock(struct kqueue *kq, struct kevent_internal_s *kev, |
7491 | bool is_fd, struct proc *p) |
7492 | { |
7493 | struct knote * ret; |
7494 | |
7495 | if (is_fd) |
7496 | proc_fdlock(p); |
7497 | else |
7498 | knhash_lock(p); |
7499 | |
7500 | ret = knote_fdfind(kq, kev, is_fd, p); |
7501 | |
7502 | if (ret) { |
7503 | kqlock(kq); |
7504 | } |
7505 | |
7506 | if (is_fd) |
7507 | proc_fdunlock(p); |
7508 | else |
7509 | knhash_unlock(p); |
7510 | |
7511 | return ret; |
7512 | } |
7513 | /* |
7514 | * knote_drop - disconnect and drop the knote |
7515 | * |
7516 | * Called with the kqueue locked, returns with the kqueue unlocked. |
7517 | * |
7518 | * If a knote locking context is passed, it is canceled. |
7519 | * |
7520 | * The knote may have already been detached from |
7521 | * (or not yet attached to) its source object. |
7522 | */ |
7523 | static void |
7524 | knote_drop(struct kqueue *kq, struct knote *kn, struct knote_lock_ctx *knlc) |
7525 | { |
7526 | struct proc *p = kq->kq_p; |
7527 | |
7528 | kqlock_held(kq); |
7529 | |
7530 | assert((kn->kn_status & KN_DROPPING) == 0); |
7531 | if (knlc == NULL) { |
7532 | assert((kn->kn_status & KN_LOCKED) == 0); |
7533 | } |
7534 | kn->kn_status |= KN_DROPPING; |
7535 | |
7536 | knote_unsuppress(kn); |
7537 | knote_dequeue(kn); |
7538 | knote_wait_for_filter_events(kq, kn); |
7539 | |
7540 | /* If we are attached, disconnect from the source first */ |
7541 | if (kn->kn_status & KN_ATTACHED) { |
7542 | knote_fops(kn)->f_detach(kn); |
7543 | } |
7544 | |
7545 | /* kq may be freed when kq_remove_knote() returns */ |
7546 | kq_remove_knote(kq, kn, p, knlc); |
7547 | if (knote_fops(kn)->f_isfd && ((kn->kn_status & KN_VANISHED) == 0)) |
7548 | fp_drop(p, kn->kn_id, kn->kn_fp, 0); |
7549 | |
7550 | knote_free(kn); |
7551 | } |
7552 | |
7553 | /* called with kqueue lock held */ |
7554 | static void |
7555 | knote_activate(struct knote *kn) |
7556 | { |
7557 | if (kn->kn_status & KN_ACTIVE) |
7558 | return; |
7559 | |
7560 | KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE), |
7561 | kn->kn_udata, kn->kn_status | (kn->kn_id << 32), |
7562 | kn->kn_filtid); |
7563 | |
7564 | kn->kn_status |= KN_ACTIVE; |
7565 | if (knote_enqueue(kn)) |
7566 | knote_wakeup(kn); |
7567 | } |
7568 | |
7569 | /* called with kqueue lock held */ |
7570 | static void |
7571 | knote_deactivate(struct knote *kn) |
7572 | { |
7573 | kn->kn_status &= ~KN_ACTIVE; |
7574 | if ((kn->kn_status & KN_STAYACTIVE) == 0) |
7575 | knote_dequeue(kn); |
7576 | } |
7577 | |
7578 | /* called with kqueue lock held */ |
7579 | static void |
7580 | knote_enable(struct knote *kn) |
7581 | { |
7582 | if ((kn->kn_status & KN_DISABLED) == 0) |
7583 | return; |
7584 | |
7585 | kn->kn_status &= ~KN_DISABLED; |
7586 | |
7587 | if (kn->kn_status & KN_SUPPRESSED) { |
7588 | /* |
7589 | * it is possible for userland to have knotes registered for a given |
7590 | * workloop `wl_orig` but really handled on another workloop `wl_new`. |
7591 | * |
7592 | * In that case, rearming will happen from the servicer thread of |
7593 | * `wl_new` which if `wl_orig` is no longer being serviced, would cause |
7594 | * this knote to stay suppressed forever if we only relied on |
7595 | * kqworkloop_acknowledge_events to be called by `wl_orig`. |
7596 | * |
7597 | * However if we see the KQ_PROCESSING bit on `wl_orig` set, we can't |
7598 | * unsuppress because that would mess with the processing phase of |
7599 | * `wl_orig`, however it also means kqworkloop_acknowledge_events() |
7600 | * will be called. |
7601 | */ |
7602 | struct kqueue *kq = knote_get_kq(kn); |
7603 | if ((kq->kq_state & KQ_PROCESSING) == 0) { |
7604 | knote_unsuppress(kn); |
7605 | } |
7606 | } else if (knote_enqueue(kn)) { |
7607 | knote_wakeup(kn); |
7608 | } |
7609 | } |
7610 | |
7611 | /* called with kqueue lock held */ |
7612 | static void |
7613 | knote_disable(struct knote *kn) |
7614 | { |
7615 | if (kn->kn_status & KN_DISABLED) |
7616 | return; |
7617 | |
7618 | kn->kn_status |= KN_DISABLED; |
7619 | knote_dequeue(kn); |
7620 | } |
7621 | |
7622 | /* called with kqueue lock held */ |
7623 | static void |
7624 | knote_suppress(struct knote *kn) |
7625 | { |
7626 | struct kqtailq *suppressq; |
7627 | struct kqueue *kq = knote_get_kq(kn); |
7628 | |
7629 | kqlock_held(kq); |
7630 | |
7631 | if (kn->kn_status & KN_SUPPRESSED) |
7632 | return; |
7633 | |
7634 | knote_dequeue(kn); |
7635 | kn->kn_status |= KN_SUPPRESSED; |
7636 | suppressq = kqueue_get_suppressed_queue(kq, kn); |
7637 | TAILQ_INSERT_TAIL(suppressq, kn, kn_tqe); |
7638 | } |
7639 | |
7640 | /* called with kqueue lock held */ |
7641 | static void |
7642 | knote_unsuppress(struct knote *kn) |
7643 | { |
7644 | struct kqtailq *suppressq; |
7645 | struct kqueue *kq = knote_get_kq(kn); |
7646 | |
7647 | kqlock_held(kq); |
7648 | |
7649 | if ((kn->kn_status & KN_SUPPRESSED) == 0) |
7650 | return; |
7651 | |
7652 | kn->kn_status &= ~KN_SUPPRESSED; |
7653 | suppressq = kqueue_get_suppressed_queue(kq, kn); |
7654 | TAILQ_REMOVE(suppressq, kn, kn_tqe); |
7655 | |
7656 | /* |
7657 | * If the knote is no longer active, reset its push, |
7658 | * and resynchronize kn_qos_index with kn_qos_override |
7659 | */ |
7660 | if ((kn->kn_status & KN_ACTIVE) == 0) { |
7661 | kn->kn_qos_override = kn->kn_req_index; |
7662 | } |
7663 | kn->kn_qos_index = kn->kn_qos_override; |
7664 | |
7665 | /* don't wakeup if unsuppressing just a stay-active knote */ |
7666 | if (knote_enqueue(kn) && (kn->kn_status & KN_ACTIVE)) { |
7667 | knote_wakeup(kn); |
7668 | } |
7669 | |
7670 | if ((kq->kq_state & KQ_WORKLOOP) && TAILQ_EMPTY(suppressq)) { |
7671 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
7672 | |
7673 | if (kqworkloop_is_processing_on_current_thread(kqwl)) { |
7674 | /* |
7675 | * kqworkloop_end_processing() or kqworkloop_begin_processing() |
7676 | * will perform the required QoS computations when it unsets the |
7677 | * processing mode. |
7678 | */ |
7679 | } else { |
7680 | kq_req_lock(kqwl); |
7681 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_RESET_WAKEUP_OVERRIDE, 0); |
7682 | kq_req_unlock(kqwl); |
7683 | } |
7684 | } |
7685 | } |
7686 | |
7687 | /* called with kqueue lock held */ |
7688 | static int |
7689 | knote_enqueue(struct knote *kn) |
7690 | { |
7691 | if ((kn->kn_status & (KN_ACTIVE | KN_STAYACTIVE)) == 0 || |
7692 | (kn->kn_status & (KN_DISABLED | KN_SUPPRESSED | KN_DROPPING))) |
7693 | return 0; |
7694 | |
7695 | if ((kn->kn_status & KN_QUEUED) == 0) { |
7696 | struct kqtailq *queue = knote_get_queue(kn); |
7697 | struct kqueue *kq = knote_get_kq(kn); |
7698 | |
7699 | kqlock_held(kq); |
7700 | TAILQ_INSERT_TAIL(queue, kn, kn_tqe); |
7701 | kn->kn_status |= KN_QUEUED; |
7702 | kq->kq_count++; |
7703 | return 1; |
7704 | } |
7705 | return ((kn->kn_status & KN_STAYACTIVE) != 0); |
7706 | } |
7707 | |
7708 | |
7709 | /* called with kqueue lock held */ |
7710 | static void |
7711 | knote_dequeue(struct knote *kn) |
7712 | { |
7713 | struct kqueue *kq = knote_get_kq(kn); |
7714 | struct kqtailq *queue; |
7715 | |
7716 | kqlock_held(kq); |
7717 | |
7718 | if ((kn->kn_status & KN_QUEUED) == 0) |
7719 | return; |
7720 | |
7721 | queue = knote_get_queue(kn); |
7722 | TAILQ_REMOVE(queue, kn, kn_tqe); |
7723 | kn->kn_status &= ~KN_QUEUED; |
7724 | kq->kq_count--; |
7725 | } |
7726 | |
7727 | void |
7728 | knote_init(void) |
7729 | { |
7730 | knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), |
7731 | 8192, "knote zone" ); |
7732 | |
7733 | kqfile_zone = zinit(sizeof(struct kqfile), 8192*sizeof(struct kqfile), |
7734 | 8192, "kqueue file zone" ); |
7735 | |
7736 | kqworkq_zone = zinit(sizeof(struct kqworkq), 8192*sizeof(struct kqworkq), |
7737 | 8192, "kqueue workq zone" ); |
7738 | |
7739 | kqworkloop_zone = zinit(sizeof(struct kqworkloop), 8192*sizeof(struct kqworkloop), |
7740 | 8192, "kqueue workloop zone" ); |
7741 | |
7742 | /* allocate kq lock group attribute and group */ |
7743 | kq_lck_grp_attr = lck_grp_attr_alloc_init(); |
7744 | |
7745 | kq_lck_grp = lck_grp_alloc_init("kqueue" , kq_lck_grp_attr); |
7746 | |
7747 | /* Allocate kq lock attribute */ |
7748 | kq_lck_attr = lck_attr_alloc_init(); |
7749 | |
7750 | #if CONFIG_MEMORYSTATUS |
7751 | /* Initialize the memorystatus list lock */ |
7752 | memorystatus_kevent_init(kq_lck_grp, kq_lck_attr); |
7753 | #endif |
7754 | } |
7755 | SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) |
7756 | |
7757 | const struct filterops * |
7758 | knote_fops(struct knote *kn) |
7759 | { |
7760 | return sysfilt_ops[kn->kn_filtid]; |
7761 | } |
7762 | |
7763 | static struct knote * |
7764 | knote_alloc(void) |
7765 | { |
7766 | struct knote *kn = ((struct knote *)zalloc(knote_zone)); |
7767 | bzero(kn, sizeof(struct knote)); |
7768 | return kn; |
7769 | } |
7770 | |
7771 | static void |
7772 | knote_free(struct knote *kn) |
7773 | { |
7774 | assert(kn->kn_inuse == 0); |
7775 | assert((kn->kn_status & KN_LOCKED) == 0); |
7776 | zfree(knote_zone, kn); |
7777 | } |
7778 | |
7779 | #if SOCKETS |
7780 | #include <sys/param.h> |
7781 | #include <sys/socket.h> |
7782 | #include <sys/protosw.h> |
7783 | #include <sys/domain.h> |
7784 | #include <sys/mbuf.h> |
7785 | #include <sys/kern_event.h> |
7786 | #include <sys/malloc.h> |
7787 | #include <sys/sys_domain.h> |
7788 | #include <sys/syslog.h> |
7789 | |
7790 | #ifndef ROUNDUP64 |
7791 | #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) |
7792 | #endif |
7793 | |
7794 | #ifndef ADVANCE64 |
7795 | #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) |
7796 | #endif |
7797 | |
7798 | static lck_grp_attr_t *kev_lck_grp_attr; |
7799 | static lck_attr_t *kev_lck_attr; |
7800 | static lck_grp_t *kev_lck_grp; |
7801 | static decl_lck_rw_data(,kev_lck_data); |
7802 | static lck_rw_t *kev_rwlock = &kev_lck_data; |
7803 | |
7804 | static int kev_attach(struct socket *so, int proto, struct proc *p); |
7805 | static int kev_detach(struct socket *so); |
7806 | static int kev_control(struct socket *so, u_long cmd, caddr_t data, |
7807 | struct ifnet *ifp, struct proc *p); |
7808 | static lck_mtx_t * event_getlock(struct socket *, int); |
7809 | static int event_lock(struct socket *, int, void *); |
7810 | static int event_unlock(struct socket *, int, void *); |
7811 | |
7812 | static int event_sofreelastref(struct socket *); |
7813 | static void kev_delete(struct kern_event_pcb *); |
7814 | |
7815 | static struct pr_usrreqs event_usrreqs = { |
7816 | .pru_attach = kev_attach, |
7817 | .pru_control = kev_control, |
7818 | .pru_detach = kev_detach, |
7819 | .pru_soreceive = soreceive, |
7820 | }; |
7821 | |
7822 | static struct protosw eventsw[] = { |
7823 | { |
7824 | .pr_type = SOCK_RAW, |
7825 | .pr_protocol = SYSPROTO_EVENT, |
7826 | .pr_flags = PR_ATOMIC, |
7827 | .pr_usrreqs = &event_usrreqs, |
7828 | .pr_lock = event_lock, |
7829 | .pr_unlock = event_unlock, |
7830 | .pr_getlock = event_getlock, |
7831 | } |
7832 | }; |
7833 | |
7834 | __private_extern__ int kevt_getstat SYSCTL_HANDLER_ARGS; |
7835 | __private_extern__ int kevt_pcblist SYSCTL_HANDLER_ARGS; |
7836 | |
7837 | SYSCTL_NODE(_net_systm, OID_AUTO, kevt, |
7838 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel event family" ); |
7839 | |
7840 | struct kevtstat kevtstat; |
7841 | SYSCTL_PROC(_net_systm_kevt, OID_AUTO, stats, |
7842 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, |
7843 | kevt_getstat, "S,kevtstat" , "" ); |
7844 | |
7845 | SYSCTL_PROC(_net_systm_kevt, OID_AUTO, pcblist, |
7846 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, |
7847 | kevt_pcblist, "S,xkevtpcb" , "" ); |
7848 | |
7849 | static lck_mtx_t * |
7850 | event_getlock(struct socket *so, int flags) |
7851 | { |
7852 | #pragma unused(flags) |
7853 | struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb; |
7854 | |
7855 | if (so->so_pcb != NULL) { |
7856 | if (so->so_usecount < 0) |
7857 | panic("%s: so=%p usecount=%d lrh= %s\n" , __func__, |
7858 | so, so->so_usecount, solockhistory_nr(so)); |
7859 | /* NOTREACHED */ |
7860 | } else { |
7861 | panic("%s: so=%p NULL NO so_pcb %s\n" , __func__, |
7862 | so, solockhistory_nr(so)); |
7863 | /* NOTREACHED */ |
7864 | } |
7865 | return (&ev_pcb->evp_mtx); |
7866 | } |
7867 | |
7868 | static int |
7869 | event_lock(struct socket *so, int refcount, void *lr) |
7870 | { |
7871 | void *lr_saved; |
7872 | |
7873 | if (lr == NULL) |
7874 | lr_saved = __builtin_return_address(0); |
7875 | else |
7876 | lr_saved = lr; |
7877 | |
7878 | if (so->so_pcb != NULL) { |
7879 | lck_mtx_lock(&((struct kern_event_pcb *)so->so_pcb)->evp_mtx); |
7880 | } else { |
7881 | panic("%s: so=%p NO PCB! lr=%p lrh= %s\n" , __func__, |
7882 | so, lr_saved, solockhistory_nr(so)); |
7883 | /* NOTREACHED */ |
7884 | } |
7885 | |
7886 | if (so->so_usecount < 0) { |
7887 | panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n" , __func__, |
7888 | so, so->so_pcb, lr_saved, so->so_usecount, |
7889 | solockhistory_nr(so)); |
7890 | /* NOTREACHED */ |
7891 | } |
7892 | |
7893 | if (refcount) |
7894 | so->so_usecount++; |
7895 | |
7896 | so->lock_lr[so->next_lock_lr] = lr_saved; |
7897 | so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; |
7898 | return (0); |
7899 | } |
7900 | |
7901 | static int |
7902 | event_unlock(struct socket *so, int refcount, void *lr) |
7903 | { |
7904 | void *lr_saved; |
7905 | lck_mtx_t *mutex_held; |
7906 | |
7907 | if (lr == NULL) |
7908 | lr_saved = __builtin_return_address(0); |
7909 | else |
7910 | lr_saved = lr; |
7911 | |
7912 | if (refcount) { |
7913 | so->so_usecount--; |
7914 | } |
7915 | if (so->so_usecount < 0) { |
7916 | panic("%s: so=%p usecount=%d lrh= %s\n" , __func__, |
7917 | so, so->so_usecount, solockhistory_nr(so)); |
7918 | /* NOTREACHED */ |
7919 | } |
7920 | if (so->so_pcb == NULL) { |
7921 | panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n" , __func__, |
7922 | so, so->so_usecount, (void *)lr_saved, |
7923 | solockhistory_nr(so)); |
7924 | /* NOTREACHED */ |
7925 | } |
7926 | mutex_held = (&((struct kern_event_pcb *)so->so_pcb)->evp_mtx); |
7927 | |
7928 | LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED); |
7929 | so->unlock_lr[so->next_unlock_lr] = lr_saved; |
7930 | so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; |
7931 | |
7932 | if (so->so_usecount == 0) { |
7933 | VERIFY(so->so_flags & SOF_PCBCLEARING); |
7934 | event_sofreelastref(so); |
7935 | } else { |
7936 | lck_mtx_unlock(mutex_held); |
7937 | } |
7938 | |
7939 | return (0); |
7940 | } |
7941 | |
7942 | static int |
7943 | event_sofreelastref(struct socket *so) |
7944 | { |
7945 | struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb; |
7946 | |
7947 | LCK_MTX_ASSERT(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_OWNED); |
7948 | |
7949 | so->so_pcb = NULL; |
7950 | |
7951 | /* |
7952 | * Disable upcall in the event another thread is in kev_post_msg() |
7953 | * appending record to the receive socket buffer, since sbwakeup() |
7954 | * may release the socket lock otherwise. |
7955 | */ |
7956 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
7957 | so->so_snd.sb_flags &= ~SB_UPCALL; |
7958 | so->so_event = sonullevent; |
7959 | lck_mtx_unlock(&(ev_pcb->evp_mtx)); |
7960 | |
7961 | LCK_MTX_ASSERT(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_NOTOWNED); |
7962 | lck_rw_lock_exclusive(kev_rwlock); |
7963 | LIST_REMOVE(ev_pcb, evp_link); |
7964 | kevtstat.kes_pcbcount--; |
7965 | kevtstat.kes_gencnt++; |
7966 | lck_rw_done(kev_rwlock); |
7967 | kev_delete(ev_pcb); |
7968 | |
7969 | sofreelastref(so, 1); |
7970 | return (0); |
7971 | } |
7972 | |
7973 | static int event_proto_count = (sizeof (eventsw) / sizeof (struct protosw)); |
7974 | |
7975 | static |
7976 | struct kern_event_head kern_event_head; |
7977 | |
7978 | static u_int32_t static_event_id = 0; |
7979 | |
7980 | #define EVPCB_ZONE_MAX 65536 |
7981 | #define EVPCB_ZONE_NAME "kerneventpcb" |
7982 | static struct zone *ev_pcb_zone; |
7983 | |
7984 | /* |
7985 | * Install the protosw's for the NKE manager. Invoked at extension load time |
7986 | */ |
7987 | void |
7988 | kern_event_init(struct domain *dp) |
7989 | { |
7990 | struct protosw *pr; |
7991 | int i; |
7992 | |
7993 | VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); |
7994 | VERIFY(dp == systemdomain); |
7995 | |
7996 | kev_lck_grp_attr = lck_grp_attr_alloc_init(); |
7997 | if (kev_lck_grp_attr == NULL) { |
7998 | panic("%s: lck_grp_attr_alloc_init failed\n" , __func__); |
7999 | /* NOTREACHED */ |
8000 | } |
8001 | |
8002 | kev_lck_grp = lck_grp_alloc_init("Kernel Event Protocol" , |
8003 | kev_lck_grp_attr); |
8004 | if (kev_lck_grp == NULL) { |
8005 | panic("%s: lck_grp_alloc_init failed\n" , __func__); |
8006 | /* NOTREACHED */ |
8007 | } |
8008 | |
8009 | kev_lck_attr = lck_attr_alloc_init(); |
8010 | if (kev_lck_attr == NULL) { |
8011 | panic("%s: lck_attr_alloc_init failed\n" , __func__); |
8012 | /* NOTREACHED */ |
8013 | } |
8014 | |
8015 | lck_rw_init(kev_rwlock, kev_lck_grp, kev_lck_attr); |
8016 | if (kev_rwlock == NULL) { |
8017 | panic("%s: lck_mtx_alloc_init failed\n" , __func__); |
8018 | /* NOTREACHED */ |
8019 | } |
8020 | |
8021 | for (i = 0, pr = &eventsw[0]; i < event_proto_count; i++, pr++) |
8022 | net_add_proto(pr, dp, 1); |
8023 | |
8024 | ev_pcb_zone = zinit(sizeof(struct kern_event_pcb), |
8025 | EVPCB_ZONE_MAX * sizeof(struct kern_event_pcb), 0, EVPCB_ZONE_NAME); |
8026 | if (ev_pcb_zone == NULL) { |
8027 | panic("%s: failed allocating ev_pcb_zone" , __func__); |
8028 | /* NOTREACHED */ |
8029 | } |
8030 | zone_change(ev_pcb_zone, Z_EXPAND, TRUE); |
8031 | zone_change(ev_pcb_zone, Z_CALLERACCT, TRUE); |
8032 | } |
8033 | |
8034 | static int |
8035 | kev_attach(struct socket *so, __unused int proto, __unused struct proc *p) |
8036 | { |
8037 | int error = 0; |
8038 | struct kern_event_pcb *ev_pcb; |
8039 | |
8040 | error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE); |
8041 | if (error != 0) |
8042 | return (error); |
8043 | |
8044 | if ((ev_pcb = (struct kern_event_pcb *)zalloc(ev_pcb_zone)) == NULL) { |
8045 | return (ENOBUFS); |
8046 | } |
8047 | bzero(ev_pcb, sizeof(struct kern_event_pcb)); |
8048 | lck_mtx_init(&ev_pcb->evp_mtx, kev_lck_grp, kev_lck_attr); |
8049 | |
8050 | ev_pcb->evp_socket = so; |
8051 | ev_pcb->evp_vendor_code_filter = 0xffffffff; |
8052 | |
8053 | so->so_pcb = (caddr_t) ev_pcb; |
8054 | lck_rw_lock_exclusive(kev_rwlock); |
8055 | LIST_INSERT_HEAD(&kern_event_head, ev_pcb, evp_link); |
8056 | kevtstat.kes_pcbcount++; |
8057 | kevtstat.kes_gencnt++; |
8058 | lck_rw_done(kev_rwlock); |
8059 | |
8060 | return (error); |
8061 | } |
8062 | |
8063 | static void |
8064 | kev_delete(struct kern_event_pcb *ev_pcb) |
8065 | { |
8066 | VERIFY(ev_pcb != NULL); |
8067 | lck_mtx_destroy(&ev_pcb->evp_mtx, kev_lck_grp); |
8068 | zfree(ev_pcb_zone, ev_pcb); |
8069 | } |
8070 | |
8071 | static int |
8072 | kev_detach(struct socket *so) |
8073 | { |
8074 | struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb; |
8075 | |
8076 | if (ev_pcb != NULL) { |
8077 | soisdisconnected(so); |
8078 | so->so_flags |= SOF_PCBCLEARING; |
8079 | } |
8080 | |
8081 | return (0); |
8082 | } |
8083 | |
8084 | /* |
8085 | * For now, kev_vendor_code and mbuf_tags use the same |
8086 | * mechanism. |
8087 | */ |
8088 | errno_t kev_vendor_code_find( |
8089 | const char *string, |
8090 | u_int32_t *out_vendor_code) |
8091 | { |
8092 | if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) { |
8093 | return (EINVAL); |
8094 | } |
8095 | return (net_str_id_find_internal(string, out_vendor_code, |
8096 | NSI_VENDOR_CODE, 1)); |
8097 | } |
8098 | |
8099 | errno_t |
8100 | kev_msg_post(struct kev_msg *event_msg) |
8101 | { |
8102 | mbuf_tag_id_t min_vendor, max_vendor; |
8103 | |
8104 | net_str_id_first_last(&min_vendor, &max_vendor, NSI_VENDOR_CODE); |
8105 | |
8106 | if (event_msg == NULL) |
8107 | return (EINVAL); |
8108 | |
8109 | /* |
8110 | * Limit third parties to posting events for registered vendor codes |
8111 | * only |
8112 | */ |
8113 | if (event_msg->vendor_code < min_vendor || |
8114 | event_msg->vendor_code > max_vendor) { |
8115 | OSIncrementAtomic64((SInt64 *)&kevtstat.kes_badvendor); |
8116 | return (EINVAL); |
8117 | } |
8118 | return (kev_post_msg(event_msg)); |
8119 | } |
8120 | |
8121 | int |
8122 | kev_post_msg(struct kev_msg *event_msg) |
8123 | { |
8124 | struct mbuf *m, *m2; |
8125 | struct kern_event_pcb *ev_pcb; |
8126 | struct kern_event_msg *ev; |
8127 | char *tmp; |
8128 | u_int32_t total_size; |
8129 | int i; |
8130 | |
8131 | /* Verify the message is small enough to fit in one mbuf w/o cluster */ |
8132 | total_size = KEV_MSG_HEADER_SIZE; |
8133 | |
8134 | for (i = 0; i < 5; i++) { |
8135 | if (event_msg->dv[i].data_length == 0) |
8136 | break; |
8137 | total_size += event_msg->dv[i].data_length; |
8138 | } |
8139 | |
8140 | if (total_size > MLEN) { |
8141 | OSIncrementAtomic64((SInt64 *)&kevtstat.kes_toobig); |
8142 | return (EMSGSIZE); |
8143 | } |
8144 | |
8145 | m = m_get(M_WAIT, MT_DATA); |
8146 | if (m == 0) { |
8147 | OSIncrementAtomic64((SInt64 *)&kevtstat.kes_nomem); |
8148 | return (ENOMEM); |
8149 | } |
8150 | ev = mtod(m, struct kern_event_msg *); |
8151 | total_size = KEV_MSG_HEADER_SIZE; |
8152 | |
8153 | tmp = (char *) &ev->event_data[0]; |
8154 | for (i = 0; i < 5; i++) { |
8155 | if (event_msg->dv[i].data_length == 0) |
8156 | break; |
8157 | |
8158 | total_size += event_msg->dv[i].data_length; |
8159 | bcopy(event_msg->dv[i].data_ptr, tmp, |
8160 | event_msg->dv[i].data_length); |
8161 | tmp += event_msg->dv[i].data_length; |
8162 | } |
8163 | |
8164 | ev->id = ++static_event_id; |
8165 | ev->total_size = total_size; |
8166 | ev->vendor_code = event_msg->vendor_code; |
8167 | ev->kev_class = event_msg->kev_class; |
8168 | ev->kev_subclass = event_msg->kev_subclass; |
8169 | ev->event_code = event_msg->event_code; |
8170 | |
8171 | m->m_len = total_size; |
8172 | lck_rw_lock_shared(kev_rwlock); |
8173 | for (ev_pcb = LIST_FIRST(&kern_event_head); |
8174 | ev_pcb; |
8175 | ev_pcb = LIST_NEXT(ev_pcb, evp_link)) { |
8176 | lck_mtx_lock(&ev_pcb->evp_mtx); |
8177 | if (ev_pcb->evp_socket->so_pcb == NULL) { |
8178 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8179 | continue; |
8180 | } |
8181 | if (ev_pcb->evp_vendor_code_filter != KEV_ANY_VENDOR) { |
8182 | if (ev_pcb->evp_vendor_code_filter != ev->vendor_code) { |
8183 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8184 | continue; |
8185 | } |
8186 | |
8187 | if (ev_pcb->evp_class_filter != KEV_ANY_CLASS) { |
8188 | if (ev_pcb->evp_class_filter != ev->kev_class) { |
8189 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8190 | continue; |
8191 | } |
8192 | |
8193 | if ((ev_pcb->evp_subclass_filter != |
8194 | KEV_ANY_SUBCLASS) && |
8195 | (ev_pcb->evp_subclass_filter != |
8196 | ev->kev_subclass)) { |
8197 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8198 | continue; |
8199 | } |
8200 | } |
8201 | } |
8202 | |
8203 | m2 = m_copym(m, 0, m->m_len, M_WAIT); |
8204 | if (m2 == 0) { |
8205 | OSIncrementAtomic64((SInt64 *)&kevtstat.kes_nomem); |
8206 | m_free(m); |
8207 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8208 | lck_rw_done(kev_rwlock); |
8209 | return (ENOMEM); |
8210 | } |
8211 | if (sbappendrecord(&ev_pcb->evp_socket->so_rcv, m2)) { |
8212 | /* |
8213 | * We use "m" for the socket stats as it would be |
8214 | * unsafe to use "m2" |
8215 | */ |
8216 | so_inc_recv_data_stat(ev_pcb->evp_socket, |
8217 | 1, m->m_len, MBUF_TC_BE); |
8218 | |
8219 | sorwakeup(ev_pcb->evp_socket); |
8220 | OSIncrementAtomic64((SInt64 *)&kevtstat.kes_posted); |
8221 | } else { |
8222 | OSIncrementAtomic64((SInt64 *)&kevtstat.kes_fullsock); |
8223 | } |
8224 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8225 | } |
8226 | m_free(m); |
8227 | lck_rw_done(kev_rwlock); |
8228 | |
8229 | return (0); |
8230 | } |
8231 | |
8232 | static int |
8233 | kev_control(struct socket *so, |
8234 | u_long cmd, |
8235 | caddr_t data, |
8236 | __unused struct ifnet *ifp, |
8237 | __unused struct proc *p) |
8238 | { |
8239 | struct kev_request *kev_req = (struct kev_request *) data; |
8240 | struct kern_event_pcb *ev_pcb; |
8241 | struct kev_vendor_code *kev_vendor; |
8242 | u_int32_t *id_value = (u_int32_t *) data; |
8243 | |
8244 | switch (cmd) { |
8245 | case SIOCGKEVID: |
8246 | *id_value = static_event_id; |
8247 | break; |
8248 | case SIOCSKEVFILT: |
8249 | ev_pcb = (struct kern_event_pcb *) so->so_pcb; |
8250 | ev_pcb->evp_vendor_code_filter = kev_req->vendor_code; |
8251 | ev_pcb->evp_class_filter = kev_req->kev_class; |
8252 | ev_pcb->evp_subclass_filter = kev_req->kev_subclass; |
8253 | break; |
8254 | case SIOCGKEVFILT: |
8255 | ev_pcb = (struct kern_event_pcb *) so->so_pcb; |
8256 | kev_req->vendor_code = ev_pcb->evp_vendor_code_filter; |
8257 | kev_req->kev_class = ev_pcb->evp_class_filter; |
8258 | kev_req->kev_subclass = ev_pcb->evp_subclass_filter; |
8259 | break; |
8260 | case SIOCGKEVVENDOR: |
8261 | kev_vendor = (struct kev_vendor_code *)data; |
8262 | /* Make sure string is NULL terminated */ |
8263 | kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0; |
8264 | return (net_str_id_find_internal(kev_vendor->vendor_string, |
8265 | &kev_vendor->vendor_code, NSI_VENDOR_CODE, 0)); |
8266 | default: |
8267 | return (ENOTSUP); |
8268 | } |
8269 | |
8270 | return (0); |
8271 | } |
8272 | |
8273 | int |
8274 | kevt_getstat SYSCTL_HANDLER_ARGS |
8275 | { |
8276 | #pragma unused(oidp, arg1, arg2) |
8277 | int error = 0; |
8278 | |
8279 | lck_rw_lock_shared(kev_rwlock); |
8280 | |
8281 | if (req->newptr != USER_ADDR_NULL) { |
8282 | error = EPERM; |
8283 | goto done; |
8284 | } |
8285 | if (req->oldptr == USER_ADDR_NULL) { |
8286 | req->oldidx = sizeof(struct kevtstat); |
8287 | goto done; |
8288 | } |
8289 | |
8290 | error = SYSCTL_OUT(req, &kevtstat, |
8291 | MIN(sizeof(struct kevtstat), req->oldlen)); |
8292 | done: |
8293 | lck_rw_done(kev_rwlock); |
8294 | |
8295 | return (error); |
8296 | } |
8297 | |
8298 | __private_extern__ int |
8299 | kevt_pcblist SYSCTL_HANDLER_ARGS |
8300 | { |
8301 | #pragma unused(oidp, arg1, arg2) |
8302 | int error = 0; |
8303 | int n, i; |
8304 | struct xsystmgen xsg; |
8305 | void *buf = NULL; |
8306 | size_t item_size = ROUNDUP64(sizeof (struct xkevtpcb)) + |
8307 | ROUNDUP64(sizeof (struct xsocket_n)) + |
8308 | 2 * ROUNDUP64(sizeof (struct xsockbuf_n)) + |
8309 | ROUNDUP64(sizeof (struct xsockstat_n)); |
8310 | struct kern_event_pcb *ev_pcb; |
8311 | |
8312 | buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO); |
8313 | if (buf == NULL) |
8314 | return (ENOMEM); |
8315 | |
8316 | lck_rw_lock_shared(kev_rwlock); |
8317 | |
8318 | n = kevtstat.kes_pcbcount; |
8319 | |
8320 | if (req->oldptr == USER_ADDR_NULL) { |
8321 | req->oldidx = (n + n/8) * item_size; |
8322 | goto done; |
8323 | } |
8324 | if (req->newptr != USER_ADDR_NULL) { |
8325 | error = EPERM; |
8326 | goto done; |
8327 | } |
8328 | bzero(&xsg, sizeof (xsg)); |
8329 | xsg.xg_len = sizeof (xsg); |
8330 | xsg.xg_count = n; |
8331 | xsg.xg_gen = kevtstat.kes_gencnt; |
8332 | xsg.xg_sogen = so_gencnt; |
8333 | error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); |
8334 | if (error) { |
8335 | goto done; |
8336 | } |
8337 | /* |
8338 | * We are done if there is no pcb |
8339 | */ |
8340 | if (n == 0) { |
8341 | goto done; |
8342 | } |
8343 | |
8344 | i = 0; |
8345 | for (i = 0, ev_pcb = LIST_FIRST(&kern_event_head); |
8346 | i < n && ev_pcb != NULL; |
8347 | i++, ev_pcb = LIST_NEXT(ev_pcb, evp_link)) { |
8348 | struct xkevtpcb *xk = (struct xkevtpcb *)buf; |
8349 | struct xsocket_n *xso = (struct xsocket_n *) |
8350 | ADVANCE64(xk, sizeof (*xk)); |
8351 | struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *) |
8352 | ADVANCE64(xso, sizeof (*xso)); |
8353 | struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *) |
8354 | ADVANCE64(xsbrcv, sizeof (*xsbrcv)); |
8355 | struct xsockstat_n *xsostats = (struct xsockstat_n *) |
8356 | ADVANCE64(xsbsnd, sizeof (*xsbsnd)); |
8357 | |
8358 | bzero(buf, item_size); |
8359 | |
8360 | lck_mtx_lock(&ev_pcb->evp_mtx); |
8361 | |
8362 | xk->kep_len = sizeof(struct xkevtpcb); |
8363 | xk->kep_kind = XSO_EVT; |
8364 | xk->kep_evtpcb = (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb); |
8365 | xk->kep_vendor_code_filter = ev_pcb->evp_vendor_code_filter; |
8366 | xk->kep_class_filter = ev_pcb->evp_class_filter; |
8367 | xk->kep_subclass_filter = ev_pcb->evp_subclass_filter; |
8368 | |
8369 | sotoxsocket_n(ev_pcb->evp_socket, xso); |
8370 | sbtoxsockbuf_n(ev_pcb->evp_socket ? |
8371 | &ev_pcb->evp_socket->so_rcv : NULL, xsbrcv); |
8372 | sbtoxsockbuf_n(ev_pcb->evp_socket ? |
8373 | &ev_pcb->evp_socket->so_snd : NULL, xsbsnd); |
8374 | sbtoxsockstat_n(ev_pcb->evp_socket, xsostats); |
8375 | |
8376 | lck_mtx_unlock(&ev_pcb->evp_mtx); |
8377 | |
8378 | error = SYSCTL_OUT(req, buf, item_size); |
8379 | } |
8380 | |
8381 | if (error == 0) { |
8382 | /* |
8383 | * Give the user an updated idea of our state. |
8384 | * If the generation differs from what we told |
8385 | * her before, she knows that something happened |
8386 | * while we were processing this request, and it |
8387 | * might be necessary to retry. |
8388 | */ |
8389 | bzero(&xsg, sizeof (xsg)); |
8390 | xsg.xg_len = sizeof (xsg); |
8391 | xsg.xg_count = n; |
8392 | xsg.xg_gen = kevtstat.kes_gencnt; |
8393 | xsg.xg_sogen = so_gencnt; |
8394 | error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); |
8395 | if (error) { |
8396 | goto done; |
8397 | } |
8398 | } |
8399 | |
8400 | done: |
8401 | lck_rw_done(kev_rwlock); |
8402 | |
8403 | return (error); |
8404 | } |
8405 | |
8406 | #endif /* SOCKETS */ |
8407 | |
8408 | |
8409 | int |
8410 | fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) |
8411 | { |
8412 | struct vinfo_stat * st; |
8413 | |
8414 | st = &kinfo->kq_stat; |
8415 | |
8416 | st->vst_size = kq->kq_count; |
8417 | if (kq->kq_state & KQ_KEV_QOS) |
8418 | st->vst_blksize = sizeof(struct kevent_qos_s); |
8419 | else if (kq->kq_state & KQ_KEV64) |
8420 | st->vst_blksize = sizeof(struct kevent64_s); |
8421 | else |
8422 | st->vst_blksize = sizeof(struct kevent); |
8423 | st->vst_mode = S_IFIFO; |
8424 | st->vst_ino = (kq->kq_state & KQ_DYNAMIC) ? |
8425 | ((struct kqworkloop *)kq)->kqwl_dynamicid : 0; |
8426 | |
8427 | /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */ |
8428 | #define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS|KQ_WORKQ|KQ_WORKLOOP) |
8429 | kinfo->kq_state = kq->kq_state & PROC_KQUEUE_MASK; |
8430 | |
8431 | return (0); |
8432 | } |
8433 | |
8434 | static int |
8435 | fill_kqueue_dyninfo(struct kqueue *kq, struct kqueue_dyninfo *kqdi) |
8436 | { |
8437 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
8438 | struct kqrequest *kqr = &kqwl->kqwl_request; |
8439 | workq_threadreq_param_t trp = {}; |
8440 | int err; |
8441 | |
8442 | if ((kq->kq_state & KQ_WORKLOOP) == 0) { |
8443 | return EINVAL; |
8444 | } |
8445 | |
8446 | if ((err = fill_kqueueinfo(kq, &kqdi->kqdi_info))) { |
8447 | return err; |
8448 | } |
8449 | |
8450 | kq_req_lock(kqwl); |
8451 | |
8452 | kqdi->kqdi_servicer = thread_tid(kqr->kqr_thread); |
8453 | kqdi->kqdi_owner = thread_tid(kqwl->kqwl_owner); |
8454 | kqdi->kqdi_request_state = kqr->kqr_state; |
8455 | kqdi->kqdi_async_qos = kqr->kqr_qos_index; |
8456 | kqdi->kqdi_events_qos = kqr->kqr_override_index; |
8457 | kqdi->kqdi_sync_waiters = kqr->kqr_dsync_waiters; |
8458 | kqdi->kqdi_sync_waiter_qos = 0; |
8459 | |
8460 | trp.trp_value = kqwl->kqwl_params; |
8461 | if (trp.trp_flags & TRP_PRIORITY) |
8462 | kqdi->kqdi_pri = trp.trp_pri; |
8463 | else |
8464 | kqdi->kqdi_pri = 0; |
8465 | |
8466 | if (trp.trp_flags & TRP_POLICY) |
8467 | kqdi->kqdi_pol = trp.trp_pol; |
8468 | else |
8469 | kqdi->kqdi_pol = 0; |
8470 | |
8471 | if (trp.trp_flags & TRP_CPUPERCENT) |
8472 | kqdi->kqdi_cpupercent = trp.trp_cpupercent; |
8473 | else |
8474 | kqdi->kqdi_cpupercent = 0; |
8475 | |
8476 | kq_req_unlock(kqwl); |
8477 | |
8478 | return 0; |
8479 | } |
8480 | |
8481 | |
8482 | void |
8483 | knote_markstayactive(struct knote *kn) |
8484 | { |
8485 | struct kqueue *kq = knote_get_kq(kn); |
8486 | kq_index_t qos; |
8487 | |
8488 | kqlock(kq); |
8489 | kn->kn_status |= KN_STAYACTIVE; |
8490 | |
8491 | /* |
8492 | * Making a knote stay active is a property of the knote that must be |
8493 | * established before it is fully attached. |
8494 | */ |
8495 | assert(kn->kn_status & KN_ATTACHING); |
8496 | assert((kn->kn_status & (KN_QUEUED | KN_SUPPRESSED)) == 0); |
8497 | |
8498 | /* handle all stayactive knotes on the (appropriate) manager */ |
8499 | if (kq->kq_state & KQ_WORKQ) { |
8500 | qos = KQWQ_QOS_MANAGER; |
8501 | } else if (kq->kq_state & KQ_WORKLOOP) { |
8502 | struct kqworkloop *kqwl = (struct kqworkloop *)kq; |
8503 | |
8504 | qos = _pthread_priority_thread_qos(kn->kn_qos); |
8505 | assert(qos && qos < THREAD_QOS_LAST); |
8506 | kq_req_lock(kq); |
8507 | kqworkloop_update_threads_qos(kqwl, KQWL_UTQ_UPDATE_STAYACTIVE_QOS, qos); |
8508 | kq_req_unlock(kq); |
8509 | qos = KQWL_BUCKET_STAYACTIVE; |
8510 | } else { |
8511 | qos = THREAD_QOS_UNSPECIFIED; |
8512 | } |
8513 | |
8514 | kn->kn_req_index = qos; |
8515 | kn->kn_qos_override = qos; |
8516 | kn->kn_qos_index = qos; |
8517 | |
8518 | knote_activate(kn); |
8519 | kqunlock(kq); |
8520 | } |
8521 | |
8522 | void |
8523 | knote_clearstayactive(struct knote *kn) |
8524 | { |
8525 | kqlock(knote_get_kq(kn)); |
8526 | kn->kn_status &= ~KN_STAYACTIVE; |
8527 | knote_deactivate(kn); |
8528 | kqunlock(knote_get_kq(kn)); |
8529 | } |
8530 | |
8531 | static unsigned long |
8532 | kevent_extinfo_emit(struct kqueue *kq, struct knote *kn, struct kevent_extinfo *buf, |
8533 | unsigned long buflen, unsigned long nknotes) |
8534 | { |
8535 | for (; kn; kn = SLIST_NEXT(kn, kn_link)) { |
8536 | if (kq == knote_get_kq(kn)) { |
8537 | if (nknotes < buflen) { |
8538 | struct kevent_extinfo *info = &buf[nknotes]; |
8539 | struct kevent_internal_s *kevp = &kn->kn_kevent; |
8540 | |
8541 | kqlock(kq); |
8542 | |
8543 | info->kqext_kev = (struct kevent_qos_s){ |
8544 | .ident = kevp->ident, |
8545 | .filter = kevp->filter, |
8546 | .flags = kevp->flags, |
8547 | .fflags = kevp->fflags, |
8548 | .data = (int64_t)kevp->data, |
8549 | .udata = kevp->udata, |
8550 | .ext[0] = kevp->ext[0], |
8551 | .ext[1] = kevp->ext[1], |
8552 | .ext[2] = kevp->ext[2], |
8553 | .ext[3] = kevp->ext[3], |
8554 | .qos = kn->kn_req_index, |
8555 | }; |
8556 | info->kqext_sdata = kn->kn_sdata; |
8557 | info->kqext_status = kn->kn_status; |
8558 | info->kqext_sfflags = kn->kn_sfflags; |
8559 | |
8560 | kqunlock(kq); |
8561 | } |
8562 | |
8563 | /* we return total number of knotes, which may be more than requested */ |
8564 | nknotes++; |
8565 | } |
8566 | } |
8567 | |
8568 | return nknotes; |
8569 | } |
8570 | |
8571 | int |
8572 | kevent_copyout_proc_dynkqids(void *proc, user_addr_t ubuf, uint32_t ubufsize, |
8573 | int32_t *nkqueues_out) |
8574 | { |
8575 | proc_t p = (proc_t)proc; |
8576 | struct filedesc *fdp = p->p_fd; |
8577 | unsigned int nkqueues = 0; |
8578 | unsigned long ubuflen = ubufsize / sizeof(kqueue_id_t); |
8579 | size_t buflen, bufsize; |
8580 | kqueue_id_t *kq_ids = NULL; |
8581 | int err = 0; |
8582 | |
8583 | assert(p != NULL); |
8584 | |
8585 | if (ubuf == USER_ADDR_NULL && ubufsize != 0) { |
8586 | err = EINVAL; |
8587 | goto out; |
8588 | } |
8589 | |
8590 | buflen = min(ubuflen, PROC_PIDDYNKQUEUES_MAX); |
8591 | |
8592 | if (ubuflen != 0) { |
8593 | if (os_mul_overflow(sizeof(kqueue_id_t), buflen, &bufsize)) { |
8594 | err = ERANGE; |
8595 | goto out; |
8596 | } |
8597 | kq_ids = kalloc(bufsize); |
8598 | assert(kq_ids != NULL); |
8599 | } |
8600 | |
8601 | kqhash_lock(p); |
8602 | |
8603 | if (fdp->fd_kqhashmask > 0) { |
8604 | for (uint32_t i = 0; i < fdp->fd_kqhashmask + 1; i++) { |
8605 | struct kqworkloop *kqwl; |
8606 | |
8607 | SLIST_FOREACH(kqwl, &fdp->fd_kqhash[i], kqwl_hashlink) { |
8608 | /* report the number of kqueues, even if they don't all fit */ |
8609 | if (nkqueues < buflen) { |
8610 | kq_ids[nkqueues] = kqwl->kqwl_dynamicid; |
8611 | } |
8612 | nkqueues++; |
8613 | } |
8614 | } |
8615 | } |
8616 | |
8617 | kqhash_unlock(p); |
8618 | |
8619 | if (kq_ids) { |
8620 | size_t copysize; |
8621 | if (os_mul_overflow(sizeof(kqueue_id_t), min(ubuflen, nkqueues), ©size)) { |
8622 | err = ERANGE; |
8623 | goto out; |
8624 | } |
8625 | |
8626 | assert(ubufsize >= copysize); |
8627 | err = copyout(kq_ids, ubuf, copysize); |
8628 | } |
8629 | |
8630 | out: |
8631 | if (kq_ids) { |
8632 | kfree(kq_ids, bufsize); |
8633 | } |
8634 | |
8635 | if (!err) { |
8636 | *nkqueues_out = (int)min(nkqueues, PROC_PIDDYNKQUEUES_MAX); |
8637 | } |
8638 | return err; |
8639 | } |
8640 | |
8641 | int |
8642 | kevent_copyout_dynkqinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf, |
8643 | uint32_t ubufsize, int32_t *size_out) |
8644 | { |
8645 | proc_t p = (proc_t)proc; |
8646 | struct kqueue *kq; |
8647 | int err = 0; |
8648 | struct kqueue_dyninfo kqdi = { }; |
8649 | |
8650 | assert(p != NULL); |
8651 | |
8652 | if (ubufsize < sizeof(struct kqueue_info)) { |
8653 | return ENOBUFS; |
8654 | } |
8655 | |
8656 | kqhash_lock(p); |
8657 | kq = kqueue_hash_lookup(p, kq_id); |
8658 | if (!kq) { |
8659 | kqhash_unlock(p); |
8660 | return ESRCH; |
8661 | } |
8662 | kqueue_retain(kq); |
8663 | kqhash_unlock(p); |
8664 | |
8665 | /* |
8666 | * backward compatibility: allow the argument to this call to only be |
8667 | * a struct kqueue_info |
8668 | */ |
8669 | if (ubufsize >= sizeof(struct kqueue_dyninfo)) { |
8670 | ubufsize = sizeof(struct kqueue_dyninfo); |
8671 | err = fill_kqueue_dyninfo(kq, &kqdi); |
8672 | } else { |
8673 | ubufsize = sizeof(struct kqueue_info); |
8674 | err = fill_kqueueinfo(kq, &kqdi.kqdi_info); |
8675 | } |
8676 | if (err == 0 && (err = copyout(&kqdi, ubuf, ubufsize)) == 0) { |
8677 | *size_out = ubufsize; |
8678 | } |
8679 | kqueue_release_last(p, kq); |
8680 | return err; |
8681 | } |
8682 | |
8683 | int |
8684 | kevent_copyout_dynkqextinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf, |
8685 | uint32_t ubufsize, int32_t *nknotes_out) |
8686 | { |
8687 | proc_t p = (proc_t)proc; |
8688 | struct kqueue *kq; |
8689 | int err; |
8690 | |
8691 | assert(p != NULL); |
8692 | |
8693 | kqhash_lock(p); |
8694 | kq = kqueue_hash_lookup(p, kq_id); |
8695 | if (!kq) { |
8696 | kqhash_unlock(p); |
8697 | return ESRCH; |
8698 | } |
8699 | kqueue_retain(kq); |
8700 | kqhash_unlock(p); |
8701 | |
8702 | err = pid_kqueue_extinfo(p, kq, ubuf, ubufsize, nknotes_out); |
8703 | kqueue_release_last(p, kq); |
8704 | return err; |
8705 | } |
8706 | |
8707 | int |
8708 | pid_kqueue_extinfo(proc_t p, struct kqueue *kq, user_addr_t ubuf, |
8709 | uint32_t bufsize, int32_t *retval) |
8710 | { |
8711 | struct knote *kn; |
8712 | int i; |
8713 | int err = 0; |
8714 | struct filedesc *fdp = p->p_fd; |
8715 | unsigned long nknotes = 0; |
8716 | unsigned long buflen = bufsize / sizeof(struct kevent_extinfo); |
8717 | struct kevent_extinfo *kqext = NULL; |
8718 | |
8719 | /* arbitrary upper limit to cap kernel memory usage, copyout size, etc. */ |
8720 | buflen = min(buflen, PROC_PIDFDKQUEUE_KNOTES_MAX); |
8721 | |
8722 | kqext = kalloc(buflen * sizeof(struct kevent_extinfo)); |
8723 | if (kqext == NULL) { |
8724 | err = ENOMEM; |
8725 | goto out; |
8726 | } |
8727 | bzero(kqext, buflen * sizeof(struct kevent_extinfo)); |
8728 | |
8729 | proc_fdlock(p); |
8730 | for (i = 0; i < fdp->fd_knlistsize; i++) { |
8731 | kn = SLIST_FIRST(&fdp->fd_knlist[i]); |
8732 | nknotes = kevent_extinfo_emit(kq, kn, kqext, buflen, nknotes); |
8733 | } |
8734 | proc_fdunlock(p); |
8735 | |
8736 | if (fdp->fd_knhashmask != 0) { |
8737 | for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) { |
8738 | kqhash_lock(p); |
8739 | kn = SLIST_FIRST(&fdp->fd_knhash[i]); |
8740 | nknotes = kevent_extinfo_emit(kq, kn, kqext, buflen, nknotes); |
8741 | kqhash_unlock(p); |
8742 | } |
8743 | } |
8744 | |
8745 | assert(bufsize >= sizeof(struct kevent_extinfo) * min(buflen, nknotes)); |
8746 | err = copyout(kqext, ubuf, sizeof(struct kevent_extinfo) * min(buflen, nknotes)); |
8747 | |
8748 | out: |
8749 | if (kqext) { |
8750 | kfree(kqext, buflen * sizeof(struct kevent_extinfo)); |
8751 | kqext = NULL; |
8752 | } |
8753 | |
8754 | if (!err) { |
8755 | *retval = min(nknotes, PROC_PIDFDKQUEUE_KNOTES_MAX); |
8756 | } |
8757 | return err; |
8758 | } |
8759 | |
8760 | static unsigned int |
8761 | klist_copy_udata(struct klist *list, uint64_t *buf, |
8762 | unsigned int buflen, unsigned int nknotes) |
8763 | { |
8764 | struct kevent_internal_s *kev; |
8765 | struct knote *kn; |
8766 | SLIST_FOREACH(kn, list, kn_link) { |
8767 | if (nknotes < buflen) { |
8768 | struct kqueue *kq = knote_get_kq(kn); |
8769 | kqlock(kq); |
8770 | kev = &(kn->kn_kevent); |
8771 | buf[nknotes] = kev->udata; |
8772 | kqunlock(kq); |
8773 | } |
8774 | /* we return total number of knotes, which may be more than requested */ |
8775 | nknotes++; |
8776 | } |
8777 | |
8778 | return nknotes; |
8779 | } |
8780 | |
8781 | static unsigned int |
8782 | kqlist_copy_dynamicids(__assert_only proc_t p, struct kqlist *list, |
8783 | uint64_t *buf, unsigned int buflen, unsigned int nids) |
8784 | { |
8785 | kqhash_lock_held(p); |
8786 | struct kqworkloop *kqwl; |
8787 | SLIST_FOREACH(kqwl, list, kqwl_hashlink) { |
8788 | if (nids < buflen) { |
8789 | buf[nids] = kqwl->kqwl_dynamicid; |
8790 | } |
8791 | nids++; |
8792 | } |
8793 | return nids; |
8794 | } |
8795 | |
8796 | int |
8797 | kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize) |
8798 | { |
8799 | proc_t p = (proc_t)proc; |
8800 | struct filedesc *fdp = p->p_fd; |
8801 | unsigned int nuptrs = 0; |
8802 | unsigned long buflen = bufsize / sizeof(uint64_t); |
8803 | |
8804 | if (buflen > 0) { |
8805 | assert(buf != NULL); |
8806 | } |
8807 | |
8808 | proc_fdlock(p); |
8809 | for (int i = 0; i < fdp->fd_knlistsize; i++) { |
8810 | nuptrs = klist_copy_udata(&fdp->fd_knlist[i], buf, buflen, nuptrs); |
8811 | } |
8812 | knhash_lock(p); |
8813 | proc_fdunlock(p); |
8814 | if (fdp->fd_knhashmask != 0) { |
8815 | for (int i = 0; i < (int)fdp->fd_knhashmask + 1; i++) { |
8816 | nuptrs = klist_copy_udata(&fdp->fd_knhash[i], buf, buflen, nuptrs); |
8817 | } |
8818 | } |
8819 | knhash_unlock(p); |
8820 | |
8821 | kqhash_lock(p); |
8822 | if (fdp->fd_kqhashmask != 0) { |
8823 | for (int i = 0; i < (int)fdp->fd_kqhashmask + 1; i++) { |
8824 | nuptrs = kqlist_copy_dynamicids(p, &fdp->fd_kqhash[i], buf, buflen, |
8825 | nuptrs); |
8826 | } |
8827 | } |
8828 | kqhash_unlock(p); |
8829 | |
8830 | return (int)nuptrs; |
8831 | } |
8832 | |
8833 | static void |
8834 | kevent_set_return_to_kernel_user_tsd(proc_t p, thread_t thread) |
8835 | { |
8836 | uint64_t ast_addr; |
8837 | bool proc_is_64bit = !!(p->p_flag & P_LP64); |
8838 | size_t user_addr_size = proc_is_64bit ? 8 : 4; |
8839 | uint32_t ast_flags32 = 0; |
8840 | uint64_t ast_flags64 = 0; |
8841 | struct uthread *ut = get_bsdthread_info(thread); |
8842 | |
8843 | if (ut->uu_kqr_bound != NULL) { |
8844 | ast_flags64 |= R2K_WORKLOOP_PENDING_EVENTS; |
8845 | } |
8846 | |
8847 | if (ast_flags64 == 0) { |
8848 | return; |
8849 | } |
8850 | |
8851 | if (!(p->p_flag & P_LP64)) { |
8852 | ast_flags32 = (uint32_t)ast_flags64; |
8853 | assert(ast_flags64 < 0x100000000ull); |
8854 | } |
8855 | |
8856 | ast_addr = thread_rettokern_addr(thread); |
8857 | if (ast_addr == 0) { |
8858 | return; |
8859 | } |
8860 | |
8861 | if (copyout((proc_is_64bit ? (void *)&ast_flags64 : (void *)&ast_flags32), |
8862 | (user_addr_t)ast_addr, |
8863 | user_addr_size) != 0) { |
8864 | printf("pid %d (tid:%llu): copyout of return_to_kernel ast flags failed with " |
8865 | "ast_addr = %llu\n" , p->p_pid, thread_tid(current_thread()), ast_addr); |
8866 | } |
8867 | } |
8868 | |
8869 | void |
8870 | kevent_ast(thread_t thread, uint16_t bits) |
8871 | { |
8872 | proc_t p = current_proc(); |
8873 | |
8874 | if (bits & AST_KEVENT_REDRIVE_THREADREQ) { |
8875 | workq_kern_threadreq_redrive(p, WORKQ_THREADREQ_CAN_CREATE_THREADS); |
8876 | } |
8877 | if (bits & AST_KEVENT_RETURN_TO_KERNEL) { |
8878 | kevent_set_return_to_kernel_user_tsd(p, thread); |
8879 | } |
8880 | } |
8881 | |
8882 | #if DEVELOPMENT || DEBUG |
8883 | |
8884 | #define KEVENT_SYSCTL_BOUND_ID 1 |
8885 | |
8886 | static int |
8887 | kevent_sysctl SYSCTL_HANDLER_ARGS |
8888 | { |
8889 | #pragma unused(oidp, arg2) |
8890 | uintptr_t type = (uintptr_t)arg1; |
8891 | uint64_t bound_id = 0; |
8892 | |
8893 | if (type != KEVENT_SYSCTL_BOUND_ID) { |
8894 | return EINVAL; |
8895 | } |
8896 | |
8897 | if (req->newptr) { |
8898 | return EINVAL; |
8899 | } |
8900 | |
8901 | struct uthread *ut = get_bsdthread_info(current_thread()); |
8902 | if (!ut) { |
8903 | return EFAULT; |
8904 | } |
8905 | |
8906 | struct kqrequest *kqr = ut->uu_kqr_bound; |
8907 | if (kqr) { |
8908 | if (kqr->kqr_state & KQR_WORKLOOP) { |
8909 | bound_id = kqr_kqworkloop(kqr)->kqwl_dynamicid; |
8910 | } else { |
8911 | bound_id = -1; |
8912 | } |
8913 | } |
8914 | |
8915 | return sysctl_io_number(req, bound_id, sizeof(bound_id), NULL, NULL); |
8916 | } |
8917 | |
8918 | SYSCTL_NODE(_kern, OID_AUTO, kevent, CTLFLAG_RW | CTLFLAG_LOCKED, 0, |
8919 | "kevent information" ); |
8920 | |
8921 | SYSCTL_PROC(_kern_kevent, OID_AUTO, bound_id, |
8922 | CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
8923 | (void *)KEVENT_SYSCTL_BOUND_ID, |
8924 | sizeof(kqueue_id_t), kevent_sysctl, "Q" , |
8925 | "get the ID of the bound kqueue" ); |
8926 | |
8927 | #endif /* DEVELOPMENT || DEBUG */ |
8928 | |