1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1999,2000 Jonathan Lemon <jlemon@FreeBSD.org>
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * $FreeBSD: src/sys/sys/eventvar.h,v 1.1.2.2 2000/07/18 21:49:12 jlemon Exp $
54 */
55
56#ifndef _SYS_EVENTVAR_H_
57#define _SYS_EVENTVAR_H_
58
59#include <sys/event.h>
60#include <sys/select.h>
61#include <kern/kern_types.h>
62#include <kern/waitq.h>
63
64#if defined(XNU_KERNEL_PRIVATE)
65
66typedef int (*kevent_callback_t)(struct kevent_qos_s *, struct kevent_ctx_s *);
67
68#include <stdint.h>
69#include <kern/locks.h>
70#include <mach/thread_policy.h>
71#include <pthread/workqueue_internal.h>
72#include <os/refcnt.h>
73
74/*
75 * Lock ordering:
76 *
77 * The kqueue locking order can follow a few different patterns:
78 *
79 * Standard file-based kqueues (from above):
80 * proc fd lock -> kq lock -> kq-waitq-set lock -> thread lock
81 *
82 * WorkQ/WorkLoop kqueues (from above):
83 * proc fd lock -> kq lock -> workq lock -> thread lock
84 *
85 * Whenever kqueues interact with source locks, it drops all of its own
86 * locks in exchange for a use-reference on the knote used to synchronize
87 * with the source code. When those sources post events from below, they
88 * have the following lock hierarchy.
89 *
90 * Standard file-based kqueues (from below):
91 * XXX lock -> kq lock -> kq-waitq-set lock -> thread lock
92 *
93 * WorkQ/WorkLoop kqueues (from below):
94 * XXX lock -> kq lock -> workq lock -> thread lock
95 */
96
97#define KQEXTENT 256 /* linear growth by this amount */
98
99struct knote_lock_ctx {
100 struct knote *knlc_knote;
101 thread_t knlc_thread;
102 uintptr_t knlc_waiters;
103 LIST_ENTRY(knote_lock_ctx) knlc_link;
104#if DEBUG || DEVELOPMENT
105#define KNOTE_LOCK_CTX_UNLOCKED 0
106#define KNOTE_LOCK_CTX_LOCKED 1
107#define KNOTE_LOCK_CTX_WAITING 2
108 int knlc_state;
109#endif
110};
111LIST_HEAD(knote_locks, knote_lock_ctx);
112
113#if DEBUG || DEVELOPMENT
114/*
115 * KNOTE_LOCK_CTX(name) is a convenience macro to define a knote lock context on
116 * the stack named `name`. In development kernels, it uses tricks to make sure
117 * not locks was still held when exiting the C-scope that contains this context.
118 */
119static inline void
120knote_lock_ctx_chk(struct knote_lock_ctx *knlc)
121{
122 /* evil hackery to make sure no one forgets to unlock */
123 assert(knlc->knlc_state == KNOTE_LOCK_CTX_UNLOCKED);
124}
125#define KNOTE_LOCK_CTX(n) \
126 struct knote_lock_ctx n __attribute__((cleanup(knote_lock_ctx_chk))); \
127 n.knlc_state = KNOTE_LOCK_CTX_UNLOCKED
128#else
129#define KNOTE_LOCK_CTX(n) \
130 struct knote_lock_ctx n
131#endif
132
133
134__options_decl(kq_state_t, uint16_t, {
135 KQ_SLEEP = 0x0002, /* thread is waiting for events */
136 KQ_PROCWAIT = 0x0004, /* thread waiting for processing */
137 KQ_KEV32 = 0x0008, /* kq is used with 32-bit events */
138 KQ_KEV64 = 0x0010, /* kq is used with 64-bit events */
139 KQ_KEV_QOS = 0x0020, /* kq events carry QoS info */
140 KQ_WORKQ = 0x0040, /* KQ is bound to process workq */
141 KQ_WORKLOOP = 0x0080, /* KQ is part of a workloop */
142 KQ_PROCESSING = 0x0100, /* KQ is being processed */
143 KQ_DRAIN = 0x0200, /* kq is draining */
144 KQ_DYNAMIC = 0x0800, /* kqueue is dynamically managed */
145 KQ_R2K_ARMED = 0x1000, /* ast notification armed */
146 KQ_HAS_TURNSTILE = 0x2000, /* this kqueue has a turnstile */
147});
148
149/*
150 * kqueue - common core definition of a kqueue
151 *
152 * No real structures are allocated of this type. They are
153 * either kqfile objects or kqworkq objects - each of which is
154 * derived from this definition.
155 */
156struct kqueue {
157 lck_spin_t kq_lock; /* kqueue lock */
158 kq_state_t kq_state; /* state of the kq */
159 uint16_t kq_level; /* nesting level of the kqfile */
160 uint32_t kq_count; /* number of queued events */
161 struct proc *kq_p; /* process containing kqueue */
162 struct knote_locks kq_knlocks; /* list of knote locks held */
163};
164
165/*
166 * kqfile - definition of a typical kqueue opened as a file descriptor
167 * via the kqueue() system call.
168 *
169 * Adds selinfo support to the base kqueue definition, as these
170 * fds can be fed into select().
171 */
172struct kqfile {
173 struct kqueue kqf_kqueue; /* common kqueue core */
174 struct kqtailq kqf_queue; /* queue of woken up knotes */
175 struct kqtailq kqf_suppressed; /* suppression queue */
176 struct selinfo kqf_sel; /* parent select/kqueue info */
177#define kqf_lock kqf_kqueue.kq_lock
178#define kqf_state kqf_kqueue.kq_state
179#define kqf_level kqf_kqueue.kq_level
180#define kqf_count kqf_kqueue.kq_count
181#define kqf_p kqf_kqueue.kq_p
182};
183
184#define QOS_INDEX_KQFILE 0 /* number of qos levels in a file kq */
185
186/*
187 * WorkQ kqueues need to request threads to service the triggered
188 * knotes in the queue. These threads are brought up on a
189 * effective-requested-QoS basis. Knotes are segregated based on
190 * that value - calculated by computing max(event-QoS, kevent-QoS).
191 * Only one servicing thread is requested at a time for all the
192 * knotes at a given effective-requested-QoS.
193 */
194
195#if !defined(KQWQ_QOS_MANAGER)
196#define KQWQ_QOS_MANAGER (THREAD_QOS_LAST)
197#endif
198
199#if !defined(KQWQ_NBUCKETS)
200#define KQWQ_NBUCKETS (KQWQ_QOS_MANAGER)
201#endif
202
203/*
204 * kqworkq - definition of a private kqueue used to coordinate event
205 * handling for pthread work queues.
206 *
207 * These have per-qos processing queues and state to coordinate with
208 * the pthread kext to ask for threads at corresponding pthread priority
209 * values.
210 */
211struct kqworkq {
212 struct kqueue kqwq_kqueue;
213 struct kqtailq kqwq_queue[KQWQ_NBUCKETS]; /* array of queues */
214 struct kqtailq kqwq_suppressed[KQWQ_NBUCKETS]; /* Per-QoS suppression queues */
215 workq_threadreq_s kqwq_request[KQWQ_NBUCKETS]; /* per-QoS request states */
216};
217
218#define kqwq_lock kqwq_kqueue.kq_lock
219#define kqwq_state kqwq_kqueue.kq_state
220#define kqwq_waitq_hook kqwq_kqueue.kq_waitq_hook
221#define kqwq_count kqwq_kqueue.kq_count
222#define kqwq_p kqwq_kqueue.kq_p
223
224/*
225 * WorkLoop kqueues need to request a thread to service the triggered
226 * knotes in the queue. The thread is brought up on a
227 * effective-requested-QoS basis. Knotes are segregated based on
228 * that value. Once a request is made, it cannot be undone. If
229 * events with higher QoS arrive after, they are stored in their
230 * own queues and an override applied to the original request based
231 * on the delta between the two QoS values.
232 */
233
234#if !defined(KQWL_NBUCKETS)
235#define KQWL_NBUCKETS (THREAD_QOS_LAST - 1)
236#endif
237
238/*
239 * kqworkloop - definition of a private kqueue used to coordinate event
240 * handling for pthread workloops.
241 *
242 * Workloops vary from workqs in that only a single thread is ever
243 * requested to service a workloop at a time. But unlike workqs,
244 * workloops may be "owned" by user-space threads that are
245 * synchronously draining an event off the workloop. In those cases,
246 * any overrides have to be applied to the owner until it relinqueshes
247 * ownership.
248 *
249 * NOTE: "lane" support is TBD.
250 */
251
252#if CONFIG_PREADOPT_TG_DEBUG
253__options_decl(kqwl_preadopt_tg_op_t, uint8_t, {
254 KQWL_PREADOPT_OP_SERVICER_BIND = 0x01,
255 KQWL_PREADOPT_OP_SERVICER_REBIND = 0x02,
256 KQWL_PREADOPT_OP_SERVICER_UNBIND = 0x3,
257 KQWL_PREADOPT_OP_INCOMING_IPC = 0x4,
258});
259#endif
260
261#if CONFIG_PREADOPT_TG
262/*
263 * We have this typedef to distinguish when there is a thread_qos_t embedded
264 * in the last 3 bits inside the pointer
265 */
266typedef struct thread_group *thread_group_qos_t;
267
268/* The possible states for kqwl_preadopt_tg:
269 *
270 * 1) Valid thread group with a QoS masked in the last 3 bits. This is used today
271 * by sync IPC thread group preadoption path with max QoS < THREAD_QOS_LAST.
272 * 2) A known constant value (enumerated below). For these known constant
273 * values, no QoS is merged into them.
274 * 3) Permanently associated with a thread group from a work interval that this
275 * kqwl is configured with. The QoS masked in last 3 bits will be THREAD_QOS_LAST
276 * to uniquely identify it from (1). See KQWL_HAS_PERMANENT_PREADOPTED_TG.
277 *
278 * @const KQWL_PREADOPTED_TG_NULL
279 * NULL implies that the kqwl is capable of preadopting a thread group and it
280 * hasn't got such a thread group to preadopt
281 * @const KQWL_PREADOPTED_TG_SENTINEL
282 * SENTINEL is set when the kqwl is no longer capable of preadopting a thread
283 * group because it has bound to a servicer - the reference of the thread group
284 * is passed to the servicer
285 * @const KQWL_PREADOPTED_TG_PROCESSED
286 * PROCESSED is set when the kqwl's servicer has processed and preadopted the
287 * thread group of the first EVFILT_MACHPORT knote that it is going to deliver
288 * to userspace.
289 * @const KQWL_PREADOPTED_TG_NEVER
290 * NEVER is set when the kqwl is not capable of preadopting a thread
291 * group because it is an app
292 */
293
294#define KQWL_PREADOPTED_TG_NULL ((struct thread_group *) 0)
295#define KQWL_PREADOPTED_TG_SENTINEL ((struct thread_group *) -1)
296#define KQWL_PREADOPTED_TG_PROCESSED ((struct thread_group *) -2)
297#define KQWL_PREADOPTED_TG_NEVER ((struct thread_group *) -3)
298
299#define KQWL_ENCODE_PREADOPTED_TG_QOS(tg, qos) \
300 (struct thread_group *) ((uintptr_t) tg | (uintptr_t) qos);
301
302#define KQWL_PREADOPT_TG_MASK ~((uint64_t) THREAD_QOS_LAST)
303#define KQWL_GET_PREADOPTED_TG(tg) \
304 (struct thread_group *)(((uintptr_t) tg) & KQWL_PREADOPT_TG_MASK)
305
306#define KQWL_PREADOPT_TG_QOS_MASK ((uint64_t) THREAD_QOS_LAST)
307#define KQWL_GET_PREADOPTED_TG_QOS(tg) \
308 (thread_qos_t) (((uintptr_t) tg) & KQWL_PREADOPT_TG_QOS_MASK)
309
310#define KQWL_HAS_VALID_PREADOPTED_TG(tg) \
311 ((tg != KQWL_PREADOPTED_TG_NULL) && \
312 (tg != KQWL_PREADOPTED_TG_SENTINEL) && \
313 (tg != KQWL_PREADOPTED_TG_NEVER) && \
314 (tg != KQWL_PREADOPTED_TG_PROCESSED) && \
315 (KQWL_GET_PREADOPTED_TG(tg) != NULL))
316
317/*
318 * The preadopt thread group on a kqwl can be permanently configured when the kqwl
319 * is created so it does not change over the course of the kqwl's lifetime. Such
320 * kqwl does not participate in thread group preadoption for incoming sync IPCs.
321 * Today, this happens for kqwl configured with os workgroups.
322 */
323#define KQWL_ENCODE_PERMANENT_PREADOPTED_TG(tg) \
324 KQWL_ENCODE_PREADOPTED_TG_QOS(tg, THREAD_QOS_LAST)
325
326#define KQWL_HAS_PERMANENT_PREADOPTED_TG(tg) \
327 (KQWL_HAS_VALID_PREADOPTED_TG(tg) && \
328 (KQWL_GET_PREADOPTED_TG_QOS(tg) == THREAD_QOS_LAST))
329
330#define KQWL_CAN_ADOPT_PREADOPT_TG(tg) \
331 ((tg != KQWL_PREADOPTED_TG_SENTINEL) && \
332 (tg != KQWL_PREADOPTED_TG_NEVER) && \
333 (tg != KQWL_PREADOPTED_TG_PROCESSED) && \
334 (!KQWL_HAS_PERMANENT_PREADOPTED_TG(tg)))
335
336struct thread_group *
337kqr_preadopt_thread_group(workq_threadreq_t req);
338
339_Atomic(struct thread_group *) *
340kqr_preadopt_thread_group_addr(workq_threadreq_t req);
341
342#endif
343
344
345struct kqworkloop {
346 struct kqueue kqwl_kqueue; /* queue of events */
347 struct kqtailq kqwl_queue[KQWL_NBUCKETS]; /* array of queues */
348 struct kqtailq kqwl_suppressed; /* Per-QoS suppression queues */
349 workq_threadreq_s kqwl_request; /* thread request state */
350#if CONFIG_PREADOPT_TG
351 _Atomic thread_group_qos_t kqwl_preadopt_tg;
352#endif
353
354 lck_spin_t kqwl_statelock; /* state/debounce lock */
355 thread_t kqwl_owner; /* current [sync] owner thread */
356 os_ref_atomic_t kqwl_retains; /* retain references */
357 thread_qos_t kqwl_wakeup_qos; /* QoS/override woke */
358 _Atomic uint8_t kqwl_iotier_override; /* iotier override */
359
360#if CONFIG_PREADOPT_TG
361 /* The point of the kqwl_preadopt_tg_needs_redrive bit is to be able to
362 * coordinate which thread is going to push information about modifications
363 * to the kqwl_preadopt_thread group on the kqwl, to the workqueue
364 * subsystem. This coordination is needed because the preadoption thread
365 * group is set on the kqwl in the filter call without the kqlock.
366 *
367 * As such, if there is another thread holding the kqlock at this time and
368 * observes the write to the preadoption thread group and the need for a
369 * redrive request, that thread will take the responsibility of pushing that
370 * information down to the workqueue subsystem, thereby ack-ing the request.
371 *
372 * Otherwise, the original thread which modified the kqwl, will do so when
373 * it gets the kqlock.
374 *
375 * Note: Only a 1 single bit is required here but the 2 bytes here were
376 * wasted in packing so I've created a new atomic field for it. Only the
377 * bottom bit is being used, the remaining bits can be reused for other
378 * purposes.
379 */
380#define KQWL_PREADOPT_TG_NEEDS_REDRIVE (uint16_t) 0x1
381#define KQWL_PREADOPT_TG_CLEAR_REDRIVE (uint16_t) 0x0
382 _Atomic uint16_t kqwl_preadopt_tg_needs_redrive;
383#endif
384
385#if CONFIG_PREADOPT_TG_DEBUG
386 /* Keep track of history of events that happened to the kqworkloop wrt to tg preadoption */
387#define KQWL_PREADOPT_TG_HISTORY_COUNT 32
388#define KQWL_PREADOPT_TG_HISTORY_WRITE_ENTRY(kqwl, ...) ({\
389 struct kqworkloop *__kqwl = (kqwl); \
390 unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_preadopt_tg_history_index, relaxed); \
391 struct kqwl_preadopt_tg _preadopt_tg = { mach_approximate_time(), __VA_ARGS__}; \
392 __kqwl->kqwl_preadopt_tg_history[__index % KQWL_PREADOPT_TG_HISTORY_COUNT] = \
393 (struct kqwl_preadopt_tg) _preadopt_tg; \
394 })
395
396 struct kqwl_preadopt_tg {
397 uint64_t time;
398 kqwl_preadopt_tg_op_t op;
399 struct thread_group *old_preadopt_tg;
400 struct thread_group *new_preadopt_tg;
401 } kqwl_preadopt_tg_history[KQWL_PREADOPT_TG_HISTORY_COUNT];
402 unsigned int kqwl_preadopt_tg_history_index;
403#else
404#define KQWL_PREADOPT_TG_HISTORY_WRITE_ENTRY(kqwl, ...)
405#endif /* CONFIG_PREADOPT_TG_DEBUG */
406
407 struct turnstile *kqwl_turnstile; /* turnstile for sync IPC/waiters */
408 kqueue_id_t kqwl_dynamicid; /* dynamic identity */
409 uint64_t kqwl_params; /* additional parameters */
410 LIST_ENTRY(kqworkloop) kqwl_hashlink; /* linkage for search list */
411#if CONFIG_WORKLOOP_DEBUG
412#define KQWL_HISTORY_COUNT 32
413#define KQWL_HISTORY_WRITE_ENTRY(kqwl, ...) ({ \
414 struct kqworkloop *__kqwl = (kqwl); \
415 unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_index, relaxed); \
416 __kqwl->kqwl_history[__index % KQWL_HISTORY_COUNT] = \
417 (struct kqwl_history)__VA_ARGS__; \
418 })
419 struct kqwl_history {
420 thread_t updater; /* Note: updates can be reordered */
421 thread_t servicer;
422 thread_t old_owner;
423 thread_t new_owner;
424
425 uint64_t kev_ident;
426 int16_t error;
427 uint16_t kev_flags;
428 uint32_t kev_fflags;
429
430 uint64_t kev_mask;
431 uint64_t kev_value;
432 uint64_t in_value;
433 } kqwl_history[KQWL_HISTORY_COUNT];
434 unsigned int kqwl_index;
435#endif // CONFIG_WORKLOOP_DEBUG
436};
437LIST_HEAD(kqwllist, kqworkloop);
438
439typedef union {
440 struct kqueue *kq;
441 struct kqworkq *kqwq;
442 struct kqfile *kqf;
443 struct kqworkloop *kqwl;
444} __attribute__((transparent_union)) kqueue_t;
445
446#define kqwl_lock kqwl_kqueue.kq_lock
447#define kqwl_state kqwl_kqueue.kq_state
448#define kqwl_waitq_hook kqwl_kqueue.kq_waitq_hook
449#define kqwl_count kqwl_kqueue.kq_count
450#define kqwl_p kqwl_kqueue.kq_p
451
452#define KQ_WORKLOOP_RETAINS_MAX UINT32_MAX
453
454extern void kqueue_threadreq_unbind(struct proc *p, workq_threadreq_t);
455
456// called with the kq req held
457#define KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE 0x1
458extern void kqueue_threadreq_bind(struct proc *p, workq_threadreq_t req,
459 thread_t thread, unsigned int flags);
460
461struct turnstile *kqueue_threadreq_get_turnstile(workq_threadreq_t kqr);
462
463// called with the wq lock held
464extern void
465kqueue_threadreq_bind_prepost(struct proc *p, workq_threadreq_t req,
466 struct uthread *uth);
467
468// called with no lock held
469extern void kqueue_threadreq_bind_commit(struct proc *p, thread_t thread);
470
471extern void kqueue_threadreq_cancel(struct proc *p, workq_threadreq_t req);
472
473// lock not held as kqwl_params is immutable after creation
474extern workq_threadreq_param_t kqueue_threadreq_workloop_param(workq_threadreq_t req);
475
476extern struct kqueue *kqueue_alloc(struct proc *);
477extern void kqueue_dealloc(struct kqueue *);
478extern void kqworkq_dealloc(struct kqworkq *kqwq);
479
480extern void knotes_dealloc(struct proc *);
481extern void kqworkloops_dealloc(struct proc *);
482
483extern int kevent_register(struct kqueue *, struct kevent_qos_s *,
484 struct knote **);
485extern int kqueue_scan(struct kqueue *, int flags,
486 struct kevent_ctx_s *, kevent_callback_t);
487extern int kqueue_stat(struct kqueue *, void *, int, proc_t);
488
489extern void kevent_set_workq_quantum_expiry_user_tsd(proc_t p, thread_t t,
490 uint64_t flags);
491
492#endif /* XNU_KERNEL_PRIVATE */
493
494#endif /* !_SYS_EVENTVAR_H_ */
495