eventvar.h source code [xnu/bsd/sys/eventvar.h]

1	/*
2	* Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Copyright (c) 1999,2000 Jonathan Lemon <jlemon@FreeBSD.org>
30	* All rights reserved.
31	*
32	* Redistribution and use in source and binary forms, with or without
33	* modification, are permitted provided that the following conditions
34	* are met:
35	* 1. Redistributions of source code must retain the above copyright
36	* notice, this list of conditions and the following disclaimer.
37	* 2. Redistributions in binary form must reproduce the above copyright
38	* notice, this list of conditions and the following disclaimer in the
39	* documentation and/or other materials provided with the distribution.
40	*
41	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51	* SUCH DAMAGE.
52	*
53	* $FreeBSD: src/sys/sys/eventvar.h,v 1.1.2.2 2000/07/18 21:49:12 jlemon Exp $
54	*/
55
56	#ifndef _SYS_EVENTVAR_H_
57	#define _SYS_EVENTVAR_H_
58
59	#include <sys/event.h>
60	#include <sys/select.h>
61	#include <kern/kern_types.h>
62	#include <kern/waitq.h>
63
64	#if defined(XNU_KERNEL_PRIVATE)
65
66	typedef int (kevent_callback_t)(struct* kevent_qos_s , struct* kevent_ctx_s *);
67
68	#include <stdint.h>
69	#include <kern/locks.h>
70	#include <mach/thread_policy.h>
71	#include <pthread/workqueue_internal.h>
72	#include <os/refcnt.h>
73
74	/*
75	* Lock ordering:
76	*
77	* The kqueue locking order can follow a few different patterns:
78	*
79	* Standard file-based kqueues (from above):
80	* proc fd lock -> kq lock -> kq-waitq-set lock -> thread lock
81	*
82	* WorkQ/WorkLoop kqueues (from above):
83	* proc fd lock -> kq lock -> workq lock -> thread lock
84	*
85	* Whenever kqueues interact with source locks, it drops all of its own
86	* locks in exchange for a use-reference on the knote used to synchronize
87	* with the source code. When those sources post events from below, they
88	* have the following lock hierarchy.
89	*
90	* Standard file-based kqueues (from below):
91	* XXX lock -> kq lock -> kq-waitq-set lock -> thread lock
92	*
93	* WorkQ/WorkLoop kqueues (from below):
94	* XXX lock -> kq lock -> workq lock -> thread lock
95	*/
96
97	#define KQEXTENT 256 /* linear growth by this amount */
98
99	struct knote_lock_ctx {
100	struct knote *knlc_knote;
101	thread_t knlc_thread;
102	uintptr_t knlc_waiters;
103	LIST_ENTRY(knote_lock_ctx) knlc_link;
104	#if DEBUG \|\| DEVELOPMENT
105	#define KNOTE_LOCK_CTX_UNLOCKED 0
106	#define KNOTE_LOCK_CTX_LOCKED 1
107	#define KNOTE_LOCK_CTX_WAITING 2
108	int knlc_state;
109	#endif
110	};
111	LIST_HEAD(knote_locks, knote_lock_ctx);
112
113	#if DEBUG \|\| DEVELOPMENT
114	/*
115	* KNOTE_LOCK_CTX(name) is a convenience macro to define a knote lock context on
116	* the stack named `name`. In development kernels, it uses tricks to make sure
117	* not locks was still held when exiting the C-scope that contains this context.
118	*/
119	static inline void
120	knote_lock_ctx_chk(struct knote_lock_ctx *knlc)
121	{
122	/ evil hackery to make sure no one forgets to unlock /
123	assert(knlc->knlc_state == KNOTE_LOCK_CTX_UNLOCKED);
124	}
125	#define KNOTE_LOCK_CTX(n) \
126	struct knote_lock_ctx n __attribute__((cleanup(knote_lock_ctx_chk))); \
127	n.knlc_state = KNOTE_LOCK_CTX_UNLOCKED
128	#else
129	#define KNOTE_LOCK_CTX(n) \
130	struct knote_lock_ctx n
131	#endif
132
133
134	__options_decl(kq_state_t, uint16_t, {
135	KQ_SLEEP = `0x0002`, / thread is waiting for events /
136	KQ_PROCWAIT = `0x0004`, / thread waiting for processing /
137	KQ_KEV32 = `0x0008`, / kq is used with 32-bit events /
138	KQ_KEV64 = `0x0010`, / kq is used with 64-bit events /
139	KQ_KEV_QOS = `0x0020`, / kq events carry QoS info /
140	KQ_WORKQ = `0x0040`, / KQ is bound to process workq /
141	KQ_WORKLOOP = `0x0080`, / KQ is part of a workloop /
142	KQ_PROCESSING = `0x0100`, / KQ is being processed /
143	KQ_DRAIN = `0x0200`, / kq is draining /
144	KQ_DYNAMIC = `0x0800`, / kqueue is dynamically managed /
145	KQ_R2K_ARMED = `0x1000`, / ast notification armed /
146	KQ_HAS_TURNSTILE = `0x2000`, / this kqueue has a turnstile /
147	});
148
149	/*
150	* kqueue - common core definition of a kqueue
151	*
152	* No real structures are allocated of this type. They are
153	* either kqfile objects or kqworkq objects - each of which is
154	* derived from this definition.
155	*/
156	struct kqueue {
157	lck_spin_t kq_lock; / kqueue lock /
158	kq_state_t kq_state; / state of the kq /
159	uint16_t kq_level; / nesting level of the kqfile /
160	uint32_t kq_count; / number of queued events /
161	struct proc kq_p; /* process containing kqueue /
162	struct knote_locks kq_knlocks; / list of knote locks held /
163	};
164
165	/*
166	* kqfile - definition of a typical kqueue opened as a file descriptor
167	* via the kqueue() system call.
168	*
169	* Adds selinfo support to the base kqueue definition, as these
170	* fds can be fed into select().
171	*/
172	struct kqfile {
173	struct kqueue kqf_kqueue; / common kqueue core /
174	struct kqtailq kqf_queue; / queue of woken up knotes /
175	struct kqtailq kqf_suppressed; / suppression queue /
176	struct selinfo kqf_sel; / parent select/kqueue info /
177	#define kqf_lock kqf_kqueue.kq_lock
178	#define kqf_state kqf_kqueue.kq_state
179	#define kqf_level kqf_kqueue.kq_level
180	#define kqf_count kqf_kqueue.kq_count
181	#define kqf_p kqf_kqueue.kq_p
182	};
183
184	#define QOS_INDEX_KQFILE 0 /* number of qos levels in a file kq */
185
186	/*
187	* WorkQ kqueues need to request threads to service the triggered
188	* knotes in the queue. These threads are brought up on a
189	* effective-requested-QoS basis. Knotes are segregated based on
190	* that value - calculated by computing max(event-QoS, kevent-QoS).
191	* Only one servicing thread is requested at a time for all the
192	* knotes at a given effective-requested-QoS.
193	*/
194
195	#if !defined(KQWQ_QOS_MANAGER)
196	#define KQWQ_QOS_MANAGER (THREAD_QOS_LAST)
197	#endif
198
199	#if !defined(KQWQ_NBUCKETS)
200	#define KQWQ_NBUCKETS (KQWQ_QOS_MANAGER)
201	#endif
202
203	/*
204	* kqworkq - definition of a private kqueue used to coordinate event
205	* handling for pthread work queues.
206	*
207	* These have per-qos processing queues and state to coordinate with
208	* the pthread kext to ask for threads at corresponding pthread priority
209	* values.
210	*/
211	struct kqworkq {
212	struct kqueue kqwq_kqueue;
213	struct kqtailq kqwq_queue[KQWQ_NBUCKETS]; / array of queues /
214	struct kqtailq kqwq_suppressed[KQWQ_NBUCKETS]; / Per-QoS suppression queues /
215	workq_threadreq_s kqwq_request[KQWQ_NBUCKETS]; / per-QoS request states /
216	};
217
218	#define kqwq_lock kqwq_kqueue.kq_lock
219	#define kqwq_state kqwq_kqueue.kq_state
220	#define kqwq_waitq_hook kqwq_kqueue.kq_waitq_hook
221	#define kqwq_count kqwq_kqueue.kq_count
222	#define kqwq_p kqwq_kqueue.kq_p
223
224	/*
225	* WorkLoop kqueues need to request a thread to service the triggered
226	* knotes in the queue. The thread is brought up on a
227	* effective-requested-QoS basis. Knotes are segregated based on
228	* that value. Once a request is made, it cannot be undone. If
229	* events with higher QoS arrive after, they are stored in their
230	* own queues and an override applied to the original request based
231	* on the delta between the two QoS values.
232	*/
233
234	#if !defined(KQWL_NBUCKETS)
235	#define KQWL_NBUCKETS (THREAD_QOS_LAST - 1)
236	#endif
237
238	/*
239	* kqworkloop - definition of a private kqueue used to coordinate event
240	* handling for pthread workloops.
241	*
242	* Workloops vary from workqs in that only a single thread is ever
243	* requested to service a workloop at a time. But unlike workqs,
244	* workloops may be "owned" by user-space threads that are
245	* synchronously draining an event off the workloop. In those cases,
246	* any overrides have to be applied to the owner until it relinqueshes
247	* ownership.
248	*
249	* NOTE: "lane" support is TBD.
250	*/
251
252	#if CONFIG_PREADOPT_TG_DEBUG
253	__options_decl(kqwl_preadopt_tg_op_t, uint8_t, {
254	KQWL_PREADOPT_OP_SERVICER_BIND = `0x01`,
255	KQWL_PREADOPT_OP_SERVICER_REBIND = `0x02`,
256	KQWL_PREADOPT_OP_SERVICER_UNBIND = `0x3`,
257	KQWL_PREADOPT_OP_INCOMING_IPC = `0x4`,
258	});
259	#endif
260
261	#if CONFIG_PREADOPT_TG
262	/*
263	* We have this typedef to distinguish when there is a thread_qos_t embedded
264	* in the last 3 bits inside the pointer
265	*/
266	typedef struct thread_group *thread_group_qos_t;
267
268	/ The possible states for kqwl_preadopt_tg:*
269	*
270	* 1) Valid thread group with a QoS masked in the last 3 bits. This is used today
271	* by sync IPC thread group preadoption path with max QoS < THREAD_QOS_LAST.
272	* 2) A known constant value (enumerated below). For these known constant
273	* values, no QoS is merged into them.
274	* 3) Permanently associated with a thread group from a work interval that this
275	* kqwl is configured with. The QoS masked in last 3 bits will be THREAD_QOS_LAST
276	* to uniquely identify it from (1). See KQWL_HAS_PERMANENT_PREADOPTED_TG.
277	*
278	* @const KQWL_PREADOPTED_TG_NULL
279	* NULL implies that the kqwl is capable of preadopting a thread group and it
280	* hasn't got such a thread group to preadopt
281	* @const KQWL_PREADOPTED_TG_SENTINEL
282	* SENTINEL is set when the kqwl is no longer capable of preadopting a thread
283	* group because it has bound to a servicer - the reference of the thread group
284	* is passed to the servicer
285	* @const KQWL_PREADOPTED_TG_PROCESSED
286	* PROCESSED is set when the kqwl's servicer has processed and preadopted the
287	* thread group of the first EVFILT_MACHPORT knote that it is going to deliver
288	* to userspace.
289	* @const KQWL_PREADOPTED_TG_NEVER
290	* NEVER is set when the kqwl is not capable of preadopting a thread
291	* group because it is an app
292	*/
293
294	#define KQWL_PREADOPTED_TG_NULL ((struct thread_group *) 0)
295	#define KQWL_PREADOPTED_TG_SENTINEL ((struct thread_group *) -1)
296	#define KQWL_PREADOPTED_TG_PROCESSED ((struct thread_group *) -2)
297	#define KQWL_PREADOPTED_TG_NEVER ((struct thread_group *) -3)
298
299	#define KQWL_ENCODE_PREADOPTED_TG_QOS(tg, qos) \
300	(struct thread_group *) ((uintptr_t) tg \| (uintptr_t) qos);
301
302	#define KQWL_PREADOPT_TG_MASK ~((uint64_t) THREAD_QOS_LAST)
303	#define KQWL_GET_PREADOPTED_TG(tg) \
304	(struct thread_group *)(((uintptr_t) tg) & KQWL_PREADOPT_TG_MASK)
305
306	#define KQWL_PREADOPT_TG_QOS_MASK ((uint64_t) THREAD_QOS_LAST)
307	#define KQWL_GET_PREADOPTED_TG_QOS(tg) \
308	(thread_qos_t) (((uintptr_t) tg) & KQWL_PREADOPT_TG_QOS_MASK)
309
310	#define KQWL_HAS_VALID_PREADOPTED_TG(tg) \
311	((tg != KQWL_PREADOPTED_TG_NULL) && \
312	(tg != KQWL_PREADOPTED_TG_SENTINEL) && \
313	(tg != KQWL_PREADOPTED_TG_NEVER) && \
314	(tg != KQWL_PREADOPTED_TG_PROCESSED) && \
315	(KQWL_GET_PREADOPTED_TG(tg) != NULL))
316
317	/*
318	* The preadopt thread group on a kqwl can be permanently configured when the kqwl
319	* is created so it does not change over the course of the kqwl's lifetime. Such
320	* kqwl does not participate in thread group preadoption for incoming sync IPCs.
321	* Today, this happens for kqwl configured with os workgroups.
322	*/
323	#define KQWL_ENCODE_PERMANENT_PREADOPTED_TG(tg) \
324	KQWL_ENCODE_PREADOPTED_TG_QOS(tg, THREAD_QOS_LAST)
325
326	#define KQWL_HAS_PERMANENT_PREADOPTED_TG(tg) \
327	(KQWL_HAS_VALID_PREADOPTED_TG(tg) && \
328	(KQWL_GET_PREADOPTED_TG_QOS(tg) == THREAD_QOS_LAST))
329
330	#define KQWL_CAN_ADOPT_PREADOPT_TG(tg) \
331	((tg != KQWL_PREADOPTED_TG_SENTINEL) && \
332	(tg != KQWL_PREADOPTED_TG_NEVER) && \
333	(tg != KQWL_PREADOPTED_TG_PROCESSED) && \
334	(!KQWL_HAS_PERMANENT_PREADOPTED_TG(tg)))
335
336	struct thread_group *
337	kqr_preadopt_thread_group(workq_threadreq_t req);
338
339	_Atomic(struct thread_group )
340	kqr_preadopt_thread_group_addr(workq_threadreq_t req);
341
342	#endif
343
344
345	struct kqworkloop {
346	struct kqueue kqwl_kqueue; / queue of events /
347	struct kqtailq kqwl_queue[KQWL_NBUCKETS]; / array of queues /
348	struct kqtailq kqwl_suppressed; / Per-QoS suppression queues /
349	workq_threadreq_s kqwl_request; / thread request state /
350	#if CONFIG_PREADOPT_TG
351	_Atomic thread_group_qos_t kqwl_preadopt_tg;
352	#endif
353
354	lck_spin_t kqwl_statelock; / state/debounce lock /
355	thread_t kqwl_owner; / current [sync] owner thread /
356	os_ref_atomic_t kqwl_retains; / retain references /
357	thread_qos_t kqwl_wakeup_qos; / QoS/override woke /
358	_Atomic uint8_t kqwl_iotier_override; / iotier override /
359
360	#if CONFIG_PREADOPT_TG
361	/ The point of the kqwl_preadopt_tg_needs_redrive bit is to be able to*
362	* coordinate which thread is going to push information about modifications
363	* to the kqwl_preadopt_thread group on the kqwl, to the workqueue
364	* subsystem. This coordination is needed because the preadoption thread
365	* group is set on the kqwl in the filter call without the kqlock.
366	*
367	* As such, if there is another thread holding the kqlock at this time and
368	* observes the write to the preadoption thread group and the need for a
369	* redrive request, that thread will take the responsibility of pushing that
370	* information down to the workqueue subsystem, thereby ack-ing the request.
371	*
372	* Otherwise, the original thread which modified the kqwl, will do so when
373	* it gets the kqlock.
374	*
375	* Note: Only a 1 single bit is required here but the 2 bytes here were
376	* wasted in packing so I've created a new atomic field for it. Only the
377	* bottom bit is being used, the remaining bits can be reused for other
378	* purposes.
379	*/
380	#define KQWL_PREADOPT_TG_NEEDS_REDRIVE (uint16_t) 0x1
381	#define KQWL_PREADOPT_TG_CLEAR_REDRIVE (uint16_t) 0x0
382	_Atomic uint16_t kqwl_preadopt_tg_needs_redrive;
383	#endif
384
385	#if CONFIG_PREADOPT_TG_DEBUG
386	/ Keep track of history of events that happened to the kqworkloop wrt to tg preadoption /
387	#define KQWL_PREADOPT_TG_HISTORY_COUNT 32
388	#define KQWL_PREADOPT_TG_HISTORY_WRITE_ENTRY(kqwl, ...) ({\
389	struct kqworkloop *__kqwl = (kqwl); \
390	unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_preadopt_tg_history_index, relaxed); \
391	struct kqwl_preadopt_tg _preadopt_tg = { mach_approximate_time(), __VA_ARGS__}; \
392	__kqwl->kqwl_preadopt_tg_history[__index % KQWL_PREADOPT_TG_HISTORY_COUNT] = \
393	(struct kqwl_preadopt_tg) _preadopt_tg; \
394	})
395
396	struct kqwl_preadopt_tg {
397	uint64_t time;
398	kqwl_preadopt_tg_op_t op;
399	struct thread_group *old_preadopt_tg;
400	struct thread_group *new_preadopt_tg;
401	} kqwl_preadopt_tg_history[KQWL_PREADOPT_TG_HISTORY_COUNT];
402	unsigned int kqwl_preadopt_tg_history_index;
403	#else
404	#define KQWL_PREADOPT_TG_HISTORY_WRITE_ENTRY(kqwl, ...)
405	#endif /* CONFIG_PREADOPT_TG_DEBUG */
406
407	struct turnstile kqwl_turnstile; /* turnstile for sync IPC/waiters /
408	kqueue_id_t kqwl_dynamicid; / dynamic identity /
409	uint64_t kqwl_params; / additional parameters /
410	LIST_ENTRY(kqworkloop) kqwl_hashlink; / linkage for search list /
411	#if CONFIG_WORKLOOP_DEBUG
412	#define KQWL_HISTORY_COUNT 32
413	#define KQWL_HISTORY_WRITE_ENTRY(kqwl, ...) ({ \
414	struct kqworkloop *__kqwl = (kqwl); \
415	unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_index, relaxed); \
416	__kqwl->kqwl_history[__index % KQWL_HISTORY_COUNT] = \
417	(struct kqwl_history)__VA_ARGS__; \
418	})
419	struct kqwl_history {
420	thread_t updater; / Note: updates can be reordered /
421	thread_t servicer;
422	thread_t old_owner;
423	thread_t new_owner;
424
425	uint64_t kev_ident;
426	int16_t error;
427	uint16_t kev_flags;
428	uint32_t kev_fflags;
429
430	uint64_t kev_mask;
431	uint64_t kev_value;
432	uint64_t in_value;
433	} kqwl_history[KQWL_HISTORY_COUNT];
434	unsigned int kqwl_index;
435	#endif // CONFIG_WORKLOOP_DEBUG
436	};
437	LIST_HEAD(kqwllist, kqworkloop);
438
439	typedef union {
440	struct kqueue *kq;
441	struct kqworkq *kqwq;
442	struct kqfile *kqf;
443	struct kqworkloop *kqwl;
444	} __attribute__((transparent_union)) kqueue_t;
445
446	#define kqwl_lock kqwl_kqueue.kq_lock
447	#define kqwl_state kqwl_kqueue.kq_state
448	#define kqwl_waitq_hook kqwl_kqueue.kq_waitq_hook
449	#define kqwl_count kqwl_kqueue.kq_count
450	#define kqwl_p kqwl_kqueue.kq_p
451
452	#define KQ_WORKLOOP_RETAINS_MAX UINT32_MAX
453
454	extern void kqueue_threadreq_unbind(struct proc *p, workq_threadreq_t);
455
456	// called with the kq req held
457	#define KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE 0x1
458	extern void kqueue_threadreq_bind(struct proc *p, workq_threadreq_t req,
459	thread_t thread, unsigned int flags);
460
461	struct turnstile *kqueue_threadreq_get_turnstile(workq_threadreq_t kqr);
462
463	// called with the wq lock held
464	extern void
465	kqueue_threadreq_bind_prepost(struct proc *p, workq_threadreq_t req,
466	struct uthread *uth);
467
468	// called with no lock held
469	extern void kqueue_threadreq_bind_commit(struct proc *p, thread_t thread);
470
471	extern void kqueue_threadreq_cancel(struct proc *p, workq_threadreq_t req);
472
473	// lock not held as kqwl_params is immutable after creation
474	extern workq_threadreq_param_t kqueue_threadreq_workloop_param(workq_threadreq_t req);
475
476	extern struct kqueue kqueue_alloc(struct* proc *);
477	extern void kqueue_dealloc(struct kqueue *);
478	extern void kqworkq_dealloc(struct kqworkq *kqwq);
479
480	extern void knotes_dealloc(struct proc *);
481	extern void kqworkloops_dealloc(struct proc *);
482
483	extern int kevent_register(struct kqueue , struct* kevent_qos_s *,
484	struct knote **);
485	extern int kqueue_scan(struct kqueue , int* flags,
486	struct kevent_ctx_s *, kevent_callback_t);
487	extern int kqueue_stat(struct kqueue , void* , int*, proc_t);
488
489	extern void kevent_set_workq_quantum_expiry_user_tsd(proc_t p, thread_t t,
490	uint64_t flags);
491
492	#endif /* XNU_KERNEL_PRIVATE */
493
494	#endif /* !_SYS_EVENTVAR_H_ */
495

Browse the source code of xnu/bsd/sys/eventvar.h