vfs_fsevents.c source code [xnu/bsd/vfs/vfs_fsevents.c]

1	/*
2	* Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	#include <stdarg.h>
29	#include <sys/param.h>
30	#include <sys/systm.h>
31	#include <sys/event.h> // for kqueue related stuff
32	#include <sys/fsevents.h>
33
34	#if CONFIG_FSE
35	#include <sys/namei.h>
36	#include <sys/filedesc.h>
37	#include <sys/kernel.h>
38	#include <sys/file_internal.h>
39	#include <sys/stat.h>
40	#include <sys/vnode_internal.h>
41	#include <sys/mount_internal.h>
42	#include <sys/proc_internal.h>
43	#include <sys/kauth.h>
44	#include <sys/uio.h>
45	#include <kern/kalloc.h>
46	#include <sys/dirent.h>
47	#include <sys/attr.h>
48	#include <sys/sysctl.h>
49	#include <sys/ubc.h>
50	#include <machine/cons.h>
51	#include <miscfs/specfs/specdev.h>
52	#include <miscfs/devfs/devfs.h>
53	#include <sys/filio.h>
54	#include <kern/locks.h>
55	#include <libkern/OSAtomic.h>
56	#include <kern/zalloc.h>
57	#include <mach/mach_time.h>
58	#include <kern/thread_call.h>
59	#include <kern/clock.h>
60	#include <IOKit/IOBSD.h>
61
62	#include <security/audit/audit.h>
63	#include <bsm/audit_kevents.h>
64
65	#include <pexpert/pexpert.h>
66	#include <libkern/section_keywords.h>
67
68	typedef struct kfs_event {
69	LIST_ENTRY(kfs_event) kevent_list;
70	uint64_t abstime; // when this event happened (mach_absolute_time())
71	int16_t type; // type code of this event
72	uint16_t flags; // per-event flags
73	int32_t refcount; // number of clients referencing this
74	pid_t pid;
75	int32_t spare;
76
77	union {
78	struct regular_event {
79	// This must match the layout of fse_info
80	// exactly, except for the "nlink" field is
81	// not included here. See copy_out_kfse()
82	// for all of the sordid details, and also
83	// the _Static_assert() statements below.
84	ino64_t ino;
85	dev_t dev;
86	int32_t mode;
87	uid_t uid;
88	uint32_t document_id;
89	struct kfs_event dest; // if this is a two-file op*
90	const char *str;
91	uint16_t len;
92	} regular_event;
93
94	struct {
95	ino64_t src_ino;
96	ino64_t dst_ino;
97	uint64_t docid;
98	dev_t dev;
99	} docid_event;
100
101	struct {
102	uint32_t version;
103	dev_t dev;
104	ino64_t ino;
105	uint64_t origin_id;
106	uint64_t age;
107	uint32_t use_state;
108	uint32_t urgency;
109	uint64_t size;
110	} activity_event;
111
112	struct {
113	audit_token_t audit_token;
114	const char *str;
115	uint16_t len;
116	} access_granted_event;
117	};
118	} kfs_event;
119
120	_Static_assert(offsetof(struct regular_event, ino) == offsetof(fse_info, ino),
121	"kfs_event and fse_info out-of-sync");
122	_Static_assert(offsetof(struct regular_event, dev) == offsetof(fse_info, dev),
123	"kfs_event and fse_info out-of-sync");
124	_Static_assert(offsetof(struct regular_event, mode) == offsetof(fse_info, mode),
125	"kfs_event and fse_info out-of-sync");
126	_Static_assert(offsetof(struct regular_event, uid) == offsetof(fse_info, uid),
127	"kfs_event and fse_info out-of-sync");
128	_Static_assert(offsetof(struct regular_event, document_id) == offsetof(fse_info, document_id),
129	"kfs_event and fse_info out-of-sync");
130
131	#define KFSE_INFO_COPYSIZE offsetof(fse_info, nlink)
132
133	// flags for the flags field
134	#define KFSE_COMBINED_EVENTS 0x0001
135	#define KFSE_CONTAINS_DROPPED_EVENTS 0x0002
136	#define KFSE_ON_LIST 0x0004
137	#define KFSE_BEING_CREATED 0x0008
138
139	LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
140	int num_events_outstanding = `0`;
141	int num_pending_rename = `0`;
142
143
144	struct fsevent_handle;
145
146	typedef struct fs_event_watcher {
147	int8_t event_list; // the events we're interested in*
148	int32_t num_events;
149	dev_t devices_not_to_watch;// report events from devices not in this list*
150	uint32_t num_devices;
151	int32_t flags;
152	kfs_event **event_queue;
153	int32_t eventq_size; // number of event pointers in queue
154	int32_t num_readers;
155	int32_t rd; // read index into the event_queue
156	int32_t wr; // write index into the event_queue
157	int32_t blockers;
158	int32_t my_id;
159	uint32_t num_dropped;
160	uint64_t max_event_id;
161	struct fsevent_handle *fseh;
162	pid_t pid;
163	char proc_name[(`2` * MAXCOMLEN) + `1`];
164	} fs_event_watcher;
165
166	// fs_event_watcher flags
167	#define WATCHER_DROPPED_EVENTS 0x0001
168	#define WATCHER_CLOSING 0x0002
169	#define WATCHER_WANTS_COMPACT_EVENTS 0x0004
170	#define WATCHER_WANTS_EXTENDED_INFO 0x0008
171	#define WATCHER_APPLE_SYSTEM_SERVICE 0x0010 // fseventsd, coreservicesd, mds, revisiond
172
173	#define MAX_WATCHERS 8
174	static fs_event_watcher *watcher_table[MAX_WATCHERS];
175
176	#define DEFAULT_MAX_KFS_EVENTS 4096
177	static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
178
179	// we allocate kfs_event structures out of this zone
180	static zone_t event_zone;
181	static int fs_event_init = `0`;
182
183	//
184	// this array records whether anyone is interested in a
185	// particular type of event. if no one is, we bail out
186	// early from the event delivery
187	//
188	static int16_t fs_event_type_watchers[FSE_MAX_EVENTS];
189
190	// the device currently being unmounted:
191	static dev_t fsevent_unmount_dev = `0`;
192	// how many ACKs are still outstanding:
193	static int fsevent_unmount_ack_count = `0`;
194
195	static int watcher_add_event(fs_event_watcher watcher, kfs_event kfse);
196	static void fsevents_wakeup(fs_event_watcher *watcher);
197
198	//
199	// Locks
200	//
201	static LCK_ATTR_DECLARE(fsevent_lock_attr, `0`, `0`);
202	static LCK_GRP_DECLARE(fsevent_mutex_group, "fsevent-mutex");
203	static LCK_GRP_DECLARE(fsevent_rw_group, "fsevent-rw");
204
205	static LCK_RW_DECLARE_ATTR(event_handling_lock, // handles locking for event manipulation and recycling
206	&fsevent_rw_group, &fsevent_lock_attr);
207	static LCK_MTX_DECLARE_ATTR(watch_table_lock,
208	&fsevent_mutex_group, &fsevent_lock_attr);
209	static LCK_MTX_DECLARE_ATTR(event_buf_lock,
210	&fsevent_mutex_group, &fsevent_lock_attr);
211	static LCK_MTX_DECLARE_ATTR(event_writer_lock,
212	&fsevent_mutex_group, &fsevent_lock_attr);
213
214
215	/ Explicitly declare qsort so compiler doesn't complain /
216	__private_extern__ void qsort(
217	void * array,
218	size_t nmembers,
219	size_t member_size,
220	int ()(const* void , const* void *));
221
222	static int
223	is_ignored_directory(const char *path)
224	{
225	if (!path) {
226	return `0`;
227	}
228
229	#define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
230	if (IS_TLD("/.Spotlight-V100/") \|\|
231	IS_TLD("/.MobileBackups/") \|\|
232	IS_TLD("/Backups.backupdb/")) {
233	return `1`;
234	}
235	#undef IS_TLD
236
237	return `0`;
238	}
239
240	static void
241	fsevents_internal_init(void)
242	{
243	int i;
244
245	if (fs_event_init++ != `0`) {
246	return;
247	}
248
249	for (i = `0`; i < FSE_MAX_EVENTS; i++) {
250	fs_event_type_watchers[i] = `0`;
251	}
252
253	memset(s: watcher_table, c: `0`, n: sizeof(watcher_table));
254
255	PE_get_default(property_name: "kern.maxkfsevents", property_ptr: &max_kfs_events, max_property: sizeof(max_kfs_events));
256
257	event_zone = zone_create_ext(name: "fs-event-buf", size: sizeof(kfs_event),
258	flags: ZC_NOGC \| ZC_NOCALLOUT, ZONE_ID_ANY, extra_setup: ^(zone_t z) {
259	// mark the zone as exhaustible so that it will not
260	// ever grow beyond what we initially filled it with
261	zone_set_exhaustible(zone: z, max_elements: max_kfs_events, / exhausts / true);
262	});
263
264	zone_fill_initially(zone: event_zone, nelems: max_kfs_events);
265	}
266
267	static void
268	lock_watch_table(void)
269	{
270	lck_mtx_lock(lck: &watch_table_lock);
271	}
272
273	static void
274	unlock_watch_table(void)
275	{
276	lck_mtx_unlock(lck: &watch_table_lock);
277	}
278
279	static void
280	lock_fs_event_list(void)
281	{
282	lck_mtx_lock(lck: &event_buf_lock);
283	}
284
285	static void
286	unlock_fs_event_list(void)
287	{
288	lck_mtx_unlock(lck: &event_buf_lock);
289	}
290
291	// forward prototype
292	static void release_event_ref(kfs_event *kfse);
293
294	static boolean_t
295	watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
296	{
297	unsigned int i;
298
299	// if devices_not_to_watch is NULL then we care about all
300	// events from all devices
301	if (watcher->devices_not_to_watch == NULL) {
302	return true;
303	}
304
305	for (i = `0`; i < watcher->num_devices; i++) {
306	if (dev == watcher->devices_not_to_watch[i]) {
307	// found a match! that means we do not
308	// want events from this device.
309	return false;
310	}
311	}
312
313	// if we're here it's not in the devices_not_to_watch[]
314	// list so that means we do care about it
315	return true;
316	}
317
318
319	int
320	need_fsevent(int type, vnode_t vp)
321	{
322	if (type >= `0` && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == `0`) {
323	return `0`;
324	}
325
326	// events in /dev aren't really interesting...
327	if (vp->v_tag == VT_DEVFS) {
328	return `0`;
329	}
330
331	return `1`;
332	}
333
334
335	#define is_throw_away(x) ((x) == FSE_STAT_CHANGED \|\| (x) == FSE_CONTENT_MODIFIED)
336
337
338	int num_dropped = `0`;
339
340	static struct timeval last_print;
341
342	//
343	// These variables are used to track coalescing multiple identical
344	// events for the same vnode/pathname. If we get the same event
345	// type and same vnode/pathname as the previous event, we just drop
346	// the event since it's superfluous. This improves some micro-
347	// benchmarks considerably and actually has a real-world impact on
348	// tests like a Finder copy where multiple stat-changed events can
349	// get coalesced.
350	//
351	static int last_event_type = -`1`;
352	static void *last_ptr = NULL;
353	static char last_str[MAXPATHLEN];
354	static int last_nlen = `0`;
355	static int last_vid = -`1`;
356	static uint64_t last_coalesced_time = `0`;
357	static void *last_event_ptr = NULL;
358	static pid_t last_pid = -`1`;
359	int last_coalesced = `0`;
360	static mach_timebase_info_data_t sTimebaseInfo = { `0`, `0` };
361
362	#define MAX_HARDLINK_NOTIFICATIONS 128
363
364	static inline void
365	kfse_init(kfs_event kfse, int* type, uint64_t time, proc_t p)
366	{
367	memset(s: kfse, c: `0`, n: sizeof(*kfse));
368	kfse->refcount = `1`;
369	kfse->type = (int16_t)type;
370	kfse->abstime = time;
371	kfse->pid = proc_getpid(p);
372
373	OSBitOrAtomic16(KFSE_BEING_CREATED, address: &kfse->flags);
374	}
375
376	int
377	add_fsevent(int type, vfs_context_t ctx, ...)
378	{
379	struct proc *p = vfs_context_proc(ctx);
380	int i, arg_type, ret;
381	kfs_event kfse, kfse_dest = NULL, *cur;
382	fs_event_watcher *watcher;
383	va_list ap;
384	int error = `0`, did_alloc = `0`;
385	int64_t orig_linkcount = -`1`;
386	dev_t dev = `0`;
387	uint64_t now, elapsed;
388	uint64_t orig_linkid = `0`, next_linkid = `0`;
389	uint64_t link_parentid = `0`;
390	char pathbuff = NULL, path_override = NULL;
391	char *link_name = NULL;
392	vnode_t link_vp = NULL;
393	int pathbuff_len = `0`;
394	uthread_t ut = get_bsdthread_info(current_thread());
395	bool do_all_links = true;
396	bool do_cache_reset = false;
397
398	if (type == FSE_CONTENT_MODIFIED_NO_HLINK) {
399	do_all_links = false;
400	type = FSE_CONTENT_MODIFIED;
401	}
402
403
404	restart:
405	va_start(ap, ctx);
406
407	// ignore bogus event types..
408	if (type < `0` \|\| type >= FSE_MAX_EVENTS) {
409	return EINVAL;
410	}
411
412	// if no one cares about this type of event, bail out
413	if (fs_event_type_watchers[type] == `0`) {
414	va_end(ap);
415
416	return `0`;
417	}
418
419	now = mach_absolute_time();
420
421	// find a free event and snag it for our use
422	// NOTE: do not do anything that would block until
423	// the lock is dropped.
424	lock_fs_event_list();
425
426	//
427	// check if this event is identical to the previous one...
428	// (as long as it's not an event type that can never be the
429	// same as a previous event)
430	//
431	if (path_override == NULL &&
432	type != FSE_CREATE_FILE &&
433	type != FSE_DELETE &&
434	type != FSE_RENAME &&
435	type != FSE_EXCHANGE &&
436	type != FSE_CHOWN &&
437	type != FSE_DOCID_CHANGED &&
438	type != FSE_DOCID_CREATED &&
439	type != FSE_CLONE &&
440	type != FSE_ACTIVITY &&
441	// don't coalesce FSE_ACCESS_GRANTED because it could
442	// have been granted to a different process.
443	type != FSE_ACCESS_GRANTED) {
444	void *ptr = NULL;
445	int vid = `0`, was_str = `0`, nlen = `0`;
446
447	for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
448	switch (arg_type) {
449	case FSE_ARG_VNODE: {
450	ptr = va_arg(ap, void *);
451	vid = vnode_vid(vp: (struct vnode *)ptr);
452	last_str[`0`] = `'\0'`;
453	break;
454	}
455	case FSE_ARG_STRING: {
456	nlen = va_arg(ap, int32_t);
457	ptr = va_arg(ap, void *);
458	was_str = `1`;
459	break;
460	}
461	}
462	if (ptr != NULL) {
463	break;
464	}
465	}
466
467	if (sTimebaseInfo.denom == `0`) {
468	(void) clock_timebase_info(info: &sTimebaseInfo);
469	}
470
471	elapsed = (now - last_coalesced_time);
472	if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
473	if (sTimebaseInfo.denom == `1`) {
474	elapsed *= sTimebaseInfo.numer;
475	} else {
476	// this could overflow... the worst that will happen is that we'll
477	// send (or not send) an extra event so I'm not going to worry about
478	// doing the math right like dtrace_abs_to_nano() does.
479	elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
480	}
481	}
482
483	if (type == last_event_type
484	&& (elapsed < `1000000000`)
485	&& (last_pid == proc_getpid(p))
486	&&
487	((vid && vid == last_vid && last_ptr == ptr)
488	\|\|
489	(last_str[`0`] && last_nlen == nlen && ptr && strcmp(s1: last_str, s2: ptr) == `0`))
490	) {
491	last_coalesced++;
492	unlock_fs_event_list();
493	va_end(ap);
494
495	return `0`;
496	} else {
497	last_ptr = ptr;
498	if (ptr && was_str) {
499	strlcpy(dst: last_str, src: ptr, n: sizeof(last_str));
500	}
501	last_nlen = nlen;
502	last_vid = vid;
503	last_event_type = type;
504	last_coalesced_time = now;
505	last_pid = proc_getpid(p);
506	}
507	}
508	va_start(ap, ctx);
509
510
511	kfse = zalloc_noblock(zone: event_zone);
512	if (kfse && (type == FSE_RENAME \|\| type == FSE_EXCHANGE \|\| type == FSE_CLONE)) {
513	kfse_dest = zalloc_noblock(zone: event_zone);
514	if (kfse_dest == NULL) {
515	did_alloc = `1`;
516	zfree(event_zone, kfse);
517	kfse = NULL;
518	}
519	}
520
521
522	if (kfse == NULL) { // yikes! no free events
523	unlock_fs_event_list();
524	lock_watch_table();
525
526	for (i = `0`; i < MAX_WATCHERS; i++) {
527	watcher = watcher_table[i];
528	if (watcher == NULL) {
529	continue;
530	}
531
532	watcher->flags \|= WATCHER_DROPPED_EVENTS;
533	fsevents_wakeup(watcher);
534	}
535	unlock_watch_table();
536
537	{
538	struct timeval current_tv;
539
540	num_dropped++;
541
542	// only print a message at most once every 5 seconds
543	microuptime(tv: &current_tv);
544	if ((current_tv.tv_sec - last_print.tv_sec) > `10`) {
545	int ii;
546	void junkptr = zalloc_noblock(zone: event_zone), listhead = kfse_list_head.lh_first;
547
548	printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
549	printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
550	printf("add_fsevent: zalloc sez: %p\n", junkptr);
551	printf("add_fsevent: event_zone info: %d 0x%x\n", ((int )event_zone)[`0`], ((int* *)event_zone)[`1`]);
552	lock_watch_table();
553	for (ii = `0`; ii < MAX_WATCHERS; ii++) {
554	if (watcher_table[ii] == NULL) {
555	continue;
556	}
557
558	printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
559	watcher_table[ii]->proc_name,
560	watcher_table[ii],
561	watcher_table[ii]->rd, watcher_table[ii]->wr,
562	watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
563	}
564	unlock_watch_table();
565
566	last_print = current_tv;
567	if (junkptr) {
568	zfree(event_zone, junkptr);
569	}
570	}
571	}
572
573	if (pathbuff) {
574	release_pathbuff(path: pathbuff);
575	pathbuff = NULL;
576	}
577	return ENOSPC;
578	}
579
580	kfse_init(kfse, type, time: now, p);
581	last_event_ptr = kfse;
582	if (type == FSE_RENAME \|\| type == FSE_EXCHANGE \|\| type == FSE_CLONE) {
583	kfse_init(kfse: kfse_dest, type, time: now, p);
584	kfse->regular_event.dest = kfse_dest;
585	}
586
587	num_events_outstanding++;
588	if (kfse->type == FSE_RENAME) {
589	num_pending_rename++;
590	}
591	LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
592	OSBitOrAtomic16(KFSE_ON_LIST, address: &kfse->flags);
593
594	if (kfse->refcount < `1`) {
595	panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
596	}
597
598	unlock_fs_event_list(); // at this point it's safe to unlock
599
600	//
601	// now process the arguments passed in and copy them into
602	// the kfse
603	//
604
605	cur = kfse;
606
607	if (type == FSE_DOCID_CREATED \|\| type == FSE_DOCID_CHANGED) {
608	//
609	// These events are special and not like the other events.
610	// They only have a dev_t, src inode #, dest inode #, and
611	// a doc-id (va_arg'd to us in that order). If we don't
612	// get one of them, then the error-check filler will
613	// catch it.
614	//
615	do_all_links = false;
616	arg_type = va_arg(ap, int32_t);
617	if (arg_type == FSE_ARG_DEV) {
618	cur->docid_event.dev = (dev_t)(va_arg(ap, dev_t));
619	}
620
621	arg_type = va_arg(ap, int32_t);
622	if (arg_type == FSE_ARG_INO) {
623	cur->docid_event.src_ino =
624	(ino64_t)(va_arg(ap, ino64_t));
625	}
626
627	arg_type = va_arg(ap, int32_t);
628	if (arg_type == FSE_ARG_INO) {
629	cur->docid_event.dst_ino =
630	(ino64_t)(va_arg(ap, ino64_t));
631	}
632
633	arg_type = va_arg(ap, int32_t);
634	if (arg_type == FSE_ARG_INT32) {
635	cur->docid_event.docid =
636	(uint64_t)va_arg(ap, uint32_t);
637	} else if (arg_type == FSE_ARG_INT64) {
638	cur->docid_event.docid =
639	(uint64_t)va_arg(ap, uint64_t);
640	}
641
642	goto done_with_args;
643	}
644
645	if (type == FSE_ACTIVITY) {
646	do_all_links = false;
647
648	arg_type = va_arg(ap, int32_t);
649	if (arg_type == FSE_ARG_INT32) {
650	cur->activity_event.version = (uint32_t)(va_arg(ap, uint32_t));
651	}
652
653	arg_type = va_arg(ap, int32_t);
654	if (arg_type == FSE_ARG_DEV) {
655	cur->activity_event.dev = (dev_t)(va_arg(ap, dev_t));
656	}
657
658	arg_type = va_arg(ap, int32_t);
659	if (arg_type == FSE_ARG_INO) {
660	cur->activity_event.ino = (ino64_t)(va_arg(ap, ino64_t));
661	}
662
663	arg_type = va_arg(ap, int32_t);
664	if (arg_type == FSE_ARG_INT64) {
665	cur->activity_event.origin_id = (uint64_t)(va_arg(ap, uint64_t));
666	}
667
668	arg_type = va_arg(ap, int32_t);
669	if (arg_type == FSE_ARG_INT64) {
670	cur->activity_event.age = (uint64_t)(va_arg(ap, uint64_t));
671	}
672
673	arg_type = va_arg(ap, int32_t);
674	if (arg_type == FSE_ARG_INT32) {
675	cur->activity_event.use_state = (uint32_t)(va_arg(ap, uint32_t));
676	}
677
678	arg_type = va_arg(ap, int32_t);
679	if (arg_type == FSE_ARG_INT32) {
680	cur->activity_event.urgency = (uint32_t)(va_arg(ap, uint32_t));
681	}
682
683	arg_type = va_arg(ap, int32_t);
684	if (arg_type == FSE_ARG_INT64) {
685	cur->activity_event.size = (uint64_t)(va_arg(ap, uint64_t));
686	}
687
688	goto done_with_args;
689	}
690	#if CONFIG_FSE_ACCESS_GRANTED
691	if (type == FSE_ACCESS_GRANTED) {
692	//
693	// This one is also different. We get a path string
694	// and (maybe) and audit token. If we don't get the
695	// audit token, we extract is from the vfs_context_t.
696	//
697	audit_token_t *atokenp = NULL;
698	vnode_t vp = NULL;
699	char *path_str = NULL;
700	size_t path_strlen = `0`;
701	void *arg;
702	int32_t len32;
703
704	do_all_links = false;
705
706	while ((arg_type = va_arg(ap, int32_t)) != FSE_ARG_DONE) {
707	switch (arg_type) {
708	case FSE_ARG_STRING:
709	len32 = va_arg(ap, int32_t);
710	arg = va_arg(ap, char *);
711	if (path_str == NULL) {
712	path_str = arg;
713	path_strlen = len32;
714	}
715	break;
716
717	case FSE_ARG_PATH:
718	arg = va_arg(ap, char *);
719	if (path_str == NULL) {
720	path_str = arg;
721	}
722	break;
723
724	case FSE_ARG_VNODE:
725	arg = va_arg(ap, vnode_t);
726	if (vp == NULL) {
727	vp = arg;
728	}
729	break;
730
731	case FSE_ARG_AUDIT_TOKEN:
732	arg = va_arg(ap, audit_token_t *);
733	if (atokenp == NULL) {
734	atokenp = arg;
735	}
736	break;
737
738	default:
739	printf("add_fsevent: FSE_ACCESS_GRANTED unknown type %d\n", arg_type);
740	// just skip one 32-bit word and hope we
741	// sync up...
742	(void)va_arg(ap, int32_t);
743	}
744	}
745
746	if (atokenp != NULL) {
747	memcpy(&cur->access_granted_event.audit_token,
748	atokenp,
749	sizeof(cur->access_granted_event.audit_token));
750	} else if (vfs_context_copy_audit_token(ctx,
751	&cur->access_granted_event.audit_token) != `0`) {
752	OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
753	&cur->flags);
754	goto done_with_args;
755	}
756
757	//
758	// If we got FSE_ARG_STRING, the length includes the
759	// terminating NUL. If we got FSE_ARG_PATH, all we
760	// got was the string pointer, so get the length and
761	// adjust. If we didn't get either, then the caller
762	// needs to have provided us with a vnode, and with
763	// that we can get the path.
764	//
765	if (path_str != NULL) {
766	if (path_strlen == `0`) {
767	path_strlen = strlen(path_str) + `1`;
768	}
769	} else if (vp != NULL) {
770	pathbuff = get_pathbuff();
771	pathbuff_len = MAXPATHLEN;
772	pathbuff[`0`] = `'\0'`;
773	if (vn_getpath_no_firmlink(vp, pathbuff,
774	&pathbuff_len) == `0`) {
775	path_str = pathbuff;
776	path_strlen = pathbuff_len;
777	}
778	}
779
780	if (path_str != NULL) {
781	assert(path_strlen <= INT16_MAX);
782	cur->access_granted_event.str =
783	vfs_addname(path_str, (uint32_t)path_strlen, `0`, `0`);
784	if (path_str == pathbuff) {
785	release_pathbuff(pathbuff);
786	pathbuff = NULL;
787	}
788	}
789	if (cur->access_granted_event.str == NULL) {
790	OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
791	&cur->flags);
792	}
793
794	goto done_with_args;
795	}
796	#endif
797	if (type == FSE_UNMOUNT_PENDING) {
798	// Just a dev_t
799	// We use the same fields as the regular event, but we
800	// don't have all of the data.
801	do_all_links = false;
802
803	arg_type = va_arg(ap, int32_t);
804	if (arg_type == FSE_ARG_DEV) {
805	cur->regular_event.dev = (dev_t)(va_arg(ap, dev_t));
806	}
807
808	cur->regular_event.dest = NULL;
809	cur->regular_event.str = NULL;
810	cur->regular_event.len = `0`;
811
812	goto done_with_args;
813	}
814
815	for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
816	switch (arg_type) {
817	case FSE_ARG_VNODE: {
818	// this expands out into multiple arguments to the client
819	struct vnode *vp;
820	struct vnode_attr va;
821
822	if (kfse->regular_event.str != NULL) {
823	cur = kfse_dest;
824	}
825
826	vp = va_arg(ap, struct vnode *);
827	if (vp == NULL) {
828	panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!",
829	cur->type);
830	}
831
832	VATTR_INIT(&va);
833	VATTR_WANTED(&va, va_fsid);
834	VATTR_WANTED(&va, va_fileid);
835	VATTR_WANTED(&va, va_mode);
836	VATTR_WANTED(&va, va_uid);
837	VATTR_WANTED(&va, va_document_id);
838	VATTR_WANTED(&va, va_nlink);
839	if ((ret = vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel())) != `0`) {
840	// printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
841	cur->regular_event.str = NULL;
842	error = EINVAL;
843	goto clean_up;
844	}
845
846	cur->regular_event.dev = dev = (dev_t)va.va_fsid;
847	cur->regular_event.ino = (ino64_t)va.va_fileid;
848	cur->regular_event.mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) \| va.va_mode;
849	cur->regular_event.uid = va.va_uid;
850	cur->regular_event.document_id = va.va_document_id;
851	if (vp->v_flag & VISHARDLINK) {
852	cur->regular_event.mode \|= FSE_MODE_HLINK;
853	if ((vp->v_type == VDIR && va.va_dirlinkcount == `0`) \|\| (vp->v_type == VREG && va.va_nlink == `0`)) {
854	cur->regular_event.mode \|= FSE_MODE_LAST_HLINK;
855	}
856	if (orig_linkid == `0`) {
857	orig_linkid = cur->regular_event.ino;
858	orig_linkcount = MIN(va.va_nlink, MAX_HARDLINK_NOTIFICATIONS);
859	link_vp = vp;
860	if (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID && !link_name) {
861	VATTR_INIT(&va);
862	VATTR_WANTED(&va, va_parentid);
863	VATTR_WANTED(&va, va_name);
864	link_name = zalloc(view: ZV_NAMEI);
865	va.va_name = link_name;
866	if ((ret = vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel()) != `0`) \|\|
867	!(VATTR_IS_SUPPORTED(&va, va_name)) \|\|
868	!(VATTR_IS_SUPPORTED(&va, va_parentid))) {
869	zfree(ZV_NAMEI, link_name);
870	link_name = NULL;
871	}
872	if (link_name) {
873	link_parentid = va.va_parentid;
874	}
875	va.va_name = NULL;
876	}
877	}
878	}
879
880	// if we haven't gotten the path yet, get it.
881	if (pathbuff == NULL && path_override == NULL) {
882	pathbuff = get_pathbuff();
883	pathbuff_len = MAXPATHLEN;
884
885	pathbuff[`0`] = `'\0'`;
886	if ((ret = vn_getpath_no_firmlink(vp, pathbuf: pathbuff, len: &pathbuff_len)) != `0` \|\| pathbuff[`0`] == `'\0'`) {
887	OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
888	address: &cur->flags);
889
890	do {
891	if (vp->v_parent != NULL) {
892	vp = vp->v_parent;
893	} else if (vp->v_mount) {
894	strlcpy(dst: pathbuff, src: vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
895	break;
896	} else {
897	vp = NULL;
898	}
899
900	if (vp == NULL) {
901	break;
902	}
903
904	pathbuff_len = MAXPATHLEN;
905	ret = vn_getpath_no_firmlink(vp, pathbuf: pathbuff, len: &pathbuff_len);
906	} while (ret == ENOSPC);
907
908	if (ret != `0` \|\| vp == NULL) {
909	error = ENOENT;
910	goto clean_up;
911	}
912	}
913	} else if (path_override) {
914	pathbuff = path_override;
915	pathbuff_len = (int)strlen(s: path_override) + `1`;
916	} else {
917	strlcpy(dst: pathbuff, src: "NOPATH", MAXPATHLEN);
918	pathbuff_len = (int)strlen(s: pathbuff) + `1`;
919	}
920
921	// store the path by adding it to the global string table
922	cur->regular_event.len = (u_int16_t)pathbuff_len;
923	cur->regular_event.str =
924	vfs_addname(name: pathbuff, len: pathbuff_len, nc_hash: `0`, flags: `0`);
925	if (cur->regular_event.str == NULL \|\|
926	cur->regular_event.str[`0`] == `'\0'`) {
927	panic("add_fsevent: was not able to add path %s to event %p.", pathbuff, cur);
928	}
929
930	if (pathbuff != path_override) {
931	release_pathbuff(path: pathbuff);
932	}
933	pathbuff = NULL;
934
935	break;
936	}
937
938	case FSE_ARG_FINFO: {
939	fse_info *fse;
940
941	fse = va_arg(ap, fse_info *);
942
943	cur->regular_event.dev = dev = (dev_t)fse->dev;
944	cur->regular_event.ino = (ino64_t)fse->ino;
945	cur->regular_event.mode = (int32_t)fse->mode;
946	cur->regular_event.uid = (uid_t)fse->uid;
947	cur->regular_event.document_id = (uint32_t)fse->document_id;
948	// if it's a hard-link and this is the last link, flag it
949	if (fse->mode & FSE_MODE_HLINK) {
950	if (fse->nlink == `0`) {
951	cur->regular_event.mode \|= FSE_MODE_LAST_HLINK;
952	}
953	if (orig_linkid == `0`) {
954	orig_linkid = cur->regular_event.ino;
955	orig_linkcount = MIN(fse->nlink, MAX_HARDLINK_NOTIFICATIONS);
956	}
957	}
958	if (cur->regular_event.mode & FSE_TRUNCATED_PATH) {
959	OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
960	address: &cur->flags);
961	cur->regular_event.mode &= ~FSE_TRUNCATED_PATH;
962	}
963	break;
964	}
965
966	case FSE_ARG_STRING:
967	if (kfse->regular_event.str != NULL) {
968	cur = kfse_dest;
969	}
970
971	cur->regular_event.len =
972	(int16_t)(va_arg(ap, int32_t) & `0x7fff`);
973	if (cur->regular_event.len >= `1`) {
974	cur->regular_event.str =
975	vfs_addname(va_arg(ap, char *),
976	len: cur->regular_event.len, nc_hash: `0`, flags: `0`);
977	} else {
978	printf("add_fsevent: funny looking string length: %d\n", (int)cur->regular_event.len);
979	cur->regular_event.len = `2`;
980	cur->regular_event.str = vfs_addname(name: "/",
981	len: cur->regular_event.len, nc_hash: `0`, flags: `0`);
982	}
983	if (cur->regular_event.str[`0`] == `0`) {
984	printf("add_fsevent: bogus looking string (len %d)\n", cur->regular_event.len);
985	}
986	break;
987
988	case FSE_ARG_INT32: {
989	uint32_t ival = (uint32_t)va_arg(ap, int32_t);
990	kfse->regular_event.uid = ival;
991	break;
992	}
993
994	default:
995	printf("add_fsevent: unknown type %d\n", arg_type);
996	// just skip one 32-bit word and hope we sync up...
997	(void)va_arg(ap, int32_t);
998	}
999	}
1000
1001	done_with_args:
1002	va_end(ap);
1003
1004	// XXX Memory barrier here?
1005	if (kfse_dest) {
1006	OSBitAndAtomic16(mask: ~KFSE_BEING_CREATED, address: &kfse_dest->flags);
1007	}
1008	OSBitAndAtomic16(mask: ~KFSE_BEING_CREATED, address: &kfse->flags);
1009
1010	//
1011	// now we have to go and let everyone know that
1012	// is interested in this type of event
1013	//
1014	lock_watch_table();
1015
1016	for (i = `0`; i < MAX_WATCHERS; i++) {
1017	watcher = watcher_table[i];
1018	if (watcher == NULL) {
1019	continue;
1020	}
1021
1022	if (type < watcher->num_events
1023	&& watcher->event_list[type] == FSE_REPORT
1024	&& watcher_cares_about_dev(watcher, dev)) {
1025	if (watcher_add_event(watcher, kfse) != `0`) {
1026	watcher->num_dropped++;
1027	continue;
1028	}
1029	}
1030
1031	// if (kfse->refcount < 1) {
1032	// panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
1033	// }
1034	}
1035
1036	unlock_watch_table();
1037
1038	clean_up:
1039
1040	if (pathbuff) {
1041	release_pathbuff(path: pathbuff);
1042	pathbuff = NULL;
1043	}
1044	// replicate events for sibling hardlinks
1045	if (do_all_links &&
1046	(kfse->regular_event.mode & FSE_MODE_HLINK) &&
1047	!(kfse->regular_event.mode & FSE_MODE_LAST_HLINK) &&
1048	(type == FSE_STAT_CHANGED \|\|
1049	type == FSE_CONTENT_MODIFIED \|\|
1050	type == FSE_FINDER_INFO_CHANGED \|\|
1051	type == FSE_XATTR_MODIFIED)) {
1052	if (orig_linkcount > `0` && orig_linkid != `0`) {
1053	#ifndef APFSIOC_NEXT_LINK
1054	#define APFSIOC_NEXT_LINK _IOWR('J', 10, uint64_t)
1055	#endif
1056	if (path_override == NULL) {
1057	path_override = get_pathbuff();
1058	}
1059	if (next_linkid == `0`) {
1060	next_linkid = orig_linkid;
1061	}
1062
1063	if (link_vp) {
1064	mount_t mp = NULL;
1065	vnode_t mnt_rootvp = NULL;
1066	int iret = -`1`;
1067
1068	mp = vnode_mount(vp: link_vp);
1069	if (mp) {
1070	iret = VFS_ROOT(mp, &mnt_rootvp, vfs_context_kernel());
1071	}
1072
1073	if (iret == `0` && mnt_rootvp) {
1074	iret = VNOP_IOCTL(vp: mnt_rootvp, APFSIOC_NEXT_LINK, data: (char )&next_linkid, fflag: (int*)`0`, ctx: vfs_context_kernel());
1075	vnode_put(vp: mnt_rootvp);
1076	}
1077
1078	int32_t fsid0;
1079	int path_override_len = MAXPATHLEN;
1080
1081	// continue resolving hardlink paths if there is a valid next_linkid retrieved
1082	// file systems not supporting APFSIOC_NEXT_LINK will skip replicating events for sibling hardlinks
1083	if (iret == `0` && next_linkid != `0`) {
1084	fsid0 = link_vp->v_mount->mnt_vfsstat.f_fsid.val[`0`];
1085	ut->uu_flag \|= UT_KERN_RAGE_VNODES;
1086	if (!do_cache_reset) {
1087	do_cache_reset = true;
1088	}
1089	if ((iret = fsgetpath_internal(ctx, fsid0, next_linkid, MAXPATHLEN, path_override, FSOPT_NOFIRMLINKPATH, &path_override_len)) == `0`) {
1090	orig_linkcount--;
1091	ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1092
1093	if (orig_linkcount >= `0`) {
1094	release_event_ref(kfse);
1095	goto restart;
1096	}
1097	} else {
1098	// failed to get override path
1099	// encountered a broken link or the linkid has been deleted before retrieving the path
1100	orig_linkcount--;
1101	ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1102
1103	if (orig_linkcount >= `0`) {
1104	goto clean_up;
1105	}
1106	}
1107	}
1108	}
1109	}
1110	}
1111
1112	if (link_name) {
1113	/*
1114	* If we call fsgetpath on all the links, it will set the link origin cache
1115	* to the last link that the path was obtained for.
1116	* To restore the the original link id cache in APFS we need to issue a
1117	* lookup on the original directory + name for the link.
1118	*/
1119	if (do_cache_reset) {
1120	vnode_t dvp = NULLVP;
1121
1122	if ((ret = VFS_VGET(link_vp->v_mount, (ino64_t)link_parentid, &dvp, vfs_context_kernel())) == `0`) {
1123	vnode_t lvp = NULLVP;
1124
1125	ret = vnode_lookupat(path: link_name, flags: `0`, vpp: &lvp, ctx, start_dvp: dvp);
1126	if (!ret) {
1127	vnode_put(vp: lvp);
1128	lvp = NULLVP;
1129	}
1130	vnode_put(vp: dvp);
1131	dvp = NULLVP;
1132	}
1133	ret = `0`;
1134	}
1135	zfree(ZV_NAMEI, link_name);
1136	link_name = NULL;
1137	}
1138
1139	if (path_override) {
1140	release_pathbuff(path: path_override);
1141	path_override = NULL;
1142	}
1143
1144	release_event_ref(kfse);
1145
1146	return error;
1147	}
1148
1149	int
1150	test_fse_access_granted(vnode_t vp, unsigned long type, vfs_context_t ctx)
1151	{
1152	audit_token_t atoken;
1153	char *pathbuff;
1154	int error, pathbuff_len;
1155
1156	if (type == `0`) {
1157	return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1158	FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1159	}
1160
1161	if (type == `1`) {
1162	error = vfs_context_copy_audit_token(ctx, token: &atoken);
1163	if (error) {
1164	return error;
1165	}
1166	return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1167	FSE_ARG_VNODE, vp, FSE_ARG_AUDIT_TOKEN, &atoken,
1168	FSE_ARG_DONE);
1169	}
1170
1171	if (type == `2` \|\| type == `3`) {
1172	pathbuff = get_pathbuff();
1173	pathbuff_len = MAXPATHLEN;
1174	pathbuff[`0`] = `'\0'`;
1175	error = vn_getpath_no_firmlink(vp, pathbuf: pathbuff, len: &pathbuff_len);
1176	if (error) {
1177	release_pathbuff(path: pathbuff);
1178	return error;
1179	}
1180	if (type == `2`) {
1181	error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1182	FSE_ARG_STRING, pathbuff_len, pathbuff,
1183	FSE_ARG_DONE);
1184	} else {
1185	error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1186	FSE_ARG_PATH, pathbuff, FSE_ARG_DONE);
1187	}
1188	release_pathbuff(path: pathbuff);
1189	return error;
1190	}
1191
1192	return ENOTSUP;
1193	}
1194
1195	static void
1196	release_event_ref(kfs_event *kfse)
1197	{
1198	int old_refcount;
1199	kfs_event *dest = NULL;
1200	const char path_str = NULL, dest_path_str = NULL;
1201
1202	lock_fs_event_list();
1203
1204	old_refcount = OSAddAtomic(-`1`, &kfse->refcount);
1205	if (old_refcount > `1`) {
1206	unlock_fs_event_list();
1207	return;
1208	}
1209
1210	if (last_event_ptr == kfse) {
1211	last_event_ptr = NULL;
1212	last_event_type = -`1`;
1213	last_coalesced_time = `0`;
1214	}
1215
1216	if (kfse->refcount < `0`) {
1217	panic("release_event_ref: bogus kfse refcount %d", kfse->refcount);
1218	}
1219
1220	assert(kfse->refcount == `0`);
1221	assert(kfse->type != FSE_INVALID);
1222
1223	//
1224	// Get pointers to all the things so we can free without
1225	// holding any locks.
1226	//
1227	if (kfse->type != FSE_DOCID_CREATED &&
1228	kfse->type != FSE_DOCID_CHANGED &&
1229	kfse->type != FSE_ACTIVITY) {
1230	path_str = kfse->regular_event.str;
1231
1232	dest = kfse->regular_event.dest;
1233	if (dest != NULL) {
1234	assert(dest->type != FSE_INVALID);
1235	if (OSAddAtomic(-`1`,
1236	&kfse->regular_event.dest->refcount) == `1`) {
1237	dest_path_str = dest->regular_event.str;
1238	} else {
1239	dest = NULL;
1240	}
1241	}
1242	}
1243
1244	if (dest != NULL) {
1245	if (dest->flags & KFSE_ON_LIST) {
1246	num_events_outstanding--;
1247	LIST_REMOVE(dest, kevent_list);
1248	}
1249	}
1250
1251	if (kfse->flags & KFSE_ON_LIST) {
1252	num_events_outstanding--;
1253	LIST_REMOVE(kfse, kevent_list);
1254	if (kfse->type == FSE_RENAME) {
1255	num_pending_rename--;
1256	}
1257	}
1258
1259	unlock_fs_event_list();
1260
1261	zfree(event_zone, kfse);
1262	if (dest != NULL) {
1263	zfree(event_zone, dest);
1264	}
1265
1266	if (path_str != NULL) {
1267	vfs_removename(name: path_str);
1268	}
1269	if (dest_path_str != NULL) {
1270	vfs_removename(name: dest_path_str);
1271	}
1272	}
1273
1274	#define FSEVENTS_WATCHER_ENTITLEMENT \
1275	"com.apple.private.vfs.fsevents-watcher"
1276
1277	#define FSEVENTS_ACTIVITY_WATCHER_ENTITLEMENT \
1278	"com.apple.private.vfs.fsevents-activity-watcher"
1279
1280	//
1281	// We restrict this for two reasons:
1282	//
1283	// 1- So that naive processes don't get this firehose by default.
1284	//
1285	// 2- Because this event, when delivered to watcheres, includes the
1286	// audit token of the process granted the access, and we don't
1287	// want to leak that to random watchers.
1288	//
1289	#define FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT \
1290	"com.apple.private.vfs.fsevents-access-granted-watcher"
1291
1292	static bool
1293	watcher_is_entitled(task_t task)
1294	{
1295	//
1296	// We consider a process to be entitled to watch /dev/fsevents
1297	// if it has either FSEVENTS_WATCHER_ENTITLEMENT or
1298	// FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT.
1299	//
1300	return !!(IOTaskHasEntitlement(task, FSEVENTS_WATCHER_ENTITLEMENT) \|\|
1301	IOTaskHasEntitlement(task,
1302	FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT) \|\|
1303	IOTaskHasEntitlement(task,
1304	FSEVENTS_ACTIVITY_WATCHER_ENTITLEMENT));
1305	}
1306	#if CONFIG_FSE_ACCESS_GRANTED
1307	static bool
1308	watcher_is_entitled_for_access_granted(task_t task)
1309	{
1310	return !!IOTaskHasEntitlement(task,
1311	FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT);
1312	}
1313	#endif
1314	static bool
1315	watcher_is_entitled_for_activity(task_t task)
1316	{
1317	return !!IOTaskHasEntitlement(task,
1318	FSEVENTS_ACTIVITY_WATCHER_ENTITLEMENT);
1319	}
1320
1321	static int
1322	add_watcher(int8_t event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher watcher_out, void* *fseh)
1323	{
1324	int i;
1325	fs_event_watcher *watcher;
1326
1327	if (eventq_size <= `0` \|\| eventq_size > `100` * max_kfs_events) {
1328	eventq_size = max_kfs_events;
1329	}
1330	if (num_events > FSE_ACTIVITY &&
1331	event_list[FSE_ACTIVITY] != FSE_IGNORE &&
1332	!watcher_is_entitled_for_activity(task: current_task())) {
1333	event_list[FSE_ACTIVITY] = FSE_IGNORE;
1334	}
1335	#if CONFIG_FSE_ACCESS_GRANTED
1336	// If the watcher wants FSE_ACCESS_GRANTED, ensure it has the
1337	// correct entitlement. If not, just silently drop that event.
1338	if (num_events > FSE_ACCESS_GRANTED &&
1339	event_list[FSE_ACCESS_GRANTED] != FSE_IGNORE &&
1340	!watcher_is_entitled_for_access_granted(current_task())) {
1341	event_list[FSE_ACCESS_GRANTED] = FSE_IGNORE;
1342	}
1343	#endif
1344	// Note: the event_queue follows the fs_event_watcher struct
1345	// in memory so we only have to do one allocation
1346	watcher = kalloc_type(fs_event_watcher, kfs_event *, eventq_size, Z_WAITOK);
1347	if (watcher == NULL) {
1348	return ENOMEM;
1349	}
1350
1351	watcher->event_list = event_list;
1352	watcher->num_events = num_events;
1353	watcher->devices_not_to_watch = NULL;
1354	watcher->num_devices = `0`;
1355	watcher->flags = `0`;
1356	watcher->event_queue = (kfs_event **)&watcher[`1`];
1357	watcher->eventq_size = eventq_size;
1358	watcher->rd = `0`;
1359	watcher->wr = `0`;
1360	watcher->blockers = `0`;
1361	watcher->num_readers = `0`;
1362	watcher->max_event_id = `0`;
1363	watcher->fseh = fseh;
1364	watcher->pid = proc_selfpid();
1365	proc_selfname(buf: watcher->proc_name, size: sizeof(watcher->proc_name));
1366
1367	watcher->num_dropped = `0`; // XXXdbg - debugging
1368
1369	if (watcher_is_entitled(task: current_task())) {
1370	watcher->flags \|= WATCHER_APPLE_SYSTEM_SERVICE;
1371	} else {
1372	printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported. Migrate to FSEventsFramework\n",
1373	watcher->proc_name, watcher->pid);
1374	}
1375
1376	lock_watch_table();
1377
1378	// find a slot for the new watcher
1379	for (i = `0`; i < MAX_WATCHERS; i++) {
1380	if (watcher_table[i] == NULL) {
1381	watcher->my_id = i;
1382	watcher_table[i] = watcher;
1383	break;
1384	}
1385	}
1386
1387	if (i >= MAX_WATCHERS) {
1388	printf("fsevents: too many watchers!\n");
1389	unlock_watch_table();
1390	kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1391	return ENOSPC;
1392	}
1393
1394	// now update the global list of who's interested in
1395	// events of a particular type...
1396	for (i = `0`; i < num_events; i++) {
1397	if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1398	fs_event_type_watchers[i]++;
1399	}
1400	}
1401
1402	unlock_watch_table();
1403
1404	*watcher_out = watcher;
1405
1406	return `0`;
1407	}
1408
1409
1410
1411	static void
1412	remove_watcher(fs_event_watcher *target)
1413	{
1414	int i, j, counter = `0`;
1415	fs_event_watcher *watcher;
1416	kfs_event *kfse;
1417
1418	lock_watch_table();
1419
1420	for (j = `0`; j < MAX_WATCHERS; j++) {
1421	watcher = watcher_table[j];
1422	if (watcher != target) {
1423	continue;
1424	}
1425
1426	watcher_table[j] = NULL;
1427
1428	for (i = `0`; i < watcher->num_events; i++) {
1429	if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1430	fs_event_type_watchers[i]--;
1431	}
1432	}
1433
1434	if (watcher->flags & WATCHER_CLOSING) {
1435	unlock_watch_table();
1436	return;
1437	}
1438
1439	// printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1440	watcher->flags \|= WATCHER_CLOSING;
1441	OSAddAtomic(`1`, &watcher->num_readers);
1442
1443	unlock_watch_table();
1444
1445	while (watcher->num_readers > `1` && counter++ < `5000`) {
1446	lock_watch_table();
1447	fsevents_wakeup(watcher); // in case they're asleep
1448	unlock_watch_table();
1449
1450	tsleep(chan: watcher, PRIBIO, wmesg: "fsevents-close", timo: `1`);
1451	}
1452	if (counter++ >= `5000`) {
1453	// printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1454	panic("fsevents: close: still have readers! (%d)", watcher->num_readers);
1455	}
1456
1457	// drain the event_queue
1458
1459	lck_rw_lock_exclusive(lck: &event_handling_lock);
1460	while (watcher->rd != watcher->wr) {
1461	kfse = watcher->event_queue[watcher->rd];
1462	watcher->event_queue[watcher->rd] = NULL;
1463	watcher->rd = (watcher->rd + `1`) % watcher->eventq_size;
1464	OSSynchronizeIO();
1465	if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= `1`) {
1466	release_event_ref(kfse);
1467	}
1468	}
1469	lck_rw_unlock_exclusive(lck: &event_handling_lock);
1470
1471	kfree_data(watcher->event_list, watcher->num_events * sizeof(int8_t));
1472	kfree_data(watcher->devices_not_to_watch, watcher->num_devices * sizeof(dev_t));
1473	kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1474	return;
1475	}
1476
1477	unlock_watch_table();
1478	}
1479
1480
1481	#define EVENT_DELAY_IN_MS 10
1482	static thread_call_t event_delivery_timer = NULL;
1483	static int timer_set = `0`;
1484
1485
1486	static void
1487	delayed_event_delivery(__unused void param0, __unused void* *param1)
1488	{
1489	int i;
1490
1491	lock_watch_table();
1492
1493	for (i = `0`; i < MAX_WATCHERS; i++) {
1494	if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1495	fsevents_wakeup(watcher: watcher_table[i]);
1496	}
1497	}
1498
1499	timer_set = `0`;
1500
1501	unlock_watch_table();
1502	}
1503
1504
1505	//
1506	// The watch table must be locked before calling this function.
1507	//
1508	static void
1509	schedule_event_wakeup(void)
1510	{
1511	uint64_t deadline;
1512
1513	if (event_delivery_timer == NULL) {
1514	event_delivery_timer = thread_call_allocate(func: (thread_call_func_t)delayed_event_delivery, NULL);
1515	}
1516
1517	clock_interval_to_deadline(EVENT_DELAY_IN_MS, scale_factor: `1000` * `1000`, result: &deadline);
1518
1519	thread_call_enter_delayed(call: event_delivery_timer, deadline);
1520	timer_set = `1`;
1521	}
1522
1523
1524
1525	#define MAX_NUM_PENDING 16
1526
1527	//
1528	// NOTE: the watch table must be locked before calling
1529	// this routine.
1530	//
1531	static int
1532	watcher_add_event(fs_event_watcher watcher, kfs_event kfse)
1533	{
1534	if (kfse->abstime > watcher->max_event_id) {
1535	watcher->max_event_id = kfse->abstime;
1536	}
1537
1538	if (((watcher->wr + `1`) % watcher->eventq_size) == watcher->rd) {
1539	watcher->flags \|= WATCHER_DROPPED_EVENTS;
1540	fsevents_wakeup(watcher);
1541	return ENOSPC;
1542	}
1543
1544	OSAddAtomic(`1`, &kfse->refcount);
1545	watcher->event_queue[watcher->wr] = kfse;
1546	OSSynchronizeIO();
1547	watcher->wr = (watcher->wr + `1`) % watcher->eventq_size;
1548
1549	//
1550	// wake up the watcher if there are more than MAX_NUM_PENDING events.
1551	// otherwise schedule a timer (if one isn't already set) which will
1552	// send any pending events if no more are received in the next
1553	// EVENT_DELAY_IN_MS milli-seconds.
1554	//
1555	int32_t num_pending = `0`;
1556	if (watcher->rd < watcher->wr) {
1557	num_pending = watcher->wr - watcher->rd;
1558	}
1559
1560	if (watcher->rd > watcher->wr) {
1561	num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1562	}
1563
1564	if (num_pending > (watcher->eventq_size * `3` / `4`) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1565	/ Non-Apple Service is falling behind, start dropping events for this process /
1566	lck_rw_lock_exclusive(lck: &event_handling_lock);
1567	while (watcher->rd != watcher->wr) {
1568	kfse = watcher->event_queue[watcher->rd];
1569	watcher->event_queue[watcher->rd] = NULL;
1570	watcher->rd = (watcher->rd + `1`) % watcher->eventq_size;
1571	OSSynchronizeIO();
1572	if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= `1`) {
1573	release_event_ref(kfse);
1574	}
1575	}
1576	watcher->flags \|= WATCHER_DROPPED_EVENTS;
1577	lck_rw_unlock_exclusive(lck: &event_handling_lock);
1578
1579	printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1580	watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1581	watcher->eventq_size, watcher->flags);
1582
1583	fsevents_wakeup(watcher);
1584	} else if (num_pending > MAX_NUM_PENDING) {
1585	fsevents_wakeup(watcher);
1586	} else if (timer_set == `0`) {
1587	schedule_event_wakeup();
1588	}
1589
1590	return `0`;
1591	}
1592
1593	static int
1594	fill_buff(uint16_t type, int32_t size, const void *data,
1595	char buff, int32_t _buff_idx, int32_t buff_sz,
1596	struct uio *uio)
1597	{
1598	int32_t amt, error = `0`, buff_idx = *_buff_idx;
1599	uint16_t tmp;
1600
1601	//
1602	// the +1 on the size is to guarantee that the main data
1603	// copy loop will always copy at least 1 byte
1604	//
1605	if ((buff_sz - buff_idx) <= (int)(`2` * sizeof(uint16_t) + `1`)) {
1606	if (buff_idx > uio_resid(a_uio: uio)) {
1607	error = ENOSPC;
1608	goto get_out;
1609	}
1610
1611	error = uiomove(cp: buff, n: buff_idx, uio);
1612	if (error) {
1613	goto get_out;
1614	}
1615	buff_idx = `0`;
1616	}
1617
1618	// copy out the header (type & size)
1619	memcpy(dst: &buff[buff_idx], src: &type, n: sizeof(uint16_t));
1620	buff_idx += sizeof(uint16_t);
1621
1622	tmp = size & `0xffff`;
1623	memcpy(dst: &buff[buff_idx], src: &tmp, n: sizeof(uint16_t));
1624	buff_idx += sizeof(uint16_t);
1625
1626	// now copy the body of the data, flushing along the way
1627	// if the buffer fills up.
1628	//
1629	while (size > `0`) {
1630	amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1631	memcpy(dst: &buff[buff_idx], src: data, n: amt);
1632
1633	size -= amt;
1634	buff_idx += amt;
1635	data = (const char *)data + amt;
1636	if (size > (buff_sz - buff_idx)) {
1637	if (buff_idx > uio_resid(a_uio: uio)) {
1638	error = ENOSPC;
1639	goto get_out;
1640	}
1641	error = uiomove(cp: buff, n: buff_idx, uio);
1642	if (error) {
1643	goto get_out;
1644	}
1645	buff_idx = `0`;
1646	}
1647
1648	if (amt == `0`) { // just in case...
1649	break;
1650	}
1651	}
1652
1653	get_out:
1654	*_buff_idx = buff_idx;
1655
1656	return error;
1657	}
1658
1659
1660	static int copy_out_kfse(fs_event_watcher watcher, kfs_event kfse, struct uio uio) __attribute__*((noinline));
1661
1662	static int
1663	copy_out_kfse(fs_event_watcher watcher, kfs_event kfse, struct uio *uio)
1664	{
1665	int error;
1666	uint16_t tmp16;
1667	int32_t type;
1668	kfs_event *cur;
1669	char evbuff[`512`];
1670	int evbuff_idx = `0`;
1671
1672	if (kfse->type == FSE_INVALID) {
1673	panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d)", kfse, kfse->refcount);
1674	}
1675
1676	if (kfse->flags & KFSE_BEING_CREATED) {
1677	return `0`;
1678	}
1679
1680	if (((kfse->type == FSE_RENAME) \|\| (kfse->type == FSE_CLONE)) &&
1681	kfse->regular_event.dest == NULL) {
1682	//
1683	// This can happen if an event gets recycled but we had a
1684	// pointer to it in our event queue. The event is the
1685	// destination of a rename or clone which we'll process
1686	// separately (that is, another kfse points to this one
1687	// so it's ok to skip this guy because we'll process it
1688	// when we process the other one)
1689	error = `0`;
1690	goto get_out;
1691	}
1692
1693	if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1694	type = (kfse->type & `0xfff`);
1695
1696	if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1697	type \|= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1698	} else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1699	type \|= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1700	}
1701	} else {
1702	type = (int32_t)kfse->type;
1703	}
1704
1705	// copy out the type of the event
1706	memcpy(dst: evbuff, src: &type, n: sizeof(int32_t));
1707	evbuff_idx += sizeof(int32_t);
1708
1709	// copy out the pid of the person that generated the event
1710	memcpy(dst: &evbuff[evbuff_idx], src: &kfse->pid, n: sizeof(pid_t));
1711	evbuff_idx += sizeof(pid_t);
1712
1713	cur = kfse;
1714
1715	copy_again:
1716
1717	if (kfse->type == FSE_DOCID_CHANGED \|\|
1718	kfse->type == FSE_DOCID_CREATED) {
1719	dev_t dev = cur->docid_event.dev;
1720	ino64_t src_ino = cur->docid_event.src_ino;
1721	ino64_t dst_ino = cur->docid_event.dst_ino;
1722	uint64_t docid = cur->docid_event.docid;
1723
1724	error = fill_buff(FSE_ARG_DEV, size: sizeof(dev_t), data: &dev, buff: evbuff,
1725	buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1726	if (error != `0`) {
1727	goto get_out;
1728	}
1729
1730	error = fill_buff(FSE_ARG_INO, size: sizeof(ino64_t), data: &src_ino,
1731	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1732	if (error != `0`) {
1733	goto get_out;
1734	}
1735
1736	error = fill_buff(FSE_ARG_INO, size: sizeof(ino64_t), data: &dst_ino,
1737	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1738	if (error != `0`) {
1739	goto get_out;
1740	}
1741
1742	error = fill_buff(FSE_ARG_INT64, size: sizeof(uint64_t), data: &docid,
1743	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1744	if (error != `0`) {
1745	goto get_out;
1746	}
1747
1748	goto done;
1749	}
1750
1751	if (kfse->type == FSE_UNMOUNT_PENDING) {
1752	dev_t dev = cur->regular_event.dev;
1753
1754	error = fill_buff(FSE_ARG_DEV, size: sizeof(dev_t), data: &dev,
1755	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1756	if (error != `0`) {
1757	goto get_out;
1758	}
1759
1760	goto done;
1761	}
1762
1763	if (kfse->type == FSE_ACTIVITY) {
1764	error = fill_buff(FSE_ARG_INT32, size: sizeof(cur->activity_event.version), data: &cur->activity_event.version,
1765	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1766	if (error != `0`) {
1767	goto get_out;
1768	}
1769	error = fill_buff(FSE_ARG_DEV, size: sizeof(cur->activity_event.dev), data: &cur->activity_event.dev, buff: evbuff,
1770	buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1771	if (error != `0`) {
1772	goto get_out;
1773	}
1774
1775	error = fill_buff(FSE_ARG_INO, size: sizeof(cur->activity_event.ino), data: &cur->activity_event.ino,
1776	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1777	if (error != `0`) {
1778	goto get_out;
1779	}
1780
1781	error = fill_buff(FSE_ARG_INT64, size: sizeof(cur->activity_event.origin_id), data: &cur->activity_event.origin_id,
1782	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1783	if (error != `0`) {
1784	goto get_out;
1785	}
1786
1787	error = fill_buff(FSE_ARG_INT64, size: sizeof(cur->activity_event.age), data: &cur->activity_event.age,
1788	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1789	if (error != `0`) {
1790	goto get_out;
1791	}
1792
1793	error = fill_buff(FSE_ARG_INT32, size: sizeof(cur->activity_event.use_state), data: &cur->activity_event.use_state,
1794	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1795	if (error != `0`) {
1796	goto get_out;
1797	}
1798
1799	error = fill_buff(FSE_ARG_INT32, size: sizeof(cur->activity_event.urgency), data: &cur->activity_event.urgency,
1800	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1801	if (error != `0`) {
1802	goto get_out;
1803	}
1804
1805	error = fill_buff(FSE_ARG_INT64, size: sizeof(cur->activity_event.size), data: &cur->activity_event.size,
1806	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1807	if (error != `0`) {
1808	goto get_out;
1809	}
1810
1811	goto done;
1812	}
1813	#if CONFIG_FSE_ACCESS_GRANTED
1814	if (kfse->type == FSE_ACCESS_GRANTED) {
1815	//
1816	// KFSE_CONTAINS_DROPPED_EVENTS will be set if either
1817	// the path or audit token are bogus; don't copy out
1818	// either in that case.
1819	//
1820	if (cur->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1821	goto done;
1822	}
1823	error = fill_buff(FSE_ARG_STRING,
1824	cur->access_granted_event.len,
1825	cur->access_granted_event.str,
1826	evbuff, &evbuff_idx, sizeof(evbuff), uio);
1827	if (error != `0`) {
1828	goto get_out;
1829	}
1830	error = fill_buff(FSE_ARG_AUDIT_TOKEN,
1831	sizeof(cur->access_granted_event.audit_token),
1832	&cur->access_granted_event.audit_token,
1833	evbuff, &evbuff_idx, sizeof(evbuff), uio);
1834	if (error != `0`) {
1835	goto get_out;
1836	}
1837
1838	goto done;
1839	}
1840	#endif
1841	if (cur->regular_event.str == NULL \|\|
1842	cur->regular_event.str[`0`] == `'\0'`) {
1843	printf("copy_out_kfse:2: empty/short path (%s)\n",
1844	cur->regular_event.str);
1845	error = fill_buff(FSE_ARG_STRING, size: `2`, data: "/", buff: evbuff, buff_idx: &evbuff_idx,
1846	buff_sz: sizeof(evbuff), uio);
1847	} else {
1848	error = fill_buff(FSE_ARG_STRING, size: cur->regular_event.len,
1849	data: cur->regular_event.str, buff: evbuff, buff_idx: &evbuff_idx,
1850	buff_sz: sizeof(evbuff), uio);
1851	}
1852	if (error != `0`) {
1853	goto get_out;
1854	}
1855
1856	if (cur->regular_event.dev == `0` && cur->regular_event.ino == `0`) {
1857	// this happens when a rename event happens and the
1858	// destination of the rename did not previously exist.
1859	// it thus has no other file info so skip copying out
1860	// the stuff below since it isn't initialized
1861	goto done;
1862	}
1863
1864	if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1865	// We rely on the layout of the "regular_event"
1866	// structure being the same as fse_info in order
1867	// to speed up this copy. The nlink field in
1868	// fse_info is not included.
1869	error = fill_buff(FSE_ARG_FINFO, KFSE_INFO_COPYSIZE,
1870	data: &cur->regular_event, buff: evbuff, buff_idx: &evbuff_idx,
1871	buff_sz: sizeof(evbuff), uio);
1872	if (error != `0`) {
1873	goto get_out;
1874	}
1875	} else {
1876	error = fill_buff(FSE_ARG_DEV, size: sizeof(dev_t),
1877	data: &cur->regular_event.dev, buff: evbuff, buff_idx: &evbuff_idx,
1878	buff_sz: sizeof(evbuff), uio);
1879	if (error != `0`) {
1880	goto get_out;
1881	}
1882
1883	error = fill_buff(FSE_ARG_INO, size: sizeof(ino64_t),
1884	data: &cur->regular_event.ino, buff: evbuff, buff_idx: &evbuff_idx,
1885	buff_sz: sizeof(evbuff), uio);
1886	if (error != `0`) {
1887	goto get_out;
1888	}
1889
1890	error = fill_buff(FSE_ARG_MODE, size: sizeof(int32_t),
1891	data: &cur->regular_event.mode, buff: evbuff, buff_idx: &evbuff_idx,
1892	buff_sz: sizeof(evbuff), uio);
1893	if (error != `0`) {
1894	goto get_out;
1895	}
1896
1897	error = fill_buff(FSE_ARG_UID, size: sizeof(uid_t),
1898	data: &cur->regular_event.uid, buff: evbuff, buff_idx: &evbuff_idx,
1899	buff_sz: sizeof(evbuff), uio);
1900	if (error != `0`) {
1901	goto get_out;
1902	}
1903
1904	error = fill_buff(FSE_ARG_GID, size: sizeof(gid_t),
1905	data: &cur->regular_event.document_id, buff: evbuff, buff_idx: &evbuff_idx,
1906	buff_sz: sizeof(evbuff), uio);
1907	if (error != `0`) {
1908	goto get_out;
1909	}
1910	}
1911
1912	if (cur->regular_event.dest) {
1913	cur = cur->regular_event.dest;
1914	goto copy_again;
1915	}
1916
1917	done:
1918	// very last thing: the time stamp
1919	error = fill_buff(FSE_ARG_INT64, size: sizeof(uint64_t), data: &cur->abstime,
1920	buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1921	if (error != `0`) {
1922	goto get_out;
1923	}
1924
1925	// check if the FSE_ARG_DONE will fit
1926	if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1927	if (evbuff_idx > uio_resid(a_uio: uio)) {
1928	error = ENOSPC;
1929	goto get_out;
1930	}
1931	error = uiomove(cp: evbuff, n: evbuff_idx, uio);
1932	if (error) {
1933	goto get_out;
1934	}
1935	evbuff_idx = `0`;
1936	}
1937
1938	tmp16 = FSE_ARG_DONE;
1939	memcpy(dst: &evbuff[evbuff_idx], src: &tmp16, n: sizeof(uint16_t));
1940	evbuff_idx += sizeof(uint16_t);
1941
1942	// flush any remaining data in the buffer (and hopefully
1943	// in most cases this is the only uiomove we'll do)
1944	if (evbuff_idx > uio_resid(a_uio: uio)) {
1945	error = ENOSPC;
1946	} else {
1947	error = uiomove(cp: evbuff, n: evbuff_idx, uio);
1948	}
1949
1950	get_out:
1951
1952	return error;
1953	}
1954
1955
1956
1957	static int
1958	fmod_watch(fs_event_watcher watcher, struct* uio *uio)
1959	{
1960	int error = `0`;
1961	user_ssize_t last_full_event_resid;
1962	kfs_event *kfse;
1963	uint16_t tmp16;
1964	int skipped;
1965
1966	last_full_event_resid = uio_resid(a_uio: uio);
1967
1968	// need at least 2048 bytes of space (maxpathlen + 1 event buf)
1969	if (uio_resid(a_uio: uio) < `2048` \|\| watcher == NULL) {
1970	return EINVAL;
1971	}
1972
1973	if (watcher->flags & WATCHER_CLOSING) {
1974	return `0`;
1975	}
1976
1977	if (OSAddAtomic(`1`, &watcher->num_readers) != `0`) {
1978	// don't allow multiple threads to read from the fd at the same time
1979	OSAddAtomic(-`1`, &watcher->num_readers);
1980	return EAGAIN;
1981	}
1982
1983	restart_watch:
1984	if (watcher->rd == watcher->wr) {
1985	if (watcher->flags & WATCHER_CLOSING) {
1986	OSAddAtomic(-`1`, &watcher->num_readers);
1987	return `0`;
1988	}
1989	OSAddAtomic(`1`, &watcher->blockers);
1990
1991	// there's nothing to do, go to sleep
1992	error = tsleep(chan: (caddr_t)watcher, PUSER \| PCATCH, wmesg: "fsevents_empty", timo: `0`);
1993
1994	OSAddAtomic(-`1`, &watcher->blockers);
1995
1996	if (error != `0` \|\| (watcher->flags & WATCHER_CLOSING)) {
1997	OSAddAtomic(-`1`, &watcher->num_readers);
1998	return error;
1999	}
2000	}
2001
2002	// if we dropped events, return that as an event first
2003	if (watcher->flags & WATCHER_DROPPED_EVENTS) {
2004	int32_t val = FSE_EVENTS_DROPPED;
2005
2006	error = uiomove(cp: (caddr_t)&val, n: sizeof(int32_t), uio);
2007	if (error == `0`) {
2008	val = `0`; // a fake pid
2009	error = uiomove(cp: (caddr_t)&val, n: sizeof(int32_t), uio);
2010
2011	tmp16 = FSE_ARG_DONE; // makes it a consistent msg
2012	error = uiomove(cp: (caddr_t)&tmp16, n: sizeof(int16_t), uio);
2013
2014	last_full_event_resid = uio_resid(a_uio: uio);
2015	}
2016
2017	if (error) {
2018	OSAddAtomic(-`1`, &watcher->num_readers);
2019	return error;
2020	}
2021
2022	watcher->flags &= ~WATCHER_DROPPED_EVENTS;
2023	}
2024
2025	skipped = `0`;
2026
2027	lck_rw_lock_shared(lck: &event_handling_lock);
2028	while (uio_resid(a_uio: uio) > `0` && watcher->rd != watcher->wr) {
2029	if (watcher->flags & WATCHER_CLOSING) {
2030	break;
2031	}
2032
2033	//
2034	// check if the event is something of interest to us
2035	// (since it may have been recycled/reused and changed
2036	// its type or which device it is for)
2037	//
2038	kfse = watcher->event_queue[watcher->rd];
2039	if (!kfse \|\| kfse->type == FSE_INVALID \|\| kfse->type >= watcher->num_events \|\| kfse->refcount < `1`) {
2040	break;
2041	}
2042
2043	if (watcher->event_list[kfse->type] == FSE_REPORT) {
2044	if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) &&
2045	kfse->type != FSE_DOCID_CREATED &&
2046	kfse->type != FSE_DOCID_CHANGED &&
2047	kfse->type != FSE_ACTIVITY &&
2048	is_ignored_directory(path: kfse->regular_event.str)) {
2049	// If this is not an Apple System Service, skip specified directories
2050	// radar://12034844
2051	error = `0`;
2052	skipped = `1`;
2053	} else {
2054	skipped = `0`;
2055	if (last_event_ptr == kfse) {
2056	last_event_ptr = NULL;
2057	last_event_type = -`1`;
2058	last_coalesced_time = `0`;
2059	}
2060	error = copy_out_kfse(watcher, kfse, uio);
2061	if (error != `0`) {
2062	// if an event won't fit or encountered an error while
2063	// we were copying it out, then backup to the last full
2064	// event and just bail out. if the error was ENOENT
2065	// then we can continue regular processing, otherwise
2066	// we should unlock things and return.
2067	uio_setresid(a_uio: uio, a_value: last_full_event_resid);
2068	if (error != ENOENT) {
2069	lck_rw_unlock_shared(lck: &event_handling_lock);
2070	error = `0`;
2071	goto get_out;
2072	}
2073	}
2074
2075	last_full_event_resid = uio_resid(a_uio: uio);
2076	}
2077	}
2078
2079	watcher->event_queue[watcher->rd] = NULL;
2080	watcher->rd = (watcher->rd + `1`) % watcher->eventq_size;
2081	OSSynchronizeIO();
2082	release_event_ref(kfse);
2083	}
2084	lck_rw_unlock_shared(lck: &event_handling_lock);
2085
2086	if (skipped && error == `0`) {
2087	goto restart_watch;
2088	}
2089
2090	get_out:
2091	OSAddAtomic(-`1`, &watcher->num_readers);
2092
2093	return error;
2094	}
2095
2096
2097	//
2098	// Shoo watchers away from a volume that's about to be unmounted
2099	// (so that it can be cleanly unmounted).
2100	//
2101	void
2102	fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
2103	{
2104	#if !defined(XNU_TARGET_OS_OSX)
2105	dev_t dev = mp->mnt_vfsstat.f_fsid.val[`0`];
2106	int error, waitcount = `0`;
2107	struct timespec ts = {.tv_sec = `1`, .tv_nsec = `0`};
2108
2109	// wait for any other pending unmounts to complete
2110	lock_watch_table();
2111	while (fsevent_unmount_dev != `0`) {
2112	error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
2113	if (error == EWOULDBLOCK) {
2114	error = `0`;
2115	}
2116	if (!error && (++waitcount >= `10`)) {
2117	error = EWOULDBLOCK;
2118	printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
2119	}
2120	if (error) {
2121	// there's a problem, bail out
2122	unlock_watch_table();
2123	return;
2124	}
2125	}
2126	if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == `0`) {
2127	// nobody watching for unmount pending events
2128	unlock_watch_table();
2129	return;
2130	}
2131	// this is now the current unmount pending
2132	fsevent_unmount_dev = dev;
2133	fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
2134	unlock_watch_table();
2135
2136	// send an event to notify the watcher they need to get off the mount
2137	error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
2138
2139	// wait for acknowledgment(s) (give up if it takes too long)
2140	lock_watch_table();
2141	waitcount = `0`;
2142	while (fsevent_unmount_dev == dev) {
2143	error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
2144	if (error == EWOULDBLOCK) {
2145	error = `0`;
2146	}
2147	if (!error && (++waitcount >= `10`)) {
2148	error = EWOULDBLOCK;
2149	printf("unmount pending ack timeout for dev %d\n", dev);
2150	}
2151	if (error) {
2152	// there's a problem, bail out
2153	if (fsevent_unmount_dev == dev) {
2154	fsevent_unmount_dev = `0`;
2155	fsevent_unmount_ack_count = `0`;
2156	}
2157	wakeup((caddr_t)&fsevent_unmount_dev);
2158	break;
2159	}
2160	}
2161	unlock_watch_table();
2162	#endif /* ! XNU_TARGET_OS_OSX */
2163	}
2164
2165
2166	//
2167	// /dev/fsevents device code
2168	//
2169	static int fsevents_installed = `0`;
2170
2171	typedef struct fsevent_handle {
2172	UInt32 flags;
2173	SInt32 active;
2174	fs_event_watcher *watcher;
2175	struct klist knotes;
2176	struct selinfo si;
2177	} fsevent_handle;
2178
2179	#define FSEH_CLOSING 0x0001
2180
2181	static int
2182	fseventsf_read(struct fileproc fp, struct* uio *uio,
2183	__unused int flags, __unused vfs_context_t ctx)
2184	{
2185	fsevent_handle fseh = (struct* fsevent_handle *)fp_get_data(fp);
2186	int error;
2187
2188	error = fmod_watch(watcher: fseh->watcher, uio);
2189
2190	return error;
2191	}
2192
2193
2194	#pragma pack(push, 4)
2195	typedef struct fsevent_dev_filter_args32 {
2196	uint32_t num_devices;
2197	user32_addr_t devices;
2198	} fsevent_dev_filter_args32;
2199	typedef struct fsevent_dev_filter_args64 {
2200	uint32_t num_devices;
2201	user64_addr_t devices;
2202	} fsevent_dev_filter_args64;
2203	#pragma pack(pop)
2204
2205	#define FSEVENTS_DEVICE_FILTER_32 _IOW('s', 100, fsevent_dev_filter_args32)
2206	#define FSEVENTS_DEVICE_FILTER_64 _IOW('s', 100, fsevent_dev_filter_args64)
2207
2208	static int
2209	fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
2210	{
2211	fsevent_handle fseh = (struct* fsevent_handle *)fp_get_data(fp);
2212	int ret = `0`;
2213	fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
2214
2215	OSAddAtomic(`1`, &fseh->active);
2216	if (fseh->flags & FSEH_CLOSING) {
2217	OSAddAtomic(-`1`, &fseh->active);
2218	return `0`;
2219	}
2220
2221	switch (cmd) {
2222	case FIONBIO:
2223	case FIOASYNC:
2224	break;
2225
2226	case FSEVENTS_WANT_COMPACT_EVENTS: {
2227	fseh->watcher->flags \|= WATCHER_WANTS_COMPACT_EVENTS;
2228	break;
2229	}
2230
2231	case FSEVENTS_WANT_EXTENDED_INFO: {
2232	fseh->watcher->flags \|= WATCHER_WANTS_EXTENDED_INFO;
2233	break;
2234	}
2235
2236	case FSEVENTS_GET_CURRENT_ID: {
2237	(uint64_t )data = fseh->watcher->max_event_id;
2238	ret = `0`;
2239	break;
2240	}
2241
2242	case FSEVENTS_DEVICE_FILTER_32: {
2243	if (proc_is64bit(vfs_context_proc(ctx))) {
2244	ret = EINVAL;
2245	break;
2246	}
2247	fsevent_dev_filter_args32 devfilt_args32 = (fsevent_dev_filter_args32 )data;
2248
2249	devfilt_args = &_devfilt_args;
2250	memset(s: devfilt_args, c: `0`, n: sizeof(fsevent_dev_filter_args64));
2251	devfilt_args->num_devices = devfilt_args32->num_devices;
2252	devfilt_args->devices = CAST_USER_ADDR_T(devfilt_args32->devices);
2253	goto handle_dev_filter;
2254	}
2255
2256	case FSEVENTS_DEVICE_FILTER_64:
2257	if (!proc_is64bit(vfs_context_proc(ctx))) {
2258	ret = EINVAL;
2259	break;
2260	}
2261	devfilt_args = (fsevent_dev_filter_args64 *)data;
2262
2263	handle_dev_filter:
2264	{
2265	int new_num_devices, old_num_devices = `0`;
2266	dev_t devices_not_to_watch, tmp = NULL;
2267
2268	if (devfilt_args->num_devices > `256`) {
2269	ret = EINVAL;
2270	break;
2271	}
2272
2273	new_num_devices = devfilt_args->num_devices;
2274	if (new_num_devices == `0`) {
2275	lock_watch_table();
2276
2277	tmp = fseh->watcher->devices_not_to_watch;
2278	fseh->watcher->devices_not_to_watch = NULL;
2279	old_num_devices = fseh->watcher->num_devices;
2280	fseh->watcher->num_devices = new_num_devices;
2281
2282	unlock_watch_table();
2283	kfree_data(tmp, old_num_devices * sizeof(dev_t));
2284	break;
2285	}
2286
2287	devices_not_to_watch = kalloc_data(new_num_devices * sizeof(dev_t), Z_WAITOK);
2288	if (devices_not_to_watch == NULL) {
2289	ret = ENOMEM;
2290	break;
2291	}
2292
2293	ret = copyin((user_addr_t)devfilt_args->devices,
2294	(void *)devices_not_to_watch,
2295	new_num_devices * sizeof(dev_t));
2296	if (ret) {
2297	kfree_data(devices_not_to_watch, new_num_devices * sizeof(dev_t));
2298	break;
2299	}
2300
2301	lock_watch_table();
2302	old_num_devices = fseh->watcher->num_devices;
2303	fseh->watcher->num_devices = new_num_devices;
2304	tmp = fseh->watcher->devices_not_to_watch;
2305	fseh->watcher->devices_not_to_watch = devices_not_to_watch;
2306	unlock_watch_table();
2307
2308	kfree_data(tmp, old_num_devices * sizeof(dev_t));
2309
2310	break;
2311	}
2312
2313	case FSEVENTS_UNMOUNT_PENDING_ACK: {
2314	lock_watch_table();
2315	dev_t dev = (dev_t )data;
2316	if (fsevent_unmount_dev == dev) {
2317	if (--fsevent_unmount_ack_count <= `0`) {
2318	fsevent_unmount_dev = `0`;
2319	wakeup(chan: (caddr_t)&fsevent_unmount_dev);
2320	}
2321	} else {
2322	printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
2323	ret = EINVAL;
2324	}
2325	unlock_watch_table();
2326	break;
2327	}
2328
2329	default:
2330	ret = EINVAL;
2331	break;
2332	}
2333
2334	OSAddAtomic(-`1`, &fseh->active);
2335	return ret;
2336	}
2337
2338
2339	static int
2340	fseventsf_select(struct fileproc fp, int* which, __unused void *wql, vfs_context_t ctx)
2341	{
2342	fsevent_handle fseh = (struct* fsevent_handle *)fp_get_data(fp);
2343	int ready = `0`;
2344
2345	if ((which != FREAD) \|\| (fseh->watcher->flags & WATCHER_CLOSING)) {
2346	return `0`;
2347	}
2348
2349
2350	// if there's nothing in the queue, we're not ready
2351	if (fseh->watcher->rd != fseh->watcher->wr) {
2352	ready = `1`;
2353	}
2354
2355	if (!ready) {
2356	lock_watch_table();
2357	selrecord(selector: vfs_context_proc(ctx), &fseh->si, wql);
2358	unlock_watch_table();
2359	}
2360
2361	return ready;
2362	}
2363
2364
2365	#if NOTUSED
2366	static int
2367	fseventsf_stat(__unused struct fileproc fp, __unused struct* stat *sb, __unused vfs_context_t ctx)
2368	{
2369	return ENOTSUP;
2370	}
2371	#endif
2372
2373	static int
2374	fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
2375	{
2376	fsevent_handle fseh = (struct* fsevent_handle *)fg_get_data(fg);
2377	fs_event_watcher *watcher;
2378
2379	OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
2380	while (OSAddAtomic(`0`, &fseh->active) > `0`) {
2381	tsleep(chan: (caddr_t)fseh->watcher, PRIBIO, wmesg: "fsevents-close", timo: `1`);
2382	}
2383
2384	watcher = fseh->watcher;
2385	fg_set_data(fg, NULL);
2386	fseh->watcher = NULL;
2387
2388	remove_watcher(target: watcher);
2389	selthreadclear(&fseh->si);
2390	kfree_type(fsevent_handle, fseh);
2391
2392	return `0`;
2393	}
2394
2395	static void
2396	filt_fsevent_detach(struct knote *kn)
2397	{
2398	fsevent_handle fseh = (struct* fsevent_handle *)knote_kn_hook_get_raw(kn);
2399
2400	lock_watch_table();
2401
2402	KNOTE_DETACH(&fseh->knotes, kn);
2403
2404	unlock_watch_table();
2405	}
2406
2407	/*
2408	* Determine whether this knote should be active
2409	*
2410	* This is kind of subtle.
2411	* --First, notice if the vnode has been revoked: in so, override hint
2412	* --EVFILT_READ knotes are checked no matter what the hint is
2413	* --Other knotes activate based on hint.
2414	* --If hint is revoke, set special flags and activate
2415	*/
2416	static int
2417	filt_fsevent_common(struct knote kn, struct* kevent_qos_s kev, long* hint)
2418	{
2419	fsevent_handle fseh = (struct* fsevent_handle *)knote_kn_hook_get_raw(kn);
2420	int activate = `0`;
2421	int32_t rd, wr, amt;
2422	int64_t data = `0`;
2423
2424	if (NOTE_REVOKE == hint) {
2425	kn->kn_flags \|= (EV_EOF \| EV_ONESHOT);
2426	activate = `1`;
2427	}
2428
2429	rd = fseh->watcher->rd;
2430	wr = fseh->watcher->wr;
2431	if (rd <= wr) {
2432	amt = wr - rd;
2433	} else {
2434	amt = fseh->watcher->eventq_size - (rd - wr);
2435	}
2436
2437	switch (kn->kn_filter) {
2438	case EVFILT_READ:
2439	data = amt;
2440	activate = (data != `0`);
2441	break;
2442	case EVFILT_VNODE:
2443	/ Check events this note matches against the hint /
2444	if (kn->kn_sfflags & hint) {
2445	kn->kn_fflags \|= (uint32_t)hint; / Set which event occurred /
2446	}
2447	if (kn->kn_fflags != `0`) {
2448	activate = `1`;
2449	}
2450	break;
2451	default:
2452	// nothing to do...
2453	break;
2454	}
2455
2456	if (activate && kev) {
2457	knote_fill_kevent(kn, kev, data);
2458	}
2459	return activate;
2460	}
2461
2462	static int
2463	filt_fsevent(struct knote kn, long* hint)
2464	{
2465	return filt_fsevent_common(kn, NULL, hint);
2466	}
2467
2468	static int
2469	filt_fsevent_touch(struct knote kn, struct* kevent_qos_s *kev)
2470	{
2471	int res;
2472
2473	lock_watch_table();
2474
2475	/ accept new fflags/data as saved /
2476	kn->kn_sfflags = kev->fflags;
2477	kn->kn_sdata = kev->data;
2478
2479	/ restrict the current results to the (smaller?) set of new interest /
2480	/*
2481	* For compatibility with previous implementations, we leave kn_fflags
2482	* as they were before.
2483	*/
2484	//kn->kn_fflags &= kev->fflags;
2485
2486	/ determine if the filter is now fired /
2487	res = filt_fsevent_common(kn, NULL, hint: `0`);
2488
2489	unlock_watch_table();
2490
2491	return res;
2492	}
2493
2494	static int
2495	filt_fsevent_process(struct knote kn, struct* kevent_qos_s *kev)
2496	{
2497	int res;
2498
2499	lock_watch_table();
2500
2501	res = filt_fsevent_common(kn, kev, hint: `0`);
2502
2503	unlock_watch_table();
2504
2505	return res;
2506	}
2507
2508	SECURITY_READ_ONLY_EARLY(struct filterops) fsevent_filtops = {
2509	.f_isfd = `1`,
2510	.f_attach = NULL,
2511	.f_detach = filt_fsevent_detach,
2512	.f_event = filt_fsevent,
2513	.f_touch = filt_fsevent_touch,
2514	.f_process = filt_fsevent_process,
2515	};
2516
2517	static int
2518	fseventsf_kqfilter(struct fileproc fp, struct* knote *kn,
2519	__unused struct kevent_qos_s *kev)
2520	{
2521	fsevent_handle fseh = (struct* fsevent_handle *)fp_get_data(fp);
2522	int res;
2523
2524	kn->kn_filtid = EVFILTID_FSEVENT;
2525	knote_kn_hook_set_raw(kn, kn_hook: (void *) fseh);
2526
2527	lock_watch_table();
2528
2529	KNOTE_ATTACH(&fseh->knotes, kn);
2530
2531	/ check to see if it is fired already /
2532	res = filt_fsevent_common(kn, NULL, hint: `0`);
2533
2534	unlock_watch_table();
2535
2536	return res;
2537	}
2538
2539
2540	static int
2541	fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2542	{
2543	int counter = `0`;
2544	fsevent_handle fseh = (struct* fsevent_handle *)fp_get_data(fp);
2545
2546	// if there are people still waiting, sleep for 10ms to
2547	// let them clean up and get out of there. however we
2548	// also don't want to get stuck forever so if they don't
2549	// exit after 5 seconds we're tearing things down anyway.
2550	while (fseh->watcher->blockers && counter++ < `500`) {
2551	// issue wakeup in case anyone is blocked waiting for an event
2552	// do this each time we wakeup in case the blocker missed
2553	// the wakeup due to the unprotected test of WATCHER_CLOSING
2554	// and decision to tsleep in fmod_watch... this bit of
2555	// latency is a decent tradeoff against not having to
2556	// take and drop a lock in fmod_watch
2557	lock_watch_table();
2558	fsevents_wakeup(watcher: fseh->watcher);
2559	unlock_watch_table();
2560
2561	tsleep(chan: (caddr_t)fseh->watcher, PRIBIO, wmesg: "watcher-close", timo: `1`);
2562	}
2563
2564	return `0`;
2565	}
2566
2567
2568	static int
2569	fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2570	{
2571	if (!kauth_cred_issuser(cred: kauth_cred_get())) {
2572	return EPERM;
2573	}
2574
2575	return `0`;
2576	}
2577
2578	static int
2579	fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2580	{
2581	return `0`;
2582	}
2583
2584	static int
2585	fseventsread(__unused dev_t dev, __unused struct uio uio, __unused int* ioflag)
2586	{
2587	return EIO;
2588	}
2589
2590
2591	static int
2592	parse_buffer_and_add_events(const char buffer, size_t bufsize, vfs_context_t ctx, size_t remainder)
2593	{
2594	const fse_info finfo, dest_finfo;
2595	const char path, ptr, dest_path, event_start = buffer;
2596	size_t path_len, dest_path_len;
2597	int type, err = `0`;
2598
2599
2600	ptr = buffer;
2601	while ((ptr + sizeof(int) + sizeof(fse_info) + `1`) < buffer + bufsize) {
2602	type = (const* int *)ptr;
2603	if (type < `0` \|\| type == FSE_ACCESS_GRANTED \|\| type == FSE_ACTIVITY \|\|
2604	type >= FSE_MAX_EVENTS) {
2605	err = EINVAL;
2606	break;
2607	}
2608
2609	ptr += sizeof(int);
2610
2611	finfo = (const fse_info *)ptr;
2612	ptr += sizeof(fse_info);
2613
2614	path = ptr;
2615	while (ptr < buffer + bufsize && *ptr != `'\0'`) {
2616	ptr++;
2617	}
2618
2619	if (ptr >= buffer + bufsize) {
2620	break;
2621	}
2622
2623	ptr++; // advance over the trailing '\0'
2624
2625	path_len = ptr - path;
2626
2627	if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2628	event_start = ptr; // record where the next event starts
2629
2630	err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2631	if (err) {
2632	break;
2633	}
2634	continue;
2635	}
2636
2637	//
2638	// if we're here we have to slurp up the destination finfo
2639	// and path so that we can pass them to the add_fsevent()
2640	// call. basically it's a copy of the above code.
2641	//
2642	dest_finfo = (const fse_info *)ptr;
2643	ptr += sizeof(fse_info);
2644
2645	dest_path = ptr;
2646	while (ptr < buffer + bufsize && *ptr != `'\0'`) {
2647	ptr++;
2648	}
2649
2650	if (ptr >= buffer + bufsize) {
2651	break;
2652	}
2653
2654	ptr++; // advance over the trailing '\0'
2655	event_start = ptr; // record where the next event starts
2656
2657	dest_path_len = ptr - dest_path;
2658	//
2659	// If the destination inode number is non-zero, generate a rename
2660	// with both source and destination FSE_ARG_FINFO. Otherwise generate
2661	// a rename with only one FSE_ARG_FINFO. If you need to inject an
2662	// exchange with an inode of zero, just make that inode (and its path)
2663	// come in as the first one, not the second.
2664	//
2665	if (dest_finfo->ino) {
2666	err = add_fsevent(type, ctx,
2667	FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2668	FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2669	FSE_ARG_DONE);
2670	} else {
2671	err = add_fsevent(type, ctx,
2672	FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2673	FSE_ARG_STRING, dest_path_len, dest_path,
2674	FSE_ARG_DONE);
2675	}
2676
2677	if (err) {
2678	break;
2679	}
2680	}
2681
2682	// if the last event wasn't complete, set the remainder
2683	// to be the last event start boundary.
2684	//
2685	remainder = (long*)((buffer + bufsize) - event_start);
2686
2687	return err;
2688	}
2689
2690
2691	//
2692	// Note: this buffer size can not ever be less than
2693	// 2MAXPATHLEN + 2sizeof(fse_info) + sizeof(int)
2694	// because that is the max size for a single event.
2695	// I made it 4k to be a "nice" size. making it
2696	// smaller is not a good idea.
2697	//
2698	#define WRITE_BUFFER_SIZE 4096
2699	static char *write_buffer = NULL;
2700
2701	static int
2702	fseventswrite(__unused dev_t dev, struct uio uio, __unused int* ioflag)
2703	{
2704	int error = `0`;
2705	size_t count, offset = `0`, remainder = `0`;
2706	vfs_context_t ctx = vfs_context_current();
2707
2708	lck_mtx_lock(lck: &event_writer_lock);
2709
2710	if (write_buffer == NULL) {
2711	write_buffer = zalloc_permanent(WRITE_BUFFER_SIZE, ZALIGN_64);
2712	}
2713
2714	//
2715	// this loop copies in and processes the events written.
2716	// it takes care to copy in reasonable size chunks and
2717	// process them. if there is an event that spans a chunk
2718	// boundary we're careful to copy those bytes down to the
2719	// beginning of the buffer and read the next chunk in just
2720	// after it.
2721	//
2722	while (uio_resid(a_uio: uio)) {
2723	count = MIN(WRITE_BUFFER_SIZE - offset, (size_t)uio_resid(uio));
2724
2725	error = uiomove(cp: write_buffer + offset, n: (int)count, uio);
2726	if (error) {
2727	break;
2728	}
2729
2730	error = parse_buffer_and_add_events(buffer: write_buffer, bufsize: offset + count, ctx, remainder: &remainder);
2731	if (error) {
2732	break;
2733	}
2734
2735	//
2736	// if there's any remainder, copy it down to the beginning
2737	// of the buffer so that it will get processed the next time
2738	// through the loop. note that the remainder always starts
2739	// at an event boundary.
2740	//
2741	memmove(dst: write_buffer, src: (write_buffer + count + offset) - remainder, n: remainder);
2742	offset = remainder;
2743	}
2744
2745	lck_mtx_unlock(lck: &event_writer_lock);
2746
2747	return error;
2748	}
2749
2750
2751	static const struct fileops fsevents_fops = {
2752	.fo_type = DTYPE_FSEVENTS,
2753	.fo_read = fseventsf_read,
2754	.fo_write = fo_no_write,
2755	.fo_ioctl = fseventsf_ioctl,
2756	.fo_select = fseventsf_select,
2757	.fo_close = fseventsf_close,
2758	.fo_kqfilter = fseventsf_kqfilter,
2759	.fo_drain = fseventsf_drain,
2760	};
2761
2762	typedef struct fsevent_clone_args32 {
2763	user32_addr_t event_list;
2764	int32_t num_events;
2765	int32_t event_queue_depth;
2766	user32_addr_t fd;
2767	} fsevent_clone_args32;
2768
2769	typedef struct fsevent_clone_args64 {
2770	user64_addr_t event_list;
2771	int32_t num_events;
2772	int32_t event_queue_depth;
2773	user64_addr_t fd;
2774	} fsevent_clone_args64;
2775
2776	#define FSEVENTS_CLONE_32 _IOW('s', 1, fsevent_clone_args32)
2777	#define FSEVENTS_CLONE_64 _IOW('s', 1, fsevent_clone_args64)
2778
2779	static int
2780	fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2781	{
2782	struct fileproc *f;
2783	int fd, error;
2784	fsevent_handle *fseh = NULL;
2785	fsevent_clone_args64 *fse_clone_args, _fse_clone;
2786	int8_t *event_list;
2787	int is64bit = proc_is64bit(p);
2788
2789	switch (cmd) {
2790	case FSEVENTS_CLONE_32: {
2791	if (is64bit) {
2792	return EINVAL;
2793	}
2794	fsevent_clone_args32 args32 = (fsevent_clone_args32 )data;
2795
2796	fse_clone_args = &_fse_clone;
2797	memset(s: fse_clone_args, c: `0`, n: sizeof(fsevent_clone_args64));
2798
2799	fse_clone_args->event_list = CAST_USER_ADDR_T(args32->event_list);
2800	fse_clone_args->num_events = args32->num_events;
2801	fse_clone_args->event_queue_depth = args32->event_queue_depth;
2802	fse_clone_args->fd = CAST_USER_ADDR_T(args32->fd);
2803	goto handle_clone;
2804	}
2805
2806	case FSEVENTS_CLONE_64:
2807	if (!is64bit) {
2808	return EINVAL;
2809	}
2810	fse_clone_args = (fsevent_clone_args64 *)data;
2811
2812	handle_clone:
2813	if (fse_clone_args->num_events <= `0` \|\| fse_clone_args->num_events > `4096`) {
2814	return EINVAL;
2815	}
2816
2817	fseh = kalloc_type(fsevent_handle, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
2818
2819	klist_init(list: &fseh->knotes);
2820
2821	event_list = kalloc_data(fse_clone_args->num_events * sizeof(int8_t), Z_WAITOK);
2822	if (event_list == NULL) {
2823	kfree_type(fsevent_handle, fseh);
2824	return ENOMEM;
2825	}
2826
2827	error = copyin((user_addr_t)fse_clone_args->event_list,
2828	(void *)event_list,
2829	fse_clone_args->num_events * sizeof(int8_t));
2830	if (error) {
2831	kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2832	kfree_type(fsevent_handle, fseh);
2833	return error;
2834	}
2835
2836	/*
2837	* Lock down the user's "fd" result buffer so it's safe
2838	* to hold locks while we copy it out.
2839	*/
2840	error = vslock(addr: (user_addr_t)fse_clone_args->fd,
2841	len: sizeof(int32_t));
2842	if (error) {
2843	kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2844	kfree_type(fsevent_handle, fseh);
2845	return error;
2846	}
2847
2848	error = add_watcher(event_list,
2849	num_events: fse_clone_args->num_events,
2850	eventq_size: fse_clone_args->event_queue_depth,
2851	watcher_out: &fseh->watcher,
2852	fseh);
2853	if (error) {
2854	vsunlock(addr: (user_addr_t)fse_clone_args->fd,
2855	len: sizeof(int32_t), dirtied: `0`);
2856	kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2857	kfree_type(fsevent_handle, fseh);
2858	return error;
2859	}
2860
2861	fseh->watcher->fseh = fseh;
2862
2863	error = falloc(p, &f, &fd);
2864	if (error) {
2865	remove_watcher(target: fseh->watcher);
2866	vsunlock(addr: (user_addr_t)fse_clone_args->fd,
2867	len: sizeof(int32_t), dirtied: `0`);
2868	kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2869	kfree_type(fsevent_handle, fseh);
2870	return error;
2871	}
2872	proc_fdlock(p);
2873	f->fp_glob->fg_flag = FREAD \| FWRITE;
2874	f->fp_glob->fg_ops = &fsevents_fops;
2875	fp_set_data(fp: f, fg_data: fseh);
2876
2877	/*
2878	* We can safely hold the proc_fdlock across this copyout()
2879	* because of the vslock() call above. The vslock() call
2880	* also ensures that we will never get an error, so assert
2881	* this.
2882	*/
2883	error = copyout((void )&fd, (user_addr_t)fse_clone_args->fd, sizeof*(int32_t));
2884	assert(error == `0`);
2885
2886	procfdtbl_releasefd(p, fd, NULL);
2887	fp_drop(p, fd, fp: f, locked: `1`);
2888	proc_fdunlock(p);
2889
2890	vsunlock(addr: (user_addr_t)fse_clone_args->fd,
2891	len: sizeof(int32_t), dirtied: `1`);
2892	break;
2893
2894	default:
2895	error = EINVAL;
2896	break;
2897	}
2898
2899	return error;
2900	}
2901
2902	static void
2903	fsevents_wakeup(fs_event_watcher *watcher)
2904	{
2905	selwakeup(&watcher->fseh->si);
2906	KNOTE(&watcher->fseh->knotes, NOTE_WRITE \| NOTE_NONE);
2907	wakeup(chan: (caddr_t)watcher);
2908	}
2909
2910
2911	/*
2912	* A struct describing which functions will get invoked for certain
2913	* actions.
2914	*/
2915	static const struct cdevsw fsevents_cdevsw =
2916	{
2917	.d_open = fseventsopen,
2918	.d_close = fseventsclose,
2919	.d_read = fseventsread,
2920	.d_write = fseventswrite,
2921	.d_ioctl = fseventsioctl,
2922	.d_stop = eno_stop,
2923	.d_reset = eno_reset,
2924	.d_select = eno_select,
2925	.d_mmap = eno_mmap,
2926	.d_strategy = eno_strat,
2927	.d_reserved_1 = eno_getc,
2928	.d_reserved_2 = eno_putc,
2929	};
2930
2931
2932	/*
2933	* Called to initialize our device,
2934	* and to register ourselves with devfs
2935	*/
2936
2937	void
2938	fsevents_init(void)
2939	{
2940	int ret;
2941
2942	if (fsevents_installed) {
2943	return;
2944	}
2945
2946	fsevents_installed = `1`;
2947
2948	ret = cdevsw_add(-`1`, &fsevents_cdevsw);
2949	if (ret < `0`) {
2950	fsevents_installed = `0`;
2951	return;
2952	}
2953
2954	devfs_make_node(makedev(ret, `0`), DEVFS_CHAR,
2955	UID_ROOT, GID_WHEEL, perms: `0644`, fmt: "fsevents");
2956
2957	fsevents_internal_init();
2958	}
2959
2960
2961	char *
2962	get_pathbuff(void)
2963	{
2964	return zalloc(view: ZV_NAMEI);
2965	}
2966
2967	void
2968	release_pathbuff(char *path)
2969	{
2970	if (path == NULL) {
2971	return;
2972	}
2973	zfree(ZV_NAMEI, path);
2974	}
2975
2976	int
2977	get_fse_info(struct vnode vp, fse_info fse, __unused vfs_context_t ctx)
2978	{
2979	struct vnode_attr va;
2980
2981	VATTR_INIT(&va);
2982	VATTR_WANTED(&va, va_fsid);
2983	va.va_vaflags \|= VA_REALFSID;
2984	VATTR_WANTED(&va, va_fileid);
2985	VATTR_WANTED(&va, va_mode);
2986	VATTR_WANTED(&va, va_uid);
2987	VATTR_WANTED(&va, va_document_id);
2988	if (vp->v_flag & VISHARDLINK) {
2989	if (vp->v_type == VDIR) {
2990	VATTR_WANTED(&va, va_dirlinkcount);
2991	} else {
2992	VATTR_WANTED(&va, va_nlink);
2993	}
2994	}
2995
2996	if (vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel()) != `0`) {
2997	memset(s: fse, c: `0`, n: sizeof(fse_info));
2998	return -`1`;
2999	}
3000
3001	return vnode_get_fse_info_from_vap(vp, fse, vap: &va);
3002	}
3003
3004	int
3005	vnode_get_fse_info_from_vap(vnode_t vp, fse_info fse, struct* vnode_attr *vap)
3006	{
3007	fse->ino = (ino64_t)vap->va_fileid;
3008	fse->dev = (dev_t)vap->va_fsid;
3009	fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) \| vap->va_mode;
3010	fse->uid = (uid_t)vap->va_uid;
3011	fse->document_id = (uint32_t)vap->va_document_id;
3012	if (vp->v_flag & VISHARDLINK) {
3013	fse->mode \|= FSE_MODE_HLINK;
3014	if (vp->v_type == VDIR) {
3015	fse->nlink = (uint64_t)vap->va_dirlinkcount;
3016	} else {
3017	fse->nlink = (uint64_t)vap->va_nlink;
3018	}
3019	}
3020
3021	return `0`;
3022	}
3023
3024	void
3025	create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
3026	{
3027	int fsevent_type = FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
3028	char pathbuf[MAXPATHLEN];
3029	fse_info fse;
3030
3031
3032	if (kevents & VNODE_EVENT_DELETE) {
3033	fsevent_type = FSE_DELETE;
3034	} else if (kevents & (VNODE_EVENT_EXTEND \| VNODE_EVENT_WRITE)) {
3035	fsevent_type = FSE_CONTENT_MODIFIED;
3036	} else if (kevents & VNODE_EVENT_LINK) {
3037	fsevent_type = FSE_CREATE_FILE;
3038	} else if (kevents & VNODE_EVENT_RENAME) {
3039	fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
3040	} else if (kevents & (VNODE_EVENT_FILE_CREATED \| VNODE_EVENT_FILE_REMOVED \| VNODE_EVENT_DIR_CREATED \| VNODE_EVENT_DIR_REMOVED)) {
3041	fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
3042	} else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
3043	fsevent_type = FSE_STAT_CHANGED;
3044	}
3045
3046	// printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
3047
3048	fse.dev = vap->va_fsid;
3049	fse.ino = vap->va_fileid;
3050	fse.mode = vnode_vttoif(vnode_vtype(vp)) \| (uint32_t)vap->va_mode;
3051	if (vp->v_flag & VISHARDLINK) {
3052	fse.mode \|= FSE_MODE_HLINK;
3053	if (vp->v_type == VDIR) {
3054	fse.nlink = vap->va_dirlinkcount;
3055	} else {
3056	fse.nlink = vap->va_nlink;
3057	}
3058	}
3059
3060	if (vp->v_type == VDIR) {
3061	fse.mode \|= FSE_REMOTE_DIR_EVENT;
3062	}
3063
3064
3065	fse.uid = vap->va_uid;
3066	fse.document_id = vap->va_document_id;
3067
3068	len = sizeof(pathbuf);
3069	if (vn_getpath_no_firmlink(vp, pathbuf, len: &len) == `0`) {
3070	add_fsevent(type: fsevent_type, ctx: vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
3071	}
3072	return;
3073	}
3074
3075	#else /* CONFIG_FSE */
3076
3077	#include <sys/fsevents.h>
3078
3079	/*
3080	* The get_pathbuff and release_pathbuff routines are used in places not
3081	* related to fsevents, and it's a handy abstraction, so define trivial
3082	* versions that don't cache a pool of buffers. This way, we don't have
3083	* to conditionalize the callers, and they still get the advantage of the
3084	* pool of buffers if CONFIG_FSE is turned on.
3085	*/
3086	char *
3087	get_pathbuff(void)
3088	{
3089	return zalloc(ZV_NAMEI);
3090	}
3091
3092	void
3093	release_pathbuff(char *path)
3094	{
3095	zfree(ZV_NAMEI, path);
3096	}
3097
3098	int
3099	add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
3100	{
3101	return `0`;
3102	}
3103
3104	int
3105	need_fsevent(__unused int type, __unused vnode_t vp)
3106	{
3107	return `0`;
3108	}
3109
3110	#endif /* CONFIG_FSE */
3111

Browse the source code of xnu/bsd/vfs/vfs_fsevents.c