vfs_fsevents.c source code [xnu/bsd/vfs/vfs_fsevents.c]

1	/*
2	* Copyright (c) 2004-2014 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	#include <stdarg.h>
29	#include <sys/param.h>
30	#include <sys/systm.h>
31	#include <sys/event.h> // for kqueue related stuff
32	#include <sys/fsevents.h>
33
34	#if CONFIG_FSE
35	#include <sys/namei.h>
36	#include <sys/filedesc.h>
37	#include <sys/kernel.h>
38	#include <sys/file_internal.h>
39	#include <sys/stat.h>
40	#include <sys/vnode_internal.h>
41	#include <sys/mount_internal.h>
42	#include <sys/proc_internal.h>
43	#include <sys/kauth.h>
44	#include <sys/uio.h>
45	#include <sys/malloc.h>
46	#include <sys/dirent.h>
47	#include <sys/attr.h>
48	#include <sys/sysctl.h>
49	#include <sys/ubc.h>
50	#include <machine/cons.h>
51	#include <miscfs/specfs/specdev.h>
52	#include <miscfs/devfs/devfs.h>
53	#include <sys/filio.h>
54	#include <kern/locks.h>
55	#include <libkern/OSAtomic.h>
56	#include <kern/zalloc.h>
57	#include <mach/mach_time.h>
58	#include <kern/thread_call.h>
59	#include <kern/clock.h>
60
61	#include <security/audit/audit.h>
62	#include <bsm/audit_kevents.h>
63
64	#include <pexpert/pexpert.h>
65	#include <libkern/section_keywords.h>
66
67	typedef struct kfs_event {
68	LIST_ENTRY(kfs_event) kevent_list;
69	int16_t type; // type code of this event
70	u_int16_t flags, // per-event flags
71	len; // the length of the path in "str"
72	int32_t refcount; // number of clients referencing this
73	pid_t pid; // pid of the process that did the op
74
75	uint64_t abstime; // when this event happened (mach_absolute_time())
76	ino64_t ino;
77	dev_t dev;
78	int32_t mode;
79	uid_t uid;
80	gid_t gid;
81
82	const char *str;
83
84	struct kfs_event dest; // if this is a two-file op*
85	} kfs_event;
86
87	// flags for the flags field
88	#define KFSE_COMBINED_EVENTS 0x0001
89	#define KFSE_CONTAINS_DROPPED_EVENTS 0x0002
90	#define KFSE_RECYCLED_EVENT 0x0004
91	#define KFSE_BEING_CREATED 0x0008
92
93	LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
94	int num_events_outstanding = `0`;
95	int num_pending_rename = `0`;
96
97
98	struct fsevent_handle;
99
100	typedef struct fs_event_watcher {
101	int8_t event_list; // the events we're interested in*
102	int32_t num_events;
103	dev_t devices_not_to_watch; // report events from devices not in this list*
104	uint32_t num_devices;
105	int32_t flags;
106	kfs_event **event_queue;
107	int32_t eventq_size; // number of event pointers in queue
108	int32_t num_readers;
109	int32_t rd; // read index into the event_queue
110	int32_t wr; // write index into the event_queue
111	int32_t blockers;
112	int32_t my_id;
113	uint32_t num_dropped;
114	uint64_t max_event_id;
115	struct fsevent_handle *fseh;
116	pid_t pid;
117	char proc_name[(`2` * MAXCOMLEN) + `1`];
118	} fs_event_watcher;
119
120	// fs_event_watcher flags
121	#define WATCHER_DROPPED_EVENTS 0x0001
122	#define WATCHER_CLOSING 0x0002
123	#define WATCHER_WANTS_COMPACT_EVENTS 0x0004
124	#define WATCHER_WANTS_EXTENDED_INFO 0x0008
125	#define WATCHER_APPLE_SYSTEM_SERVICE 0x0010 // fseventsd, coreservicesd, mds, revisiond
126
127	#define MAX_WATCHERS 8
128	static fs_event_watcher *watcher_table[MAX_WATCHERS];
129
130	#define DEFAULT_MAX_KFS_EVENTS 4096
131	static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
132
133	// we allocate kfs_event structures out of this zone
134	static zone_t event_zone;
135	static int fs_event_init = `0`;
136
137	//
138	// this array records whether anyone is interested in a
139	// particular type of event. if no one is, we bail out
140	// early from the event delivery
141	//
142	static int16_t fs_event_type_watchers[FSE_MAX_EVENTS];
143
144	// the device currently being unmounted:
145	static dev_t fsevent_unmount_dev = `0`;
146	// how many ACKs are still outstanding:
147	static int fsevent_unmount_ack_count = `0`;
148
149	static int watcher_add_event(fs_event_watcher watcher, kfs_event kfse);
150	static void fsevents_wakeup(fs_event_watcher *watcher);
151
152	//
153	// Locks
154	//
155	static lck_grp_attr_t * fsevent_group_attr;
156	static lck_attr_t * fsevent_lock_attr;
157	static lck_grp_t * fsevent_mutex_group;
158
159	static lck_grp_t * fsevent_rw_group;
160
161	static lck_rw_t event_handling_lock; // handles locking for event manipulation and recycling
162	static lck_mtx_t watch_table_lock;
163	static lck_mtx_t event_buf_lock;
164	static lck_mtx_t event_writer_lock;
165
166
167	/ Explicitly declare qsort so compiler doesn't complain /
168	__private_extern__ void qsort(
169	void * array,
170	size_t nmembers,
171	size_t member_size,
172	int ()(const* void , const* void *));
173
174	static int
175	is_ignored_directory(const char *path) {
176
177	if (!path) {
178	return `0`;
179	}
180
181	#define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
182	if (IS_TLD("/.Spotlight-V100/") \|\|
183	IS_TLD("/.MobileBackups/") \|\|
184	IS_TLD("/Backups.backupdb/")) {
185	return `1`;
186	}
187	#undef IS_TLD
188
189	return `0`;
190	}
191
192	static void
193	fsevents_internal_init(void)
194	{
195	int i;
196
197	if (fs_event_init++ != `0`) {
198	return;
199	}
200
201	for(i=`0`; i < FSE_MAX_EVENTS; i++) {
202	fs_event_type_watchers[i] = `0`;
203	}
204
205	memset(watcher_table, `0`, sizeof(watcher_table));
206
207	fsevent_lock_attr = lck_attr_alloc_init();
208	fsevent_group_attr = lck_grp_attr_alloc_init();
209	fsevent_mutex_group = lck_grp_alloc_init("fsevent-mutex", fsevent_group_attr);
210	fsevent_rw_group = lck_grp_alloc_init("fsevent-rw", fsevent_group_attr);
211
212	lck_mtx_init(&watch_table_lock, fsevent_mutex_group, fsevent_lock_attr);
213	lck_mtx_init(&event_buf_lock, fsevent_mutex_group, fsevent_lock_attr);
214	lck_mtx_init(&event_writer_lock, fsevent_mutex_group, fsevent_lock_attr);
215
216	lck_rw_init(&event_handling_lock, fsevent_rw_group, fsevent_lock_attr);
217
218	PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
219
220	event_zone = zinit(sizeof(kfs_event),
221	max_kfs_events * sizeof(kfs_event),
222	max_kfs_events * sizeof(kfs_event),
223	"fs-event-buf");
224	if (event_zone == NULL) {
225	printf("fsevents: failed to initialize the event zone.\n");
226	}
227
228	// mark the zone as exhaustible so that it will not
229	// ever grow beyond what we initially filled it with
230	zone_change(event_zone, Z_EXHAUST, TRUE);
231	zone_change(event_zone, Z_COLLECT, FALSE);
232	zone_change(event_zone, Z_CALLERACCT, FALSE);
233
234	if (zfill(event_zone, max_kfs_events) < max_kfs_events) {
235	printf("fsevents: failed to pre-fill the event zone.\n");
236	}
237
238	}
239
240	static void
241	lock_watch_table(void)
242	{
243	lck_mtx_lock(&watch_table_lock);
244	}
245
246	static void
247	unlock_watch_table(void)
248	{
249	lck_mtx_unlock(&watch_table_lock);
250	}
251
252	static void
253	lock_fs_event_list(void)
254	{
255	lck_mtx_lock(&event_buf_lock);
256	}
257
258	static void
259	unlock_fs_event_list(void)
260	{
261	lck_mtx_unlock(&event_buf_lock);
262	}
263
264	// forward prototype
265	static void release_event_ref(kfs_event *kfse);
266
267	static int
268	watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
269	{
270	unsigned int i;
271
272	// if devices_not_to_watch is NULL then we care about all
273	// events from all devices
274	if (watcher->devices_not_to_watch == NULL) {
275	return `1`;
276	}
277
278	for(i=`0`; i < watcher->num_devices; i++) {
279	if (dev == watcher->devices_not_to_watch[i]) {
280	// found a match! that means we do not
281	// want events from this device.
282	return `0`;
283	}
284	}
285
286	// if we're here it's not in the devices_not_to_watch[]
287	// list so that means we do care about it
288	return `1`;
289	}
290
291
292	int
293	need_fsevent(int type, vnode_t vp)
294	{
295	if (type >= `0` && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == `0`)
296	return (`0`);
297
298	// events in /dev aren't really interesting...
299	if (vp->v_tag == VT_DEVFS) {
300	return (`0`);
301	}
302
303	return `1`;
304	}
305
306
307	#define is_throw_away(x) ((x) == FSE_STAT_CHANGED \|\| (x) == FSE_CONTENT_MODIFIED)
308
309
310	// Ways that an event can be reused:
311	//
312	// "combined" events mean that there were two events for
313	// the same vnode or path and we're combining both events
314	// into a single event. The primary event gets a bit that
315	// marks it as having been combined. The secondary event
316	// is essentially dropped and the kfse structure reused.
317	//
318	// "collapsed" means that multiple events below a given
319	// directory are collapsed into a single event. in this
320	// case, the directory that we collapse into and all of
321	// its children must be re-scanned.
322	//
323	// "recycled" means that we're completely blowing away
324	// the event since there are other events that have info
325	// about the same vnode or path (and one of those other
326	// events will be marked as combined or collapsed as
327	// appropriate).
328	//
329	#define KFSE_COMBINED 0x0001
330	#define KFSE_COLLAPSED 0x0002
331	#define KFSE_RECYCLED 0x0004
332
333	int num_dropped = `0`;
334	int num_parent_switch = `0`;
335	int num_recycled_rename = `0`;
336
337	static struct timeval last_print;
338
339	//
340	// These variables are used to track coalescing multiple identical
341	// events for the same vnode/pathname. If we get the same event
342	// type and same vnode/pathname as the previous event, we just drop
343	// the event since it's superfluous. This improves some micro-
344	// benchmarks considerably and actually has a real-world impact on
345	// tests like a Finder copy where multiple stat-changed events can
346	// get coalesced.
347	//
348	static int last_event_type=-`1`;
349	static void *last_ptr=NULL;
350	static char last_str[MAXPATHLEN];
351	static int last_nlen=`0`;
352	static int last_vid=-`1`;
353	static uint64_t last_coalesced_time=`0`;
354	static void *last_event_ptr=NULL;
355	int last_coalesced = `0`;
356	static mach_timebase_info_data_t sTimebaseInfo = { `0`, `0` };
357
358
359	int
360	add_fsevent(int type, vfs_context_t ctx, ...)
361	{
362	struct proc *p = vfs_context_proc(ctx);
363	int i, arg_type, ret;
364	kfs_event kfse, kfse_dest=NULL, *cur;
365	fs_event_watcher *watcher;
366	va_list ap;
367	int error = `0`, did_alloc=`0`;
368	dev_t dev = `0`;
369	uint64_t now, elapsed;
370	char *pathbuff=NULL;
371	int pathbuff_len;
372
373
374
375	va_start(ap, ctx);
376
377	// ignore bogus event types..
378	if (type < `0` \|\| type >= FSE_MAX_EVENTS) {
379	return EINVAL;
380	}
381
382	// if no one cares about this type of event, bail out
383	if (fs_event_type_watchers[type] == `0`) {
384	va_end(ap);
385
386	return `0`;
387	}
388
389	now = mach_absolute_time();
390
391	// find a free event and snag it for our use
392	// NOTE: do not do anything that would block until
393	// the lock is dropped.
394	lock_fs_event_list();
395
396	//
397	// check if this event is identical to the previous one...
398	// (as long as it's not an event type that can never be the
399	// same as a previous event)
400	//
401	if (type != FSE_CREATE_FILE && type != FSE_DELETE && type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CHOWN && type != FSE_DOCID_CHANGED && type != FSE_DOCID_CREATED && type != FSE_CLONE) {
402	void *ptr=NULL;
403	int vid=`0`, was_str=`0`, nlen=`0`;
404
405	for(arg_type=va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type=va_arg(ap, int32_t)) {
406	switch(arg_type) {
407	case FSE_ARG_VNODE: {
408	ptr = va_arg(ap, void *);
409	vid = vnode_vid((struct vnode *)ptr);
410	last_str[`0`] = `'\0'`;
411	break;
412	}
413	case FSE_ARG_STRING: {
414	nlen = va_arg(ap, int32_t);
415	ptr = va_arg(ap, void *);
416	was_str = `1`;
417	break;
418	}
419	}
420	if (ptr != NULL) {
421	break;
422	}
423	}
424
425	if ( sTimebaseInfo.denom == `0` ) {
426	(void) clock_timebase_info(&sTimebaseInfo);
427	}
428
429	elapsed = (now - last_coalesced_time);
430	if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
431	if (sTimebaseInfo.denom == `1`) {
432	elapsed *= sTimebaseInfo.numer;
433	} else {
434	// this could overflow... the worst that will happen is that we'll
435	// send (or not send) an extra event so I'm not going to worry about
436	// doing the math right like dtrace_abs_to_nano() does.
437	elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
438	}
439	}
440
441	if (type == last_event_type
442	&& (elapsed < `1000000000`)
443	&&
444	((vid && vid == last_vid && last_ptr == ptr)
445	\|\|
446	(last_str[`0`] && last_nlen == nlen && ptr && strcmp(last_str, ptr) == `0`))
447	) {
448
449	last_coalesced++;
450	unlock_fs_event_list();
451	va_end(ap);
452
453	return `0`;
454	} else {
455	last_ptr = ptr;
456	if (was_str) {
457	strlcpy(last_str, ptr, sizeof(last_str));
458	}
459	last_nlen = nlen;
460	last_vid = vid;
461	last_event_type = type;
462	last_coalesced_time = now;
463	}
464	}
465	va_start(ap, ctx);
466
467
468	kfse = zalloc_noblock(event_zone);
469	if (kfse && (type == FSE_RENAME \|\| type == FSE_EXCHANGE \|\| type == FSE_CLONE)) {
470	kfse_dest = zalloc_noblock(event_zone);
471	if (kfse_dest == NULL) {
472	did_alloc = `1`;
473	zfree(event_zone, kfse);
474	kfse = NULL;
475	}
476	}
477
478
479	if (kfse == NULL) { // yikes! no free events
480	unlock_fs_event_list();
481	lock_watch_table();
482
483	for(i=`0`; i < MAX_WATCHERS; i++) {
484	watcher = watcher_table[i];
485	if (watcher == NULL) {
486	continue;
487	}
488
489	watcher->flags \|= WATCHER_DROPPED_EVENTS;
490	fsevents_wakeup(watcher);
491	}
492	unlock_watch_table();
493
494	{
495	struct timeval current_tv;
496
497	num_dropped++;
498
499	// only print a message at most once every 5 seconds
500	microuptime(&current_tv);
501	if ((current_tv.tv_sec - last_print.tv_sec) > `10`) {
502	int ii;
503	void junkptr=zalloc_noblock(event_zone), listhead=kfse_list_head.lh_first;
504
505	printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
506	printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
507	printf("add_fsevent: zalloc sez: %p\n", junkptr);
508	printf("add_fsevent: event_zone info: %d 0x%x\n", ((int )event_zone)[`0`], ((int* *)event_zone)[`1`]);
509	lock_watch_table();
510	for(ii=`0`; ii < MAX_WATCHERS; ii++) {
511	if (watcher_table[ii] == NULL) {
512	continue;
513	}
514
515	printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
516	watcher_table[ii]->proc_name,
517	watcher_table[ii],
518	watcher_table[ii]->rd, watcher_table[ii]->wr,
519	watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
520	}
521	unlock_watch_table();
522
523	last_print = current_tv;
524	if (junkptr) {
525	zfree(event_zone, junkptr);
526	}
527	}
528	}
529
530	if (pathbuff) {
531	release_pathbuff(pathbuff);
532	pathbuff = NULL;
533	}
534	return ENOSPC;
535	}
536
537	memset(kfse, `0`, sizeof(kfs_event));
538	kfse->refcount = `1`;
539	OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse->flags);
540
541	last_event_ptr = kfse;
542	kfse->type = type;
543	kfse->abstime = now;
544	kfse->pid = p->p_pid;
545	if (type == FSE_RENAME \|\| type == FSE_EXCHANGE \|\| type == FSE_CLONE) {
546	memset(kfse_dest, `0`, sizeof(kfs_event));
547	kfse_dest->refcount = `1`;
548	OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse_dest->flags);
549	kfse_dest->type = type;
550	kfse_dest->pid = p->p_pid;
551	kfse_dest->abstime = now;
552
553	kfse->dest = kfse_dest;
554	}
555
556	num_events_outstanding++;
557	if (kfse->type == FSE_RENAME) {
558	num_pending_rename++;
559	}
560	LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
561
562	if (kfse->refcount < `1`) {
563	panic("add_fsevent: line %d: kfse recount %d but should be at least 1\n", __LINE__, kfse->refcount);
564	}
565
566	unlock_fs_event_list(); // at this point it's safe to unlock
567
568	//
569	// now process the arguments passed in and copy them into
570	// the kfse
571	//
572
573	cur = kfse;
574
575	if (type == FSE_DOCID_CREATED \|\| type == FSE_DOCID_CHANGED) {
576	uint64_t val;
577
578	//
579	// These events are special and not like the other events. They only
580	// have a dev_t, src inode #, dest inode #, and a doc-id. We use the
581	// fields that we can in the kfse but have to overlay the dest inode
582	// number and the doc-id on the other fields.
583	//
584
585	// First the dev_t
586	arg_type = va_arg(ap, int32_t);
587	if (arg_type == FSE_ARG_DEV) {
588	cur->dev = (dev_t)(va_arg(ap, dev_t));
589	} else {
590	cur->dev = (dev_t)`0xbadc0de1`;
591	}
592
593	// next the source inode #
594	arg_type = va_arg(ap, int32_t);
595	if (arg_type == FSE_ARG_INO) {
596	cur->ino = (ino64_t)(va_arg(ap, ino64_t));
597	} else {
598	cur->ino = `0xbadc0de2`;
599	}
600
601	// now the dest inode #
602	arg_type = va_arg(ap, int32_t);
603	if (arg_type == FSE_ARG_INO) {
604	val = (ino64_t)(va_arg(ap, ino64_t));
605	} else {
606	val = `0xbadc0de2`;
607	}
608	// overlay the dest inode number on the str/dest pointer fields
609	memcpy(&cur->str, &val, sizeof(ino64_t));
610
611
612	// and last the document-id
613	arg_type = va_arg(ap, int32_t);
614	if (arg_type == FSE_ARG_INT32) {
615	val = (uint64_t)va_arg(ap, uint32_t);
616	} else if (arg_type == FSE_ARG_INT64) {
617	val = (uint64_t)va_arg(ap, uint64_t);
618	} else {
619	val = `0xbadc0de3`;
620	}
621
622	// the docid is 64-bit and overlays the uid/gid fields
623	memcpy(&cur->uid, &val, sizeof(uint64_t));
624
625	goto done_with_args;
626	}
627
628	if (type == FSE_UNMOUNT_PENDING) {
629
630	// Just a dev_t
631	arg_type = va_arg(ap, int32_t);
632	if (arg_type == FSE_ARG_DEV) {
633	cur->dev = (dev_t)(va_arg(ap, dev_t));
634	} else {
635	cur->dev = (dev_t)`0xbadc0de1`;
636	}
637
638	goto done_with_args;
639	}
640
641	for(arg_type=va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type=va_arg(ap, int32_t))
642
643	switch(arg_type) {
644	case FSE_ARG_VNODE: {
645	// this expands out into multiple arguments to the client
646	struct vnode *vp;
647	struct vnode_attr va;
648
649	if (kfse->str != NULL) {
650	cur = kfse_dest;
651	}
652
653	vp = va_arg(ap, struct vnode *);
654	if (vp == NULL) {
655	panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!\n",
656	cur->type);
657	}
658
659	VATTR_INIT(&va);
660	VATTR_WANTED(&va, va_fsid);
661	VATTR_WANTED(&va, va_fileid);
662	VATTR_WANTED(&va, va_mode);
663	VATTR_WANTED(&va, va_uid);
664	VATTR_WANTED(&va, va_gid);
665	VATTR_WANTED(&va, va_nlink);
666	if ((ret = vnode_getattr(vp, &va, vfs_context_kernel())) != `0`) {
667	// printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
668	cur->str = NULL;
669	error = EINVAL;
670	goto clean_up;
671	}
672
673	cur->dev = dev = (dev_t)va.va_fsid;
674	cur->ino = (ino64_t)va.va_fileid;
675	cur->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) \| va.va_mode;
676	cur->uid = va.va_uid;
677	cur->gid = va.va_gid;
678	if (vp->v_flag & VISHARDLINK) {
679	cur->mode \|= FSE_MODE_HLINK;
680	if ((vp->v_type == VDIR && va.va_dirlinkcount == `0`) \|\| (vp->v_type == VREG && va.va_nlink == `0`)) {
681	cur->mode \|= FSE_MODE_LAST_HLINK;
682	}
683	}
684
685	// if we haven't gotten the path yet, get it.
686	if (pathbuff == NULL) {
687	pathbuff = get_pathbuff();
688	pathbuff_len = MAXPATHLEN;
689
690	pathbuff[`0`] = `'\0'`;
691	if ((ret = vn_getpath(vp, pathbuff, &pathbuff_len)) != `0` \|\| pathbuff[`0`] == `'\0'`) {
692
693	cur->flags \|= KFSE_CONTAINS_DROPPED_EVENTS;
694
695	do {
696	if (vp->v_parent != NULL) {
697	vp = vp->v_parent;
698	} else if (vp->v_mount) {
699	strlcpy(pathbuff, vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
700	break;
701	} else {
702	vp = NULL;
703	}
704
705	if (vp == NULL) {
706	break;
707	}
708
709	pathbuff_len = MAXPATHLEN;
710	ret = vn_getpath(vp, pathbuff, &pathbuff_len);
711	} while (ret == ENOSPC);
712
713	if (ret != `0` \|\| vp == NULL) {
714	error = ENOENT;
715	goto clean_up;
716	}
717	}
718	}
719
720	// store the path by adding it to the global string table
721	cur->len = pathbuff_len;
722	cur->str = vfs_addname(pathbuff, pathbuff_len, `0`, `0`);
723	if (cur->str == NULL \|\| cur->str[`0`] == `'\0'`) {
724	panic("add_fsevent: was not able to add path %s to event %p.\n", pathbuff, cur);
725	}
726
727	release_pathbuff(pathbuff);
728	pathbuff = NULL;
729
730	break;
731	}
732
733	case FSE_ARG_FINFO: {
734	fse_info *fse;
735
736	fse = va_arg(ap, fse_info *);
737
738	cur->dev = dev = (dev_t)fse->dev;
739	cur->ino = (ino64_t)fse->ino;
740	cur->mode = (int32_t)fse->mode;
741	cur->uid = (uid_t)fse->uid;
742	cur->gid = (uid_t)fse->gid;
743	// if it's a hard-link and this is the last link, flag it
744	if ((fse->mode & FSE_MODE_HLINK) && fse->nlink == `0`) {
745	cur->mode \|= FSE_MODE_LAST_HLINK;
746	}
747	if (cur->mode & FSE_TRUNCATED_PATH) {
748	cur->flags \|= KFSE_CONTAINS_DROPPED_EVENTS;
749	cur->mode &= ~FSE_TRUNCATED_PATH;
750	}
751	break;
752	}
753
754	case FSE_ARG_STRING:
755	if (kfse->str != NULL) {
756	cur = kfse_dest;
757	}
758
759	cur->len = (int16_t)(va_arg(ap, int32_t) & `0x7fff`);
760	if (cur->len >= `1`) {
761	cur->str = vfs_addname(va_arg(ap, char *), cur->len, `0`, `0`);
762	} else {
763	printf("add_fsevent: funny looking string length: %d\n", (int)cur->len);
764	cur->len = `2`;
765	cur->str = vfs_addname("/", cur->len, `0`, `0`);
766	}
767	if (cur->str[`0`] == `0`) {
768	printf("add_fsevent: bogus looking string (len %d)\n", cur->len);
769	}
770	break;
771
772	case FSE_ARG_INT32: {
773	uint32_t ival = (uint32_t)va_arg(ap, int32_t);
774	kfse->uid = (ino64_t)ival;
775	break;
776	}
777
778	default:
779	printf("add_fsevent: unknown type %d\n", arg_type);
780	// just skip one 32-bit word and hope we sync up...
781	(void)va_arg(ap, int32_t);
782	}
783
784	done_with_args:
785	va_end(ap);
786
787	OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse->flags);
788	if (kfse_dest) {
789	OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse_dest->flags);
790	}
791
792	//
793	// now we have to go and let everyone know that
794	// is interested in this type of event
795	//
796	lock_watch_table();
797
798	for(i=`0`; i < MAX_WATCHERS; i++) {
799	watcher = watcher_table[i];
800	if (watcher == NULL) {
801	continue;
802	}
803
804	if ( type < watcher->num_events
805	&& watcher->event_list[type] == FSE_REPORT
806	&& watcher_cares_about_dev(watcher, dev)) {
807
808	if (watcher_add_event(watcher, kfse) != `0`) {
809	watcher->num_dropped++;
810	continue;
811	}
812	}
813
814	// if (kfse->refcount < 1) {
815	// panic("add_fsevent: line %d: kfse recount %d but should be at least 1\n", __LINE__, kfse->refcount);
816	// }
817	}
818
819	unlock_watch_table();
820
821	clean_up:
822
823	if (pathbuff) {
824	release_pathbuff(pathbuff);
825	pathbuff = NULL;
826	}
827
828	release_event_ref(kfse);
829
830	return error;
831	}
832
833
834	static void
835	release_event_ref(kfs_event *kfse)
836	{
837	int old_refcount;
838	kfs_event copy, dest_copy;
839
840
841	old_refcount = OSAddAtomic(-`1`, &kfse->refcount);
842	if (old_refcount > `1`) {
843	return;
844	}
845
846	lock_fs_event_list();
847	if (last_event_ptr == kfse) {
848	last_event_ptr = NULL;
849	last_event_type = -`1`;
850	last_coalesced_time = `0`;
851	}
852
853	if (kfse->refcount < `0`) {
854	panic("release_event_ref: bogus kfse refcount %d\n", kfse->refcount);
855	}
856
857	if (kfse->refcount > `0` \|\| kfse->type == FSE_INVALID) {
858	// This is very subtle. Either of these conditions can
859	// be true if an event got recycled while we were waiting
860	// on the fs_event_list lock or the event got recycled,
861	// delivered, _and_ free'd by someone else while we were
862	// waiting on the fs event list lock. In either case
863	// we need to just unlock the list and return without
864	// doing anything because if the refcount is > 0 then
865	// someone else will take care of free'ing it and when
866	// the kfse->type is invalid then someone else already
867	// has handled free'ing the event (while we were blocked
868	// on the event list lock).
869	//
870	unlock_fs_event_list();
871	return;
872	}
873
874	//
875	// make a copy of this so we can free things without
876	// holding the fs_event_buf lock
877	//
878	copy = *kfse;
879	if (kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && kfse->dest && OSAddAtomic(-`1`, &kfse->dest->refcount) == `1`) {
880	dest_copy = *kfse->dest;
881	} else {
882	dest_copy.str = NULL;
883	dest_copy.len = `0`;
884	dest_copy.type = FSE_INVALID;
885	}
886
887	kfse->pid = kfse->type; // save this off for debugging...
888	kfse->uid = (uid_t)(long)kfse->str; // save this off for debugging...
889	kfse->gid = (gid_t)(long)current_thread();
890
891	kfse->str = (char )`0xdeadbeef`; // XXXdbg - catch any cheaters...*
892
893	if (dest_copy.type != FSE_INVALID) {
894	kfse->dest->str = (char )`0xbadc0de`; // XXXdbg - catch any cheaters...*
895	kfse->dest->type = FSE_INVALID;
896
897	if (kfse->dest->kevent_list.le_prev != NULL) {
898	num_events_outstanding--;
899	LIST_REMOVE(kfse->dest, kevent_list);
900	memset(&kfse->dest->kevent_list, `0xa5`, sizeof(kfse->dest->kevent_list));
901	}
902
903	zfree(event_zone, kfse->dest);
904	}
905
906	// mark this fsevent as invalid
907	{
908	int otype;
909
910	otype = kfse->type;
911	kfse->type = FSE_INVALID;
912
913	if (kfse->kevent_list.le_prev != NULL) {
914	num_events_outstanding--;
915	if (otype == FSE_RENAME) {
916	num_pending_rename--;
917	}
918	LIST_REMOVE(kfse, kevent_list);
919	memset(&kfse->kevent_list, `0`, sizeof(kfse->kevent_list));
920	}
921	}
922
923	zfree(event_zone, kfse);
924
925	unlock_fs_event_list();
926
927	// if we have a pointer in the union
928	if (copy.str && copy.type != FSE_DOCID_CREATED && copy.type != FSE_DOCID_CHANGED) {
929	if (copy.len == `0`) { // and it's not a string
930	panic("%s:%d: no more fref.vp!\n", __FILE__, __LINE__);
931	// vnode_rele_ext(copy.fref.vp, O_EVTONLY, 0);
932	} else { // else it's a string
933	vfs_removename(copy.str);
934	}
935	}
936
937	if (dest_copy.type != FSE_INVALID && dest_copy.str) {
938	if (dest_copy.len == `0`) {
939	panic("%s:%d: no more fref.vp!\n", __FILE__, __LINE__);
940	// vnode_rele_ext(dest_copy.fref.vp, O_EVTONLY, 0);
941	} else {
942	vfs_removename(dest_copy.str);
943	}
944	}
945	}
946
947	static int
948	add_watcher(int8_t event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher watcher_out, void* *fseh)
949	{
950	int i;
951	fs_event_watcher *watcher;
952
953	if (eventq_size <= `0` \|\| eventq_size > `100`*max_kfs_events) {
954	eventq_size = max_kfs_events;
955	}
956
957	// Note: the event_queue follows the fs_event_watcher struct
958	// in memory so we only have to do one allocation
959	MALLOC(watcher,
960	fs_event_watcher *,
961	sizeof(fs_event_watcher) + eventq_size * sizeof(kfs_event *),
962	M_TEMP, M_WAITOK);
963	if (watcher == NULL) {
964	return ENOMEM;
965	}
966
967	watcher->event_list = event_list;
968	watcher->num_events = num_events;
969	watcher->devices_not_to_watch = NULL;
970	watcher->num_devices = `0`;
971	watcher->flags = `0`;
972	watcher->event_queue = (kfs_event **)&watcher[`1`];
973	watcher->eventq_size = eventq_size;
974	watcher->rd = `0`;
975	watcher->wr = `0`;
976	watcher->blockers = `0`;
977	watcher->num_readers = `0`;
978	watcher->max_event_id = `0`;
979	watcher->fseh = fseh;
980	watcher->pid = proc_selfpid();
981	proc_selfname(watcher->proc_name, sizeof(watcher->proc_name));
982
983	watcher->num_dropped = `0`; // XXXdbg - debugging
984
985	if (!strncmp(watcher->proc_name, "fseventsd", sizeof(watcher->proc_name)) \|\|
986	!strncmp(watcher->proc_name, "coreservicesd", sizeof(watcher->proc_name)) \|\|
987	!strncmp(watcher->proc_name, "revisiond", sizeof(watcher->proc_name)) \|\|
988	!strncmp(watcher->proc_name, "mds", sizeof(watcher->proc_name))) {
989	watcher->flags \|= WATCHER_APPLE_SYSTEM_SERVICE;
990	} else {
991	printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported. Migrate to FSEventsFramework\n",
992	watcher->proc_name, watcher->pid);
993	}
994
995	lock_watch_table();
996
997	// find a slot for the new watcher
998	for(i=`0`; i < MAX_WATCHERS; i++) {
999	if (watcher_table[i] == NULL) {
1000	watcher->my_id = i;
1001	watcher_table[i] = watcher;
1002	break;
1003	}
1004	}
1005
1006	if (i >= MAX_WATCHERS) {
1007	printf("fsevents: too many watchers!\n");
1008	unlock_watch_table();
1009	FREE(watcher, M_TEMP);
1010	return ENOSPC;
1011	}
1012
1013	// now update the global list of who's interested in
1014	// events of a particular type...
1015	for(i=`0`; i < num_events; i++) {
1016	if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1017	fs_event_type_watchers[i]++;
1018	}
1019	}
1020
1021	unlock_watch_table();
1022
1023	*watcher_out = watcher;
1024
1025	return `0`;
1026	}
1027
1028
1029
1030	static void
1031	remove_watcher(fs_event_watcher *target)
1032	{
1033	int i, j, counter=`0`;
1034	fs_event_watcher *watcher;
1035	kfs_event *kfse;
1036
1037	lock_watch_table();
1038
1039	for(j=`0`; j < MAX_WATCHERS; j++) {
1040	watcher = watcher_table[j];
1041	if (watcher != target) {
1042	continue;
1043	}
1044
1045	watcher_table[j] = NULL;
1046
1047	for(i=`0`; i < watcher->num_events; i++) {
1048	if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1049	fs_event_type_watchers[i]--;
1050	}
1051	}
1052
1053	if (watcher->flags & WATCHER_CLOSING) {
1054	unlock_watch_table();
1055	return;
1056	}
1057
1058	// printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1059	watcher->flags \|= WATCHER_CLOSING;
1060	OSAddAtomic(`1`, &watcher->num_readers);
1061
1062	unlock_watch_table();
1063
1064	while (watcher->num_readers > `1` && counter++ < `5000`) {
1065	lock_watch_table();
1066	fsevents_wakeup(watcher); // in case they're asleep
1067	unlock_watch_table();
1068
1069	tsleep(watcher, PRIBIO, "fsevents-close", `1`);
1070	}
1071	if (counter++ >= `5000`) {
1072	// printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1073	panic("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1074	}
1075
1076	// drain the event_queue
1077
1078	lck_rw_lock_exclusive(&event_handling_lock);
1079	while(watcher->rd != watcher->wr) {
1080	kfse = watcher->event_queue[watcher->rd];
1081	watcher->event_queue[watcher->rd] = NULL;
1082	watcher->rd = (watcher->rd+`1`) % watcher->eventq_size;
1083	OSSynchronizeIO();
1084	if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= `1`) {
1085	release_event_ref(kfse);
1086	}
1087	}
1088	lck_rw_unlock_exclusive(&event_handling_lock);
1089
1090	if (watcher->event_list) {
1091	FREE(watcher->event_list, M_TEMP);
1092	watcher->event_list = NULL;
1093	}
1094	if (watcher->devices_not_to_watch) {
1095	FREE(watcher->devices_not_to_watch, M_TEMP);
1096	watcher->devices_not_to_watch = NULL;
1097	}
1098	FREE(watcher, M_TEMP);
1099
1100	return;
1101	}
1102
1103	unlock_watch_table();
1104	}
1105
1106
1107	#define EVENT_DELAY_IN_MS 10
1108	static thread_call_t event_delivery_timer = NULL;
1109	static int timer_set = `0`;
1110
1111
1112	static void
1113	delayed_event_delivery(__unused void param0, __unused void* *param1)
1114	{
1115	int i;
1116
1117	lock_watch_table();
1118
1119	for(i=`0`; i < MAX_WATCHERS; i++) {
1120	if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1121	fsevents_wakeup(watcher_table[i]);
1122	}
1123	}
1124
1125	timer_set = `0`;
1126
1127	unlock_watch_table();
1128	}
1129
1130
1131	//
1132	// The watch table must be locked before calling this function.
1133	//
1134	static void
1135	schedule_event_wakeup(void)
1136	{
1137	uint64_t deadline;
1138
1139	if (event_delivery_timer == NULL) {
1140	event_delivery_timer = thread_call_allocate((thread_call_func_t)delayed_event_delivery, NULL);
1141	}
1142
1143	clock_interval_to_deadline(EVENT_DELAY_IN_MS, `1000` * `1000`, &deadline);
1144
1145	thread_call_enter_delayed(event_delivery_timer, deadline);
1146	timer_set = `1`;
1147	}
1148
1149
1150
1151	#define MAX_NUM_PENDING 16
1152
1153	//
1154	// NOTE: the watch table must be locked before calling
1155	// this routine.
1156	//
1157	static int
1158	watcher_add_event(fs_event_watcher watcher, kfs_event kfse)
1159	{
1160	if (kfse->abstime > watcher->max_event_id) {
1161	watcher->max_event_id = kfse->abstime;
1162	}
1163
1164	if (((watcher->wr + `1`) % watcher->eventq_size) == watcher->rd) {
1165	watcher->flags \|= WATCHER_DROPPED_EVENTS;
1166	fsevents_wakeup(watcher);
1167	return ENOSPC;
1168	}
1169
1170	OSAddAtomic(`1`, &kfse->refcount);
1171	watcher->event_queue[watcher->wr] = kfse;
1172	OSSynchronizeIO();
1173	watcher->wr = (watcher->wr + `1`) % watcher->eventq_size;
1174
1175	//
1176	// wake up the watcher if there are more than MAX_NUM_PENDING events.
1177	// otherwise schedule a timer (if one isn't already set) which will
1178	// send any pending events if no more are received in the next
1179	// EVENT_DELAY_IN_MS milli-seconds.
1180	//
1181	int32_t num_pending = `0`;
1182	if (watcher->rd < watcher->wr) {
1183	num_pending = watcher->wr - watcher->rd;
1184	}
1185
1186	if (watcher->rd > watcher->wr) {
1187	num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1188	}
1189
1190	if (num_pending > (watcher->eventq_size*`3`/`4`) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1191	/ Non-Apple Service is falling behind, start dropping events for this process /
1192	lck_rw_lock_exclusive(&event_handling_lock);
1193	while (watcher->rd != watcher->wr) {
1194	kfse = watcher->event_queue[watcher->rd];
1195	watcher->event_queue[watcher->rd] = NULL;
1196	watcher->rd = (watcher->rd+`1`) % watcher->eventq_size;
1197	OSSynchronizeIO();
1198	if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= `1`) {
1199	release_event_ref(kfse);
1200	}
1201	}
1202	watcher->flags \|= WATCHER_DROPPED_EVENTS;
1203	lck_rw_unlock_exclusive(&event_handling_lock);
1204
1205	printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1206	watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1207	watcher->eventq_size, watcher->flags);
1208
1209	fsevents_wakeup(watcher);
1210	} else if (num_pending > MAX_NUM_PENDING) {
1211	fsevents_wakeup(watcher);
1212	} else if (timer_set == `0`) {
1213	schedule_event_wakeup();
1214	}
1215
1216	return `0`;
1217	}
1218
1219	static int
1220	fill_buff(uint16_t type, int32_t size, const void *data,
1221	char buff, int32_t _buff_idx, int32_t buff_sz,
1222	struct uio *uio)
1223	{
1224	int32_t amt, error = `0`, buff_idx = *_buff_idx;
1225	uint16_t tmp;
1226
1227	//
1228	// the +1 on the size is to guarantee that the main data
1229	// copy loop will always copy at least 1 byte
1230	//
1231	if ((buff_sz - buff_idx) <= (int)(`2`*sizeof(uint16_t) + `1`)) {
1232	if (buff_idx > uio_resid(uio)) {
1233	error = ENOSPC;
1234	goto get_out;
1235	}
1236
1237	error = uiomove(buff, buff_idx, uio);
1238	if (error) {
1239	goto get_out;
1240	}
1241	buff_idx = `0`;
1242	}
1243
1244	// copy out the header (type & size)
1245	memcpy(&buff[buff_idx], &type, sizeof(uint16_t));
1246	buff_idx += sizeof(uint16_t);
1247
1248	tmp = size & `0xffff`;
1249	memcpy(&buff[buff_idx], &tmp, sizeof(uint16_t));
1250	buff_idx += sizeof(uint16_t);
1251
1252	// now copy the body of the data, flushing along the way
1253	// if the buffer fills up.
1254	//
1255	while(size > `0`) {
1256	amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1257	memcpy(&buff[buff_idx], data, amt);
1258
1259	size -= amt;
1260	buff_idx += amt;
1261	data = (const char *)data + amt;
1262	if (size > (buff_sz - buff_idx)) {
1263	if (buff_idx > uio_resid(uio)) {
1264	error = ENOSPC;
1265	goto get_out;
1266	}
1267	error = uiomove(buff, buff_idx, uio);
1268	if (error) {
1269	goto get_out;
1270	}
1271	buff_idx = `0`;
1272	}
1273
1274	if (amt == `0`) { // just in case...
1275	break;
1276	}
1277	}
1278
1279	get_out:
1280	*_buff_idx = buff_idx;
1281
1282	return error;
1283	}
1284
1285
1286	static int copy_out_kfse(fs_event_watcher watcher, kfs_event kfse, struct uio uio) __attribute__*((noinline));
1287
1288	static int
1289	copy_out_kfse(fs_event_watcher watcher, kfs_event kfse, struct uio *uio)
1290	{
1291	int error;
1292	uint16_t tmp16;
1293	int32_t type;
1294	kfs_event *cur;
1295	char evbuff[`512`];
1296	int evbuff_idx = `0`;
1297
1298	if (kfse->type == FSE_INVALID) {
1299	panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d fref ptr %p)\n", kfse, kfse->refcount, kfse->str);
1300	}
1301
1302	if (kfse->flags & KFSE_BEING_CREATED) {
1303	return `0`;
1304	}
1305
1306	if (((kfse->type == FSE_RENAME) \|\| (kfse->type == FSE_CLONE)) && kfse->dest == NULL) {
1307	//
1308	// This can happen if an event gets recycled but we had a
1309	// pointer to it in our event queue. The event is the
1310	// destination of a rename or clone which we'll process separately
1311	// (that is, another kfse points to this one so it's ok
1312	// to skip this guy because we'll process it when we process
1313	// the other one)
1314	error = `0`;
1315	goto get_out;
1316	}
1317
1318	if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1319
1320	type = (kfse->type & `0xfff`);
1321
1322	if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1323	type \|= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1324	} else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1325	type \|= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1326	}
1327
1328	} else {
1329	type = (int32_t)kfse->type;
1330	}
1331
1332	// copy out the type of the event
1333	memcpy(evbuff, &type, sizeof(int32_t));
1334	evbuff_idx += sizeof(int32_t);
1335
1336	// copy out the pid of the person that generated the event
1337	memcpy(&evbuff[evbuff_idx], &kfse->pid, sizeof(pid_t));
1338	evbuff_idx += sizeof(pid_t);
1339
1340	cur = kfse;
1341
1342	copy_again:
1343
1344	if (kfse->type == FSE_DOCID_CHANGED \|\| kfse->type == FSE_DOCID_CREATED) {
1345	dev_t dev = cur->dev;
1346	ino64_t ino = cur->ino;
1347	uint64_t ival;
1348
1349	error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1350	if (error != `0`) {
1351	goto get_out;
1352	}
1353
1354	error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1355	if (error != `0`) {
1356	goto get_out;
1357	}
1358
1359	memcpy(&ino, &cur->str, sizeof(ino64_t));
1360	error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1361	if (error != `0`) {
1362	goto get_out;
1363	}
1364
1365	memcpy(&ival, &cur->uid, sizeof(uint64_t)); // the docid gets stuffed into the ino field
1366	error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &ival, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1367	if (error != `0`) {
1368	goto get_out;
1369	}
1370
1371	goto done;
1372	}
1373
1374	if (kfse->type == FSE_UNMOUNT_PENDING) {
1375	dev_t dev = cur->dev;
1376
1377	error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1378	if (error != `0`) {
1379	goto get_out;
1380	}
1381
1382	goto done;
1383	}
1384
1385	if (cur->str == NULL \|\| cur->str[`0`] == `'\0'`) {
1386	printf("copy_out_kfse:2: empty/short path (%s)\n", cur->str);
1387	error = fill_buff(FSE_ARG_STRING, `2`, "/", evbuff, &evbuff_idx, sizeof(evbuff), uio);
1388	} else {
1389	error = fill_buff(FSE_ARG_STRING, cur->len, cur->str, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1390	}
1391	if (error != `0`) {
1392	goto get_out;
1393	}
1394
1395	if (cur->dev == `0` && cur->ino == `0`) {
1396	// this happens when a rename event happens and the
1397	// destination of the rename did not previously exist.
1398	// it thus has no other file info so skip copying out
1399	// the stuff below since it isn't initialized
1400	goto done;
1401	}
1402
1403
1404	if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1405	int32_t finfo_size;
1406
1407	finfo_size = sizeof(dev_t) + sizeof(ino64_t) + sizeof(int32_t) + sizeof(uid_t) + sizeof(gid_t);
1408	error = fill_buff(FSE_ARG_FINFO, finfo_size, &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1409	if (error != `0`) {
1410	goto get_out;
1411	}
1412	} else {
1413	error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &cur->dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1414	if (error != `0`) {
1415	goto get_out;
1416	}
1417
1418	error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1419	if (error != `0`) {
1420	goto get_out;
1421	}
1422
1423	error = fill_buff(FSE_ARG_MODE, sizeof(int32_t), &cur->mode, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1424	if (error != `0`) {
1425	goto get_out;
1426	}
1427
1428	error = fill_buff(FSE_ARG_UID, sizeof(uid_t), &cur->uid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1429	if (error != `0`) {
1430	goto get_out;
1431	}
1432
1433	error = fill_buff(FSE_ARG_GID, sizeof(gid_t), &cur->gid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1434	if (error != `0`) {
1435	goto get_out;
1436	}
1437	}
1438
1439
1440	if (cur->dest) {
1441	cur = cur->dest;
1442	goto copy_again;
1443	}
1444
1445	done:
1446	// very last thing: the time stamp
1447	error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &cur->abstime, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1448	if (error != `0`) {
1449	goto get_out;
1450	}
1451
1452	// check if the FSE_ARG_DONE will fit
1453	if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1454	if (evbuff_idx > uio_resid(uio)) {
1455	error = ENOSPC;
1456	goto get_out;
1457	}
1458	error = uiomove(evbuff, evbuff_idx, uio);
1459	if (error) {
1460	goto get_out;
1461	}
1462	evbuff_idx = `0`;
1463	}
1464
1465	tmp16 = FSE_ARG_DONE;
1466	memcpy(&evbuff[evbuff_idx], &tmp16, sizeof(uint16_t));
1467	evbuff_idx += sizeof(uint16_t);
1468
1469	// flush any remaining data in the buffer (and hopefully
1470	// in most cases this is the only uiomove we'll do)
1471	if (evbuff_idx > uio_resid(uio)) {
1472	error = ENOSPC;
1473	} else {
1474	error = uiomove(evbuff, evbuff_idx, uio);
1475	}
1476
1477	get_out:
1478
1479	return error;
1480	}
1481
1482
1483
1484	static int
1485	fmod_watch(fs_event_watcher watcher, struct* uio *uio)
1486	{
1487	int error=`0`;
1488	user_ssize_t last_full_event_resid;
1489	kfs_event *kfse;
1490	uint16_t tmp16;
1491	int skipped;
1492
1493	last_full_event_resid = uio_resid(uio);
1494
1495	// need at least 2048 bytes of space (maxpathlen + 1 event buf)
1496	if (uio_resid(uio) < `2048` \|\| watcher == NULL) {
1497	return EINVAL;
1498	}
1499
1500	if (watcher->flags & WATCHER_CLOSING) {
1501	return `0`;
1502	}
1503
1504	if (OSAddAtomic(`1`, &watcher->num_readers) != `0`) {
1505	// don't allow multiple threads to read from the fd at the same time
1506	OSAddAtomic(-`1`, &watcher->num_readers);
1507	return EAGAIN;
1508	}
1509
1510	restart_watch:
1511	if (watcher->rd == watcher->wr) {
1512	if (watcher->flags & WATCHER_CLOSING) {
1513	OSAddAtomic(-`1`, &watcher->num_readers);
1514	return `0`;
1515	}
1516	OSAddAtomic(`1`, &watcher->blockers);
1517
1518	// there's nothing to do, go to sleep
1519	error = tsleep((caddr_t)watcher, PUSER\|PCATCH, "fsevents_empty", `0`);
1520
1521	OSAddAtomic(-`1`, &watcher->blockers);
1522
1523	if (error != `0` \|\| (watcher->flags & WATCHER_CLOSING)) {
1524	OSAddAtomic(-`1`, &watcher->num_readers);
1525	return error;
1526	}
1527	}
1528
1529	// if we dropped events, return that as an event first
1530	if (watcher->flags & WATCHER_DROPPED_EVENTS) {
1531	int32_t val = FSE_EVENTS_DROPPED;
1532
1533	error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1534	if (error == `0`) {
1535	val = `0`; // a fake pid
1536	error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1537
1538	tmp16 = FSE_ARG_DONE; // makes it a consistent msg
1539	error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio);
1540
1541	last_full_event_resid = uio_resid(uio);
1542	}
1543
1544	if (error) {
1545	OSAddAtomic(-`1`, &watcher->num_readers);
1546	return error;
1547	}
1548
1549	watcher->flags &= ~WATCHER_DROPPED_EVENTS;
1550	}
1551
1552	skipped = `0`;
1553
1554	lck_rw_lock_shared(&event_handling_lock);
1555	while (uio_resid(uio) > `0` && watcher->rd != watcher->wr) {
1556	if (watcher->flags & WATCHER_CLOSING) {
1557	break;
1558	}
1559
1560	//
1561	// check if the event is something of interest to us
1562	// (since it may have been recycled/reused and changed
1563	// its type or which device it is for)
1564	//
1565	kfse = watcher->event_queue[watcher->rd];
1566	if (!kfse \|\| kfse->type == FSE_INVALID \|\| kfse->type >= watcher->num_events \|\| kfse->refcount < `1`) {
1567	break;
1568	}
1569
1570	if (watcher->event_list[kfse->type] == FSE_REPORT && watcher_cares_about_dev(watcher, kfse->dev)) {
1571
1572	if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
1573	// If this is not an Apple System Service, skip specified directories
1574	// radar://12034844
1575	error = `0`;
1576	skipped = `1`;
1577	} else {
1578
1579	skipped = `0`;
1580	if (last_event_ptr == kfse) {
1581	last_event_ptr = NULL;
1582	last_event_type = -`1`;
1583	last_coalesced_time = `0`;
1584	}
1585	error = copy_out_kfse(watcher, kfse, uio);
1586	if (error != `0`) {
1587	// if an event won't fit or encountered an error while
1588	// we were copying it out, then backup to the last full
1589	// event and just bail out. if the error was ENOENT
1590	// then we can continue regular processing, otherwise
1591	// we should unlock things and return.
1592	uio_setresid(uio, last_full_event_resid);
1593	if (error != ENOENT) {
1594	lck_rw_unlock_shared(&event_handling_lock);
1595	error = `0`;
1596	goto get_out;
1597	}
1598	}
1599
1600	last_full_event_resid = uio_resid(uio);
1601	}
1602	}
1603
1604	watcher->event_queue[watcher->rd] = NULL;
1605	watcher->rd = (watcher->rd + `1`) % watcher->eventq_size;
1606	OSSynchronizeIO();
1607	release_event_ref(kfse);
1608	}
1609	lck_rw_unlock_shared(&event_handling_lock);
1610
1611	if (skipped && error == `0`) {
1612	goto restart_watch;
1613	}
1614
1615	get_out:
1616	OSAddAtomic(-`1`, &watcher->num_readers);
1617
1618	return error;
1619	}
1620
1621
1622	//
1623	// Shoo watchers away from a volume that's about to be unmounted
1624	// (so that it can be cleanly unmounted).
1625	//
1626	void
1627	fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
1628	{
1629	#if CONFIG_EMBEDDED
1630	dev_t dev = mp->mnt_vfsstat.f_fsid.val[`0`];
1631	int error, waitcount = `0`;
1632	struct timespec ts = {`1`, `0`};
1633
1634	// wait for any other pending unmounts to complete
1635	lock_watch_table();
1636	while (fsevent_unmount_dev != `0`) {
1637	error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
1638	if (error == EWOULDBLOCK)
1639	error = `0`;
1640	if (!error && (++waitcount >= `10`)) {
1641	error = EWOULDBLOCK;
1642	printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
1643	}
1644	if (error) {
1645	// there's a problem, bail out
1646	unlock_watch_table();
1647	return;
1648	}
1649	}
1650	if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == `0`) {
1651	// nobody watching for unmount pending events
1652	unlock_watch_table();
1653	return;
1654	}
1655	// this is now the current unmount pending
1656	fsevent_unmount_dev = dev;
1657	fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
1658	unlock_watch_table();
1659
1660	// send an event to notify the watcher they need to get off the mount
1661	error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
1662
1663	// wait for acknowledgment(s) (give up if it takes too long)
1664	lock_watch_table();
1665	waitcount = `0`;
1666	while (fsevent_unmount_dev == dev) {
1667	error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
1668	if (error == EWOULDBLOCK)
1669	error = `0`;
1670	if (!error && (++waitcount >= `10`)) {
1671	error = EWOULDBLOCK;
1672	printf("unmount pending ack timeout for dev %d\n", dev);
1673	}
1674	if (error) {
1675	// there's a problem, bail out
1676	if (fsevent_unmount_dev == dev) {
1677	fsevent_unmount_dev = `0`;
1678	fsevent_unmount_ack_count = `0`;
1679	}
1680	wakeup((caddr_t)&fsevent_unmount_dev);
1681	break;
1682	}
1683	}
1684	unlock_watch_table();
1685	#endif
1686	}
1687
1688
1689	//
1690	// /dev/fsevents device code
1691	//
1692	static int fsevents_installed = `0`;
1693
1694	typedef struct fsevent_handle {
1695	UInt32 flags;
1696	SInt32 active;
1697	fs_event_watcher *watcher;
1698	struct klist knotes;
1699	struct selinfo si;
1700	} fsevent_handle;
1701
1702	#define FSEH_CLOSING 0x0001
1703
1704	static int
1705	fseventsf_read(struct fileproc fp, struct* uio *uio,
1706	__unused int flags, __unused vfs_context_t ctx)
1707	{
1708	fsevent_handle fseh = (struct* fsevent_handle *)fp->f_fglob->fg_data;
1709	int error;
1710
1711	error = fmod_watch(fseh->watcher, uio);
1712
1713	return error;
1714	}
1715
1716
1717	static int
1718	fseventsf_write(__unused struct fileproc fp, __unused struct* uio *uio,
1719	__unused int flags, __unused vfs_context_t ctx)
1720	{
1721	return EIO;
1722	}
1723
1724	#pragma pack(push, 4)
1725	typedef struct fsevent_dev_filter_args32 {
1726	uint32_t num_devices;
1727	user32_addr_t devices;
1728	} fsevent_dev_filter_args32;
1729	typedef struct fsevent_dev_filter_args64 {
1730	uint32_t num_devices;
1731	user64_addr_t devices;
1732	} fsevent_dev_filter_args64;
1733	#pragma pack(pop)
1734
1735	#define FSEVENTS_DEVICE_FILTER_32 _IOW('s', 100, fsevent_dev_filter_args32)
1736	#define FSEVENTS_DEVICE_FILTER_64 _IOW('s', 100, fsevent_dev_filter_args64)
1737
1738	static int
1739	fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
1740	{
1741	fsevent_handle fseh = (struct* fsevent_handle *)fp->f_fglob->fg_data;
1742	int ret = `0`;
1743	fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
1744
1745	OSAddAtomic(`1`, &fseh->active);
1746	if (fseh->flags & FSEH_CLOSING) {
1747	OSAddAtomic(-`1`, &fseh->active);
1748	return `0`;
1749	}
1750
1751	switch (cmd) {
1752	case FIONBIO:
1753	case FIOASYNC:
1754	break;
1755
1756	case FSEVENTS_WANT_COMPACT_EVENTS: {
1757	fseh->watcher->flags \|= WATCHER_WANTS_COMPACT_EVENTS;
1758	break;
1759	}
1760
1761	case FSEVENTS_WANT_EXTENDED_INFO: {
1762	fseh->watcher->flags \|= WATCHER_WANTS_EXTENDED_INFO;
1763	break;
1764	}
1765
1766	case FSEVENTS_GET_CURRENT_ID: {
1767	(uint64_t )data = fseh->watcher->max_event_id;
1768	ret = `0`;
1769	break;
1770	}
1771
1772	case FSEVENTS_DEVICE_FILTER_32: {
1773	if (proc_is64bit(vfs_context_proc(ctx))) {
1774	ret = EINVAL;
1775	break;
1776	}
1777	fsevent_dev_filter_args32 devfilt_args32 = (fsevent_dev_filter_args32 )data;
1778
1779	devfilt_args = &_devfilt_args;
1780	memset(devfilt_args, `0`, sizeof(fsevent_dev_filter_args64));
1781	devfilt_args->num_devices = devfilt_args32->num_devices;
1782	devfilt_args->devices = CAST_USER_ADDR_T(devfilt_args32->devices);
1783	goto handle_dev_filter;
1784	}
1785
1786	case FSEVENTS_DEVICE_FILTER_64:
1787	if (!proc_is64bit(vfs_context_proc(ctx))) {
1788	ret = EINVAL;
1789	break;
1790	}
1791	devfilt_args = (fsevent_dev_filter_args64 *)data;
1792
1793	handle_dev_filter:
1794	{
1795	int new_num_devices;
1796	dev_t devices_not_to_watch, tmp=NULL;
1797
1798	if (devfilt_args->num_devices > `256`) {
1799	ret = EINVAL;
1800	break;
1801	}
1802
1803	new_num_devices = devfilt_args->num_devices;
1804	if (new_num_devices == `0`) {
1805	lock_watch_table();
1806
1807	tmp = fseh->watcher->devices_not_to_watch;
1808	fseh->watcher->devices_not_to_watch = NULL;
1809	fseh->watcher->num_devices = new_num_devices;
1810
1811	unlock_watch_table();
1812	if (tmp) {
1813	FREE(tmp, M_TEMP);
1814	}
1815	break;
1816	}
1817
1818	MALLOC(devices_not_to_watch, dev_t *,
1819	new_num_devices * sizeof(dev_t),
1820	M_TEMP, M_WAITOK);
1821	if (devices_not_to_watch == NULL) {
1822	ret = ENOMEM;
1823	break;
1824	}
1825
1826	ret = copyin(devfilt_args->devices,
1827	(void *)devices_not_to_watch,
1828	new_num_devices * sizeof(dev_t));
1829	if (ret) {
1830	FREE(devices_not_to_watch, M_TEMP);
1831	break;
1832	}
1833
1834	lock_watch_table();
1835	fseh->watcher->num_devices = new_num_devices;
1836	tmp = fseh->watcher->devices_not_to_watch;
1837	fseh->watcher->devices_not_to_watch = devices_not_to_watch;
1838	unlock_watch_table();
1839
1840	if (tmp) {
1841	FREE(tmp, M_TEMP);
1842	}
1843
1844	break;
1845	}
1846
1847	case FSEVENTS_UNMOUNT_PENDING_ACK: {
1848	lock_watch_table();
1849	dev_t dev = (dev_t )data;
1850	if (fsevent_unmount_dev == dev) {
1851	if (--fsevent_unmount_ack_count <= `0`) {
1852	fsevent_unmount_dev = `0`;
1853	wakeup((caddr_t)&fsevent_unmount_dev);
1854	}
1855	} else {
1856	printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
1857	ret = EINVAL;
1858	}
1859	unlock_watch_table();
1860	break;
1861	}
1862
1863	default:
1864	ret = EINVAL;
1865	break;
1866	}
1867
1868	OSAddAtomic(-`1`, &fseh->active);
1869	return (ret);
1870	}
1871
1872
1873	static int
1874	fseventsf_select(struct fileproc fp, int* which, __unused void *wql, vfs_context_t ctx)
1875	{
1876	fsevent_handle fseh = (struct* fsevent_handle *)fp->f_fglob->fg_data;
1877	int ready = `0`;
1878
1879	if ((which != FREAD) \|\| (fseh->watcher->flags & WATCHER_CLOSING)) {
1880	return `0`;
1881	}
1882
1883
1884	// if there's nothing in the queue, we're not ready
1885	if (fseh->watcher->rd != fseh->watcher->wr) {
1886	ready = `1`;
1887	}
1888
1889	if (!ready) {
1890	selrecord(vfs_context_proc(ctx), &fseh->si, wql);
1891	}
1892
1893	return ready;
1894	}
1895
1896
1897	#if NOTUSED
1898	static int
1899	fseventsf_stat(__unused struct fileproc fp, __unused struct* stat *sb, __unused vfs_context_t ctx)
1900	{
1901	return ENOTSUP;
1902	}
1903	#endif
1904
1905	static int
1906	fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
1907	{
1908	fsevent_handle fseh = (struct* fsevent_handle *)fg->fg_data;
1909	fs_event_watcher *watcher;
1910
1911	OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
1912	while (OSAddAtomic(`0`, &fseh->active) > `0`) {
1913	tsleep((caddr_t)fseh->watcher, PRIBIO, "fsevents-close", `1`);
1914	}
1915
1916	watcher = fseh->watcher;
1917	fg->fg_data = NULL;
1918	fseh->watcher = NULL;
1919
1920	remove_watcher(watcher);
1921	FREE(fseh, M_TEMP);
1922
1923	return `0`;
1924	}
1925
1926	static void
1927	filt_fsevent_detach(struct knote *kn)
1928	{
1929	fsevent_handle fseh = (struct* fsevent_handle *)kn->kn_hook;
1930
1931	lock_watch_table();
1932
1933	KNOTE_DETACH(&fseh->knotes, kn);
1934
1935	unlock_watch_table();
1936	}
1937
1938	/*
1939	* Determine whether this knote should be active
1940	*
1941	* This is kind of subtle.
1942	* --First, notice if the vnode has been revoked: in so, override hint
1943	* --EVFILT_READ knotes are checked no matter what the hint is
1944	* --Other knotes activate based on hint.
1945	* --If hint is revoke, set special flags and activate
1946	*/
1947	static int
1948	filt_fsevent(struct knote kn, long* hint)
1949	{
1950	fsevent_handle fseh = (struct* fsevent_handle *)kn->kn_hook;
1951	int activate = `0`;
1952	int32_t rd, wr, amt;
1953
1954	if (NOTE_REVOKE == hint) {
1955	kn->kn_flags \|= (EV_EOF \| EV_ONESHOT);
1956	activate = `1`;
1957	}
1958
1959	rd = fseh->watcher->rd;
1960	wr = fseh->watcher->wr;
1961	if (rd <= wr) {
1962	amt = wr - rd;
1963	} else {
1964	amt = fseh->watcher->eventq_size - (rd - wr);
1965	}
1966
1967	switch(kn->kn_filter) {
1968	case EVFILT_READ:
1969	kn->kn_data = amt;
1970
1971	if (kn->kn_data != `0`) {
1972	activate = `1`;
1973	}
1974	break;
1975	case EVFILT_VNODE:
1976	/ Check events this note matches against the hint /
1977	if (kn->kn_sfflags & hint) {
1978	kn->kn_fflags \|= hint; / Set which event occurred /
1979	}
1980	if (kn->kn_fflags != `0`) {
1981	activate = `1`;
1982	}
1983	break;
1984	default: {
1985	// nothing to do...
1986	break;
1987	}
1988	}
1989
1990	return (activate);
1991	}
1992
1993
1994	static int
1995	filt_fsevent_touch(struct knote kn, struct* kevent_internal_s *kev)
1996	{
1997	int res;
1998
1999	lock_watch_table();
2000
2001	/ accept new fflags/data as saved /
2002	kn->kn_sfflags = kev->fflags;
2003	kn->kn_sdata = kev->data;
2004
2005	/ restrict the current results to the (smaller?) set of new interest /
2006	/*
2007	* For compatibility with previous implementations, we leave kn_fflags
2008	* as they were before.
2009	*/
2010	//kn->kn_fflags &= kev->fflags;
2011
2012	/ determine if the filter is now fired /
2013	res = filt_fsevent(kn, `0`);
2014
2015	unlock_watch_table();
2016
2017	return res;
2018	}
2019
2020	static int
2021	filt_fsevent_process(struct knote kn, struct* filt_process_s data, struct* kevent_internal_s *kev)
2022	{
2023	#pragma unused(data)
2024	int res;
2025
2026	lock_watch_table();
2027
2028	res = filt_fsevent(kn, `0`);
2029	if (res) {
2030	*kev = kn->kn_kevent;
2031	if (kev->flags & EV_CLEAR) {
2032	kn->kn_data = `0`;
2033	kn->kn_fflags = `0`;
2034	}
2035	}
2036
2037	unlock_watch_table();
2038	return res;
2039	}
2040
2041	SECURITY_READ_ONLY_EARLY(struct filterops) fsevent_filtops = {
2042	.f_isfd = `1`,
2043	.f_attach = NULL,
2044	.f_detach = filt_fsevent_detach,
2045	.f_event = filt_fsevent,
2046	.f_touch = filt_fsevent_touch,
2047	.f_process = filt_fsevent_process,
2048	};
2049
2050	static int
2051	fseventsf_kqfilter(__unused struct fileproc fp, __unused struct* knote *kn,
2052	__unused struct kevent_internal_s *kev, __unused vfs_context_t ctx)
2053	{
2054	fsevent_handle fseh = (struct* fsevent_handle *)fp->f_fglob->fg_data;
2055	int res;
2056
2057	kn->kn_hook = (void*)fseh;
2058	kn->kn_hookid = `1`;
2059	kn->kn_filtid = EVFILTID_FSEVENT;
2060
2061	lock_watch_table();
2062
2063	KNOTE_ATTACH(&fseh->knotes, kn);
2064
2065	/ check to see if it is fired already /
2066	res = filt_fsevent(kn, `0`);
2067
2068	unlock_watch_table();
2069
2070	return res;
2071	}
2072
2073
2074	static int
2075	fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2076	{
2077	int counter = `0`;
2078	fsevent_handle fseh = (struct* fsevent_handle *)fp->f_fglob->fg_data;
2079
2080	// if there are people still waiting, sleep for 10ms to
2081	// let them clean up and get out of there. however we
2082	// also don't want to get stuck forever so if they don't
2083	// exit after 5 seconds we're tearing things down anyway.
2084	while(fseh->watcher->blockers && counter++ < `500`) {
2085	// issue wakeup in case anyone is blocked waiting for an event
2086	// do this each time we wakeup in case the blocker missed
2087	// the wakeup due to the unprotected test of WATCHER_CLOSING
2088	// and decision to tsleep in fmod_watch... this bit of
2089	// latency is a decent tradeoff against not having to
2090	// take and drop a lock in fmod_watch
2091	lock_watch_table();
2092	fsevents_wakeup(fseh->watcher);
2093	unlock_watch_table();
2094
2095	tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", `1`);
2096	}
2097
2098	return `0`;
2099	}
2100
2101
2102	static int
2103	fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2104	{
2105	if (!kauth_cred_issuser(kauth_cred_get())) {
2106	return EPERM;
2107	}
2108
2109	return `0`;
2110	}
2111
2112	static int
2113	fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2114	{
2115	return `0`;
2116	}
2117
2118	static int
2119	fseventsread(__unused dev_t dev, __unused struct uio uio, __unused int* ioflag)
2120	{
2121	return EIO;
2122	}
2123
2124
2125	static int
2126	parse_buffer_and_add_events(const char buffer, int* bufsize, vfs_context_t ctx, long *remainder)
2127	{
2128	const fse_info finfo, dest_finfo;
2129	const char path, ptr, dest_path, event_start=buffer;
2130	int path_len, type, dest_path_len, err = `0`;
2131
2132
2133	ptr = buffer;
2134	while ((ptr+sizeof(int)+sizeof(fse_info)+`1`) < buffer+bufsize) {
2135	type = (const* int *)ptr;
2136	if (type < `0` \|\| type >= FSE_MAX_EVENTS) {
2137	err = EINVAL;
2138	break;
2139	}
2140
2141	ptr += sizeof(int);
2142
2143	finfo = (const fse_info *)ptr;
2144	ptr += sizeof(fse_info);
2145
2146	path = ptr;
2147	while(ptr < buffer+bufsize && *ptr != `'\0'`) {
2148	ptr++;
2149	}
2150
2151	if (ptr >= buffer+bufsize) {
2152	break;
2153	}
2154
2155	ptr++; // advance over the trailing '\0'
2156
2157	path_len = ptr - path;
2158
2159	if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2160	event_start = ptr; // record where the next event starts
2161
2162	err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2163	if (err) {
2164	break;
2165	}
2166	continue;
2167	}
2168
2169	//
2170	// if we're here we have to slurp up the destination finfo
2171	// and path so that we can pass them to the add_fsevent()
2172	// call. basically it's a copy of the above code.
2173	//
2174	dest_finfo = (const fse_info *)ptr;
2175	ptr += sizeof(fse_info);
2176
2177	dest_path = ptr;
2178	while(ptr < buffer+bufsize && *ptr != `'\0'`) {
2179	ptr++;
2180	}
2181
2182	if (ptr >= buffer+bufsize) {
2183	break;
2184	}
2185
2186	ptr++; // advance over the trailing '\0'
2187	event_start = ptr; // record where the next event starts
2188
2189	dest_path_len = ptr - dest_path;
2190	//
2191	// If the destination inode number is non-zero, generate a rename
2192	// with both source and destination FSE_ARG_FINFO. Otherwise generate
2193	// a rename with only one FSE_ARG_FINFO. If you need to inject an
2194	// exchange with an inode of zero, just make that inode (and its path)
2195	// come in as the first one, not the second.
2196	//
2197	if (dest_finfo->ino) {
2198	err = add_fsevent(type, ctx,
2199	FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2200	FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2201	FSE_ARG_DONE);
2202	} else {
2203	err = add_fsevent(type, ctx,
2204	FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2205	FSE_ARG_STRING, dest_path_len, dest_path,
2206	FSE_ARG_DONE);
2207	}
2208
2209	if (err) {
2210	break;
2211	}
2212
2213	}
2214
2215	// if the last event wasn't complete, set the remainder
2216	// to be the last event start boundary.
2217	//
2218	remainder = (long*)((buffer+bufsize) - event_start);
2219
2220	return err;
2221	}
2222
2223
2224	//
2225	// Note: this buffer size can not ever be less than
2226	// 2MAXPATHLEN + 2sizeof(fse_info) + sizeof(int)
2227	// because that is the max size for a single event.
2228	// I made it 4k to be a "nice" size. making it
2229	// smaller is not a good idea.
2230	//
2231	#define WRITE_BUFFER_SIZE 4096
2232	char *write_buffer=NULL;
2233
2234	static int
2235	fseventswrite(__unused dev_t dev, struct uio uio, __unused int* ioflag)
2236	{
2237	int error=`0`, count;
2238	vfs_context_t ctx = vfs_context_current();
2239	long offset=`0`, remainder;
2240
2241	lck_mtx_lock(&event_writer_lock);
2242
2243	if (write_buffer == NULL) {
2244	if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE, VM_KERN_MEMORY_FILE)) {
2245	lck_mtx_unlock(&event_writer_lock);
2246	return ENOMEM;
2247	}
2248	}
2249
2250	//
2251	// this loop copies in and processes the events written.
2252	// it takes care to copy in reasonable size chunks and
2253	// process them. if there is an event that spans a chunk
2254	// boundary we're careful to copy those bytes down to the
2255	// beginning of the buffer and read the next chunk in just
2256	// after it.
2257	//
2258	while(uio_resid(uio)) {
2259	if (uio_resid(uio) > (WRITE_BUFFER_SIZE-offset)) {
2260	count = WRITE_BUFFER_SIZE - offset;
2261	} else {
2262	count = uio_resid(uio);
2263	}
2264
2265	error = uiomove(write_buffer+offset, count, uio);
2266	if (error) {
2267	break;
2268	}
2269
2270	// printf("fsevents: write: copied in %d bytes (offset: %ld)\n", count, offset);
2271	error = parse_buffer_and_add_events(write_buffer, offset+count, ctx, &remainder);
2272	if (error) {
2273	break;
2274	}
2275
2276	//
2277	// if there's any remainder, copy it down to the beginning
2278	// of the buffer so that it will get processed the next time
2279	// through the loop. note that the remainder always starts
2280	// at an event boundary.
2281	//
2282	if (remainder != `0`) {
2283	// printf("fsevents: write: an event spanned a %d byte boundary. remainder: %ld\n",
2284	// WRITE_BUFFER_SIZE, remainder);
2285	memmove(write_buffer, (write_buffer+count+offset) - remainder, remainder);
2286	offset = remainder;
2287	} else {
2288	offset = `0`;
2289	}
2290	}
2291
2292	lck_mtx_unlock(&event_writer_lock);
2293
2294	return error;
2295	}
2296
2297
2298	static const struct fileops fsevents_fops = {
2299	.fo_type = DTYPE_FSEVENTS,
2300	.fo_read = fseventsf_read,
2301	.fo_write = fseventsf_write,
2302	.fo_ioctl = fseventsf_ioctl,
2303	.fo_select = fseventsf_select,
2304	.fo_close = fseventsf_close,
2305	.fo_kqfilter = fseventsf_kqfilter,
2306	.fo_drain = fseventsf_drain,
2307	};
2308
2309	typedef struct fsevent_clone_args32 {
2310	user32_addr_t event_list;
2311	int32_t num_events;
2312	int32_t event_queue_depth;
2313	user32_addr_t fd;
2314	} fsevent_clone_args32;
2315
2316	typedef struct fsevent_clone_args64 {
2317	user64_addr_t event_list;
2318	int32_t num_events;
2319	int32_t event_queue_depth;
2320	user64_addr_t fd;
2321	} fsevent_clone_args64;
2322
2323	#define FSEVENTS_CLONE_32 _IOW('s', 1, fsevent_clone_args32)
2324	#define FSEVENTS_CLONE_64 _IOW('s', 1, fsevent_clone_args64)
2325
2326	static int
2327	fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2328	{
2329	struct fileproc *f;
2330	int fd, error;
2331	fsevent_handle *fseh = NULL;
2332	fsevent_clone_args64 *fse_clone_args, _fse_clone;
2333	int8_t *event_list;
2334	int is64bit = proc_is64bit(p);
2335
2336	switch (cmd) {
2337	case FSEVENTS_CLONE_32: {
2338	if (is64bit) {
2339	return EINVAL;
2340	}
2341	fsevent_clone_args32 args32 = (fsevent_clone_args32 )data;
2342
2343	fse_clone_args = &_fse_clone;
2344	memset(fse_clone_args, `0`, sizeof(fsevent_clone_args64));
2345
2346	fse_clone_args->event_list = CAST_USER_ADDR_T(args32->event_list);
2347	fse_clone_args->num_events = args32->num_events;
2348	fse_clone_args->event_queue_depth = args32->event_queue_depth;
2349	fse_clone_args->fd = CAST_USER_ADDR_T(args32->fd);
2350	goto handle_clone;
2351	}
2352
2353	case FSEVENTS_CLONE_64:
2354	if (!is64bit) {
2355	return EINVAL;
2356	}
2357	fse_clone_args = (fsevent_clone_args64 *)data;
2358
2359	handle_clone:
2360	if (fse_clone_args->num_events < `0` \|\| fse_clone_args->num_events > `4096`) {
2361	return EINVAL;
2362	}
2363
2364	MALLOC(fseh, fsevent_handle , sizeof*(fsevent_handle),
2365	M_TEMP, M_WAITOK);
2366	if (fseh == NULL) {
2367	return ENOMEM;
2368	}
2369	memset(fseh, `0`, sizeof(fsevent_handle));
2370
2371	klist_init(&fseh->knotes);
2372
2373	MALLOC(event_list, int8_t *,
2374	fse_clone_args->num_events * sizeof(int8_t),
2375	M_TEMP, M_WAITOK);
2376	if (event_list == NULL) {
2377	FREE(fseh, M_TEMP);
2378	return ENOMEM;
2379	}
2380
2381	error = copyin(fse_clone_args->event_list,
2382	(void *)event_list,
2383	fse_clone_args->num_events * sizeof(int8_t));
2384	if (error) {
2385	FREE(event_list, M_TEMP);
2386	FREE(fseh, M_TEMP);
2387	return error;
2388	}
2389
2390	error = add_watcher(event_list,
2391	fse_clone_args->num_events,
2392	fse_clone_args->event_queue_depth,
2393	&fseh->watcher,
2394	fseh);
2395	if (error) {
2396	FREE(event_list, M_TEMP);
2397	FREE(fseh, M_TEMP);
2398	return error;
2399	}
2400
2401	fseh->watcher->fseh = fseh;
2402
2403	error = falloc(p, &f, &fd, vfs_context_current());
2404	if (error) {
2405	remove_watcher(fseh->watcher);
2406	FREE(event_list, M_TEMP);
2407	FREE(fseh, M_TEMP);
2408	return (error);
2409	}
2410	proc_fdlock(p);
2411	f->f_fglob->fg_flag = FREAD \| FWRITE;
2412	f->f_fglob->fg_ops = &fsevents_fops;
2413	f->f_fglob->fg_data = (caddr_t) fseh;
2414	proc_fdunlock(p);
2415	error = copyout((void )&fd, fse_clone_args->fd, sizeof*(int32_t));
2416	if (error != `0`) {
2417	fp_free(p, fd, f);
2418	} else {
2419	proc_fdlock(p);
2420	procfdtbl_releasefd(p, fd, NULL);
2421	fp_drop(p, fd, f, `1`);
2422	proc_fdunlock(p);
2423	}
2424	break;
2425
2426	default:
2427	error = EINVAL;
2428	break;
2429	}
2430
2431	return error;
2432	}
2433
2434	static void
2435	fsevents_wakeup(fs_event_watcher *watcher)
2436	{
2437	selwakeup(&watcher->fseh->si);
2438	KNOTE(&watcher->fseh->knotes, NOTE_WRITE\|NOTE_NONE);
2439	wakeup((caddr_t)watcher);
2440	}
2441
2442
2443	/*
2444	* A struct describing which functions will get invoked for certain
2445	* actions.
2446	*/
2447	static struct cdevsw fsevents_cdevsw =
2448	{
2449	fseventsopen, / open /
2450	fseventsclose, / close /
2451	fseventsread, / read /
2452	fseventswrite, / write /
2453	fseventsioctl, / ioctl /
2454	(stop_fcn_t )&nulldev, /* stop /
2455	(reset_fcn_t )&nulldev, /* reset /
2456	NULL, / tty's /
2457	eno_select, / select /
2458	eno_mmap, / mmap /
2459	eno_strat, / strategy /
2460	eno_getc, / getc /
2461	eno_putc, / putc /
2462	`0` / type /
2463	};
2464
2465
2466	/*
2467	* Called to initialize our device,
2468	* and to register ourselves with devfs
2469	*/
2470
2471	void
2472	fsevents_init(void)
2473	{
2474	int ret;
2475
2476	if (fsevents_installed) {
2477	return;
2478	}
2479
2480	fsevents_installed = `1`;
2481
2482	ret = cdevsw_add(-`1`, &fsevents_cdevsw);
2483	if (ret < `0`) {
2484	fsevents_installed = `0`;
2485	return;
2486	}
2487
2488	devfs_make_node(makedev (ret, `0`), DEVFS_CHAR,
2489	UID_ROOT, GID_WHEEL, `0644`, "fsevents", `0`);
2490
2491	fsevents_internal_init();
2492	}
2493
2494
2495	char *
2496	get_pathbuff(void)
2497	{
2498	char *path;
2499
2500	MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2501	return path;
2502	}
2503
2504	void
2505	release_pathbuff(char *path)
2506	{
2507
2508	if (path == NULL) {
2509	return;
2510	}
2511	FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2512	}
2513
2514	int
2515	get_fse_info(struct vnode vp, fse_info fse, __unused vfs_context_t ctx)
2516	{
2517	struct vnode_attr va;
2518
2519	VATTR_INIT(&va);
2520	VATTR_WANTED(&va, va_fsid);
2521	VATTR_WANTED(&va, va_fileid);
2522	VATTR_WANTED(&va, va_mode);
2523	VATTR_WANTED(&va, va_uid);
2524	VATTR_WANTED(&va, va_gid);
2525	if (vp->v_flag & VISHARDLINK) {
2526	if (vp->v_type == VDIR) {
2527	VATTR_WANTED(&va, va_dirlinkcount);
2528	} else {
2529	VATTR_WANTED(&va, va_nlink);
2530	}
2531	}
2532
2533	if (vnode_getattr(vp, &va, vfs_context_kernel()) != `0`) {
2534	memset(fse, `0`, sizeof(fse_info));
2535	return -`1`;
2536	}
2537
2538	return vnode_get_fse_info_from_vap(vp, fse, &va);
2539	}
2540
2541	int
2542	vnode_get_fse_info_from_vap(vnode_t vp, fse_info fse, struct* vnode_attr *vap)
2543	{
2544	fse->ino = (ino64_t)vap->va_fileid;
2545	fse->dev = (dev_t)vap->va_fsid;
2546	fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) \| vap->va_mode;
2547	fse->uid = (uid_t)vap->va_uid;
2548	fse->gid = (gid_t)vap->va_gid;
2549	if (vp->v_flag & VISHARDLINK) {
2550	fse->mode \|= FSE_MODE_HLINK;
2551	if (vp->v_type == VDIR) {
2552	fse->nlink = (uint64_t)vap->va_dirlinkcount;
2553	} else {
2554	fse->nlink = (uint64_t)vap->va_nlink;
2555	}
2556	}
2557
2558	return `0`;
2559	}
2560
2561	void
2562	create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
2563	{
2564	int fsevent_type=FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
2565	char pathbuf[MAXPATHLEN];
2566	fse_info fse;
2567
2568
2569	if (kevents & VNODE_EVENT_DELETE) {
2570	fsevent_type = FSE_DELETE;
2571	} else if (kevents & (VNODE_EVENT_EXTEND\|VNODE_EVENT_WRITE)) {
2572	fsevent_type = FSE_CONTENT_MODIFIED;
2573	} else if (kevents & VNODE_EVENT_LINK) {
2574	fsevent_type = FSE_CREATE_FILE;
2575	} else if (kevents & VNODE_EVENT_RENAME) {
2576	fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
2577	} else if (kevents & (VNODE_EVENT_FILE_CREATED\|VNODE_EVENT_FILE_REMOVED\|VNODE_EVENT_DIR_CREATED\|VNODE_EVENT_DIR_REMOVED)) {
2578	fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
2579	} else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
2580	fsevent_type = FSE_STAT_CHANGED;
2581	}
2582
2583	// printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
2584
2585	fse.dev = vap->va_fsid;
2586	fse.ino = vap->va_fileid;
2587	fse.mode = vnode_vttoif(vnode_vtype(vp)) \| (uint32_t)vap->va_mode;
2588	if (vp->v_flag & VISHARDLINK) {
2589	fse.mode \|= FSE_MODE_HLINK;
2590	if (vp->v_type == VDIR) {
2591	fse.nlink = vap->va_dirlinkcount;
2592	} else {
2593	fse.nlink = vap->va_nlink;
2594	}
2595	}
2596
2597	if (vp->v_type == VDIR) {
2598	fse.mode \|= FSE_REMOTE_DIR_EVENT;
2599	}
2600
2601
2602	fse.uid = vap->va_uid;
2603	fse.gid = vap->va_gid;
2604
2605	len = sizeof(pathbuf);
2606	if (vn_getpath(vp, pathbuf, &len) == `0`) {
2607	add_fsevent(fsevent_type, vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
2608	}
2609	return;
2610	}
2611
2612	#else /* CONFIG_FSE */
2613
2614	#include <sys/fsevents.h>
2615
2616	/*
2617	* The get_pathbuff and release_pathbuff routines are used in places not
2618	* related to fsevents, and it's a handy abstraction, so define trivial
2619	* versions that don't cache a pool of buffers. This way, we don't have
2620	* to conditionalize the callers, and they still get the advantage of the
2621	* pool of buffers if CONFIG_FSE is turned on.
2622	*/
2623	char *
2624	get_pathbuff(void)
2625	{
2626	char *path;
2627	MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2628	return path;
2629	}
2630
2631	void
2632	release_pathbuff(char *path)
2633	{
2634	FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2635	}
2636
2637	int
2638	add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
2639	{
2640	return `0`;
2641	}
2642
2643	int need_fsevent(__unused int type, __unused vnode_t vp)
2644	{
2645	return `0`;
2646	}
2647
2648	#endif /* CONFIG_FSE */
2649

Browse the source code of xnu/bsd/vfs/vfs_fsevents.c