1/*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <stdarg.h>
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/event.h> // for kqueue related stuff
32#include <sys/fsevents.h>
33
34#if CONFIG_FSE
35#include <sys/namei.h>
36#include <sys/filedesc.h>
37#include <sys/kernel.h>
38#include <sys/file_internal.h>
39#include <sys/stat.h>
40#include <sys/vnode_internal.h>
41#include <sys/mount_internal.h>
42#include <sys/proc_internal.h>
43#include <sys/kauth.h>
44#include <sys/uio.h>
45#include <kern/kalloc.h>
46#include <sys/dirent.h>
47#include <sys/attr.h>
48#include <sys/sysctl.h>
49#include <sys/ubc.h>
50#include <machine/cons.h>
51#include <miscfs/specfs/specdev.h>
52#include <miscfs/devfs/devfs.h>
53#include <sys/filio.h>
54#include <kern/locks.h>
55#include <libkern/OSAtomic.h>
56#include <kern/zalloc.h>
57#include <mach/mach_time.h>
58#include <kern/thread_call.h>
59#include <kern/clock.h>
60#include <IOKit/IOBSD.h>
61
62#include <security/audit/audit.h>
63#include <bsm/audit_kevents.h>
64
65#include <pexpert/pexpert.h>
66#include <libkern/section_keywords.h>
67
68typedef struct kfs_event {
69 LIST_ENTRY(kfs_event) kevent_list;
70 uint64_t abstime; // when this event happened (mach_absolute_time())
71 int16_t type; // type code of this event
72 uint16_t flags; // per-event flags
73 int32_t refcount; // number of clients referencing this
74 pid_t pid;
75 int32_t spare;
76
77 union {
78 struct regular_event {
79 // This must match the layout of fse_info
80 // exactly, except for the "nlink" field is
81 // not included here. See copy_out_kfse()
82 // for all of the sordid details, and also
83 // the _Static_assert() statements below.
84 ino64_t ino;
85 dev_t dev;
86 int32_t mode;
87 uid_t uid;
88 uint32_t document_id;
89 struct kfs_event *dest; // if this is a two-file op
90 const char *str;
91 uint16_t len;
92 } regular_event;
93
94 struct {
95 ino64_t src_ino;
96 ino64_t dst_ino;
97 uint64_t docid;
98 dev_t dev;
99 } docid_event;
100
101 struct {
102 uint32_t version;
103 dev_t dev;
104 ino64_t ino;
105 uint64_t origin_id;
106 uint64_t age;
107 uint32_t use_state;
108 uint32_t urgency;
109 uint64_t size;
110 } activity_event;
111
112 struct {
113 audit_token_t audit_token;
114 const char *str;
115 uint16_t len;
116 } access_granted_event;
117 };
118} kfs_event;
119
120_Static_assert(offsetof(struct regular_event, ino) == offsetof(fse_info, ino),
121 "kfs_event and fse_info out-of-sync");
122_Static_assert(offsetof(struct regular_event, dev) == offsetof(fse_info, dev),
123 "kfs_event and fse_info out-of-sync");
124_Static_assert(offsetof(struct regular_event, mode) == offsetof(fse_info, mode),
125 "kfs_event and fse_info out-of-sync");
126_Static_assert(offsetof(struct regular_event, uid) == offsetof(fse_info, uid),
127 "kfs_event and fse_info out-of-sync");
128_Static_assert(offsetof(struct regular_event, document_id) == offsetof(fse_info, document_id),
129 "kfs_event and fse_info out-of-sync");
130
131#define KFSE_INFO_COPYSIZE offsetof(fse_info, nlink)
132
133// flags for the flags field
134#define KFSE_COMBINED_EVENTS 0x0001
135#define KFSE_CONTAINS_DROPPED_EVENTS 0x0002
136#define KFSE_ON_LIST 0x0004
137#define KFSE_BEING_CREATED 0x0008
138
139LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
140int num_events_outstanding = 0;
141int num_pending_rename = 0;
142
143
144struct fsevent_handle;
145
146typedef struct fs_event_watcher {
147 int8_t *event_list; // the events we're interested in
148 int32_t num_events;
149 dev_t *devices_not_to_watch;// report events from devices not in this list
150 uint32_t num_devices;
151 int32_t flags;
152 kfs_event **event_queue;
153 int32_t eventq_size; // number of event pointers in queue
154 int32_t num_readers;
155 int32_t rd; // read index into the event_queue
156 int32_t wr; // write index into the event_queue
157 int32_t blockers;
158 int32_t my_id;
159 uint32_t num_dropped;
160 uint64_t max_event_id;
161 struct fsevent_handle *fseh;
162 pid_t pid;
163 char proc_name[(2 * MAXCOMLEN) + 1];
164} fs_event_watcher;
165
166// fs_event_watcher flags
167#define WATCHER_DROPPED_EVENTS 0x0001
168#define WATCHER_CLOSING 0x0002
169#define WATCHER_WANTS_COMPACT_EVENTS 0x0004
170#define WATCHER_WANTS_EXTENDED_INFO 0x0008
171#define WATCHER_APPLE_SYSTEM_SERVICE 0x0010 // fseventsd, coreservicesd, mds, revisiond
172
173#define MAX_WATCHERS 8
174static fs_event_watcher *watcher_table[MAX_WATCHERS];
175
176#define DEFAULT_MAX_KFS_EVENTS 4096
177static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
178
179// we allocate kfs_event structures out of this zone
180static zone_t event_zone;
181static int fs_event_init = 0;
182
183//
184// this array records whether anyone is interested in a
185// particular type of event. if no one is, we bail out
186// early from the event delivery
187//
188static int16_t fs_event_type_watchers[FSE_MAX_EVENTS];
189
190// the device currently being unmounted:
191static dev_t fsevent_unmount_dev = 0;
192// how many ACKs are still outstanding:
193static int fsevent_unmount_ack_count = 0;
194
195static int watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse);
196static void fsevents_wakeup(fs_event_watcher *watcher);
197
198//
199// Locks
200//
201static LCK_ATTR_DECLARE(fsevent_lock_attr, 0, 0);
202static LCK_GRP_DECLARE(fsevent_mutex_group, "fsevent-mutex");
203static LCK_GRP_DECLARE(fsevent_rw_group, "fsevent-rw");
204
205static LCK_RW_DECLARE_ATTR(event_handling_lock, // handles locking for event manipulation and recycling
206 &fsevent_rw_group, &fsevent_lock_attr);
207static LCK_MTX_DECLARE_ATTR(watch_table_lock,
208 &fsevent_mutex_group, &fsevent_lock_attr);
209static LCK_MTX_DECLARE_ATTR(event_buf_lock,
210 &fsevent_mutex_group, &fsevent_lock_attr);
211static LCK_MTX_DECLARE_ATTR(event_writer_lock,
212 &fsevent_mutex_group, &fsevent_lock_attr);
213
214
215/* Explicitly declare qsort so compiler doesn't complain */
216__private_extern__ void qsort(
217 void * array,
218 size_t nmembers,
219 size_t member_size,
220 int (*)(const void *, const void *));
221
222static int
223is_ignored_directory(const char *path)
224{
225 if (!path) {
226 return 0;
227 }
228
229#define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
230 if (IS_TLD("/.Spotlight-V100/") ||
231 IS_TLD("/.MobileBackups/") ||
232 IS_TLD("/Backups.backupdb/")) {
233 return 1;
234 }
235#undef IS_TLD
236
237 return 0;
238}
239
240static void
241fsevents_internal_init(void)
242{
243 int i;
244
245 if (fs_event_init++ != 0) {
246 return;
247 }
248
249 for (i = 0; i < FSE_MAX_EVENTS; i++) {
250 fs_event_type_watchers[i] = 0;
251 }
252
253 memset(s: watcher_table, c: 0, n: sizeof(watcher_table));
254
255 PE_get_default(property_name: "kern.maxkfsevents", property_ptr: &max_kfs_events, max_property: sizeof(max_kfs_events));
256
257 event_zone = zone_create_ext(name: "fs-event-buf", size: sizeof(kfs_event),
258 flags: ZC_NOGC | ZC_NOCALLOUT, ZONE_ID_ANY, extra_setup: ^(zone_t z) {
259 // mark the zone as exhaustible so that it will not
260 // ever grow beyond what we initially filled it with
261 zone_set_exhaustible(zone: z, max_elements: max_kfs_events, /* exhausts */ true);
262 });
263
264 zone_fill_initially(zone: event_zone, nelems: max_kfs_events);
265}
266
267static void
268lock_watch_table(void)
269{
270 lck_mtx_lock(lck: &watch_table_lock);
271}
272
273static void
274unlock_watch_table(void)
275{
276 lck_mtx_unlock(lck: &watch_table_lock);
277}
278
279static void
280lock_fs_event_list(void)
281{
282 lck_mtx_lock(lck: &event_buf_lock);
283}
284
285static void
286unlock_fs_event_list(void)
287{
288 lck_mtx_unlock(lck: &event_buf_lock);
289}
290
291// forward prototype
292static void release_event_ref(kfs_event *kfse);
293
294static boolean_t
295watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
296{
297 unsigned int i;
298
299 // if devices_not_to_watch is NULL then we care about all
300 // events from all devices
301 if (watcher->devices_not_to_watch == NULL) {
302 return true;
303 }
304
305 for (i = 0; i < watcher->num_devices; i++) {
306 if (dev == watcher->devices_not_to_watch[i]) {
307 // found a match! that means we do not
308 // want events from this device.
309 return false;
310 }
311 }
312
313 // if we're here it's not in the devices_not_to_watch[]
314 // list so that means we do care about it
315 return true;
316}
317
318
319int
320need_fsevent(int type, vnode_t vp)
321{
322 if (type >= 0 && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == 0) {
323 return 0;
324 }
325
326 // events in /dev aren't really interesting...
327 if (vp->v_tag == VT_DEVFS) {
328 return 0;
329 }
330
331 return 1;
332}
333
334
335#define is_throw_away(x) ((x) == FSE_STAT_CHANGED || (x) == FSE_CONTENT_MODIFIED)
336
337
338int num_dropped = 0;
339
340static struct timeval last_print;
341
342//
343// These variables are used to track coalescing multiple identical
344// events for the same vnode/pathname. If we get the same event
345// type and same vnode/pathname as the previous event, we just drop
346// the event since it's superfluous. This improves some micro-
347// benchmarks considerably and actually has a real-world impact on
348// tests like a Finder copy where multiple stat-changed events can
349// get coalesced.
350//
351static int last_event_type = -1;
352static void *last_ptr = NULL;
353static char last_str[MAXPATHLEN];
354static int last_nlen = 0;
355static int last_vid = -1;
356static uint64_t last_coalesced_time = 0;
357static void *last_event_ptr = NULL;
358static pid_t last_pid = -1;
359int last_coalesced = 0;
360static mach_timebase_info_data_t sTimebaseInfo = { 0, 0 };
361
362#define MAX_HARDLINK_NOTIFICATIONS 128
363
364static inline void
365kfse_init(kfs_event *kfse, int type, uint64_t time, proc_t p)
366{
367 memset(s: kfse, c: 0, n: sizeof(*kfse));
368 kfse->refcount = 1;
369 kfse->type = (int16_t)type;
370 kfse->abstime = time;
371 kfse->pid = proc_getpid(p);
372
373 OSBitOrAtomic16(KFSE_BEING_CREATED, address: &kfse->flags);
374}
375
376int
377add_fsevent(int type, vfs_context_t ctx, ...)
378{
379 struct proc *p = vfs_context_proc(ctx);
380 int i, arg_type, ret;
381 kfs_event *kfse, *kfse_dest = NULL, *cur;
382 fs_event_watcher *watcher;
383 va_list ap;
384 int error = 0, did_alloc = 0;
385 int64_t orig_linkcount = -1;
386 dev_t dev = 0;
387 uint64_t now, elapsed;
388 uint64_t orig_linkid = 0, next_linkid = 0;
389 uint64_t link_parentid = 0;
390 char *pathbuff = NULL, *path_override = NULL;
391 char *link_name = NULL;
392 vnode_t link_vp = NULL;
393 int pathbuff_len = 0;
394 uthread_t ut = get_bsdthread_info(current_thread());
395 bool do_all_links = true;
396 bool do_cache_reset = false;
397
398 if (type == FSE_CONTENT_MODIFIED_NO_HLINK) {
399 do_all_links = false;
400 type = FSE_CONTENT_MODIFIED;
401 }
402
403
404restart:
405 va_start(ap, ctx);
406
407 // ignore bogus event types..
408 if (type < 0 || type >= FSE_MAX_EVENTS) {
409 return EINVAL;
410 }
411
412 // if no one cares about this type of event, bail out
413 if (fs_event_type_watchers[type] == 0) {
414 va_end(ap);
415
416 return 0;
417 }
418
419 now = mach_absolute_time();
420
421 // find a free event and snag it for our use
422 // NOTE: do not do anything that would block until
423 // the lock is dropped.
424 lock_fs_event_list();
425
426 //
427 // check if this event is identical to the previous one...
428 // (as long as it's not an event type that can never be the
429 // same as a previous event)
430 //
431 if (path_override == NULL &&
432 type != FSE_CREATE_FILE &&
433 type != FSE_DELETE &&
434 type != FSE_RENAME &&
435 type != FSE_EXCHANGE &&
436 type != FSE_CHOWN &&
437 type != FSE_DOCID_CHANGED &&
438 type != FSE_DOCID_CREATED &&
439 type != FSE_CLONE &&
440 type != FSE_ACTIVITY &&
441 // don't coalesce FSE_ACCESS_GRANTED because it could
442 // have been granted to a different process.
443 type != FSE_ACCESS_GRANTED) {
444 void *ptr = NULL;
445 int vid = 0, was_str = 0, nlen = 0;
446
447 for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
448 switch (arg_type) {
449 case FSE_ARG_VNODE: {
450 ptr = va_arg(ap, void *);
451 vid = vnode_vid(vp: (struct vnode *)ptr);
452 last_str[0] = '\0';
453 break;
454 }
455 case FSE_ARG_STRING: {
456 nlen = va_arg(ap, int32_t);
457 ptr = va_arg(ap, void *);
458 was_str = 1;
459 break;
460 }
461 }
462 if (ptr != NULL) {
463 break;
464 }
465 }
466
467 if (sTimebaseInfo.denom == 0) {
468 (void) clock_timebase_info(info: &sTimebaseInfo);
469 }
470
471 elapsed = (now - last_coalesced_time);
472 if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
473 if (sTimebaseInfo.denom == 1) {
474 elapsed *= sTimebaseInfo.numer;
475 } else {
476 // this could overflow... the worst that will happen is that we'll
477 // send (or not send) an extra event so I'm not going to worry about
478 // doing the math right like dtrace_abs_to_nano() does.
479 elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
480 }
481 }
482
483 if (type == last_event_type
484 && (elapsed < 1000000000)
485 && (last_pid == proc_getpid(p))
486 &&
487 ((vid && vid == last_vid && last_ptr == ptr)
488 ||
489 (last_str[0] && last_nlen == nlen && ptr && strcmp(s1: last_str, s2: ptr) == 0))
490 ) {
491 last_coalesced++;
492 unlock_fs_event_list();
493 va_end(ap);
494
495 return 0;
496 } else {
497 last_ptr = ptr;
498 if (ptr && was_str) {
499 strlcpy(dst: last_str, src: ptr, n: sizeof(last_str));
500 }
501 last_nlen = nlen;
502 last_vid = vid;
503 last_event_type = type;
504 last_coalesced_time = now;
505 last_pid = proc_getpid(p);
506 }
507 }
508 va_start(ap, ctx);
509
510
511 kfse = zalloc_noblock(zone: event_zone);
512 if (kfse && (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE)) {
513 kfse_dest = zalloc_noblock(zone: event_zone);
514 if (kfse_dest == NULL) {
515 did_alloc = 1;
516 zfree(event_zone, kfse);
517 kfse = NULL;
518 }
519 }
520
521
522 if (kfse == NULL) { // yikes! no free events
523 unlock_fs_event_list();
524 lock_watch_table();
525
526 for (i = 0; i < MAX_WATCHERS; i++) {
527 watcher = watcher_table[i];
528 if (watcher == NULL) {
529 continue;
530 }
531
532 watcher->flags |= WATCHER_DROPPED_EVENTS;
533 fsevents_wakeup(watcher);
534 }
535 unlock_watch_table();
536
537 {
538 struct timeval current_tv;
539
540 num_dropped++;
541
542 // only print a message at most once every 5 seconds
543 microuptime(tv: &current_tv);
544 if ((current_tv.tv_sec - last_print.tv_sec) > 10) {
545 int ii;
546 void *junkptr = zalloc_noblock(zone: event_zone), *listhead = kfse_list_head.lh_first;
547
548 printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
549 printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
550 printf("add_fsevent: zalloc sez: %p\n", junkptr);
551 printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]);
552 lock_watch_table();
553 for (ii = 0; ii < MAX_WATCHERS; ii++) {
554 if (watcher_table[ii] == NULL) {
555 continue;
556 }
557
558 printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
559 watcher_table[ii]->proc_name,
560 watcher_table[ii],
561 watcher_table[ii]->rd, watcher_table[ii]->wr,
562 watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
563 }
564 unlock_watch_table();
565
566 last_print = current_tv;
567 if (junkptr) {
568 zfree(event_zone, junkptr);
569 }
570 }
571 }
572
573 if (pathbuff) {
574 release_pathbuff(path: pathbuff);
575 pathbuff = NULL;
576 }
577 return ENOSPC;
578 }
579
580 kfse_init(kfse, type, time: now, p);
581 last_event_ptr = kfse;
582 if (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE) {
583 kfse_init(kfse: kfse_dest, type, time: now, p);
584 kfse->regular_event.dest = kfse_dest;
585 }
586
587 num_events_outstanding++;
588 if (kfse->type == FSE_RENAME) {
589 num_pending_rename++;
590 }
591 LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
592 OSBitOrAtomic16(KFSE_ON_LIST, address: &kfse->flags);
593
594 if (kfse->refcount < 1) {
595 panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
596 }
597
598 unlock_fs_event_list(); // at this point it's safe to unlock
599
600 //
601 // now process the arguments passed in and copy them into
602 // the kfse
603 //
604
605 cur = kfse;
606
607 if (type == FSE_DOCID_CREATED || type == FSE_DOCID_CHANGED) {
608 //
609 // These events are special and not like the other events.
610 // They only have a dev_t, src inode #, dest inode #, and
611 // a doc-id (va_arg'd to us in that order). If we don't
612 // get one of them, then the error-check filler will
613 // catch it.
614 //
615 do_all_links = false;
616 arg_type = va_arg(ap, int32_t);
617 if (arg_type == FSE_ARG_DEV) {
618 cur->docid_event.dev = (dev_t)(va_arg(ap, dev_t));
619 }
620
621 arg_type = va_arg(ap, int32_t);
622 if (arg_type == FSE_ARG_INO) {
623 cur->docid_event.src_ino =
624 (ino64_t)(va_arg(ap, ino64_t));
625 }
626
627 arg_type = va_arg(ap, int32_t);
628 if (arg_type == FSE_ARG_INO) {
629 cur->docid_event.dst_ino =
630 (ino64_t)(va_arg(ap, ino64_t));
631 }
632
633 arg_type = va_arg(ap, int32_t);
634 if (arg_type == FSE_ARG_INT32) {
635 cur->docid_event.docid =
636 (uint64_t)va_arg(ap, uint32_t);
637 } else if (arg_type == FSE_ARG_INT64) {
638 cur->docid_event.docid =
639 (uint64_t)va_arg(ap, uint64_t);
640 }
641
642 goto done_with_args;
643 }
644
645 if (type == FSE_ACTIVITY) {
646 do_all_links = false;
647
648 arg_type = va_arg(ap, int32_t);
649 if (arg_type == FSE_ARG_INT32) {
650 cur->activity_event.version = (uint32_t)(va_arg(ap, uint32_t));
651 }
652
653 arg_type = va_arg(ap, int32_t);
654 if (arg_type == FSE_ARG_DEV) {
655 cur->activity_event.dev = (dev_t)(va_arg(ap, dev_t));
656 }
657
658 arg_type = va_arg(ap, int32_t);
659 if (arg_type == FSE_ARG_INO) {
660 cur->activity_event.ino = (ino64_t)(va_arg(ap, ino64_t));
661 }
662
663 arg_type = va_arg(ap, int32_t);
664 if (arg_type == FSE_ARG_INT64) {
665 cur->activity_event.origin_id = (uint64_t)(va_arg(ap, uint64_t));
666 }
667
668 arg_type = va_arg(ap, int32_t);
669 if (arg_type == FSE_ARG_INT64) {
670 cur->activity_event.age = (uint64_t)(va_arg(ap, uint64_t));
671 }
672
673 arg_type = va_arg(ap, int32_t);
674 if (arg_type == FSE_ARG_INT32) {
675 cur->activity_event.use_state = (uint32_t)(va_arg(ap, uint32_t));
676 }
677
678 arg_type = va_arg(ap, int32_t);
679 if (arg_type == FSE_ARG_INT32) {
680 cur->activity_event.urgency = (uint32_t)(va_arg(ap, uint32_t));
681 }
682
683 arg_type = va_arg(ap, int32_t);
684 if (arg_type == FSE_ARG_INT64) {
685 cur->activity_event.size = (uint64_t)(va_arg(ap, uint64_t));
686 }
687
688 goto done_with_args;
689 }
690#if CONFIG_FSE_ACCESS_GRANTED
691 if (type == FSE_ACCESS_GRANTED) {
692 //
693 // This one is also different. We get a path string
694 // and (maybe) and audit token. If we don't get the
695 // audit token, we extract is from the vfs_context_t.
696 //
697 audit_token_t *atokenp = NULL;
698 vnode_t vp = NULL;
699 char *path_str = NULL;
700 size_t path_strlen = 0;
701 void *arg;
702 int32_t len32;
703
704 do_all_links = false;
705
706 while ((arg_type = va_arg(ap, int32_t)) != FSE_ARG_DONE) {
707 switch (arg_type) {
708 case FSE_ARG_STRING:
709 len32 = va_arg(ap, int32_t);
710 arg = va_arg(ap, char *);
711 if (path_str == NULL) {
712 path_str = arg;
713 path_strlen = len32;
714 }
715 break;
716
717 case FSE_ARG_PATH:
718 arg = va_arg(ap, char *);
719 if (path_str == NULL) {
720 path_str = arg;
721 }
722 break;
723
724 case FSE_ARG_VNODE:
725 arg = va_arg(ap, vnode_t);
726 if (vp == NULL) {
727 vp = arg;
728 }
729 break;
730
731 case FSE_ARG_AUDIT_TOKEN:
732 arg = va_arg(ap, audit_token_t *);
733 if (atokenp == NULL) {
734 atokenp = arg;
735 }
736 break;
737
738 default:
739 printf("add_fsevent: FSE_ACCESS_GRANTED unknown type %d\n", arg_type);
740 // just skip one 32-bit word and hope we
741 // sync up...
742 (void)va_arg(ap, int32_t);
743 }
744 }
745
746 if (atokenp != NULL) {
747 memcpy(&cur->access_granted_event.audit_token,
748 atokenp,
749 sizeof(cur->access_granted_event.audit_token));
750 } else if (vfs_context_copy_audit_token(ctx,
751 &cur->access_granted_event.audit_token) != 0) {
752 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
753 &cur->flags);
754 goto done_with_args;
755 }
756
757 //
758 // If we got FSE_ARG_STRING, the length includes the
759 // terminating NUL. If we got FSE_ARG_PATH, all we
760 // got was the string pointer, so get the length and
761 // adjust. If we didn't get either, then the caller
762 // needs to have provided us with a vnode, and with
763 // that we can get the path.
764 //
765 if (path_str != NULL) {
766 if (path_strlen == 0) {
767 path_strlen = strlen(path_str) + 1;
768 }
769 } else if (vp != NULL) {
770 pathbuff = get_pathbuff();
771 pathbuff_len = MAXPATHLEN;
772 pathbuff[0] = '\0';
773 if (vn_getpath_no_firmlink(vp, pathbuff,
774 &pathbuff_len) == 0) {
775 path_str = pathbuff;
776 path_strlen = pathbuff_len;
777 }
778 }
779
780 if (path_str != NULL) {
781 assert(path_strlen <= INT16_MAX);
782 cur->access_granted_event.str =
783 vfs_addname(path_str, (uint32_t)path_strlen, 0, 0);
784 if (path_str == pathbuff) {
785 release_pathbuff(pathbuff);
786 pathbuff = NULL;
787 }
788 }
789 if (cur->access_granted_event.str == NULL) {
790 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
791 &cur->flags);
792 }
793
794 goto done_with_args;
795 }
796#endif
797 if (type == FSE_UNMOUNT_PENDING) {
798 // Just a dev_t
799 // We use the same fields as the regular event, but we
800 // don't have all of the data.
801 do_all_links = false;
802
803 arg_type = va_arg(ap, int32_t);
804 if (arg_type == FSE_ARG_DEV) {
805 cur->regular_event.dev = (dev_t)(va_arg(ap, dev_t));
806 }
807
808 cur->regular_event.dest = NULL;
809 cur->regular_event.str = NULL;
810 cur->regular_event.len = 0;
811
812 goto done_with_args;
813 }
814
815 for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
816 switch (arg_type) {
817 case FSE_ARG_VNODE: {
818 // this expands out into multiple arguments to the client
819 struct vnode *vp;
820 struct vnode_attr va;
821
822 if (kfse->regular_event.str != NULL) {
823 cur = kfse_dest;
824 }
825
826 vp = va_arg(ap, struct vnode *);
827 if (vp == NULL) {
828 panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!",
829 cur->type);
830 }
831
832 VATTR_INIT(&va);
833 VATTR_WANTED(&va, va_fsid);
834 VATTR_WANTED(&va, va_fileid);
835 VATTR_WANTED(&va, va_mode);
836 VATTR_WANTED(&va, va_uid);
837 VATTR_WANTED(&va, va_document_id);
838 VATTR_WANTED(&va, va_nlink);
839 if ((ret = vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel())) != 0) {
840 // printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
841 cur->regular_event.str = NULL;
842 error = EINVAL;
843 goto clean_up;
844 }
845
846 cur->regular_event.dev = dev = (dev_t)va.va_fsid;
847 cur->regular_event.ino = (ino64_t)va.va_fileid;
848 cur->regular_event.mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode;
849 cur->regular_event.uid = va.va_uid;
850 cur->regular_event.document_id = va.va_document_id;
851 if (vp->v_flag & VISHARDLINK) {
852 cur->regular_event.mode |= FSE_MODE_HLINK;
853 if ((vp->v_type == VDIR && va.va_dirlinkcount == 0) || (vp->v_type == VREG && va.va_nlink == 0)) {
854 cur->regular_event.mode |= FSE_MODE_LAST_HLINK;
855 }
856 if (orig_linkid == 0) {
857 orig_linkid = cur->regular_event.ino;
858 orig_linkcount = MIN(va.va_nlink, MAX_HARDLINK_NOTIFICATIONS);
859 link_vp = vp;
860 if (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID && !link_name) {
861 VATTR_INIT(&va);
862 VATTR_WANTED(&va, va_parentid);
863 VATTR_WANTED(&va, va_name);
864 link_name = zalloc(view: ZV_NAMEI);
865 va.va_name = link_name;
866 if ((ret = vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel()) != 0) ||
867 !(VATTR_IS_SUPPORTED(&va, va_name)) ||
868 !(VATTR_IS_SUPPORTED(&va, va_parentid))) {
869 zfree(ZV_NAMEI, link_name);
870 link_name = NULL;
871 }
872 if (link_name) {
873 link_parentid = va.va_parentid;
874 }
875 va.va_name = NULL;
876 }
877 }
878 }
879
880 // if we haven't gotten the path yet, get it.
881 if (pathbuff == NULL && path_override == NULL) {
882 pathbuff = get_pathbuff();
883 pathbuff_len = MAXPATHLEN;
884
885 pathbuff[0] = '\0';
886 if ((ret = vn_getpath_no_firmlink(vp, pathbuf: pathbuff, len: &pathbuff_len)) != 0 || pathbuff[0] == '\0') {
887 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
888 address: &cur->flags);
889
890 do {
891 if (vp->v_parent != NULL) {
892 vp = vp->v_parent;
893 } else if (vp->v_mount) {
894 strlcpy(dst: pathbuff, src: vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
895 break;
896 } else {
897 vp = NULL;
898 }
899
900 if (vp == NULL) {
901 break;
902 }
903
904 pathbuff_len = MAXPATHLEN;
905 ret = vn_getpath_no_firmlink(vp, pathbuf: pathbuff, len: &pathbuff_len);
906 } while (ret == ENOSPC);
907
908 if (ret != 0 || vp == NULL) {
909 error = ENOENT;
910 goto clean_up;
911 }
912 }
913 } else if (path_override) {
914 pathbuff = path_override;
915 pathbuff_len = (int)strlen(s: path_override) + 1;
916 } else {
917 strlcpy(dst: pathbuff, src: "NOPATH", MAXPATHLEN);
918 pathbuff_len = (int)strlen(s: pathbuff) + 1;
919 }
920
921 // store the path by adding it to the global string table
922 cur->regular_event.len = (u_int16_t)pathbuff_len;
923 cur->regular_event.str =
924 vfs_addname(name: pathbuff, len: pathbuff_len, nc_hash: 0, flags: 0);
925 if (cur->regular_event.str == NULL ||
926 cur->regular_event.str[0] == '\0') {
927 panic("add_fsevent: was not able to add path %s to event %p.", pathbuff, cur);
928 }
929
930 if (pathbuff != path_override) {
931 release_pathbuff(path: pathbuff);
932 }
933 pathbuff = NULL;
934
935 break;
936 }
937
938 case FSE_ARG_FINFO: {
939 fse_info *fse;
940
941 fse = va_arg(ap, fse_info *);
942
943 cur->regular_event.dev = dev = (dev_t)fse->dev;
944 cur->regular_event.ino = (ino64_t)fse->ino;
945 cur->regular_event.mode = (int32_t)fse->mode;
946 cur->regular_event.uid = (uid_t)fse->uid;
947 cur->regular_event.document_id = (uint32_t)fse->document_id;
948 // if it's a hard-link and this is the last link, flag it
949 if (fse->mode & FSE_MODE_HLINK) {
950 if (fse->nlink == 0) {
951 cur->regular_event.mode |= FSE_MODE_LAST_HLINK;
952 }
953 if (orig_linkid == 0) {
954 orig_linkid = cur->regular_event.ino;
955 orig_linkcount = MIN(fse->nlink, MAX_HARDLINK_NOTIFICATIONS);
956 }
957 }
958 if (cur->regular_event.mode & FSE_TRUNCATED_PATH) {
959 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
960 address: &cur->flags);
961 cur->regular_event.mode &= ~FSE_TRUNCATED_PATH;
962 }
963 break;
964 }
965
966 case FSE_ARG_STRING:
967 if (kfse->regular_event.str != NULL) {
968 cur = kfse_dest;
969 }
970
971 cur->regular_event.len =
972 (int16_t)(va_arg(ap, int32_t) & 0x7fff);
973 if (cur->regular_event.len >= 1) {
974 cur->regular_event.str =
975 vfs_addname(va_arg(ap, char *),
976 len: cur->regular_event.len, nc_hash: 0, flags: 0);
977 } else {
978 printf("add_fsevent: funny looking string length: %d\n", (int)cur->regular_event.len);
979 cur->regular_event.len = 2;
980 cur->regular_event.str = vfs_addname(name: "/",
981 len: cur->regular_event.len, nc_hash: 0, flags: 0);
982 }
983 if (cur->regular_event.str[0] == 0) {
984 printf("add_fsevent: bogus looking string (len %d)\n", cur->regular_event.len);
985 }
986 break;
987
988 case FSE_ARG_INT32: {
989 uint32_t ival = (uint32_t)va_arg(ap, int32_t);
990 kfse->regular_event.uid = ival;
991 break;
992 }
993
994 default:
995 printf("add_fsevent: unknown type %d\n", arg_type);
996 // just skip one 32-bit word and hope we sync up...
997 (void)va_arg(ap, int32_t);
998 }
999 }
1000
1001done_with_args:
1002 va_end(ap);
1003
1004 // XXX Memory barrier here?
1005 if (kfse_dest) {
1006 OSBitAndAtomic16(mask: ~KFSE_BEING_CREATED, address: &kfse_dest->flags);
1007 }
1008 OSBitAndAtomic16(mask: ~KFSE_BEING_CREATED, address: &kfse->flags);
1009
1010 //
1011 // now we have to go and let everyone know that
1012 // is interested in this type of event
1013 //
1014 lock_watch_table();
1015
1016 for (i = 0; i < MAX_WATCHERS; i++) {
1017 watcher = watcher_table[i];
1018 if (watcher == NULL) {
1019 continue;
1020 }
1021
1022 if (type < watcher->num_events
1023 && watcher->event_list[type] == FSE_REPORT
1024 && watcher_cares_about_dev(watcher, dev)) {
1025 if (watcher_add_event(watcher, kfse) != 0) {
1026 watcher->num_dropped++;
1027 continue;
1028 }
1029 }
1030
1031 // if (kfse->refcount < 1) {
1032 // panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
1033 // }
1034 }
1035
1036 unlock_watch_table();
1037
1038clean_up:
1039
1040 if (pathbuff) {
1041 release_pathbuff(path: pathbuff);
1042 pathbuff = NULL;
1043 }
1044 // replicate events for sibling hardlinks
1045 if (do_all_links &&
1046 (kfse->regular_event.mode & FSE_MODE_HLINK) &&
1047 !(kfse->regular_event.mode & FSE_MODE_LAST_HLINK) &&
1048 (type == FSE_STAT_CHANGED ||
1049 type == FSE_CONTENT_MODIFIED ||
1050 type == FSE_FINDER_INFO_CHANGED ||
1051 type == FSE_XATTR_MODIFIED)) {
1052 if (orig_linkcount > 0 && orig_linkid != 0) {
1053#ifndef APFSIOC_NEXT_LINK
1054#define APFSIOC_NEXT_LINK _IOWR('J', 10, uint64_t)
1055#endif
1056 if (path_override == NULL) {
1057 path_override = get_pathbuff();
1058 }
1059 if (next_linkid == 0) {
1060 next_linkid = orig_linkid;
1061 }
1062
1063 if (link_vp) {
1064 mount_t mp = NULL;
1065 vnode_t mnt_rootvp = NULL;
1066 int iret = -1;
1067
1068 mp = vnode_mount(vp: link_vp);
1069 if (mp) {
1070 iret = VFS_ROOT(mp, &mnt_rootvp, vfs_context_kernel());
1071 }
1072
1073 if (iret == 0 && mnt_rootvp) {
1074 iret = VNOP_IOCTL(vp: mnt_rootvp, APFSIOC_NEXT_LINK, data: (char *)&next_linkid, fflag: (int)0, ctx: vfs_context_kernel());
1075 vnode_put(vp: mnt_rootvp);
1076 }
1077
1078 int32_t fsid0;
1079 int path_override_len = MAXPATHLEN;
1080
1081 // continue resolving hardlink paths if there is a valid next_linkid retrieved
1082 // file systems not supporting APFSIOC_NEXT_LINK will skip replicating events for sibling hardlinks
1083 if (iret == 0 && next_linkid != 0) {
1084 fsid0 = link_vp->v_mount->mnt_vfsstat.f_fsid.val[0];
1085 ut->uu_flag |= UT_KERN_RAGE_VNODES;
1086 if (!do_cache_reset) {
1087 do_cache_reset = true;
1088 }
1089 if ((iret = fsgetpath_internal(ctx, fsid0, next_linkid, MAXPATHLEN, path_override, FSOPT_NOFIRMLINKPATH, &path_override_len)) == 0) {
1090 orig_linkcount--;
1091 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1092
1093 if (orig_linkcount >= 0) {
1094 release_event_ref(kfse);
1095 goto restart;
1096 }
1097 } else {
1098 // failed to get override path
1099 // encountered a broken link or the linkid has been deleted before retrieving the path
1100 orig_linkcount--;
1101 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1102
1103 if (orig_linkcount >= 0) {
1104 goto clean_up;
1105 }
1106 }
1107 }
1108 }
1109 }
1110 }
1111
1112 if (link_name) {
1113 /*
1114 * If we call fsgetpath on all the links, it will set the link origin cache
1115 * to the last link that the path was obtained for.
1116 * To restore the the original link id cache in APFS we need to issue a
1117 * lookup on the original directory + name for the link.
1118 */
1119 if (do_cache_reset) {
1120 vnode_t dvp = NULLVP;
1121
1122 if ((ret = VFS_VGET(link_vp->v_mount, (ino64_t)link_parentid, &dvp, vfs_context_kernel())) == 0) {
1123 vnode_t lvp = NULLVP;
1124
1125 ret = vnode_lookupat(path: link_name, flags: 0, vpp: &lvp, ctx, start_dvp: dvp);
1126 if (!ret) {
1127 vnode_put(vp: lvp);
1128 lvp = NULLVP;
1129 }
1130 vnode_put(vp: dvp);
1131 dvp = NULLVP;
1132 }
1133 ret = 0;
1134 }
1135 zfree(ZV_NAMEI, link_name);
1136 link_name = NULL;
1137 }
1138
1139 if (path_override) {
1140 release_pathbuff(path: path_override);
1141 path_override = NULL;
1142 }
1143
1144 release_event_ref(kfse);
1145
1146 return error;
1147}
1148
1149int
1150test_fse_access_granted(vnode_t vp, unsigned long type, vfs_context_t ctx)
1151{
1152 audit_token_t atoken;
1153 char *pathbuff;
1154 int error, pathbuff_len;
1155
1156 if (type == 0) {
1157 return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1158 FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1159 }
1160
1161 if (type == 1) {
1162 error = vfs_context_copy_audit_token(ctx, token: &atoken);
1163 if (error) {
1164 return error;
1165 }
1166 return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1167 FSE_ARG_VNODE, vp, FSE_ARG_AUDIT_TOKEN, &atoken,
1168 FSE_ARG_DONE);
1169 }
1170
1171 if (type == 2 || type == 3) {
1172 pathbuff = get_pathbuff();
1173 pathbuff_len = MAXPATHLEN;
1174 pathbuff[0] = '\0';
1175 error = vn_getpath_no_firmlink(vp, pathbuf: pathbuff, len: &pathbuff_len);
1176 if (error) {
1177 release_pathbuff(path: pathbuff);
1178 return error;
1179 }
1180 if (type == 2) {
1181 error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1182 FSE_ARG_STRING, pathbuff_len, pathbuff,
1183 FSE_ARG_DONE);
1184 } else {
1185 error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1186 FSE_ARG_PATH, pathbuff, FSE_ARG_DONE);
1187 }
1188 release_pathbuff(path: pathbuff);
1189 return error;
1190 }
1191
1192 return ENOTSUP;
1193}
1194
1195static void
1196release_event_ref(kfs_event *kfse)
1197{
1198 int old_refcount;
1199 kfs_event *dest = NULL;
1200 const char *path_str = NULL, *dest_path_str = NULL;
1201
1202 lock_fs_event_list();
1203
1204 old_refcount = OSAddAtomic(-1, &kfse->refcount);
1205 if (old_refcount > 1) {
1206 unlock_fs_event_list();
1207 return;
1208 }
1209
1210 if (last_event_ptr == kfse) {
1211 last_event_ptr = NULL;
1212 last_event_type = -1;
1213 last_coalesced_time = 0;
1214 }
1215
1216 if (kfse->refcount < 0) {
1217 panic("release_event_ref: bogus kfse refcount %d", kfse->refcount);
1218 }
1219
1220 assert(kfse->refcount == 0);
1221 assert(kfse->type != FSE_INVALID);
1222
1223 //
1224 // Get pointers to all the things so we can free without
1225 // holding any locks.
1226 //
1227 if (kfse->type != FSE_DOCID_CREATED &&
1228 kfse->type != FSE_DOCID_CHANGED &&
1229 kfse->type != FSE_ACTIVITY) {
1230 path_str = kfse->regular_event.str;
1231
1232 dest = kfse->regular_event.dest;
1233 if (dest != NULL) {
1234 assert(dest->type != FSE_INVALID);
1235 if (OSAddAtomic(-1,
1236 &kfse->regular_event.dest->refcount) == 1) {
1237 dest_path_str = dest->regular_event.str;
1238 } else {
1239 dest = NULL;
1240 }
1241 }
1242 }
1243
1244 if (dest != NULL) {
1245 if (dest->flags & KFSE_ON_LIST) {
1246 num_events_outstanding--;
1247 LIST_REMOVE(dest, kevent_list);
1248 }
1249 }
1250
1251 if (kfse->flags & KFSE_ON_LIST) {
1252 num_events_outstanding--;
1253 LIST_REMOVE(kfse, kevent_list);
1254 if (kfse->type == FSE_RENAME) {
1255 num_pending_rename--;
1256 }
1257 }
1258
1259 unlock_fs_event_list();
1260
1261 zfree(event_zone, kfse);
1262 if (dest != NULL) {
1263 zfree(event_zone, dest);
1264 }
1265
1266 if (path_str != NULL) {
1267 vfs_removename(name: path_str);
1268 }
1269 if (dest_path_str != NULL) {
1270 vfs_removename(name: dest_path_str);
1271 }
1272}
1273
1274#define FSEVENTS_WATCHER_ENTITLEMENT \
1275 "com.apple.private.vfs.fsevents-watcher"
1276
1277#define FSEVENTS_ACTIVITY_WATCHER_ENTITLEMENT \
1278 "com.apple.private.vfs.fsevents-activity-watcher"
1279
1280//
1281// We restrict this for two reasons:
1282//
1283// 1- So that naive processes don't get this firehose by default.
1284//
1285// 2- Because this event, when delivered to watcheres, includes the
1286// audit token of the process granted the access, and we don't
1287// want to leak that to random watchers.
1288//
1289#define FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT \
1290 "com.apple.private.vfs.fsevents-access-granted-watcher"
1291
1292static bool
1293watcher_is_entitled(task_t task)
1294{
1295 //
1296 // We consider a process to be entitled to watch /dev/fsevents
1297 // if it has either FSEVENTS_WATCHER_ENTITLEMENT or
1298 // FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT.
1299 //
1300 return !!(IOTaskHasEntitlement(task, FSEVENTS_WATCHER_ENTITLEMENT) ||
1301 IOTaskHasEntitlement(task,
1302 FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT) ||
1303 IOTaskHasEntitlement(task,
1304 FSEVENTS_ACTIVITY_WATCHER_ENTITLEMENT));
1305}
1306#if CONFIG_FSE_ACCESS_GRANTED
1307static bool
1308watcher_is_entitled_for_access_granted(task_t task)
1309{
1310 return !!IOTaskHasEntitlement(task,
1311 FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT);
1312}
1313#endif
1314static bool
1315watcher_is_entitled_for_activity(task_t task)
1316{
1317 return !!IOTaskHasEntitlement(task,
1318 FSEVENTS_ACTIVITY_WATCHER_ENTITLEMENT);
1319}
1320
1321static int
1322add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out, void *fseh)
1323{
1324 int i;
1325 fs_event_watcher *watcher;
1326
1327 if (eventq_size <= 0 || eventq_size > 100 * max_kfs_events) {
1328 eventq_size = max_kfs_events;
1329 }
1330 if (num_events > FSE_ACTIVITY &&
1331 event_list[FSE_ACTIVITY] != FSE_IGNORE &&
1332 !watcher_is_entitled_for_activity(task: current_task())) {
1333 event_list[FSE_ACTIVITY] = FSE_IGNORE;
1334 }
1335#if CONFIG_FSE_ACCESS_GRANTED
1336 // If the watcher wants FSE_ACCESS_GRANTED, ensure it has the
1337 // correct entitlement. If not, just silently drop that event.
1338 if (num_events > FSE_ACCESS_GRANTED &&
1339 event_list[FSE_ACCESS_GRANTED] != FSE_IGNORE &&
1340 !watcher_is_entitled_for_access_granted(current_task())) {
1341 event_list[FSE_ACCESS_GRANTED] = FSE_IGNORE;
1342 }
1343#endif
1344 // Note: the event_queue follows the fs_event_watcher struct
1345 // in memory so we only have to do one allocation
1346 watcher = kalloc_type(fs_event_watcher, kfs_event *, eventq_size, Z_WAITOK);
1347 if (watcher == NULL) {
1348 return ENOMEM;
1349 }
1350
1351 watcher->event_list = event_list;
1352 watcher->num_events = num_events;
1353 watcher->devices_not_to_watch = NULL;
1354 watcher->num_devices = 0;
1355 watcher->flags = 0;
1356 watcher->event_queue = (kfs_event **)&watcher[1];
1357 watcher->eventq_size = eventq_size;
1358 watcher->rd = 0;
1359 watcher->wr = 0;
1360 watcher->blockers = 0;
1361 watcher->num_readers = 0;
1362 watcher->max_event_id = 0;
1363 watcher->fseh = fseh;
1364 watcher->pid = proc_selfpid();
1365 proc_selfname(buf: watcher->proc_name, size: sizeof(watcher->proc_name));
1366
1367 watcher->num_dropped = 0; // XXXdbg - debugging
1368
1369 if (watcher_is_entitled(task: current_task())) {
1370 watcher->flags |= WATCHER_APPLE_SYSTEM_SERVICE;
1371 } else {
1372 printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported. Migrate to FSEventsFramework\n",
1373 watcher->proc_name, watcher->pid);
1374 }
1375
1376 lock_watch_table();
1377
1378 // find a slot for the new watcher
1379 for (i = 0; i < MAX_WATCHERS; i++) {
1380 if (watcher_table[i] == NULL) {
1381 watcher->my_id = i;
1382 watcher_table[i] = watcher;
1383 break;
1384 }
1385 }
1386
1387 if (i >= MAX_WATCHERS) {
1388 printf("fsevents: too many watchers!\n");
1389 unlock_watch_table();
1390 kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1391 return ENOSPC;
1392 }
1393
1394 // now update the global list of who's interested in
1395 // events of a particular type...
1396 for (i = 0; i < num_events; i++) {
1397 if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1398 fs_event_type_watchers[i]++;
1399 }
1400 }
1401
1402 unlock_watch_table();
1403
1404 *watcher_out = watcher;
1405
1406 return 0;
1407}
1408
1409
1410
1411static void
1412remove_watcher(fs_event_watcher *target)
1413{
1414 int i, j, counter = 0;
1415 fs_event_watcher *watcher;
1416 kfs_event *kfse;
1417
1418 lock_watch_table();
1419
1420 for (j = 0; j < MAX_WATCHERS; j++) {
1421 watcher = watcher_table[j];
1422 if (watcher != target) {
1423 continue;
1424 }
1425
1426 watcher_table[j] = NULL;
1427
1428 for (i = 0; i < watcher->num_events; i++) {
1429 if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1430 fs_event_type_watchers[i]--;
1431 }
1432 }
1433
1434 if (watcher->flags & WATCHER_CLOSING) {
1435 unlock_watch_table();
1436 return;
1437 }
1438
1439 // printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1440 watcher->flags |= WATCHER_CLOSING;
1441 OSAddAtomic(1, &watcher->num_readers);
1442
1443 unlock_watch_table();
1444
1445 while (watcher->num_readers > 1 && counter++ < 5000) {
1446 lock_watch_table();
1447 fsevents_wakeup(watcher); // in case they're asleep
1448 unlock_watch_table();
1449
1450 tsleep(chan: watcher, PRIBIO, wmesg: "fsevents-close", timo: 1);
1451 }
1452 if (counter++ >= 5000) {
1453 // printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1454 panic("fsevents: close: still have readers! (%d)", watcher->num_readers);
1455 }
1456
1457 // drain the event_queue
1458
1459 lck_rw_lock_exclusive(lck: &event_handling_lock);
1460 while (watcher->rd != watcher->wr) {
1461 kfse = watcher->event_queue[watcher->rd];
1462 watcher->event_queue[watcher->rd] = NULL;
1463 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1464 OSSynchronizeIO();
1465 if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1466 release_event_ref(kfse);
1467 }
1468 }
1469 lck_rw_unlock_exclusive(lck: &event_handling_lock);
1470
1471 kfree_data(watcher->event_list, watcher->num_events * sizeof(int8_t));
1472 kfree_data(watcher->devices_not_to_watch, watcher->num_devices * sizeof(dev_t));
1473 kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1474 return;
1475 }
1476
1477 unlock_watch_table();
1478}
1479
1480
1481#define EVENT_DELAY_IN_MS 10
1482static thread_call_t event_delivery_timer = NULL;
1483static int timer_set = 0;
1484
1485
1486static void
1487delayed_event_delivery(__unused void *param0, __unused void *param1)
1488{
1489 int i;
1490
1491 lock_watch_table();
1492
1493 for (i = 0; i < MAX_WATCHERS; i++) {
1494 if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1495 fsevents_wakeup(watcher: watcher_table[i]);
1496 }
1497 }
1498
1499 timer_set = 0;
1500
1501 unlock_watch_table();
1502}
1503
1504
1505//
1506// The watch table must be locked before calling this function.
1507//
1508static void
1509schedule_event_wakeup(void)
1510{
1511 uint64_t deadline;
1512
1513 if (event_delivery_timer == NULL) {
1514 event_delivery_timer = thread_call_allocate(func: (thread_call_func_t)delayed_event_delivery, NULL);
1515 }
1516
1517 clock_interval_to_deadline(EVENT_DELAY_IN_MS, scale_factor: 1000 * 1000, result: &deadline);
1518
1519 thread_call_enter_delayed(call: event_delivery_timer, deadline);
1520 timer_set = 1;
1521}
1522
1523
1524
1525#define MAX_NUM_PENDING 16
1526
1527//
1528// NOTE: the watch table must be locked before calling
1529// this routine.
1530//
1531static int
1532watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse)
1533{
1534 if (kfse->abstime > watcher->max_event_id) {
1535 watcher->max_event_id = kfse->abstime;
1536 }
1537
1538 if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) {
1539 watcher->flags |= WATCHER_DROPPED_EVENTS;
1540 fsevents_wakeup(watcher);
1541 return ENOSPC;
1542 }
1543
1544 OSAddAtomic(1, &kfse->refcount);
1545 watcher->event_queue[watcher->wr] = kfse;
1546 OSSynchronizeIO();
1547 watcher->wr = (watcher->wr + 1) % watcher->eventq_size;
1548
1549 //
1550 // wake up the watcher if there are more than MAX_NUM_PENDING events.
1551 // otherwise schedule a timer (if one isn't already set) which will
1552 // send any pending events if no more are received in the next
1553 // EVENT_DELAY_IN_MS milli-seconds.
1554 //
1555 int32_t num_pending = 0;
1556 if (watcher->rd < watcher->wr) {
1557 num_pending = watcher->wr - watcher->rd;
1558 }
1559
1560 if (watcher->rd > watcher->wr) {
1561 num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1562 }
1563
1564 if (num_pending > (watcher->eventq_size * 3 / 4) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1565 /* Non-Apple Service is falling behind, start dropping events for this process */
1566 lck_rw_lock_exclusive(lck: &event_handling_lock);
1567 while (watcher->rd != watcher->wr) {
1568 kfse = watcher->event_queue[watcher->rd];
1569 watcher->event_queue[watcher->rd] = NULL;
1570 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1571 OSSynchronizeIO();
1572 if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1573 release_event_ref(kfse);
1574 }
1575 }
1576 watcher->flags |= WATCHER_DROPPED_EVENTS;
1577 lck_rw_unlock_exclusive(lck: &event_handling_lock);
1578
1579 printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1580 watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1581 watcher->eventq_size, watcher->flags);
1582
1583 fsevents_wakeup(watcher);
1584 } else if (num_pending > MAX_NUM_PENDING) {
1585 fsevents_wakeup(watcher);
1586 } else if (timer_set == 0) {
1587 schedule_event_wakeup();
1588 }
1589
1590 return 0;
1591}
1592
1593static int
1594fill_buff(uint16_t type, int32_t size, const void *data,
1595 char *buff, int32_t *_buff_idx, int32_t buff_sz,
1596 struct uio *uio)
1597{
1598 int32_t amt, error = 0, buff_idx = *_buff_idx;
1599 uint16_t tmp;
1600
1601 //
1602 // the +1 on the size is to guarantee that the main data
1603 // copy loop will always copy at least 1 byte
1604 //
1605 if ((buff_sz - buff_idx) <= (int)(2 * sizeof(uint16_t) + 1)) {
1606 if (buff_idx > uio_resid(a_uio: uio)) {
1607 error = ENOSPC;
1608 goto get_out;
1609 }
1610
1611 error = uiomove(cp: buff, n: buff_idx, uio);
1612 if (error) {
1613 goto get_out;
1614 }
1615 buff_idx = 0;
1616 }
1617
1618 // copy out the header (type & size)
1619 memcpy(dst: &buff[buff_idx], src: &type, n: sizeof(uint16_t));
1620 buff_idx += sizeof(uint16_t);
1621
1622 tmp = size & 0xffff;
1623 memcpy(dst: &buff[buff_idx], src: &tmp, n: sizeof(uint16_t));
1624 buff_idx += sizeof(uint16_t);
1625
1626 // now copy the body of the data, flushing along the way
1627 // if the buffer fills up.
1628 //
1629 while (size > 0) {
1630 amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1631 memcpy(dst: &buff[buff_idx], src: data, n: amt);
1632
1633 size -= amt;
1634 buff_idx += amt;
1635 data = (const char *)data + amt;
1636 if (size > (buff_sz - buff_idx)) {
1637 if (buff_idx > uio_resid(a_uio: uio)) {
1638 error = ENOSPC;
1639 goto get_out;
1640 }
1641 error = uiomove(cp: buff, n: buff_idx, uio);
1642 if (error) {
1643 goto get_out;
1644 }
1645 buff_idx = 0;
1646 }
1647
1648 if (amt == 0) { // just in case...
1649 break;
1650 }
1651 }
1652
1653get_out:
1654 *_buff_idx = buff_idx;
1655
1656 return error;
1657}
1658
1659
1660static int copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio) __attribute__((noinline));
1661
1662static int
1663copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)
1664{
1665 int error;
1666 uint16_t tmp16;
1667 int32_t type;
1668 kfs_event *cur;
1669 char evbuff[512];
1670 int evbuff_idx = 0;
1671
1672 if (kfse->type == FSE_INVALID) {
1673 panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d)", kfse, kfse->refcount);
1674 }
1675
1676 if (kfse->flags & KFSE_BEING_CREATED) {
1677 return 0;
1678 }
1679
1680 if (((kfse->type == FSE_RENAME) || (kfse->type == FSE_CLONE)) &&
1681 kfse->regular_event.dest == NULL) {
1682 //
1683 // This can happen if an event gets recycled but we had a
1684 // pointer to it in our event queue. The event is the
1685 // destination of a rename or clone which we'll process
1686 // separately (that is, another kfse points to this one
1687 // so it's ok to skip this guy because we'll process it
1688 // when we process the other one)
1689 error = 0;
1690 goto get_out;
1691 }
1692
1693 if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1694 type = (kfse->type & 0xfff);
1695
1696 if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1697 type |= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1698 } else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1699 type |= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1700 }
1701 } else {
1702 type = (int32_t)kfse->type;
1703 }
1704
1705 // copy out the type of the event
1706 memcpy(dst: evbuff, src: &type, n: sizeof(int32_t));
1707 evbuff_idx += sizeof(int32_t);
1708
1709 // copy out the pid of the person that generated the event
1710 memcpy(dst: &evbuff[evbuff_idx], src: &kfse->pid, n: sizeof(pid_t));
1711 evbuff_idx += sizeof(pid_t);
1712
1713 cur = kfse;
1714
1715copy_again:
1716
1717 if (kfse->type == FSE_DOCID_CHANGED ||
1718 kfse->type == FSE_DOCID_CREATED) {
1719 dev_t dev = cur->docid_event.dev;
1720 ino64_t src_ino = cur->docid_event.src_ino;
1721 ino64_t dst_ino = cur->docid_event.dst_ino;
1722 uint64_t docid = cur->docid_event.docid;
1723
1724 error = fill_buff(FSE_ARG_DEV, size: sizeof(dev_t), data: &dev, buff: evbuff,
1725 buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1726 if (error != 0) {
1727 goto get_out;
1728 }
1729
1730 error = fill_buff(FSE_ARG_INO, size: sizeof(ino64_t), data: &src_ino,
1731 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1732 if (error != 0) {
1733 goto get_out;
1734 }
1735
1736 error = fill_buff(FSE_ARG_INO, size: sizeof(ino64_t), data: &dst_ino,
1737 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1738 if (error != 0) {
1739 goto get_out;
1740 }
1741
1742 error = fill_buff(FSE_ARG_INT64, size: sizeof(uint64_t), data: &docid,
1743 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1744 if (error != 0) {
1745 goto get_out;
1746 }
1747
1748 goto done;
1749 }
1750
1751 if (kfse->type == FSE_UNMOUNT_PENDING) {
1752 dev_t dev = cur->regular_event.dev;
1753
1754 error = fill_buff(FSE_ARG_DEV, size: sizeof(dev_t), data: &dev,
1755 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1756 if (error != 0) {
1757 goto get_out;
1758 }
1759
1760 goto done;
1761 }
1762
1763 if (kfse->type == FSE_ACTIVITY) {
1764 error = fill_buff(FSE_ARG_INT32, size: sizeof(cur->activity_event.version), data: &cur->activity_event.version,
1765 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1766 if (error != 0) {
1767 goto get_out;
1768 }
1769 error = fill_buff(FSE_ARG_DEV, size: sizeof(cur->activity_event.dev), data: &cur->activity_event.dev, buff: evbuff,
1770 buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1771 if (error != 0) {
1772 goto get_out;
1773 }
1774
1775 error = fill_buff(FSE_ARG_INO, size: sizeof(cur->activity_event.ino), data: &cur->activity_event.ino,
1776 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1777 if (error != 0) {
1778 goto get_out;
1779 }
1780
1781 error = fill_buff(FSE_ARG_INT64, size: sizeof(cur->activity_event.origin_id), data: &cur->activity_event.origin_id,
1782 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1783 if (error != 0) {
1784 goto get_out;
1785 }
1786
1787 error = fill_buff(FSE_ARG_INT64, size: sizeof(cur->activity_event.age), data: &cur->activity_event.age,
1788 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1789 if (error != 0) {
1790 goto get_out;
1791 }
1792
1793 error = fill_buff(FSE_ARG_INT32, size: sizeof(cur->activity_event.use_state), data: &cur->activity_event.use_state,
1794 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1795 if (error != 0) {
1796 goto get_out;
1797 }
1798
1799 error = fill_buff(FSE_ARG_INT32, size: sizeof(cur->activity_event.urgency), data: &cur->activity_event.urgency,
1800 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1801 if (error != 0) {
1802 goto get_out;
1803 }
1804
1805 error = fill_buff(FSE_ARG_INT64, size: sizeof(cur->activity_event.size), data: &cur->activity_event.size,
1806 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1807 if (error != 0) {
1808 goto get_out;
1809 }
1810
1811 goto done;
1812 }
1813#if CONFIG_FSE_ACCESS_GRANTED
1814 if (kfse->type == FSE_ACCESS_GRANTED) {
1815 //
1816 // KFSE_CONTAINS_DROPPED_EVENTS will be set if either
1817 // the path or audit token are bogus; don't copy out
1818 // either in that case.
1819 //
1820 if (cur->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1821 goto done;
1822 }
1823 error = fill_buff(FSE_ARG_STRING,
1824 cur->access_granted_event.len,
1825 cur->access_granted_event.str,
1826 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1827 if (error != 0) {
1828 goto get_out;
1829 }
1830 error = fill_buff(FSE_ARG_AUDIT_TOKEN,
1831 sizeof(cur->access_granted_event.audit_token),
1832 &cur->access_granted_event.audit_token,
1833 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1834 if (error != 0) {
1835 goto get_out;
1836 }
1837
1838 goto done;
1839 }
1840#endif
1841 if (cur->regular_event.str == NULL ||
1842 cur->regular_event.str[0] == '\0') {
1843 printf("copy_out_kfse:2: empty/short path (%s)\n",
1844 cur->regular_event.str);
1845 error = fill_buff(FSE_ARG_STRING, size: 2, data: "/", buff: evbuff, buff_idx: &evbuff_idx,
1846 buff_sz: sizeof(evbuff), uio);
1847 } else {
1848 error = fill_buff(FSE_ARG_STRING, size: cur->regular_event.len,
1849 data: cur->regular_event.str, buff: evbuff, buff_idx: &evbuff_idx,
1850 buff_sz: sizeof(evbuff), uio);
1851 }
1852 if (error != 0) {
1853 goto get_out;
1854 }
1855
1856 if (cur->regular_event.dev == 0 && cur->regular_event.ino == 0) {
1857 // this happens when a rename event happens and the
1858 // destination of the rename did not previously exist.
1859 // it thus has no other file info so skip copying out
1860 // the stuff below since it isn't initialized
1861 goto done;
1862 }
1863
1864 if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1865 // We rely on the layout of the "regular_event"
1866 // structure being the same as fse_info in order
1867 // to speed up this copy. The nlink field in
1868 // fse_info is not included.
1869 error = fill_buff(FSE_ARG_FINFO, KFSE_INFO_COPYSIZE,
1870 data: &cur->regular_event, buff: evbuff, buff_idx: &evbuff_idx,
1871 buff_sz: sizeof(evbuff), uio);
1872 if (error != 0) {
1873 goto get_out;
1874 }
1875 } else {
1876 error = fill_buff(FSE_ARG_DEV, size: sizeof(dev_t),
1877 data: &cur->regular_event.dev, buff: evbuff, buff_idx: &evbuff_idx,
1878 buff_sz: sizeof(evbuff), uio);
1879 if (error != 0) {
1880 goto get_out;
1881 }
1882
1883 error = fill_buff(FSE_ARG_INO, size: sizeof(ino64_t),
1884 data: &cur->regular_event.ino, buff: evbuff, buff_idx: &evbuff_idx,
1885 buff_sz: sizeof(evbuff), uio);
1886 if (error != 0) {
1887 goto get_out;
1888 }
1889
1890 error = fill_buff(FSE_ARG_MODE, size: sizeof(int32_t),
1891 data: &cur->regular_event.mode, buff: evbuff, buff_idx: &evbuff_idx,
1892 buff_sz: sizeof(evbuff), uio);
1893 if (error != 0) {
1894 goto get_out;
1895 }
1896
1897 error = fill_buff(FSE_ARG_UID, size: sizeof(uid_t),
1898 data: &cur->regular_event.uid, buff: evbuff, buff_idx: &evbuff_idx,
1899 buff_sz: sizeof(evbuff), uio);
1900 if (error != 0) {
1901 goto get_out;
1902 }
1903
1904 error = fill_buff(FSE_ARG_GID, size: sizeof(gid_t),
1905 data: &cur->regular_event.document_id, buff: evbuff, buff_idx: &evbuff_idx,
1906 buff_sz: sizeof(evbuff), uio);
1907 if (error != 0) {
1908 goto get_out;
1909 }
1910 }
1911
1912 if (cur->regular_event.dest) {
1913 cur = cur->regular_event.dest;
1914 goto copy_again;
1915 }
1916
1917done:
1918 // very last thing: the time stamp
1919 error = fill_buff(FSE_ARG_INT64, size: sizeof(uint64_t), data: &cur->abstime,
1920 buff: evbuff, buff_idx: &evbuff_idx, buff_sz: sizeof(evbuff), uio);
1921 if (error != 0) {
1922 goto get_out;
1923 }
1924
1925 // check if the FSE_ARG_DONE will fit
1926 if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1927 if (evbuff_idx > uio_resid(a_uio: uio)) {
1928 error = ENOSPC;
1929 goto get_out;
1930 }
1931 error = uiomove(cp: evbuff, n: evbuff_idx, uio);
1932 if (error) {
1933 goto get_out;
1934 }
1935 evbuff_idx = 0;
1936 }
1937
1938 tmp16 = FSE_ARG_DONE;
1939 memcpy(dst: &evbuff[evbuff_idx], src: &tmp16, n: sizeof(uint16_t));
1940 evbuff_idx += sizeof(uint16_t);
1941
1942 // flush any remaining data in the buffer (and hopefully
1943 // in most cases this is the only uiomove we'll do)
1944 if (evbuff_idx > uio_resid(a_uio: uio)) {
1945 error = ENOSPC;
1946 } else {
1947 error = uiomove(cp: evbuff, n: evbuff_idx, uio);
1948 }
1949
1950get_out:
1951
1952 return error;
1953}
1954
1955
1956
1957static int
1958fmod_watch(fs_event_watcher *watcher, struct uio *uio)
1959{
1960 int error = 0;
1961 user_ssize_t last_full_event_resid;
1962 kfs_event *kfse;
1963 uint16_t tmp16;
1964 int skipped;
1965
1966 last_full_event_resid = uio_resid(a_uio: uio);
1967
1968 // need at least 2048 bytes of space (maxpathlen + 1 event buf)
1969 if (uio_resid(a_uio: uio) < 2048 || watcher == NULL) {
1970 return EINVAL;
1971 }
1972
1973 if (watcher->flags & WATCHER_CLOSING) {
1974 return 0;
1975 }
1976
1977 if (OSAddAtomic(1, &watcher->num_readers) != 0) {
1978 // don't allow multiple threads to read from the fd at the same time
1979 OSAddAtomic(-1, &watcher->num_readers);
1980 return EAGAIN;
1981 }
1982
1983restart_watch:
1984 if (watcher->rd == watcher->wr) {
1985 if (watcher->flags & WATCHER_CLOSING) {
1986 OSAddAtomic(-1, &watcher->num_readers);
1987 return 0;
1988 }
1989 OSAddAtomic(1, &watcher->blockers);
1990
1991 // there's nothing to do, go to sleep
1992 error = tsleep(chan: (caddr_t)watcher, PUSER | PCATCH, wmesg: "fsevents_empty", timo: 0);
1993
1994 OSAddAtomic(-1, &watcher->blockers);
1995
1996 if (error != 0 || (watcher->flags & WATCHER_CLOSING)) {
1997 OSAddAtomic(-1, &watcher->num_readers);
1998 return error;
1999 }
2000 }
2001
2002 // if we dropped events, return that as an event first
2003 if (watcher->flags & WATCHER_DROPPED_EVENTS) {
2004 int32_t val = FSE_EVENTS_DROPPED;
2005
2006 error = uiomove(cp: (caddr_t)&val, n: sizeof(int32_t), uio);
2007 if (error == 0) {
2008 val = 0; // a fake pid
2009 error = uiomove(cp: (caddr_t)&val, n: sizeof(int32_t), uio);
2010
2011 tmp16 = FSE_ARG_DONE; // makes it a consistent msg
2012 error = uiomove(cp: (caddr_t)&tmp16, n: sizeof(int16_t), uio);
2013
2014 last_full_event_resid = uio_resid(a_uio: uio);
2015 }
2016
2017 if (error) {
2018 OSAddAtomic(-1, &watcher->num_readers);
2019 return error;
2020 }
2021
2022 watcher->flags &= ~WATCHER_DROPPED_EVENTS;
2023 }
2024
2025 skipped = 0;
2026
2027 lck_rw_lock_shared(lck: &event_handling_lock);
2028 while (uio_resid(a_uio: uio) > 0 && watcher->rd != watcher->wr) {
2029 if (watcher->flags & WATCHER_CLOSING) {
2030 break;
2031 }
2032
2033 //
2034 // check if the event is something of interest to us
2035 // (since it may have been recycled/reused and changed
2036 // its type or which device it is for)
2037 //
2038 kfse = watcher->event_queue[watcher->rd];
2039 if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
2040 break;
2041 }
2042
2043 if (watcher->event_list[kfse->type] == FSE_REPORT) {
2044 if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) &&
2045 kfse->type != FSE_DOCID_CREATED &&
2046 kfse->type != FSE_DOCID_CHANGED &&
2047 kfse->type != FSE_ACTIVITY &&
2048 is_ignored_directory(path: kfse->regular_event.str)) {
2049 // If this is not an Apple System Service, skip specified directories
2050 // radar://12034844
2051 error = 0;
2052 skipped = 1;
2053 } else {
2054 skipped = 0;
2055 if (last_event_ptr == kfse) {
2056 last_event_ptr = NULL;
2057 last_event_type = -1;
2058 last_coalesced_time = 0;
2059 }
2060 error = copy_out_kfse(watcher, kfse, uio);
2061 if (error != 0) {
2062 // if an event won't fit or encountered an error while
2063 // we were copying it out, then backup to the last full
2064 // event and just bail out. if the error was ENOENT
2065 // then we can continue regular processing, otherwise
2066 // we should unlock things and return.
2067 uio_setresid(a_uio: uio, a_value: last_full_event_resid);
2068 if (error != ENOENT) {
2069 lck_rw_unlock_shared(lck: &event_handling_lock);
2070 error = 0;
2071 goto get_out;
2072 }
2073 }
2074
2075 last_full_event_resid = uio_resid(a_uio: uio);
2076 }
2077 }
2078
2079 watcher->event_queue[watcher->rd] = NULL;
2080 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
2081 OSSynchronizeIO();
2082 release_event_ref(kfse);
2083 }
2084 lck_rw_unlock_shared(lck: &event_handling_lock);
2085
2086 if (skipped && error == 0) {
2087 goto restart_watch;
2088 }
2089
2090get_out:
2091 OSAddAtomic(-1, &watcher->num_readers);
2092
2093 return error;
2094}
2095
2096
2097//
2098// Shoo watchers away from a volume that's about to be unmounted
2099// (so that it can be cleanly unmounted).
2100//
2101void
2102fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
2103{
2104#if !defined(XNU_TARGET_OS_OSX)
2105 dev_t dev = mp->mnt_vfsstat.f_fsid.val[0];
2106 int error, waitcount = 0;
2107 struct timespec ts = {.tv_sec = 1, .tv_nsec = 0};
2108
2109 // wait for any other pending unmounts to complete
2110 lock_watch_table();
2111 while (fsevent_unmount_dev != 0) {
2112 error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
2113 if (error == EWOULDBLOCK) {
2114 error = 0;
2115 }
2116 if (!error && (++waitcount >= 10)) {
2117 error = EWOULDBLOCK;
2118 printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
2119 }
2120 if (error) {
2121 // there's a problem, bail out
2122 unlock_watch_table();
2123 return;
2124 }
2125 }
2126 if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == 0) {
2127 // nobody watching for unmount pending events
2128 unlock_watch_table();
2129 return;
2130 }
2131 // this is now the current unmount pending
2132 fsevent_unmount_dev = dev;
2133 fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
2134 unlock_watch_table();
2135
2136 // send an event to notify the watcher they need to get off the mount
2137 error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
2138
2139 // wait for acknowledgment(s) (give up if it takes too long)
2140 lock_watch_table();
2141 waitcount = 0;
2142 while (fsevent_unmount_dev == dev) {
2143 error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
2144 if (error == EWOULDBLOCK) {
2145 error = 0;
2146 }
2147 if (!error && (++waitcount >= 10)) {
2148 error = EWOULDBLOCK;
2149 printf("unmount pending ack timeout for dev %d\n", dev);
2150 }
2151 if (error) {
2152 // there's a problem, bail out
2153 if (fsevent_unmount_dev == dev) {
2154 fsevent_unmount_dev = 0;
2155 fsevent_unmount_ack_count = 0;
2156 }
2157 wakeup((caddr_t)&fsevent_unmount_dev);
2158 break;
2159 }
2160 }
2161 unlock_watch_table();
2162#endif /* ! XNU_TARGET_OS_OSX */
2163}
2164
2165
2166//
2167// /dev/fsevents device code
2168//
2169static int fsevents_installed = 0;
2170
2171typedef struct fsevent_handle {
2172 UInt32 flags;
2173 SInt32 active;
2174 fs_event_watcher *watcher;
2175 struct klist knotes;
2176 struct selinfo si;
2177} fsevent_handle;
2178
2179#define FSEH_CLOSING 0x0001
2180
2181static int
2182fseventsf_read(struct fileproc *fp, struct uio *uio,
2183 __unused int flags, __unused vfs_context_t ctx)
2184{
2185 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2186 int error;
2187
2188 error = fmod_watch(watcher: fseh->watcher, uio);
2189
2190 return error;
2191}
2192
2193
2194#pragma pack(push, 4)
2195typedef struct fsevent_dev_filter_args32 {
2196 uint32_t num_devices;
2197 user32_addr_t devices;
2198} fsevent_dev_filter_args32;
2199typedef struct fsevent_dev_filter_args64 {
2200 uint32_t num_devices;
2201 user64_addr_t devices;
2202} fsevent_dev_filter_args64;
2203#pragma pack(pop)
2204
2205#define FSEVENTS_DEVICE_FILTER_32 _IOW('s', 100, fsevent_dev_filter_args32)
2206#define FSEVENTS_DEVICE_FILTER_64 _IOW('s', 100, fsevent_dev_filter_args64)
2207
2208static int
2209fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
2210{
2211 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2212 int ret = 0;
2213 fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
2214
2215 OSAddAtomic(1, &fseh->active);
2216 if (fseh->flags & FSEH_CLOSING) {
2217 OSAddAtomic(-1, &fseh->active);
2218 return 0;
2219 }
2220
2221 switch (cmd) {
2222 case FIONBIO:
2223 case FIOASYNC:
2224 break;
2225
2226 case FSEVENTS_WANT_COMPACT_EVENTS: {
2227 fseh->watcher->flags |= WATCHER_WANTS_COMPACT_EVENTS;
2228 break;
2229 }
2230
2231 case FSEVENTS_WANT_EXTENDED_INFO: {
2232 fseh->watcher->flags |= WATCHER_WANTS_EXTENDED_INFO;
2233 break;
2234 }
2235
2236 case FSEVENTS_GET_CURRENT_ID: {
2237 *(uint64_t *)data = fseh->watcher->max_event_id;
2238 ret = 0;
2239 break;
2240 }
2241
2242 case FSEVENTS_DEVICE_FILTER_32: {
2243 if (proc_is64bit(vfs_context_proc(ctx))) {
2244 ret = EINVAL;
2245 break;
2246 }
2247 fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
2248
2249 devfilt_args = &_devfilt_args;
2250 memset(s: devfilt_args, c: 0, n: sizeof(fsevent_dev_filter_args64));
2251 devfilt_args->num_devices = devfilt_args32->num_devices;
2252 devfilt_args->devices = CAST_USER_ADDR_T(devfilt_args32->devices);
2253 goto handle_dev_filter;
2254 }
2255
2256 case FSEVENTS_DEVICE_FILTER_64:
2257 if (!proc_is64bit(vfs_context_proc(ctx))) {
2258 ret = EINVAL;
2259 break;
2260 }
2261 devfilt_args = (fsevent_dev_filter_args64 *)data;
2262
2263handle_dev_filter:
2264 {
2265 int new_num_devices, old_num_devices = 0;
2266 dev_t *devices_not_to_watch, *tmp = NULL;
2267
2268 if (devfilt_args->num_devices > 256) {
2269 ret = EINVAL;
2270 break;
2271 }
2272
2273 new_num_devices = devfilt_args->num_devices;
2274 if (new_num_devices == 0) {
2275 lock_watch_table();
2276
2277 tmp = fseh->watcher->devices_not_to_watch;
2278 fseh->watcher->devices_not_to_watch = NULL;
2279 old_num_devices = fseh->watcher->num_devices;
2280 fseh->watcher->num_devices = new_num_devices;
2281
2282 unlock_watch_table();
2283 kfree_data(tmp, old_num_devices * sizeof(dev_t));
2284 break;
2285 }
2286
2287 devices_not_to_watch = kalloc_data(new_num_devices * sizeof(dev_t), Z_WAITOK);
2288 if (devices_not_to_watch == NULL) {
2289 ret = ENOMEM;
2290 break;
2291 }
2292
2293 ret = copyin((user_addr_t)devfilt_args->devices,
2294 (void *)devices_not_to_watch,
2295 new_num_devices * sizeof(dev_t));
2296 if (ret) {
2297 kfree_data(devices_not_to_watch, new_num_devices * sizeof(dev_t));
2298 break;
2299 }
2300
2301 lock_watch_table();
2302 old_num_devices = fseh->watcher->num_devices;
2303 fseh->watcher->num_devices = new_num_devices;
2304 tmp = fseh->watcher->devices_not_to_watch;
2305 fseh->watcher->devices_not_to_watch = devices_not_to_watch;
2306 unlock_watch_table();
2307
2308 kfree_data(tmp, old_num_devices * sizeof(dev_t));
2309
2310 break;
2311 }
2312
2313 case FSEVENTS_UNMOUNT_PENDING_ACK: {
2314 lock_watch_table();
2315 dev_t dev = *(dev_t *)data;
2316 if (fsevent_unmount_dev == dev) {
2317 if (--fsevent_unmount_ack_count <= 0) {
2318 fsevent_unmount_dev = 0;
2319 wakeup(chan: (caddr_t)&fsevent_unmount_dev);
2320 }
2321 } else {
2322 printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
2323 ret = EINVAL;
2324 }
2325 unlock_watch_table();
2326 break;
2327 }
2328
2329 default:
2330 ret = EINVAL;
2331 break;
2332 }
2333
2334 OSAddAtomic(-1, &fseh->active);
2335 return ret;
2336}
2337
2338
2339static int
2340fseventsf_select(struct fileproc *fp, int which, __unused void *wql, vfs_context_t ctx)
2341{
2342 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2343 int ready = 0;
2344
2345 if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) {
2346 return 0;
2347 }
2348
2349
2350 // if there's nothing in the queue, we're not ready
2351 if (fseh->watcher->rd != fseh->watcher->wr) {
2352 ready = 1;
2353 }
2354
2355 if (!ready) {
2356 lock_watch_table();
2357 selrecord(selector: vfs_context_proc(ctx), &fseh->si, wql);
2358 unlock_watch_table();
2359 }
2360
2361 return ready;
2362}
2363
2364
2365#if NOTUSED
2366static int
2367fseventsf_stat(__unused struct fileproc *fp, __unused struct stat *sb, __unused vfs_context_t ctx)
2368{
2369 return ENOTSUP;
2370}
2371#endif
2372
2373static int
2374fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
2375{
2376 fsevent_handle *fseh = (struct fsevent_handle *)fg_get_data(fg);
2377 fs_event_watcher *watcher;
2378
2379 OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
2380 while (OSAddAtomic(0, &fseh->active) > 0) {
2381 tsleep(chan: (caddr_t)fseh->watcher, PRIBIO, wmesg: "fsevents-close", timo: 1);
2382 }
2383
2384 watcher = fseh->watcher;
2385 fg_set_data(fg, NULL);
2386 fseh->watcher = NULL;
2387
2388 remove_watcher(target: watcher);
2389 selthreadclear(&fseh->si);
2390 kfree_type(fsevent_handle, fseh);
2391
2392 return 0;
2393}
2394
2395static void
2396filt_fsevent_detach(struct knote *kn)
2397{
2398 fsevent_handle *fseh = (struct fsevent_handle *)knote_kn_hook_get_raw(kn);
2399
2400 lock_watch_table();
2401
2402 KNOTE_DETACH(&fseh->knotes, kn);
2403
2404 unlock_watch_table();
2405}
2406
2407/*
2408 * Determine whether this knote should be active
2409 *
2410 * This is kind of subtle.
2411 * --First, notice if the vnode has been revoked: in so, override hint
2412 * --EVFILT_READ knotes are checked no matter what the hint is
2413 * --Other knotes activate based on hint.
2414 * --If hint is revoke, set special flags and activate
2415 */
2416static int
2417filt_fsevent_common(struct knote *kn, struct kevent_qos_s *kev, long hint)
2418{
2419 fsevent_handle *fseh = (struct fsevent_handle *)knote_kn_hook_get_raw(kn);
2420 int activate = 0;
2421 int32_t rd, wr, amt;
2422 int64_t data = 0;
2423
2424 if (NOTE_REVOKE == hint) {
2425 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2426 activate = 1;
2427 }
2428
2429 rd = fseh->watcher->rd;
2430 wr = fseh->watcher->wr;
2431 if (rd <= wr) {
2432 amt = wr - rd;
2433 } else {
2434 amt = fseh->watcher->eventq_size - (rd - wr);
2435 }
2436
2437 switch (kn->kn_filter) {
2438 case EVFILT_READ:
2439 data = amt;
2440 activate = (data != 0);
2441 break;
2442 case EVFILT_VNODE:
2443 /* Check events this note matches against the hint */
2444 if (kn->kn_sfflags & hint) {
2445 kn->kn_fflags |= (uint32_t)hint; /* Set which event occurred */
2446 }
2447 if (kn->kn_fflags != 0) {
2448 activate = 1;
2449 }
2450 break;
2451 default:
2452 // nothing to do...
2453 break;
2454 }
2455
2456 if (activate && kev) {
2457 knote_fill_kevent(kn, kev, data);
2458 }
2459 return activate;
2460}
2461
2462static int
2463filt_fsevent(struct knote *kn, long hint)
2464{
2465 return filt_fsevent_common(kn, NULL, hint);
2466}
2467
2468static int
2469filt_fsevent_touch(struct knote *kn, struct kevent_qos_s *kev)
2470{
2471 int res;
2472
2473 lock_watch_table();
2474
2475 /* accept new fflags/data as saved */
2476 kn->kn_sfflags = kev->fflags;
2477 kn->kn_sdata = kev->data;
2478
2479 /* restrict the current results to the (smaller?) set of new interest */
2480 /*
2481 * For compatibility with previous implementations, we leave kn_fflags
2482 * as they were before.
2483 */
2484 //kn->kn_fflags &= kev->fflags;
2485
2486 /* determine if the filter is now fired */
2487 res = filt_fsevent_common(kn, NULL, hint: 0);
2488
2489 unlock_watch_table();
2490
2491 return res;
2492}
2493
2494static int
2495filt_fsevent_process(struct knote *kn, struct kevent_qos_s *kev)
2496{
2497 int res;
2498
2499 lock_watch_table();
2500
2501 res = filt_fsevent_common(kn, kev, hint: 0);
2502
2503 unlock_watch_table();
2504
2505 return res;
2506}
2507
2508SECURITY_READ_ONLY_EARLY(struct filterops) fsevent_filtops = {
2509 .f_isfd = 1,
2510 .f_attach = NULL,
2511 .f_detach = filt_fsevent_detach,
2512 .f_event = filt_fsevent,
2513 .f_touch = filt_fsevent_touch,
2514 .f_process = filt_fsevent_process,
2515};
2516
2517static int
2518fseventsf_kqfilter(struct fileproc *fp, struct knote *kn,
2519 __unused struct kevent_qos_s *kev)
2520{
2521 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2522 int res;
2523
2524 kn->kn_filtid = EVFILTID_FSEVENT;
2525 knote_kn_hook_set_raw(kn, kn_hook: (void *) fseh);
2526
2527 lock_watch_table();
2528
2529 KNOTE_ATTACH(&fseh->knotes, kn);
2530
2531 /* check to see if it is fired already */
2532 res = filt_fsevent_common(kn, NULL, hint: 0);
2533
2534 unlock_watch_table();
2535
2536 return res;
2537}
2538
2539
2540static int
2541fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2542{
2543 int counter = 0;
2544 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2545
2546 // if there are people still waiting, sleep for 10ms to
2547 // let them clean up and get out of there. however we
2548 // also don't want to get stuck forever so if they don't
2549 // exit after 5 seconds we're tearing things down anyway.
2550 while (fseh->watcher->blockers && counter++ < 500) {
2551 // issue wakeup in case anyone is blocked waiting for an event
2552 // do this each time we wakeup in case the blocker missed
2553 // the wakeup due to the unprotected test of WATCHER_CLOSING
2554 // and decision to tsleep in fmod_watch... this bit of
2555 // latency is a decent tradeoff against not having to
2556 // take and drop a lock in fmod_watch
2557 lock_watch_table();
2558 fsevents_wakeup(watcher: fseh->watcher);
2559 unlock_watch_table();
2560
2561 tsleep(chan: (caddr_t)fseh->watcher, PRIBIO, wmesg: "watcher-close", timo: 1);
2562 }
2563
2564 return 0;
2565}
2566
2567
2568static int
2569fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2570{
2571 if (!kauth_cred_issuser(cred: kauth_cred_get())) {
2572 return EPERM;
2573 }
2574
2575 return 0;
2576}
2577
2578static int
2579fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2580{
2581 return 0;
2582}
2583
2584static int
2585fseventsread(__unused dev_t dev, __unused struct uio *uio, __unused int ioflag)
2586{
2587 return EIO;
2588}
2589
2590
2591static int
2592parse_buffer_and_add_events(const char *buffer, size_t bufsize, vfs_context_t ctx, size_t *remainder)
2593{
2594 const fse_info *finfo, *dest_finfo;
2595 const char *path, *ptr, *dest_path, *event_start = buffer;
2596 size_t path_len, dest_path_len;
2597 int type, err = 0;
2598
2599
2600 ptr = buffer;
2601 while ((ptr + sizeof(int) + sizeof(fse_info) + 1) < buffer + bufsize) {
2602 type = *(const int *)ptr;
2603 if (type < 0 || type == FSE_ACCESS_GRANTED || type == FSE_ACTIVITY ||
2604 type >= FSE_MAX_EVENTS) {
2605 err = EINVAL;
2606 break;
2607 }
2608
2609 ptr += sizeof(int);
2610
2611 finfo = (const fse_info *)ptr;
2612 ptr += sizeof(fse_info);
2613
2614 path = ptr;
2615 while (ptr < buffer + bufsize && *ptr != '\0') {
2616 ptr++;
2617 }
2618
2619 if (ptr >= buffer + bufsize) {
2620 break;
2621 }
2622
2623 ptr++; // advance over the trailing '\0'
2624
2625 path_len = ptr - path;
2626
2627 if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2628 event_start = ptr; // record where the next event starts
2629
2630 err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2631 if (err) {
2632 break;
2633 }
2634 continue;
2635 }
2636
2637 //
2638 // if we're here we have to slurp up the destination finfo
2639 // and path so that we can pass them to the add_fsevent()
2640 // call. basically it's a copy of the above code.
2641 //
2642 dest_finfo = (const fse_info *)ptr;
2643 ptr += sizeof(fse_info);
2644
2645 dest_path = ptr;
2646 while (ptr < buffer + bufsize && *ptr != '\0') {
2647 ptr++;
2648 }
2649
2650 if (ptr >= buffer + bufsize) {
2651 break;
2652 }
2653
2654 ptr++; // advance over the trailing '\0'
2655 event_start = ptr; // record where the next event starts
2656
2657 dest_path_len = ptr - dest_path;
2658 //
2659 // If the destination inode number is non-zero, generate a rename
2660 // with both source and destination FSE_ARG_FINFO. Otherwise generate
2661 // a rename with only one FSE_ARG_FINFO. If you need to inject an
2662 // exchange with an inode of zero, just make that inode (and its path)
2663 // come in as the first one, not the second.
2664 //
2665 if (dest_finfo->ino) {
2666 err = add_fsevent(type, ctx,
2667 FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2668 FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2669 FSE_ARG_DONE);
2670 } else {
2671 err = add_fsevent(type, ctx,
2672 FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2673 FSE_ARG_STRING, dest_path_len, dest_path,
2674 FSE_ARG_DONE);
2675 }
2676
2677 if (err) {
2678 break;
2679 }
2680 }
2681
2682 // if the last event wasn't complete, set the remainder
2683 // to be the last event start boundary.
2684 //
2685 *remainder = (long)((buffer + bufsize) - event_start);
2686
2687 return err;
2688}
2689
2690
2691//
2692// Note: this buffer size can not ever be less than
2693// 2*MAXPATHLEN + 2*sizeof(fse_info) + sizeof(int)
2694// because that is the max size for a single event.
2695// I made it 4k to be a "nice" size. making it
2696// smaller is not a good idea.
2697//
2698#define WRITE_BUFFER_SIZE 4096
2699static char *write_buffer = NULL;
2700
2701static int
2702fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
2703{
2704 int error = 0;
2705 size_t count, offset = 0, remainder = 0;
2706 vfs_context_t ctx = vfs_context_current();
2707
2708 lck_mtx_lock(lck: &event_writer_lock);
2709
2710 if (write_buffer == NULL) {
2711 write_buffer = zalloc_permanent(WRITE_BUFFER_SIZE, ZALIGN_64);
2712 }
2713
2714 //
2715 // this loop copies in and processes the events written.
2716 // it takes care to copy in reasonable size chunks and
2717 // process them. if there is an event that spans a chunk
2718 // boundary we're careful to copy those bytes down to the
2719 // beginning of the buffer and read the next chunk in just
2720 // after it.
2721 //
2722 while (uio_resid(a_uio: uio)) {
2723 count = MIN(WRITE_BUFFER_SIZE - offset, (size_t)uio_resid(uio));
2724
2725 error = uiomove(cp: write_buffer + offset, n: (int)count, uio);
2726 if (error) {
2727 break;
2728 }
2729
2730 error = parse_buffer_and_add_events(buffer: write_buffer, bufsize: offset + count, ctx, remainder: &remainder);
2731 if (error) {
2732 break;
2733 }
2734
2735 //
2736 // if there's any remainder, copy it down to the beginning
2737 // of the buffer so that it will get processed the next time
2738 // through the loop. note that the remainder always starts
2739 // at an event boundary.
2740 //
2741 memmove(dst: write_buffer, src: (write_buffer + count + offset) - remainder, n: remainder);
2742 offset = remainder;
2743 }
2744
2745 lck_mtx_unlock(lck: &event_writer_lock);
2746
2747 return error;
2748}
2749
2750
2751static const struct fileops fsevents_fops = {
2752 .fo_type = DTYPE_FSEVENTS,
2753 .fo_read = fseventsf_read,
2754 .fo_write = fo_no_write,
2755 .fo_ioctl = fseventsf_ioctl,
2756 .fo_select = fseventsf_select,
2757 .fo_close = fseventsf_close,
2758 .fo_kqfilter = fseventsf_kqfilter,
2759 .fo_drain = fseventsf_drain,
2760};
2761
2762typedef struct fsevent_clone_args32 {
2763 user32_addr_t event_list;
2764 int32_t num_events;
2765 int32_t event_queue_depth;
2766 user32_addr_t fd;
2767} fsevent_clone_args32;
2768
2769typedef struct fsevent_clone_args64 {
2770 user64_addr_t event_list;
2771 int32_t num_events;
2772 int32_t event_queue_depth;
2773 user64_addr_t fd;
2774} fsevent_clone_args64;
2775
2776#define FSEVENTS_CLONE_32 _IOW('s', 1, fsevent_clone_args32)
2777#define FSEVENTS_CLONE_64 _IOW('s', 1, fsevent_clone_args64)
2778
2779static int
2780fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2781{
2782 struct fileproc *f;
2783 int fd, error;
2784 fsevent_handle *fseh = NULL;
2785 fsevent_clone_args64 *fse_clone_args, _fse_clone;
2786 int8_t *event_list;
2787 int is64bit = proc_is64bit(p);
2788
2789 switch (cmd) {
2790 case FSEVENTS_CLONE_32: {
2791 if (is64bit) {
2792 return EINVAL;
2793 }
2794 fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
2795
2796 fse_clone_args = &_fse_clone;
2797 memset(s: fse_clone_args, c: 0, n: sizeof(fsevent_clone_args64));
2798
2799 fse_clone_args->event_list = CAST_USER_ADDR_T(args32->event_list);
2800 fse_clone_args->num_events = args32->num_events;
2801 fse_clone_args->event_queue_depth = args32->event_queue_depth;
2802 fse_clone_args->fd = CAST_USER_ADDR_T(args32->fd);
2803 goto handle_clone;
2804 }
2805
2806 case FSEVENTS_CLONE_64:
2807 if (!is64bit) {
2808 return EINVAL;
2809 }
2810 fse_clone_args = (fsevent_clone_args64 *)data;
2811
2812handle_clone:
2813 if (fse_clone_args->num_events <= 0 || fse_clone_args->num_events > 4096) {
2814 return EINVAL;
2815 }
2816
2817 fseh = kalloc_type(fsevent_handle, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2818
2819 klist_init(list: &fseh->knotes);
2820
2821 event_list = kalloc_data(fse_clone_args->num_events * sizeof(int8_t), Z_WAITOK);
2822 if (event_list == NULL) {
2823 kfree_type(fsevent_handle, fseh);
2824 return ENOMEM;
2825 }
2826
2827 error = copyin((user_addr_t)fse_clone_args->event_list,
2828 (void *)event_list,
2829 fse_clone_args->num_events * sizeof(int8_t));
2830 if (error) {
2831 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2832 kfree_type(fsevent_handle, fseh);
2833 return error;
2834 }
2835
2836 /*
2837 * Lock down the user's "fd" result buffer so it's safe
2838 * to hold locks while we copy it out.
2839 */
2840 error = vslock(addr: (user_addr_t)fse_clone_args->fd,
2841 len: sizeof(int32_t));
2842 if (error) {
2843 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2844 kfree_type(fsevent_handle, fseh);
2845 return error;
2846 }
2847
2848 error = add_watcher(event_list,
2849 num_events: fse_clone_args->num_events,
2850 eventq_size: fse_clone_args->event_queue_depth,
2851 watcher_out: &fseh->watcher,
2852 fseh);
2853 if (error) {
2854 vsunlock(addr: (user_addr_t)fse_clone_args->fd,
2855 len: sizeof(int32_t), dirtied: 0);
2856 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2857 kfree_type(fsevent_handle, fseh);
2858 return error;
2859 }
2860
2861 fseh->watcher->fseh = fseh;
2862
2863 error = falloc(p, &f, &fd);
2864 if (error) {
2865 remove_watcher(target: fseh->watcher);
2866 vsunlock(addr: (user_addr_t)fse_clone_args->fd,
2867 len: sizeof(int32_t), dirtied: 0);
2868 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2869 kfree_type(fsevent_handle, fseh);
2870 return error;
2871 }
2872 proc_fdlock(p);
2873 f->fp_glob->fg_flag = FREAD | FWRITE;
2874 f->fp_glob->fg_ops = &fsevents_fops;
2875 fp_set_data(fp: f, fg_data: fseh);
2876
2877 /*
2878 * We can safely hold the proc_fdlock across this copyout()
2879 * because of the vslock() call above. The vslock() call
2880 * also ensures that we will never get an error, so assert
2881 * this.
2882 */
2883 error = copyout((void *)&fd, (user_addr_t)fse_clone_args->fd, sizeof(int32_t));
2884 assert(error == 0);
2885
2886 procfdtbl_releasefd(p, fd, NULL);
2887 fp_drop(p, fd, fp: f, locked: 1);
2888 proc_fdunlock(p);
2889
2890 vsunlock(addr: (user_addr_t)fse_clone_args->fd,
2891 len: sizeof(int32_t), dirtied: 1);
2892 break;
2893
2894 default:
2895 error = EINVAL;
2896 break;
2897 }
2898
2899 return error;
2900}
2901
2902static void
2903fsevents_wakeup(fs_event_watcher *watcher)
2904{
2905 selwakeup(&watcher->fseh->si);
2906 KNOTE(&watcher->fseh->knotes, NOTE_WRITE | NOTE_NONE);
2907 wakeup(chan: (caddr_t)watcher);
2908}
2909
2910
2911/*
2912 * A struct describing which functions will get invoked for certain
2913 * actions.
2914 */
2915static const struct cdevsw fsevents_cdevsw =
2916{
2917 .d_open = fseventsopen,
2918 .d_close = fseventsclose,
2919 .d_read = fseventsread,
2920 .d_write = fseventswrite,
2921 .d_ioctl = fseventsioctl,
2922 .d_stop = eno_stop,
2923 .d_reset = eno_reset,
2924 .d_select = eno_select,
2925 .d_mmap = eno_mmap,
2926 .d_strategy = eno_strat,
2927 .d_reserved_1 = eno_getc,
2928 .d_reserved_2 = eno_putc,
2929};
2930
2931
2932/*
2933 * Called to initialize our device,
2934 * and to register ourselves with devfs
2935 */
2936
2937void
2938fsevents_init(void)
2939{
2940 int ret;
2941
2942 if (fsevents_installed) {
2943 return;
2944 }
2945
2946 fsevents_installed = 1;
2947
2948 ret = cdevsw_add(-1, &fsevents_cdevsw);
2949 if (ret < 0) {
2950 fsevents_installed = 0;
2951 return;
2952 }
2953
2954 devfs_make_node(makedev(ret, 0), DEVFS_CHAR,
2955 UID_ROOT, GID_WHEEL, perms: 0644, fmt: "fsevents");
2956
2957 fsevents_internal_init();
2958}
2959
2960
2961char *
2962get_pathbuff(void)
2963{
2964 return zalloc(view: ZV_NAMEI);
2965}
2966
2967void
2968release_pathbuff(char *path)
2969{
2970 if (path == NULL) {
2971 return;
2972 }
2973 zfree(ZV_NAMEI, path);
2974}
2975
2976int
2977get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx)
2978{
2979 struct vnode_attr va;
2980
2981 VATTR_INIT(&va);
2982 VATTR_WANTED(&va, va_fsid);
2983 va.va_vaflags |= VA_REALFSID;
2984 VATTR_WANTED(&va, va_fileid);
2985 VATTR_WANTED(&va, va_mode);
2986 VATTR_WANTED(&va, va_uid);
2987 VATTR_WANTED(&va, va_document_id);
2988 if (vp->v_flag & VISHARDLINK) {
2989 if (vp->v_type == VDIR) {
2990 VATTR_WANTED(&va, va_dirlinkcount);
2991 } else {
2992 VATTR_WANTED(&va, va_nlink);
2993 }
2994 }
2995
2996 if (vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel()) != 0) {
2997 memset(s: fse, c: 0, n: sizeof(fse_info));
2998 return -1;
2999 }
3000
3001 return vnode_get_fse_info_from_vap(vp, fse, vap: &va);
3002}
3003
3004int
3005vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap)
3006{
3007 fse->ino = (ino64_t)vap->va_fileid;
3008 fse->dev = (dev_t)vap->va_fsid;
3009 fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | vap->va_mode;
3010 fse->uid = (uid_t)vap->va_uid;
3011 fse->document_id = (uint32_t)vap->va_document_id;
3012 if (vp->v_flag & VISHARDLINK) {
3013 fse->mode |= FSE_MODE_HLINK;
3014 if (vp->v_type == VDIR) {
3015 fse->nlink = (uint64_t)vap->va_dirlinkcount;
3016 } else {
3017 fse->nlink = (uint64_t)vap->va_nlink;
3018 }
3019 }
3020
3021 return 0;
3022}
3023
3024void
3025create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
3026{
3027 int fsevent_type = FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
3028 char pathbuf[MAXPATHLEN];
3029 fse_info fse;
3030
3031
3032 if (kevents & VNODE_EVENT_DELETE) {
3033 fsevent_type = FSE_DELETE;
3034 } else if (kevents & (VNODE_EVENT_EXTEND | VNODE_EVENT_WRITE)) {
3035 fsevent_type = FSE_CONTENT_MODIFIED;
3036 } else if (kevents & VNODE_EVENT_LINK) {
3037 fsevent_type = FSE_CREATE_FILE;
3038 } else if (kevents & VNODE_EVENT_RENAME) {
3039 fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
3040 } else if (kevents & (VNODE_EVENT_FILE_CREATED | VNODE_EVENT_FILE_REMOVED | VNODE_EVENT_DIR_CREATED | VNODE_EVENT_DIR_REMOVED)) {
3041 fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
3042 } else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
3043 fsevent_type = FSE_STAT_CHANGED;
3044 }
3045
3046 // printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
3047
3048 fse.dev = vap->va_fsid;
3049 fse.ino = vap->va_fileid;
3050 fse.mode = vnode_vttoif(vnode_vtype(vp)) | (uint32_t)vap->va_mode;
3051 if (vp->v_flag & VISHARDLINK) {
3052 fse.mode |= FSE_MODE_HLINK;
3053 if (vp->v_type == VDIR) {
3054 fse.nlink = vap->va_dirlinkcount;
3055 } else {
3056 fse.nlink = vap->va_nlink;
3057 }
3058 }
3059
3060 if (vp->v_type == VDIR) {
3061 fse.mode |= FSE_REMOTE_DIR_EVENT;
3062 }
3063
3064
3065 fse.uid = vap->va_uid;
3066 fse.document_id = vap->va_document_id;
3067
3068 len = sizeof(pathbuf);
3069 if (vn_getpath_no_firmlink(vp, pathbuf, len: &len) == 0) {
3070 add_fsevent(type: fsevent_type, ctx: vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
3071 }
3072 return;
3073}
3074
3075#else /* CONFIG_FSE */
3076
3077#include <sys/fsevents.h>
3078
3079/*
3080 * The get_pathbuff and release_pathbuff routines are used in places not
3081 * related to fsevents, and it's a handy abstraction, so define trivial
3082 * versions that don't cache a pool of buffers. This way, we don't have
3083 * to conditionalize the callers, and they still get the advantage of the
3084 * pool of buffers if CONFIG_FSE is turned on.
3085 */
3086char *
3087get_pathbuff(void)
3088{
3089 return zalloc(ZV_NAMEI);
3090}
3091
3092void
3093release_pathbuff(char *path)
3094{
3095 zfree(ZV_NAMEI, path);
3096}
3097
3098int
3099add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
3100{
3101 return 0;
3102}
3103
3104int
3105need_fsevent(__unused int type, __unused vnode_t vp)
3106{
3107 return 0;
3108}
3109
3110#endif /* CONFIG_FSE */
3111