kern_lockf.c source code [xnu/bsd/kern/kern_lockf.c]

1	/*
2	* Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Copyright (c) 1982, 1986, 1989, 1993
30	* The Regents of the University of California. All rights reserved.
31	*
32	* This code is derived from software contributed to Berkeley by
33	* Scooter Morris at Genentech Inc.
34	*
35	* Redistribution and use in source and binary forms, with or without
36	* modification, are permitted provided that the following conditions
37	* are met:
38	* 1. Redistributions of source code must retain the above copyright
39	* notice, this list of conditions and the following disclaimer.
40	* 2. Redistributions in binary form must reproduce the above copyright
41	* notice, this list of conditions and the following disclaimer in the
42	* documentation and/or other materials provided with the distribution.
43	* 4. Neither the name of the University nor the names of its contributors
44	* may be used to endorse or promote products derived from this software
45	* without specific prior written permission.
46	*
47	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57	* SUCH DAMAGE.
58	*
59	* @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
60	*/
61
62	#include <sys/cdefs.h>
63	#include <sys/param.h>
64	#include <sys/systm.h>
65	#include <sys/kernel.h>
66	#include <sys/lock.h>
67	#include <sys/mount.h>
68	#include <sys/proc.h>
69	#include <sys/signalvar.h>
70	#include <sys/unistd.h>
71	#include <sys/user.h>
72	#include <sys/vnode.h>
73	#include <sys/vnode_internal.h>
74	#include <sys/vnode_if.h>
75	#include <sys/malloc.h>
76	#include <sys/fcntl.h>
77	#include <sys/lockf.h>
78	#include <sys/sdt.h>
79	#include <kern/policy_internal.h>
80
81	#include <sys/file_internal.h>
82
83	/*
84	* This variable controls the maximum number of processes that will
85	* be checked in doing deadlock detection.
86	*/
87	static int maxlockdepth = MAXDEPTH;
88
89	#if (DEVELOPMENT \|\| DEBUG)
90	#define LOCKF_DEBUGGING 1
91	#endif
92
93	#ifdef LOCKF_DEBUGGING
94	#include <sys/sysctl.h>
95	void lf_print(const char tag, struct* lockf *lock);
96	void lf_printlist(const char tag, struct* lockf *lock);
97
98	#define LF_DBG_LOCKOP (1 << 0) /* setlk, getlk, clearlk */
99	#define LF_DBG_LIST (1 << 1) /* split, coalesce */
100	#define LF_DBG_IMPINH (1 << 2) /* importance inheritance */
101	#define LF_DBG_TRACE (1 << 3) /* errors, exit */
102
103	static int lockf_debug = `0`; / was 2, could be 3 ;-) /
104	SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED, &lockf_debug, `0`, "");
105
106	/*
107	* If there is no mask bit selector, or there is one, and the selector is
108	* set, then output the debugging diagnostic.
109	*/
110	#define LOCKF_DEBUG(mask, ...) \
111	do { \
112	if( !(mask) \|\| ((mask) & lockf_debug)) { \
113	printf(__VA_ARGS__); \
114	} \
115	} while(0)
116	#else /* !LOCKF_DEBUGGING */
117	#define LOCKF_DEBUG(mask, ...) /* mask */
118	#endif /* !LOCKF_DEBUGGING */
119
120	MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
121
122	#define NOLOCKF (struct lockf *)0
123	#define SELF 0x1
124	#define OTHERS 0x2
125	#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
126
127	/*
128	* Overlapping lock states
129	*/
130	typedef enum {
131	OVERLAP_NONE = `0`,
132	OVERLAP_EQUALS_LOCK,
133	OVERLAP_CONTAINS_LOCK,
134	OVERLAP_CONTAINED_BY_LOCK,
135	OVERLAP_STARTS_BEFORE_LOCK,
136	OVERLAP_ENDS_AFTER_LOCK
137	} overlap_t;
138
139	static int lf_clearlock(struct lockf *);
140	static overlap_t lf_findoverlap(struct lockf *,
141	struct lockf , int, struct* lockf *, struct lockf );
142	static struct lockf lf_getblock(struct* lockf *, pid_t);
143	static int lf_getlock(struct lockf , struct* flock *, pid_t);
144	static int lf_setlock(struct lockf , struct* timespec *);
145	static int lf_split(struct lockf , struct* lockf *);
146	static void lf_wakelock(struct lockf *, boolean_t);
147	#if IMPORTANCE_INHERITANCE
148	static void lf_hold_assertion(task_t, struct lockf *);
149	static void lf_jump_to_queue_head(struct lockf , struct* lockf *);
150	static void lf_drop_assertion(struct lockf *);
151	static void lf_boost_blocking_proc(struct lockf , struct* lockf *);
152	static void lf_adjust_assertion(struct lockf *block);
153	#endif /* IMPORTANCE_INHERITANCE */
154
155	/*
156	* lf_advlock
157	*
158	* Description: Advisory record locking support
159	*
160	* Parameters: ap Argument pointer to a vnop_advlock_args
161	* argument descriptor structure for the
162	* lock operation to be attempted.
163	*
164	* Returns: 0 Success
165	* EOVERFLOW
166	* EINVAL
167	* ENOLCK Number of locked regions exceeds limit
168	* lf_setlock:EAGAIN
169	* lf_setlock:EDEADLK
170	* lf_setlock:EINTR
171	* lf_setlock:ENOLCK
172	* lf_setlock:ETIMEDOUT
173	* lf_clearlock:ENOLCK
174	* vnode_size:???
175	*
176	* Notes: We return ENOLCK when we run out of memory to support locks; as
177	* such, there is no specific expectation limit other than the
178	* amount of available resources.
179	*/
180	int
181	lf_advlock(struct vnop_advlock_args *ap)
182	{
183	struct vnode *vp = ap->a_vp;
184	struct flock *fl = ap->a_fl;
185	vfs_context_t context = ap->a_context;
186	struct lockf *lock;
187	off_t start, end, oadd;
188	u_quad_t size;
189	int error;
190	struct lockf **head = &vp->v_lockf;
191
192	/ XXX HFS may need a !vnode_isreg(vp) EISDIR error here /
193
194	/*
195	* Avoid the common case of unlocking when inode has no locks.
196	*/
197	if (head == (struct* lockf *)`0`) {
198	if (ap->a_op != F_SETLK) {
199	fl->l_type = F_UNLCK;
200	LOCKF_DEBUG(LF_DBG_TRACE,
201	"lf_advlock: '%s' unlock without lock\n",
202	vfs_context_proc(context)->p_comm);
203	return (`0`);
204	}
205	}
206
207	/*
208	* Convert the flock structure into a start and end.
209	*/
210	switch (fl->l_whence) {
211
212	case SEEK_SET:
213	case SEEK_CUR:
214	/*
215	* Caller is responsible for adding any necessary offset
216	* when SEEK_CUR is used.
217	*/
218	start = fl->l_start;
219	break;
220
221	case SEEK_END:
222
223	/*
224	* It's OK to cast the u_quad_t to and off_t here, since they
225	* are the same storage size, and the value of the returned
226	* contents will never overflow into the sign bit. We need to
227	* do this because we will use size to force range checks.
228	*/
229	if ((error = vnode_size(vp, (off_t *)&size, context))) {
230	LOCKF_DEBUG(LF_DBG_TRACE,
231	"lf_advlock: vnode_getattr failed: %d\n", error);
232	return (error);
233	}
234
235	if (size > OFF_MAX \|\|
236	(fl->l_start > `0` &&
237	size > (u_quad_t)(OFF_MAX - fl->l_start)))
238	return (EOVERFLOW);
239	start = size + fl->l_start;
240	break;
241
242	default:
243	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
244	fl->l_whence);
245	return (EINVAL);
246	}
247	if (start < `0`) {
248	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
249	start);
250	return (EINVAL);
251	}
252	if (fl->l_len < `0`) {
253	if (start == `0`) {
254	LOCKF_DEBUG(LF_DBG_TRACE,
255	"lf_advlock: len < 0 & start == 0\n");
256	return (EINVAL);
257	}
258	end = start - `1`;
259	start += fl->l_len;
260	if (start < `0`) {
261	LOCKF_DEBUG(LF_DBG_TRACE,
262	"lf_advlock: start < 0 (%qd)\n", start);
263	return (EINVAL);
264	}
265	} else if (fl->l_len == `0`)
266	end = -`1`;
267	else {
268	oadd = fl->l_len - `1`;
269	if (oadd > (off_t)(OFF_MAX - start)) {
270	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
271	return (EOVERFLOW);
272	}
273	end = start + oadd;
274	}
275	/*
276	* Create the lockf structure
277	*/
278	MALLOC(lock, struct lockf , sizeof* *lock, M_LOCKF, M_WAITOK);
279	if (lock == NULL)
280	return (ENOLCK);
281	lock->lf_start = start;
282	lock->lf_end = end;
283	lock->lf_id = ap->a_id;
284	lock->lf_vnode = vp;
285	lock->lf_type = fl->l_type;
286	lock->lf_head = head;
287	lock->lf_next = (struct lockf *)`0`;
288	TAILQ_INIT(&lock->lf_blkhd);
289	lock->lf_flags = ap->a_flags;
290	#if IMPORTANCE_INHERITANCE
291	lock->lf_boosted = LF_NOT_BOOSTED;
292	#endif
293	if (ap->a_flags & F_POSIX)
294	lock->lf_owner = (struct proc *)lock->lf_id;
295	else
296	lock->lf_owner = NULL;
297
298	if (ap->a_flags & F_FLOCK)
299	lock->lf_flags \|= F_WAKE1_SAFE;
300
301	lck_mtx_lock(&vp->v_lock); / protect the lockf list /
302	/*
303	* Do the requested operation.
304	*/
305	switch(ap->a_op) {
306	case F_SETLK:
307	/*
308	* For F_OFD_* locks, lf_id is the fileglob.
309	* Record an "lf_owner" iff this is a confined fd
310	* i.e. it cannot escape this process and will be
311	* F_UNLCKed before the owner exits. (This is
312	* the implicit guarantee needed to ensure lf_owner
313	* remains a valid reference here.)
314	*/
315	if (ap->a_flags & F_OFD_LOCK) {
316	struct fileglob fg = (void* *)lock->lf_id;
317	if (fg->fg_lflags & FG_CONFINED)
318	lock->lf_owner = current_proc();
319	}
320	error = lf_setlock(lock, ap->a_timeout);
321	break;
322
323	case F_UNLCK:
324	error = lf_clearlock(lock);
325	FREE(lock, M_LOCKF);
326	break;
327
328	case F_GETLK:
329	error = lf_getlock(lock, fl, -`1`);
330	FREE(lock, M_LOCKF);
331	break;
332
333	case F_GETLKPID:
334	error = lf_getlock(lock, fl, fl->l_pid);
335	FREE(lock, M_LOCKF);
336	break;
337
338	default:
339	FREE(lock, M_LOCKF);
340	error = EINVAL;
341	break;
342	}
343	lck_mtx_unlock(&vp->v_lock); / done manipulating the list /
344
345	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
346	return (error);
347	}
348
349	/*
350	* Empty the queue of msleeping requests for a lock on the given vnode.
351	* Called with the vnode already locked. Used for forced unmount, where
352	* a flock(2) invoker sleeping on a blocked lock holds an iocount reference
353	* that prevents the vnode from ever being drained. Force unmounting wins.
354	*/
355	void
356	lf_abort_advlocks(vnode_t vp)
357	{
358	struct lockf *lock;
359
360	if ((lock = vp->v_lockf) == NULL)
361	return;
362
363	lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
364
365	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
366	struct lockf *tlock;
367
368	TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
369	/*
370	* Setting this flag should cause all
371	* currently blocked F_SETLK request to
372	* return to userland with an errno.
373	*/
374	tlock->lf_flags \|= F_ABORT;
375	}
376	lf_wakelock(lock, TRUE);
377	}
378	}
379
380	/*
381	* Take any lock attempts which are currently blocked by a given lock ("from")
382	* and mark them as blocked by a different lock ("to"). Used in the case
383	* where a byte range currently occupied by "from" is to be occupied by "to."
384	*/
385	static void
386	lf_move_blocked(struct lockf to, struct* lockf *from)
387	{
388	struct lockf *tlock;
389
390	TAILQ_FOREACH(tlock, &from->lf_blkhd, lf_block) {
391	tlock->lf_next = to;
392	}
393
394	TAILQ_CONCAT(&to->lf_blkhd, &from->lf_blkhd, lf_block);
395	}
396
397	/*
398	* lf_coalesce_adjacent
399	*
400	* Description: Helper function: when setting a lock, coalesce adjacent
401	* locks. Needed because adjacent locks are not overlapping,
402	* but POSIX requires that they be coalesced.
403	*
404	* Parameters: lock The new lock which may be adjacent
405	* to already locked regions, and which
406	* should therefore be coalesced with them
407	*
408	* Returns: <void>
409	*/
410	static void
411	lf_coalesce_adjacent(struct lockf *lock)
412	{
413	struct lockf **lf = lock->lf_head;
414
415	while (*lf != NOLOCKF) {
416	/ reject locks that obviously could not be coalesced /
417	if ((*lf == lock) \|\|
418	((*lf)->lf_id != lock->lf_id) \|\|
419	((*lf)->lf_type != lock->lf_type)) {
420	lf = &(*lf)->lf_next;
421	continue;
422	}
423
424	/*
425	* NOTE: Assumes that if two locks are adjacent on the number line
426	* and belong to the same owner, then they are adjacent on the list.
427	*/
428	if ((*lf)->lf_end != -`1` &&
429	((*lf)->lf_end + `1`) == lock->lf_start) {
430	struct lockf adjacent = lf;
431
432	LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
433	lock->lf_start = (*lf)->lf_start;
434	*lf = lock;
435	lf = &(*lf)->lf_next;
436
437	lf_move_blocked(lock, adjacent);
438
439	FREE(adjacent, M_LOCKF);
440	continue;
441	}
442	/ If the lock starts adjacent to us, we can coalesce it /
443	if (lock->lf_end != -`1` &&
444	(lock->lf_end + `1`) == (*lf)->lf_start) {
445	struct lockf adjacent = lf;
446
447	LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
448	lock->lf_end = (*lf)->lf_end;
449	lock->lf_next = (*lf)->lf_next;
450	lf = &lock->lf_next;
451
452	lf_move_blocked(lock, adjacent);
453
454	FREE(adjacent, M_LOCKF);
455	continue;
456	}
457
458	/ no matching conditions; go on to next lock /
459	lf = &(*lf)->lf_next;
460	}
461	}
462
463	/*
464	* lf_setlock
465	*
466	* Description: Set a byte-range lock.
467	*
468	* Parameters: lock The lock structure describing the lock
469	* to be set; allocated by the caller, it
470	* will be linked into the lock list if
471	* the set is successful, and freed if the
472	* set is unsuccessful.
473	*
474	* timeout Timeout specified in the case of
475	* SETLKWTIMEOUT.
476	*
477	* Returns: 0 Success
478	* EAGAIN
479	* EDEADLK
480	* lf_split:ENOLCK
481	* lf_clearlock:ENOLCK
482	* msleep:EINTR
483	* msleep:ETIMEDOUT
484	*
485	* Notes: We add the lock to the provisional lock list. We do not
486	* coalesce at this time; this has implications for other lock
487	* requestors in the blocker search mechanism.
488	*/
489	static int
490	lf_setlock(struct lockf lock, struct* timespec *timeout)
491	{
492	struct lockf *block;
493	struct lockf **head = lock->lf_head;
494	struct lockf *prev, overlap, *ltmp;
495	static char lockstr[] = "lockf";
496	int priority, needtolink, error;
497	struct vnode *vp = lock->lf_vnode;
498	overlap_t ovcase;
499
500	#ifdef LOCKF_DEBUGGING
501	if (lockf_debug & LF_DBG_LOCKOP) {
502	lf_print("lf_setlock", lock);
503	lf_printlist("lf_setlock(in)", lock);
504	}
505	#endif /* LOCKF_DEBUGGING */
506
507	/*
508	* Set the priority
509	*/
510	priority = PLOCK;
511	if (lock->lf_type == F_WRLCK)
512	priority += `4`;
513	priority \|= PCATCH;
514	/*
515	* Scan lock list for this file looking for locks that would block us.
516	*/
517	while ((block = lf_getblock(lock, -`1`))) {
518	/*
519	* Free the structure and return if nonblocking.
520	*/
521	if ((lock->lf_flags & F_WAIT) == `0`) {
522	DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
523	FREE(lock, M_LOCKF);
524	return (EAGAIN);
525	}
526
527	/*
528	* We are blocked. Since flock style locks cover
529	* the whole file, there is no chance for deadlock.
530	*
531	* OFD byte-range locks currently do NOT support
532	* deadlock detection.
533	*
534	* For POSIX byte-range locks we must check for deadlock.
535	*
536	* Deadlock detection is done by looking through the
537	* wait channels to see if there are any cycles that
538	* involve us. MAXDEPTH is set just to make sure we
539	* do not go off into neverland.
540	*/
541	if ((lock->lf_flags & F_POSIX) &&
542	(block->lf_flags & F_POSIX)) {
543	struct proc wproc, bproc;
544	struct uthread *ut;
545	struct lockf *waitblock;
546	int i = `0`;
547
548	/ The block is waiting on something /
549	wproc = block->lf_owner;
550	proc_lock(wproc);
551	TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
552	/*
553	* While the thread is asleep (uu_wchan != 0)
554	* in this code (uu_wmesg == lockstr)
555	* and we have not exceeded the maximum cycle
556	* depth (i < maxlockdepth), then check for a
557	* cycle to see if the lock is blocked behind
558	* someone blocked behind us.
559	*/
560	while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
561	ut->uu_wmesg == lockstr &&
562	(i++ < maxlockdepth)) {
563	waitblock = (struct lockf *)ut->uu_wchan;
564	/*
565	* Get the lock blocking the lock
566	* which would block us, and make
567	* certain it hasn't come unblocked
568	* (been granted, e.g. between the time
569	* we called lf_getblock, and the time
570	* we successfully acquired the
571	* proc_lock).
572	*/
573	waitblock = waitblock->lf_next;
574	if (waitblock == NULL)
575	break;
576
577	/*
578	* Make sure it's an advisory range
579	* lock and not any other kind of lock;
580	* if we mix lock types, it's our own
581	* fault.
582	*/
583	if ((waitblock->lf_flags & F_POSIX) == `0`)
584	break;
585
586	/*
587	* If the owner of the lock that's
588	* blocking a lock that's blocking us
589	* getting the requested lock, then we
590	* would deadlock, so error out.
591	*/
592	bproc = waitblock->lf_owner;
593	if (bproc == lock->lf_owner) {
594	proc_unlock(wproc);
595	FREE(lock, M_LOCKF);
596	return (EDEADLK);
597	}
598	}
599	}
600	proc_unlock(wproc);
601	}
602
603	/*
604	* For flock type locks, we must first remove
605	* any shared locks that we hold before we sleep
606	* waiting for an exclusive lock.
607	*/
608	if ((lock->lf_flags & F_FLOCK) &&
609	lock->lf_type == F_WRLCK) {
610	lock->lf_type = F_UNLCK;
611	if ((error = lf_clearlock(lock)) != `0`) {
612	FREE(lock, M_LOCKF);
613	return (error);
614	}
615	lock->lf_type = F_WRLCK;
616	}
617	/*
618	* Add our lock to the blocked list and sleep until we're free.
619	* Remember who blocked us (for deadlock detection).
620	*/
621	lock->lf_next = block;
622	TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
623
624	if ( !(lock->lf_flags & F_FLOCK))
625	block->lf_flags &= ~F_WAKE1_SAFE;
626
627	#if IMPORTANCE_INHERITANCE
628	/*
629	* Importance donation is done only for cases where the
630	* owning task can be unambiguously determined.
631	*
632	* POSIX type locks are not inherited by child processes;
633	* we maintain a 1:1 mapping between a lock and its owning
634	* process.
635	*
636	* Flock type locks are inherited across fork() and there is
637	* no 1:1 mapping in the general case. However, the fileglobs
638	* used by OFD locks may be confined to the process that
639	* created them, and thus have an "owner", in which case
640	* we also attempt importance donation.
641	*/
642	if ((lock->lf_flags & block->lf_flags & F_POSIX) != `0`)
643	lf_boost_blocking_proc(lock, block);
644	else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
645	lock->lf_owner != block->lf_owner &&
646	NULL != lock->lf_owner && NULL != block->lf_owner)
647	lf_boost_blocking_proc(lock, block);
648	#endif /* IMPORTANCE_INHERITANCE */
649
650	#ifdef LOCKF_DEBUGGING
651	if (lockf_debug & LF_DBG_LOCKOP) {
652	lf_print("lf_setlock: blocking on", block);
653	lf_printlist("lf_setlock(block)", block);
654	}
655	#endif /* LOCKF_DEBUGGING */
656	DTRACE_FSINFO(advlock__wait, vnode_t, vp);
657
658	error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
659
660	if (error == `0` && (lock->lf_flags & F_ABORT) != `0`)
661	error = EBADF;
662
663	if (lock->lf_next) {
664	/*
665	* lf_wakelock() always sets wakelock->lf_next to
666	* NULL before a wakeup; so we've been woken early
667	* - perhaps by a debugger, signal or other event.
668	*
669	* Remove 'lock' from the block list (avoids double-add
670	* in the spurious case, which would create a cycle)
671	*/
672	TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
673	#if IMPORTANCE_INHERITANCE
674	/*
675	* Adjust the boost on lf_next.
676	*/
677	lf_adjust_assertion(lock->lf_next);
678	#endif /* IMPORTANCE_INHERITANCE */
679	lock->lf_next = NULL;
680
681	if (error == `0`) {
682	/*
683	* If this was a spurious wakeup, retry
684	*/
685	printf("%s: spurious wakeup, retrying lock\n",
686	__func__);
687	continue;
688	}
689	}
690
691	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
692	if ((block = lf_getblock(lock, -`1`)) != NULL)
693	lf_move_blocked(block, lock);
694	}
695
696	if (error) {
697	if (!TAILQ_EMPTY(&lock->lf_blkhd))
698	lf_wakelock(lock, TRUE);
699	FREE(lock, M_LOCKF);
700	/ Return ETIMEDOUT if timeout occoured. /
701	if (error == EWOULDBLOCK) {
702	error = ETIMEDOUT;
703	}
704	return (error);
705	}
706	}
707
708	/*
709	* No blocks!! Add the lock. Note that we will
710	* downgrade or upgrade any overlapping locks this
711	* process already owns.
712	*
713	* Skip over locks owned by other processes.
714	* Handle any locks that overlap and are owned by ourselves.
715	*/
716	prev = head;
717	block = *head;
718	needtolink = `1`;
719	for (;;) {
720	ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
721	if (ovcase)
722	block = overlap->lf_next;
723	/*
724	* Six cases:
725	* 0) no overlap
726	* 1) overlap == lock
727	* 2) overlap contains lock
728	* 3) lock contains overlap
729	* 4) overlap starts before lock
730	* 5) overlap ends after lock
731	*/
732	switch (ovcase) {
733	case OVERLAP_NONE:
734	if (needtolink) {
735	*prev = lock;
736	lock->lf_next = overlap;
737	}
738	break;
739
740	case OVERLAP_EQUALS_LOCK:
741	/*
742	* If downgrading lock, others may be
743	* able to acquire it.
744	*/
745	if (lock->lf_type == F_RDLCK &&
746	overlap->lf_type == F_WRLCK)
747	lf_wakelock(overlap, TRUE);
748	overlap->lf_type = lock->lf_type;
749	FREE(lock, M_LOCKF);
750	lock = overlap; / for lf_coalesce_adjacent() /
751	break;
752
753	case OVERLAP_CONTAINS_LOCK:
754	/*
755	* Check for common starting point and different types.
756	*/
757	if (overlap->lf_type == lock->lf_type) {
758	FREE(lock, M_LOCKF);
759	lock = overlap; / for lf_coalesce_adjacent() /
760	break;
761	}
762	if (overlap->lf_start == lock->lf_start) {
763	*prev = lock;
764	lock->lf_next = overlap;
765	overlap->lf_start = lock->lf_end + `1`;
766	} else {
767	/*
768	* If we can't split the lock, we can't
769	* grant it. Claim a system limit for the
770	* resource shortage.
771	*/
772	if (lf_split(overlap, lock)) {
773	FREE(lock, M_LOCKF);
774	return (ENOLCK);
775	}
776	}
777	lf_wakelock(overlap, TRUE);
778	break;
779
780	case OVERLAP_CONTAINED_BY_LOCK:
781	/*
782	* If downgrading lock, others may be able to
783	* acquire it, otherwise take the list.
784	*/
785	if (lock->lf_type == F_RDLCK &&
786	overlap->lf_type == F_WRLCK) {
787	lf_wakelock(overlap, TRUE);
788	} else {
789	while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
790	ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
791	TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
792	lf_block);
793	TAILQ_INSERT_TAIL(&lock->lf_blkhd,
794	ltmp, lf_block);
795	ltmp->lf_next = lock;
796	}
797	}
798	/*
799	* Add the new lock if necessary and delete the overlap.
800	*/
801	if (needtolink) {
802	*prev = lock;
803	lock->lf_next = overlap->lf_next;
804	prev = &lock->lf_next;
805	needtolink = `0`;
806	} else
807	*prev = overlap->lf_next;
808	FREE(overlap, M_LOCKF);
809	continue;
810
811	case OVERLAP_STARTS_BEFORE_LOCK:
812	/*
813	* Add lock after overlap on the list.
814	*/
815	lock->lf_next = overlap->lf_next;
816	overlap->lf_next = lock;
817	overlap->lf_end = lock->lf_start - `1`;
818	prev = &lock->lf_next;
819	lf_wakelock(overlap, TRUE);
820	needtolink = `0`;
821	continue;
822
823	case OVERLAP_ENDS_AFTER_LOCK:
824	/*
825	* Add the new lock before overlap.
826	*/
827	if (needtolink) {
828	*prev = lock;
829	lock->lf_next = overlap;
830	}
831	overlap->lf_start = lock->lf_end + `1`;
832	lf_wakelock(overlap, TRUE);
833	break;
834	}
835	break;
836	}
837	/ Coalesce adjacent locks with identical attributes /
838	lf_coalesce_adjacent(lock);
839	#ifdef LOCKF_DEBUGGING
840	if (lockf_debug & LF_DBG_LOCKOP) {
841	lf_print("lf_setlock: got the lock", lock);
842	lf_printlist("lf_setlock(out)", lock);
843	}
844	#endif /* LOCKF_DEBUGGING */
845	return (`0`);
846	}
847
848
849	/*
850	* lf_clearlock
851	*
852	* Description: Remove a byte-range lock on an vnode. Generally, find the
853	* lock (or an overlap to that lock) and remove it (or shrink
854	* it), then wakeup anyone we can.
855	*
856	* Parameters: unlock The lock to clear
857	*
858	* Returns: 0 Success
859	* lf_split:ENOLCK
860	*
861	* Notes: A caller may unlock all the locks owned by the caller by
862	* specifying the entire file range; locks owned by other
863	* callers are not effected by this operation.
864	*/
865	static int
866	lf_clearlock(struct lockf *unlock)
867	{
868	struct lockf **head = unlock->lf_head;
869	struct lockf lf = head;
870	struct lockf overlap, *prev;
871	overlap_t ovcase;
872
873	if (lf == NOLOCKF)
874	return (`0`);
875	#ifdef LOCKF_DEBUGGING
876	if (unlock->lf_type != F_UNLCK)
877	panic("lf_clearlock: bad type");
878	if (lockf_debug & LF_DBG_LOCKOP)
879	lf_print("lf_clearlock", unlock);
880	#endif /* LOCKF_DEBUGGING */
881	prev = head;
882	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
883	/*
884	* Wakeup the list of locks to be retried.
885	*/
886	lf_wakelock(overlap, FALSE);
887	#if IMPORTANCE_INHERITANCE
888	if (overlap->lf_boosted == LF_BOOSTED) {
889	lf_drop_assertion(overlap);
890	}
891	#endif /* IMPORTANCE_INHERITANCE */
892
893	switch (ovcase) {
894	case OVERLAP_NONE: / satisfy compiler enum/switch /
895	break;
896
897	case OVERLAP_EQUALS_LOCK:
898	*prev = overlap->lf_next;
899	FREE(overlap, M_LOCKF);
900	break;
901
902	case OVERLAP_CONTAINS_LOCK: / split it /
903	if (overlap->lf_start == unlock->lf_start) {
904	overlap->lf_start = unlock->lf_end + `1`;
905	break;
906	}
907	/*
908	* If we can't split the lock, we can't grant it.
909	* Claim a system limit for the resource shortage.
910	*/
911	if (lf_split(overlap, unlock))
912	return (ENOLCK);
913	overlap->lf_next = unlock->lf_next;
914	break;
915
916	case OVERLAP_CONTAINED_BY_LOCK:
917	*prev = overlap->lf_next;
918	lf = overlap->lf_next;
919	FREE(overlap, M_LOCKF);
920	continue;
921
922	case OVERLAP_STARTS_BEFORE_LOCK:
923	overlap->lf_end = unlock->lf_start - `1`;
924	prev = &overlap->lf_next;
925	lf = overlap->lf_next;
926	continue;
927
928	case OVERLAP_ENDS_AFTER_LOCK:
929	overlap->lf_start = unlock->lf_end + `1`;
930	break;
931	}
932	break;
933	}
934	#ifdef LOCKF_DEBUGGING
935	if (lockf_debug & LF_DBG_LOCKOP)
936	lf_printlist("lf_clearlock", unlock);
937	#endif /* LOCKF_DEBUGGING */
938	return (`0`);
939	}
940
941
942	/*
943	* lf_getlock
944	*
945	* Description: Check whether there is a blocking lock, and if so return
946	* its process identifier into the lock being requested.
947	*
948	* Parameters: lock Pointer to lock to test for blocks
949	* fl Pointer to flock structure to receive
950	* the blocking lock information, if a
951	* blocking lock is found.
952	* matchpid -1, or pid value to match in lookup.
953	*
954	* Returns: 0 Success
955	*
956	* Implicit Returns:
957	* *fl Contents modified to reflect the
958	* blocking lock, if one is found; not
959	* modified otherwise
960	*
961	* Notes: fl->l_pid will be (-1) for file locks and will only be set to
962	* the blocking process ID for advisory record locks.
963	*/
964	static int
965	lf_getlock(struct lockf lock, struct* flock *fl, pid_t matchpid)
966	{
967	struct lockf *block;
968
969	#ifdef LOCKF_DEBUGGING
970	if (lockf_debug & LF_DBG_LOCKOP)
971	lf_print("lf_getlock", lock);
972	#endif /* LOCKF_DEBUGGING */
973
974	if ((block = lf_getblock(lock, matchpid))) {
975	fl->l_type = block->lf_type;
976	fl->l_whence = SEEK_SET;
977	fl->l_start = block->lf_start;
978	if (block->lf_end == -`1`)
979	fl->l_len = `0`;
980	else
981	fl->l_len = block->lf_end - block->lf_start + `1`;
982	if (NULL != block->lf_owner) {
983	/*
984	* lf_owner is only non-NULL when the lock
985	* "owner" can be unambiguously determined
986	*/
987	fl->l_pid = proc_pid(block->lf_owner);
988	} else
989	fl->l_pid = -`1`;
990	} else {
991	fl->l_type = F_UNLCK;
992	}
993	return (`0`);
994	}
995
996	/*
997	* lf_getblock
998	*
999	* Description: Walk the list of locks for an inode and return the first
1000	* blocking lock. A lock is considered blocking if we are not
1001	* the lock owner; otherwise, we are permitted to upgrade or
1002	* downgrade it, and it's not considered blocking.
1003	*
1004	* Parameters: lock The lock for which we are interested
1005	* in obtaining the blocking lock, if any
1006	* matchpid -1, or pid value to match in lookup.
1007	*
1008	* Returns: NOLOCKF No blocking lock exists
1009	* !NOLOCKF The address of the blocking lock's
1010	* struct lockf.
1011	*/
1012	static struct lockf *
1013	lf_getblock(struct lockf *lock, pid_t matchpid)
1014	{
1015	struct lockf *prev, overlap, lf = (lock->lf_head);
1016
1017	for (prev = lock->lf_head;
1018	lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
1019	lf = overlap->lf_next) {
1020	/*
1021	* Found an overlap.
1022	*
1023	* If we're matching pids, and it's a record lock,
1024	* or it's an OFD lock on a process-confined fd,
1025	* but the pid doesn't match, then keep on looking ..
1026	*/
1027	if (matchpid != -`1` &&
1028	(overlap->lf_flags & (F_POSIX\|F_OFD_LOCK)) != `0` &&
1029	proc_pid(overlap->lf_owner) != matchpid)
1030	continue;
1031
1032	/*
1033	* does it block us?
1034	*/
1035	if ((lock->lf_type == F_WRLCK \|\| overlap->lf_type == F_WRLCK))
1036	return (overlap);
1037	}
1038	return (NOLOCKF);
1039	}
1040
1041
1042	/*
1043	* lf_findoverlap
1044	*
1045	* Description: Walk the list of locks to find an overlapping lock (if any).
1046	*
1047	* Parameters: lf First lock on lock list
1048	* lock The lock we are checking for an overlap
1049	* check Check type
1050	* prev pointer to pointer pointer to contain
1051	* address of pointer to previous lock
1052	* pointer to overlapping lock, if overlap
1053	* overlap pointer to pointer to contain address
1054	* of overlapping lock
1055	*
1056	* Returns: OVERLAP_NONE
1057	* OVERLAP_EQUALS_LOCK
1058	* OVERLAP_CONTAINS_LOCK
1059	* OVERLAP_CONTAINED_BY_LOCK
1060	* OVERLAP_STARTS_BEFORE_LOCK
1061	* OVERLAP_ENDS_AFTER_LOCK
1062	*
1063	* Implicit Returns:
1064	* *prev The address of the next pointer in the
1065	* lock previous to the overlapping lock;
1066	* this is generally used to relink the
1067	* lock list, avoiding a second iteration.
1068	* *overlap The pointer to the overlapping lock
1069	* itself; this is used to return data in
1070	* the check == OTHERS case, and for the
1071	* caller to modify the overlapping lock,
1072	* in the check == SELF case
1073	*
1074	* Note: This returns only the FIRST overlapping lock. There may be
1075	* more than one. lf_getlock will return the first blocking lock,
1076	* while lf_setlock will iterate over all overlapping locks to
1077	*
1078	* The check parameter can be SELF, meaning we are looking for
1079	* overlapping locks owned by us, or it can be OTHERS, meaning
1080	* we are looking for overlapping locks owned by someone else so
1081	* we can report a blocking lock on an F_GETLK request.
1082	*
1083	* The value of overlap and prev are modified, even if there is
1084	* no overlapping lock found; always check the return code.
1085	*/
1086	static overlap_t
1087	lf_findoverlap(struct lockf lf, struct* lockf lock, int* type,
1088	struct lockf *prev, struct lockf overlap)
1089	{
1090	off_t start, end;
1091	int found_self = `0`;
1092
1093	*overlap = lf;
1094	if (lf == NOLOCKF)
1095	return (`0`);
1096	#ifdef LOCKF_DEBUGGING
1097	if (lockf_debug & LF_DBG_LIST)
1098	lf_print("lf_findoverlap: looking for overlap in", lock);
1099	#endif /* LOCKF_DEBUGGING */
1100	start = lock->lf_start;
1101	end = lock->lf_end;
1102	while (lf != NOLOCKF) {
1103	if (((type & SELF) && lf->lf_id != lock->lf_id) \|\|
1104	((type & OTHERS) && lf->lf_id == lock->lf_id)) {
1105	/*
1106	* Locks belonging to one process are adjacent on the
1107	* list, so if we've found any locks belonging to us,
1108	* and we're now seeing something else, then we've
1109	* examined all "self" locks. Note that bailing out
1110	* here is quite important; for coalescing, we assume
1111	* numerically adjacent locks from the same owner to
1112	* be adjacent on the list.
1113	*/
1114	if ((type & SELF) && found_self) {
1115	return OVERLAP_NONE;
1116	}
1117
1118	*prev = &lf->lf_next;
1119	*overlap = lf = lf->lf_next;
1120	continue;
1121	}
1122
1123	if ((type & SELF)) {
1124	found_self = `1`;
1125	}
1126
1127	#ifdef LOCKF_DEBUGGING
1128	if (lockf_debug & LF_DBG_LIST)
1129	lf_print("\tchecking", lf);
1130	#endif /* LOCKF_DEBUGGING */
1131	/*
1132	* OK, check for overlap
1133	*/
1134	if ((lf->lf_end != -`1` && start > lf->lf_end) \|\|
1135	(end != -`1` && lf->lf_start > end)) {
1136	/ Case 0 /
1137	LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
1138
1139	/*
1140	* NOTE: assumes that locks for the same process are
1141	* nonintersecting and ordered.
1142	*/
1143	if ((type & SELF) && end != -`1` && lf->lf_start > end)
1144	return (OVERLAP_NONE);
1145	*prev = &lf->lf_next;
1146	*overlap = lf = lf->lf_next;
1147	continue;
1148	}
1149	if ((lf->lf_start == start) && (lf->lf_end == end)) {
1150	LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
1151	return (OVERLAP_EQUALS_LOCK);
1152	}
1153	if ((lf->lf_start <= start) &&
1154	(end != -`1`) &&
1155	((lf->lf_end >= end) \|\| (lf->lf_end == -`1`))) {
1156	LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
1157	return (OVERLAP_CONTAINS_LOCK);
1158	}
1159	if (start <= lf->lf_start &&
1160	(end == -`1` \|\|
1161	(lf->lf_end != -`1` && end >= lf->lf_end))) {
1162	LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
1163	return (OVERLAP_CONTAINED_BY_LOCK);
1164	}
1165	if ((lf->lf_start < start) &&
1166	((lf->lf_end >= start) \|\| (lf->lf_end == -`1`))) {
1167	LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
1168	return (OVERLAP_STARTS_BEFORE_LOCK);
1169	}
1170	if ((lf->lf_start > start) &&
1171	(end != -`1`) &&
1172	((lf->lf_end > end) \|\| (lf->lf_end == -`1`))) {
1173	LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
1174	return (OVERLAP_ENDS_AFTER_LOCK);
1175	}
1176	panic("lf_findoverlap: default");
1177	}
1178	return (OVERLAP_NONE);
1179	}
1180
1181
1182	/*
1183	* lf_split
1184	*
1185	* Description: Split a lock and a contained region into two or three locks
1186	* as necessary.
1187	*
1188	* Parameters: lock1 Lock to split
1189	* lock2 Overlapping lock region requiring the
1190	* split (upgrade/downgrade/unlock)
1191	*
1192	* Returns: 0 Success
1193	* ENOLCK No memory for new lock
1194	*
1195	* Implicit Returns:
1196	* *lock1 Modified original lock
1197	* *lock2 Overlapping lock (inserted into list)
1198	* (new lock) Potential new lock inserted into list
1199	* if split results in 3 locks
1200	*
1201	* Notes: This operation can only fail if the split would result in three
1202	* locks, and there is insufficient memory to allocate the third
1203	* lock; in that case, neither of the locks will be modified.
1204	*/
1205	static int
1206	lf_split(struct lockf lock1, struct* lockf *lock2)
1207	{
1208	struct lockf *splitlock;
1209
1210	#ifdef LOCKF_DEBUGGING
1211	if (lockf_debug & LF_DBG_LIST) {
1212	lf_print("lf_split", lock1);
1213	lf_print("splitting from", lock2);
1214	}
1215	#endif /* LOCKF_DEBUGGING */
1216	/*
1217	* Check to see if splitting into only two pieces.
1218	*/
1219	if (lock1->lf_start == lock2->lf_start) {
1220	lock1->lf_start = lock2->lf_end + `1`;
1221	lock2->lf_next = lock1;
1222	return (`0`);
1223	}
1224	if (lock1->lf_end == lock2->lf_end) {
1225	lock1->lf_end = lock2->lf_start - `1`;
1226	lock2->lf_next = lock1->lf_next;
1227	lock1->lf_next = lock2;
1228	return (`0`);
1229	}
1230	/*
1231	* Make a new lock consisting of the last part of
1232	* the encompassing lock
1233	*/
1234	MALLOC(splitlock, struct lockf , sizeof* *splitlock, M_LOCKF, M_WAITOK);
1235	if (splitlock == NULL)
1236	return (ENOLCK);
1237	bcopy(lock1, splitlock, sizeof *splitlock);
1238	splitlock->lf_start = lock2->lf_end + `1`;
1239	TAILQ_INIT(&splitlock->lf_blkhd);
1240	lock1->lf_end = lock2->lf_start - `1`;
1241	/*
1242	* OK, now link it in
1243	*/
1244	splitlock->lf_next = lock1->lf_next;
1245	lock2->lf_next = splitlock;
1246	lock1->lf_next = lock2;
1247
1248	return (`0`);
1249	}
1250
1251
1252	/*
1253	* lf_wakelock
1254	*
1255	* Wakeup a blocklist in the case of a downgrade or unlock, since others
1256	* waiting on the lock may now be able to acquire it.
1257	*
1258	* Parameters: listhead Lock list head on which waiters may
1259	* have pending locks
1260	*
1261	* Returns: <void>
1262	*
1263	* Notes: This function iterates a list of locks and wakes all waiters,
1264	* rather than only waiters for the contended regions. Because
1265	* of this, for heavily contended files, this can result in a
1266	* "thundering herd" situation. Refactoring the code could make
1267	* this operation more efficient, if heavy contention ever results
1268	* in a real-world performance problem.
1269	*/
1270	static void
1271	lf_wakelock(struct lockf *listhead, boolean_t force_all)
1272	{
1273	struct lockf *wakelock;
1274	boolean_t wake_all = TRUE;
1275
1276	if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
1277	wake_all = FALSE;
1278
1279	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
1280	wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
1281	TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
1282
1283	wakelock->lf_next = NOLOCKF;
1284	#ifdef LOCKF_DEBUGGING
1285	if (lockf_debug & LF_DBG_LOCKOP)
1286	lf_print("lf_wakelock: awakening", wakelock);
1287	#endif /* LOCKF_DEBUGGING */
1288	if (wake_all == FALSE) {
1289	/*
1290	* If there are items on the list head block list,
1291	* move them to the wakelock list instead, and then
1292	* correct their lf_next pointers.
1293	*/
1294	if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
1295	TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
1296
1297	struct lockf *tlock;
1298
1299	TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
1300	if (TAILQ_NEXT(tlock, lf_block) == tlock) {
1301	/ See rdar://10887303 /
1302	panic("cycle in wakelock list");
1303	}
1304	tlock->lf_next = wakelock;
1305	}
1306	}
1307	}
1308	wakeup(wakelock);
1309
1310	if (wake_all == FALSE)
1311	break;
1312	}
1313	}
1314
1315
1316	#ifdef LOCKF_DEBUGGING
1317	#define GET_LF_OWNER_PID(lf) (proc_pid((lf)->lf_owner))
1318
1319	/*
1320	* lf_print DEBUG
1321	*
1322	* Print out a lock; lock information is prefixed by the string in 'tag'
1323	*
1324	* Parameters: tag A string tag for debugging
1325	* lock The lock whose information should be
1326	* displayed
1327	*
1328	* Returns: <void>
1329	*/
1330	void
1331	lf_print(const char tag, struct* lockf *lock)
1332	{
1333	printf("%s: lock %p for ", tag, (void *)lock);
1334	if (lock->lf_flags & F_POSIX)
1335	printf("proc %p (owner %d)",
1336	lock->lf_id, GET_LF_OWNER_PID(lock));
1337	else if (lock->lf_flags & F_OFD_LOCK)
1338	printf("fg %p (owner %d)",
1339	lock->lf_id, GET_LF_OWNER_PID(lock));
1340	else
1341	printf("id %p", (void *)lock->lf_id);
1342	if (lock->lf_vnode != `0`)
1343	printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
1344	lock->lf_vnode,
1345	lock->lf_type == F_RDLCK ? "shared" :
1346	lock->lf_type == F_WRLCK ? "exclusive" :
1347	lock->lf_type == F_UNLCK ? "unlock" : "unknown",
1348	(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
1349	else
1350	printf(" %s, start 0x%016llx, end 0x%016llx",
1351	lock->lf_type == F_RDLCK ? "shared" :
1352	lock->lf_type == F_WRLCK ? "exclusive" :
1353	lock->lf_type == F_UNLCK ? "unlock" : "unknown",
1354	(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
1355	if (!TAILQ_EMPTY(&lock->lf_blkhd))
1356	printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
1357	else
1358	printf("\n");
1359	}
1360
1361
1362	/*
1363	* lf_printlist DEBUG
1364	*
1365	* Print out a lock list for the vnode associated with 'lock'; lock information
1366	* is prefixed by the string in 'tag'
1367	*
1368	* Parameters: tag A string tag for debugging
1369	* lock The lock whose vnode's lock list should
1370	* be displayed
1371	*
1372	* Returns: <void>
1373	*/
1374	void
1375	lf_printlist(const char tag, struct* lockf *lock)
1376	{
1377	struct lockf lf, blk;
1378
1379	if (lock->lf_vnode == `0`)
1380	return;
1381
1382	printf("%s: Lock list for vno %p:\n",
1383	tag, lock->lf_vnode);
1384	for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
1385	printf("\tlock %p for ",(void *)lf);
1386	if (lf->lf_flags & F_POSIX)
1387	printf("proc %p (owner %d)",
1388	lf->lf_id, GET_LF_OWNER_PID(lf));
1389	else if (lf->lf_flags & F_OFD_LOCK)
1390	printf("fg %p (owner %d)",
1391	lf->lf_id, GET_LF_OWNER_PID(lf));
1392	else
1393	printf("id %p", (void *)lf->lf_id);
1394	printf(", %s, start 0x%016llx, end 0x%016llx",
1395	lf->lf_type == F_RDLCK ? "shared" :
1396	lf->lf_type == F_WRLCK ? "exclusive" :
1397	lf->lf_type == F_UNLCK ? "unlock" :
1398	"unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
1399	TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
1400	printf("\n\t\tlock request %p for ", (void *)blk);
1401	if (blk->lf_flags & F_POSIX)
1402	printf("proc %p (owner %d)",
1403	blk->lf_id, GET_LF_OWNER_PID(blk));
1404	else if (blk->lf_flags & F_OFD_LOCK)
1405	printf("fg %p (owner %d)",
1406	blk->lf_id, GET_LF_OWNER_PID(blk));
1407	else
1408	printf("id %p", (void *)blk->lf_id);
1409	printf(", %s, start 0x%016llx, end 0x%016llx",
1410	blk->lf_type == F_RDLCK ? "shared" :
1411	blk->lf_type == F_WRLCK ? "exclusive" :
1412	blk->lf_type == F_UNLCK ? "unlock" :
1413	"unknown", (intmax_t)blk->lf_start,
1414	(intmax_t)blk->lf_end);
1415	if (!TAILQ_EMPTY(&blk->lf_blkhd))
1416	panic("lf_printlist: bad list");
1417	}
1418	printf("\n");
1419	}
1420	}
1421	#endif /* LOCKF_DEBUGGING */
1422
1423	#if IMPORTANCE_INHERITANCE
1424
1425	/*
1426	* lf_hold_assertion
1427	*
1428	* Call task importance hold assertion on the owner of the lock.
1429	*
1430	* Parameters: block_task Owner of the lock blocking
1431	* current thread.
1432	*
1433	* block lock on which the current thread
1434	* is blocking on.
1435	*
1436	* Returns: <void>
1437	*
1438	* Notes: The task reference on block_task is not needed to be hold since
1439	* the current thread has vnode lock and block_task has a file
1440	* lock, thus removing file lock in exit requires block_task to
1441	* grab the vnode lock.
1442	*/
1443	static void
1444	lf_hold_assertion(task_t block_task, struct lockf *block)
1445	{
1446	if (task_importance_hold_file_lock_assertion(block_task, `1`) == `0`) {
1447	block->lf_boosted = LF_BOOSTED;
1448	LOCKF_DEBUG(LF_DBG_IMPINH,
1449	"lf: importance hold file lock assert on pid %d lock %p\n",
1450	proc_pid(block->lf_owner), block);
1451	}
1452	}
1453
1454
1455	/*
1456	* lf_jump_to_queue_head
1457	*
1458	* Jump the lock from the tail of the block queue to the head of
1459	* the queue.
1460	*
1461	* Parameters: block lockf struct containing the
1462	* block queue.
1463	* lock lockf struct to be jumped to the
1464	* front.
1465	*
1466	* Returns: <void>
1467	*/
1468	static void
1469	lf_jump_to_queue_head(struct lockf block, struct* lockf *lock)
1470	{
1471	/ Move the lock to the head of the block queue. /
1472	TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
1473	TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
1474	}
1475
1476
1477	/*
1478	* lf_drop_assertion
1479	*
1480	* Drops the task hold assertion.
1481	*
1482	* Parameters: block lockf struct holding the assertion.
1483	*
1484	* Returns: <void>
1485	*/
1486	static void
1487	lf_drop_assertion(struct lockf *block)
1488	{
1489	LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
1490	proc_pid(block->lf_owner), block);
1491
1492	task_t current_task = proc_task(block->lf_owner);
1493	task_importance_drop_file_lock_assertion(current_task, `1`);
1494	block->lf_boosted = LF_NOT_BOOSTED;
1495	}
1496
1497	/*
1498	* lf_adjust_assertion
1499	*
1500	* Adjusts importance assertion of file lock. Goes through
1501	* all the blocking locks and checks if the file lock needs
1502	* to be boosted anymore.
1503	*
1504	* Parameters: block lockf structure which needs to be adjusted.
1505	*
1506	* Returns: <void>
1507	*/
1508	static void
1509	lf_adjust_assertion(struct lockf *block)
1510	{
1511	boolean_t drop_boost = TRUE;
1512	struct lockf *next;
1513
1514	/ Return if the lock is not boosted /
1515	if (block->lf_boosted == LF_NOT_BOOSTED) {
1516	return;
1517	}
1518
1519	TAILQ_FOREACH(next, &block->lf_blkhd, lf_block) {
1520	/ Check if block and next are same type of locks /
1521	if (((block->lf_flags & next->lf_flags & F_POSIX) != `0`) \|\|
1522	((block->lf_flags & next->lf_flags & F_OFD_LOCK) &&
1523	(block->lf_owner != next->lf_owner) &&
1524	(NULL != block->lf_owner && NULL != next->lf_owner))) {
1525
1526	/ Check if next would be boosting block /
1527	if (task_is_importance_donor(proc_task(next->lf_owner)) &&
1528	task_is_importance_receiver_type(proc_task(block->lf_owner))) {
1529	/ Found a lock boosting block /
1530	drop_boost = FALSE;
1531	break;
1532	}
1533	}
1534	}
1535
1536	if (drop_boost) {
1537	lf_drop_assertion(block);
1538	}
1539	}
1540
1541	static void
1542	lf_boost_blocking_proc(struct lockf lock, struct* lockf *block)
1543	{
1544	task_t ltask = proc_task(lock->lf_owner);
1545	task_t btask = proc_task(block->lf_owner);
1546
1547	/*
1548	* Check if ltask can donate importance. The
1549	* check of imp_donor bit is done without holding
1550	* any lock. The value may change after you read it,
1551	* but it is ok to boost a task while someone else is
1552	* unboosting you.
1553	*
1554	* TODO: Support live inheritance on file locks.
1555	*/
1556	if (task_is_importance_donor(ltask)) {
1557	LOCKF_DEBUG(LF_DBG_IMPINH,
1558	"lf: %d: attempt to boost pid %d that holds lock %p\n",
1559	proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
1560
1561	if (block->lf_boosted != LF_BOOSTED &&
1562	task_is_importance_receiver_type(btask)) {
1563	lf_hold_assertion(btask, block);
1564	}
1565	lf_jump_to_queue_head(block, lock);
1566	}
1567	}
1568	#endif /* IMPORTANCE_INHERITANCE */
1569

Browse the source code of xnu/bsd/kern/kern_lockf.c