vnode_pager.c source code [xnu/bsd/vm/vnode_pager.c]

1	/*
2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Mach Operating System
30	* Copyright (c) 1987 Carnegie-Mellon University
31	* All rights reserved. The CMU software License Agreement specifies
32	* the terms and conditions for use and redistribution.
33	*/
34	/*
35	* File: vnode_pager.c
36	*
37	* "Swap" pager that pages to/from vnodes. Also
38	* handles demand paging from files.
39	*
40	*/
41
42	#include <mach/boolean.h>
43	#include <sys/param.h>
44	#include <sys/systm.h>
45	#include <sys/user.h>
46	#include <sys/proc.h>
47	#include <sys/kauth.h>
48	#include <sys/buf.h>
49	#include <sys/uio.h>
50	#include <sys/vnode_internal.h>
51	#include <sys/namei.h>
52	#include <sys/mount_internal.h> /* needs internal due to fhandle_t */
53	#include <sys/ubc_internal.h>
54	#include <sys/lock.h>
55	#include <sys/disk.h> /* For DKIOC calls */
56
57	#include <mach/mach_types.h>
58	#include <mach/memory_object_types.h>
59	#include <mach/vm_map.h>
60	#include <mach/mach_vm.h>
61	#include <mach/upl.h>
62	#include <mach/sdt.h>
63
64	#include <vm/vm_map.h>
65	#include <vm/vm_kern.h>
66	#include <kern/zalloc.h>
67	#include <libkern/libkern.h>
68
69	#include <vm/vnode_pager.h>
70	#include <vm/vm_pageout.h>
71
72	#include <kern/assert.h>
73	#include <sys/kdebug.h>
74	#include <nfs/nfs.h>
75
76	#include <vm/vm_protos.h>
77
78	#include <sys/kdebug_triage.h>
79	#include <vfs/vfs_disk_conditioner.h>
80
81	void
82	vnode_pager_throttle(void)
83	{
84	if (current_uthread()->uu_lowpri_window) {
85	throttle_lowpri_io(sleep_amount: `1`);
86	}
87	}
88
89	boolean_t
90	vnode_pager_isSSD(vnode_t vp)
91	{
92	return disk_conditioner_mount_is_ssd(vp->v_mount);
93	}
94
95	#if FBDP_DEBUG_OBJECT_NO_PAGER
96	bool
97	vnode_pager_forced_unmount(vnode_t vp)
98	{
99	mount_t mnt;
100	mnt = vnode_mount(vp);
101	if (!mnt) {
102	return false;
103	}
104	return vfs_isforce(mnt);
105	}
106	#endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
107
108	#if CONFIG_IOSCHED
109	void
110	vnode_pager_issue_reprioritize_io(struct vnode devvp, uint64_t blkno, uint32_t len, int* priority)
111	{
112	u_int32_t blocksize = `0`;
113	dk_extent_t extent;
114	dk_set_tier_t set_tier;
115	int error = `0`;
116
117	error = VNOP_IOCTL(vp: devvp, DKIOCGETBLOCKSIZE, data: (caddr_t)&blocksize, fflag: `0`, ctx: vfs_context_kernel());
118	if (error) {
119	return;
120	}
121
122	memset(s: &extent, c: `0`, n: sizeof(dk_extent_t));
123	memset(s: &set_tier, c: `0`, n: sizeof(dk_set_tier_t));
124
125	extent.offset = blkno * (u_int64_t) blocksize;
126	extent.length = len;
127
128	set_tier.extents = &extent;
129	set_tier.extentsCount = `1`;
130	set_tier.tier = (uint8_t)priority;
131
132	error = VNOP_IOCTL(vp: devvp, DKIOCSETTIER, data: (caddr_t)&set_tier, fflag: `0`, ctx: vfs_context_kernel());
133	return;
134	}
135	#endif
136
137	void
138	vnode_pager_was_dirtied(
139	struct vnode *vp,
140	vm_object_offset_t s_offset,
141	vm_object_offset_t e_offset)
142	{
143	cluster_update_state(vp, s_offset, e_offset, TRUE);
144	}
145
146	uint32_t
147	vnode_pager_isinuse(struct vnode *vp)
148	{
149	if (vp->v_usecount > vp->v_kusecount) {
150	return `1`;
151	}
152	return `0`;
153	}
154
155	uint32_t
156	vnode_pager_return_throttle_io_limit(struct vnode vp, uint32_t limit)
157	{
158	return cluster_throttle_io_limit(vp, limit);
159	}
160
161	vm_object_offset_t
162	vnode_pager_get_filesize(struct vnode *vp)
163	{
164	return (vm_object_offset_t) ubc_getsize(vp);
165	}
166
167	extern int safe_getpath(struct vnode dvp, char* leafname, char* path, int* _len, int *truncated_path);
168
169	kern_return_t
170	vnode_pager_get_name(
171	struct vnode *vp,
172	char *pathname,
173	vm_size_t pathname_len,
174	char *filename,
175	vm_size_t filename_len,
176	boolean_t *truncated_path_p)
177	{
178	*truncated_path_p = FALSE;
179	if (pathname != NULL) {
180	/ get the path name /
181	safe_getpath(dvp: vp, NULL,
182	path: pathname, len: (int) pathname_len,
183	truncated_path: truncated_path_p);
184	}
185	if ((pathname == NULL \|\| *truncated_path_p) &&
186	filename != NULL) {
187	/ get the file name /
188	const char *name;
189
190	name = vnode_getname_printable(vp);
191	strlcpy(dst: filename, src: name, n: (size_t) filename_len);
192	vnode_putname_printable(name);
193	}
194	return KERN_SUCCESS;
195	}
196
197	kern_return_t
198	vnode_pager_get_mtime(
199	struct vnode *vp,
200	struct timespec *current_mtime,
201	struct timespec *cs_mtime)
202	{
203	vnode_mtime(vp, current_mtime, vfs_context_current());
204	if (cs_mtime != NULL) {
205	ubc_get_cs_mtime(vp, cs_mtime);
206	}
207	return KERN_SUCCESS;
208	}
209
210	kern_return_t
211	vnode_pager_get_cs_blobs(
212	struct vnode *vp,
213	void **blobs)
214	{
215	*blobs = ubc_get_cs_blobs(vp);
216	return KERN_SUCCESS;
217	}
218
219	/*
220	* vnode_trim:
221	* Used to call the DKIOCUNMAP ioctl on the underlying disk device for the specified vnode.
222	* Trims the region at offset bytes into the file, for length bytes.
223	*
224	* Care must be taken to ensure that the vnode is sufficiently reference counted at the time this
225	* function is called; no iocounts or usecounts are taken on the vnode.
226	* This function is non-idempotent in error cases; We cannot un-discard the blocks if only some of them
227	* are successfully discarded.
228	*/
229	u_int32_t
230	vnode_trim(
231	struct vnode *vp,
232	off_t offset,
233	size_t length)
234	{
235	daddr64_t io_blockno; / Block number corresponding to the start of the extent /
236	size_t io_bytecount; / Number of bytes in current extent for the specified range /
237	size_t trimmed = `0`;
238	off_t current_offset = offset;
239	size_t remaining_length = length;
240	int error = `0`;
241	u_int32_t blocksize = `0`;
242	struct vnode *devvp;
243	dk_extent_t extent;
244	dk_unmap_t unmap;
245
246
247	/ Get the underlying device vnode /
248	devvp = vp->v_mount->mnt_devvp;
249
250	/ Figure out the underlying device block size /
251	error = VNOP_IOCTL(vp: devvp, DKIOCGETBLOCKSIZE, data: (caddr_t)&blocksize, fflag: `0`, ctx: vfs_context_kernel());
252	if (error) {
253	goto trim_exit;
254	}
255
256	/*
257	* We may not get the entire range from offset -> offset+length in a single
258	* extent from the blockmap call. Keep looping/going until we are sure we've hit
259	* the whole range or if we encounter an error.
260	*/
261	while (trimmed < length) {
262	/*
263	* VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
264	* specified offset. It returns blocks in contiguous chunks, so if the logical range is
265	* broken into multiple extents, it must be called multiple times, increasing the offset
266	* in each call to ensure that the entire range is covered.
267	*/
268	error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
269	&io_blockno, &io_bytecount, NULL, VNODE_READ \| VNODE_BLOCKMAP_NO_TRACK, NULL);
270
271	if (error) {
272	goto trim_exit;
273	}
274	/*
275	* We have a contiguous run. Prepare & issue the ioctl for the device.
276	* the DKIOCUNMAP ioctl takes offset in bytes from the start of the device.
277	*/
278	memset(s: &extent, c: `0`, n: sizeof(dk_extent_t));
279	memset(s: &unmap, c: `0`, n: sizeof(dk_unmap_t));
280	extent.offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
281	extent.length = io_bytecount;
282	unmap.extents = &extent;
283	unmap.extentsCount = `1`;
284	error = VNOP_IOCTL(vp: devvp, DKIOCUNMAP, data: (caddr_t)&unmap, fflag: `0`, ctx: vfs_context_kernel());
285
286	if (error) {
287	goto trim_exit;
288	}
289	remaining_length = remaining_length - io_bytecount;
290	trimmed = trimmed + io_bytecount;
291	current_offset = current_offset + io_bytecount;
292	}
293	trim_exit:
294
295	return error;
296	}
297
298	pager_return_t
299	vnode_pageout(struct vnode *vp,
300	upl_t upl,
301	upl_offset_t upl_offset,
302	vm_object_offset_t f_offset,
303	upl_size_t size,
304	int flags,
305	int *errorp)
306	{
307	int result = PAGER_SUCCESS;
308	int error = `0`;
309	int error_ret = `0`;
310	daddr64_t blkno;
311	int isize;
312	int pg_index;
313	int base_index;
314	upl_offset_t offset;
315	upl_page_info_t *pl;
316	vfs_context_t ctx = vfs_context_current(); / pager context /
317
318	isize = (int)size;
319
320	/*
321	* This call is non-blocking and does not ever fail but it can
322	* only be made when there is other explicit synchronization
323	* with reclaiming of the vnode which, in this path, is provided
324	* by the paging in progress counter.
325	*
326	* In addition, this may also be entered via explicit ubc_msync
327	* calls or vm_swapfile_io where the existing iocount provides
328	* the necessary synchronization. Ideally we would not take an
329	* additional iocount here in the cases where an explcit iocount
330	* has already been taken but this call doesn't cause a deadlock
331	* as other forms of vnode_get* might if this thread has already
332	* taken an iocount.
333	*/
334	error = vnode_getalways_from_pager(vp);
335	if (error != `0`) {
336	/ This can't happen /
337	panic("vnode_getalways returned %d for vp %p", error, vp);
338	}
339
340	if (isize <= `0`) {
341	result = PAGER_ERROR;
342	error_ret = EINVAL;
343	goto out;
344	}
345
346	if (UBCINFOEXISTS(vp) == `0`) {
347	result = PAGER_ERROR;
348	error_ret = EINVAL;
349
350	if (upl && !(flags & UPL_NOCOMMIT)) {
351	ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
352	}
353	goto out;
354	}
355	if (!(flags & UPL_VNODE_PAGER)) {
356	/*
357	* This is a pageout from the default pager,
358	* just go ahead and call vnop_pageout since
359	* it has already sorted out the dirty ranges
360	*/
361	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
362	(MACHDBG_CODE(DBG_MACH_VM, `1`)) \| DBG_FUNC_START,
363	size, `1`, `0`, `0`, `0`);
364
365	if ((error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
366	(size_t)size, flags, ctx))) {
367	result = PAGER_ERROR;
368	}
369
370	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
371	(MACHDBG_CODE(DBG_MACH_VM, `1`)) \| DBG_FUNC_END,
372	size, `1`, `0`, `0`, `0`);
373
374	goto out;
375	}
376	if (upl == NULL) {
377	int request_flags;
378
379	if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_PAGEOUTV2) {
380	/*
381	* filesystem has requested the new form of VNOP_PAGEOUT for file
382	* backed objects... we will not grab the UPL befofe calling VNOP_PAGEOUT...
383	* it is the fileystem's responsibility to grab the range we're denoting
384	* via 'f_offset' and 'size' into a UPL... this allows the filesystem to first
385	* take any locks it needs, before effectively locking the pages into a UPL...
386	*/
387	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
388	(MACHDBG_CODE(DBG_MACH_VM, `1`)) \| DBG_FUNC_START,
389	size, (int)f_offset, `0`, `0`, `0`);
390
391	if ((error_ret = VNOP_PAGEOUT(vp, NULL, upl_offset, (off_t)f_offset,
392	size, flags, ctx))) {
393	result = PAGER_ERROR;
394	}
395	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
396	(MACHDBG_CODE(DBG_MACH_VM, `1`)) \| DBG_FUNC_END,
397	size, `0`, `0`, `0`, `0`);
398
399	goto out;
400	}
401	if (flags & UPL_MSYNC) {
402	request_flags = UPL_UBC_MSYNC \| UPL_RET_ONLY_DIRTY;
403	} else {
404	request_flags = UPL_UBC_PAGEOUT \| UPL_RET_ONLY_DIRTY;
405	}
406
407	if (ubc_create_upl_kernel(vp, f_offset, size, &upl, &pl, request_flags, VM_KERN_MEMORY_FILE) != KERN_SUCCESS) {
408	result = PAGER_ERROR;
409	error_ret = EINVAL;
410	goto out;
411	}
412	upl_offset = `0`;
413	} else {
414	pl = ubc_upl_pageinfo(upl);
415	}
416
417	/*
418	* Ignore any non-present pages at the end of the
419	* UPL so that we aren't looking at a upl that
420	* may already have been freed by the preceeding
421	* aborts/completions.
422	*/
423	base_index = upl_offset / PAGE_SIZE;
424
425	for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) {
426	if (upl_page_present(upl: pl, index: --pg_index)) {
427	break;
428	}
429	if (pg_index == base_index) {
430	/*
431	* no pages were returned, so release
432	* our hold on the upl and leave
433	*/
434	if (!(flags & UPL_NOCOMMIT)) {
435	ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY);
436	}
437
438	goto out;
439	}
440	}
441	isize = ((pg_index + `1`) - base_index) * PAGE_SIZE;
442
443	/*
444	* we come here for pageouts to 'real' files and
445	* for msyncs... the upl may not contain any
446	* dirty pages.. it's our responsibility to sort
447	* through it and find the 'runs' of dirty pages
448	* to call VNOP_PAGEOUT on...
449	*/
450
451	if (ubc_getsize(vp) == `0`) {
452	/*
453	* if the file has been effectively deleted, then
454	* we need to go through the UPL and invalidate any
455	* buffer headers we might have that reference any
456	* of it's pages
457	*/
458	for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
459	if (vp->v_tag == VT_NFS) {
460	/ check with nfs if page is OK to drop /
461	error = nfs_buf_page_inval(vp, (off_t)f_offset);
462	} else {
463	blkno = ubc_offtoblk(vp, (off_t)f_offset);
464	error = buf_invalblkno(vp, lblkno: blkno, flags: `0`);
465	}
466	if (error) {
467	if (!(flags & UPL_NOCOMMIT)) {
468	ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
469	}
470	if (error_ret == `0`) {
471	error_ret = error;
472	}
473	result = PAGER_ERROR;
474	} else if (!(flags & UPL_NOCOMMIT)) {
475	ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
476	}
477	f_offset += PAGE_SIZE;
478	}
479	goto out;
480	}
481
482	offset = upl_offset;
483	pg_index = base_index;
484
485	while (isize) {
486	int xsize;
487	int num_of_pages;
488
489	if (!upl_page_present(upl: pl, index: pg_index)) {
490	/*
491	* we asked for RET_ONLY_DIRTY, so it's possible
492	* to get back empty slots in the UPL
493	* just skip over them
494	*/
495	f_offset += PAGE_SIZE;
496	offset += PAGE_SIZE;
497	isize -= PAGE_SIZE;
498	pg_index++;
499
500	continue;
501	}
502	if (!upl_dirty_page(upl: pl, index: pg_index)) {
503	/*
504	* if the page is not dirty and reached here it is
505	* marked precious or it is due to invalidation in
506	* memory_object_lock request as part of truncation
507	* We also get here from vm_object_terminate()
508	* So all you need to do in these
509	* cases is to invalidate incore buffer if it is there
510	* Note we must not sleep here if the buffer is busy - that is
511	* a lock inversion which causes deadlock.
512	*/
513	if (vp->v_tag == VT_NFS) {
514	/ check with nfs if page is OK to drop /
515	error = nfs_buf_page_inval(vp, (off_t)f_offset);
516	} else {
517	blkno = ubc_offtoblk(vp, (off_t)f_offset);
518	error = buf_invalblkno(vp, lblkno: blkno, flags: `0`);
519	}
520	if (error) {
521	if (!(flags & UPL_NOCOMMIT)) {
522	ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
523	}
524	if (error_ret == `0`) {
525	error_ret = error;
526	}
527	result = PAGER_ERROR;
528	} else if (!(flags & UPL_NOCOMMIT)) {
529	ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
530	}
531	f_offset += PAGE_SIZE;
532	offset += PAGE_SIZE;
533	isize -= PAGE_SIZE;
534	pg_index++;
535
536	continue;
537	}
538	num_of_pages = `1`;
539	xsize = isize - PAGE_SIZE;
540
541	while (xsize) {
542	if (!upl_dirty_page(upl: pl, index: pg_index + num_of_pages)) {
543	break;
544	}
545	num_of_pages++;
546	xsize -= PAGE_SIZE;
547	}
548	xsize = num_of_pages * PAGE_SIZE;
549
550	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
551	(MACHDBG_CODE(DBG_MACH_VM, `1`)) \| DBG_FUNC_START,
552	xsize, (int)f_offset, `0`, `0`, `0`);
553
554	if ((error = VNOP_PAGEOUT(vp, upl, offset, (off_t)f_offset,
555	xsize, flags, ctx))) {
556	if (error_ret == `0`) {
557	error_ret = error;
558	}
559	result = PAGER_ERROR;
560	}
561	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
562	(MACHDBG_CODE(DBG_MACH_VM, `1`)) \| DBG_FUNC_END,
563	xsize, `0`, `0`, `0`, `0`);
564
565	f_offset += xsize;
566	offset += xsize;
567	isize -= xsize;
568	pg_index += num_of_pages;
569	}
570	out:
571	vnode_put_from_pager(vp);
572
573	if (errorp) {
574	*errorp = error_ret;
575	}
576
577	return result;
578	}
579
580
581	pager_return_t
582	vnode_pagein(
583	struct vnode *vp,
584	upl_t upl,
585	upl_offset_t upl_offset,
586	vm_object_offset_t f_offset,
587	upl_size_t size,
588	int flags,
589	int *errorp)
590	{
591	upl_page_info_t *pl;
592	int result = PAGER_SUCCESS;
593	int error = `0`;
594	int pages_in_upl;
595	int start_pg;
596	int last_pg;
597	int first_pg;
598	int xsize;
599	int must_commit = `1`;
600	int ignore_valid_page_check = `0`;
601
602	if (flags & UPL_NOCOMMIT) {
603	must_commit = `0`;
604	}
605
606	if (flags & UPL_IGNORE_VALID_PAGE_CHECK) {
607	ignore_valid_page_check = `1`;
608	}
609
610	/*
611	* This call is non-blocking and does not ever fail but it can
612	* only be made when there is other explicit synchronization
613	* with reclaiming of the vnode which, in this path, is provided
614	* by the paging in progress counter.
615	*
616	* In addition, this may also be entered via vm_swapfile_io
617	* where the existing iocount provides the necessary synchronization.
618	* Ideally we would not take an additional iocount here in the cases
619	* where an explcit iocount has already been taken but this call
620	* doesn't cause a deadlock as other forms of vnode_get* might if
621	* this thread has already taken an iocount.
622	*/
623	error = vnode_getalways_from_pager(vp);
624	if (error != `0`) {
625	/ This can't happen /
626	panic("vnode_getalways returned %d for vp %p", error, vp);
627	}
628
629	if (UBCINFOEXISTS(vp) == `0`) {
630	result = PAGER_ERROR;
631	error = PAGER_ERROR;
632
633	if (upl && must_commit) {
634	ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY \| UPL_ABORT_ERROR);
635	}
636
637	ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_VNODEPAGEIN_NO_UBCINFO), arg: `0` / arg /);
638	goto out;
639	}
640	if (upl == (upl_t)NULL) {
641	flags &= ~UPL_NOCOMMIT;
642
643	if (size > MAX_UPL_SIZE_BYTES) {
644	result = PAGER_ERROR;
645	error = PAGER_ERROR;
646	goto out;
647	}
648	if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_PAGEINV2) {
649	/*
650	* filesystem has requested the new form of VNOP_PAGEIN for file
651	* backed objects... we will not grab the UPL befofe calling VNOP_PAGEIN...
652	* it is the fileystem's responsibility to grab the range we're denoting
653	* via 'f_offset' and 'size' into a UPL... this allows the filesystem to first
654	* take any locks it needs, before effectively locking the pages into a UPL...
655	* so we pass a NULL into the filesystem instead of a UPL pointer... the 'upl_offset'
656	* is used to identify the "must have" page in the extent... the filesystem is free
657	* to clip the extent to better fit the underlying FS blocksize if it desires as
658	* long as it continues to include the "must have" page... 'f_offset' + 'upl_offset'
659	* identifies that page
660	*/
661	if ((error = VNOP_PAGEIN(vp, NULL, upl_offset, (off_t)f_offset,
662	size, flags, vfs_context_current()))) {
663	set_thread_pagein_error(current_thread(), error);
664	result = PAGER_ERROR;
665	error = PAGER_ERROR;
666	ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_VNODEPAGEIN_FSPAGEIN_FAIL), arg: `0` / arg /);
667	}
668	goto out;
669	}
670	ubc_create_upl_kernel(vp, f_offset, size, &upl, &pl, UPL_UBC_PAGEIN \| UPL_RET_ONLY_ABSENT, VM_KERN_MEMORY_FILE);
671
672	if (upl == (upl_t)NULL) {
673	result = PAGER_ABSENT;
674	error = PAGER_ABSENT;
675	ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_VNODEPAGEIN_NO_UPL), arg: `0` / arg /);
676	goto out;
677	}
678	ubc_upl_range_needed(upl, upl_offset / PAGE_SIZE, `1`);
679
680	upl_offset = `0`;
681	first_pg = `0`;
682
683	/*
684	* if we get here, we've created the upl and
685	* are responsible for commiting/aborting it
686	* regardless of what the caller has passed in
687	*/
688	must_commit = `1`;
689	} else {
690	pl = ubc_upl_pageinfo(upl);
691	first_pg = upl_offset / PAGE_SIZE;
692	}
693	pages_in_upl = size / PAGE_SIZE;
694	DTRACE_VM2(pgpgin, int, pages_in_upl, (uint64_t *), NULL);
695
696	/*
697	* before we start marching forward, we must make sure we end on
698	* a present page, otherwise we will be working with a freed
699	* upl
700	*/
701	for (last_pg = pages_in_upl - `1`; last_pg >= first_pg; last_pg--) {
702	if (upl_page_present(upl: pl, index: last_pg)) {
703	break;
704	}
705	if (last_pg == first_pg) {
706	/*
707	* empty UPL, no pages are present
708	*/
709	if (must_commit) {
710	ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
711	}
712	goto out;
713	}
714	}
715	pages_in_upl = last_pg + `1`;
716	last_pg = first_pg;
717
718	while (last_pg < pages_in_upl) {
719	/*
720	* skip over missing pages...
721	*/
722	for (; last_pg < pages_in_upl; last_pg++) {
723	if (upl_page_present(upl: pl, index: last_pg)) {
724	break;
725	}
726	}
727
728	if (ignore_valid_page_check == `1`) {
729	start_pg = last_pg;
730	} else {
731	/*
732	* skip over 'valid' pages... we don't want to issue I/O for these
733	*/
734	for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
735	if (!upl_valid_page(upl: pl, index: last_pg)) {
736	break;
737	}
738	}
739	}
740
741	if (last_pg > start_pg) {
742	/*
743	* we've found a range of valid pages
744	* if we've got COMMIT responsibility
745	* commit this range of pages back to the
746	* cache unchanged
747	*/
748	xsize = (last_pg - start_pg) * PAGE_SIZE;
749
750	if (must_commit) {
751	ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY);
752	}
753	}
754	if (last_pg == pages_in_upl) {
755	/*
756	* we're done... all pages that were present
757	* have either had I/O issued on them or
758	* were aborted unchanged...
759	*/
760	break;
761	}
762
763	if (!upl_page_present(upl: pl, index: last_pg)) {
764	/*
765	* we found a range of valid pages
766	* terminated by a missing page...
767	* bump index to the next page and continue on
768	*/
769	last_pg++;
770	continue;
771	}
772	/*
773	* scan from the found invalid page looking for a valid
774	* or non-present page before the end of the upl is reached, if we
775	* find one, then it will be the last page of the request to
776	* 'cluster_io'
777	*/
778	for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
779	if ((!ignore_valid_page_check && upl_valid_page(upl: pl, index: last_pg)) \|\| !upl_page_present(upl: pl, index: last_pg)) {
780	break;
781	}
782	}
783	if (last_pg > start_pg) {
784	int xoff;
785	xsize = (last_pg - start_pg) * PAGE_SIZE;
786	xoff = start_pg * PAGE_SIZE;
787
788	if ((error = VNOP_PAGEIN(vp, upl, (upl_offset_t) xoff,
789	(off_t)f_offset + xoff,
790	xsize, flags, vfs_context_current()))) {
791	/*
792	* Usually this UPL will be aborted/committed by the lower cluster layer.
793	*
794	* a) In the case of decmpfs, however, we may return an error (EAGAIN) to avoid
795	* a deadlock with another thread already inflating the file.
796	*
797	* b) In the case of content protection, EPERM is a valid error and we should respect it.
798	*
799	* In those cases, we must take care of our UPL at this layer itself.
800	*/
801	if (must_commit) {
802	if (error == EAGAIN) {
803	ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY \| UPL_ABORT_RESTART);
804	}
805	if (error == EPERM) {
806	ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY \| UPL_ABORT_ERROR);
807	}
808	}
809	set_thread_pagein_error(current_thread(), error);
810	result = PAGER_ERROR;
811	error = PAGER_ERROR;
812	ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_VNODEPAGEIN_FSPAGEIN_FAIL), arg: `0` / arg /);
813	}
814	}
815	}
816	out:
817	vnode_put_from_pager(vp);
818
819	if (errorp) {
820	*errorp = result;
821	}
822
823	return error;
824	}
825

Browse the source code of xnu/bsd/vm/vnode_pager.c