1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41
42#include <kern/assert.h>
43#include <kern/host.h>
44#include <kern/ledger.h>
45#include <kern/thread.h>
46#include <kern/ipc_kobject.h>
47#include <os/refcnt.h>
48
49#include <ipc/ipc_port.h>
50#include <ipc/ipc_space.h>
51
52#include <vm/vm_map.h>
53#include <vm/vm_pageout.h>
54#include <vm/memory_object.h>
55#include <vm/vm_pageout.h>
56#include <vm/vm_protos.h>
57#include <vm/vm_purgeable_internal.h>
58
59#include <sys/kdebug_triage.h>
60
61/* BSD VM COMPONENT INTERFACES */
62int
63get_map_nentries(
64 vm_map_t);
65
66int
67get_map_nentries(
68 vm_map_t map)
69{
70 return map->hdr.nentries;
71}
72
73/*
74 * BSD VNODE PAGER
75 */
76
77const struct memory_object_pager_ops vnode_pager_ops = {
78 .memory_object_reference = vnode_pager_reference,
79 .memory_object_deallocate = vnode_pager_deallocate,
80 .memory_object_init = vnode_pager_init,
81 .memory_object_terminate = vnode_pager_terminate,
82 .memory_object_data_request = vnode_pager_data_request,
83 .memory_object_data_return = vnode_pager_data_return,
84 .memory_object_data_initialize = vnode_pager_data_initialize,
85 .memory_object_map = vnode_pager_map,
86 .memory_object_last_unmap = vnode_pager_last_unmap,
87 .memory_object_backing_object = NULL,
88 .memory_object_pager_name = "vnode pager"
89};
90
91typedef struct vnode_pager {
92 /* mandatory generic header */
93 struct memory_object vn_pgr_hdr;
94
95 /* pager-specific */
96#if MEMORY_OBJECT_HAS_REFCOUNT
97#define vn_pgr_hdr_ref vn_pgr_hdr.mo_ref
98#else
99 os_ref_atomic_t vn_pgr_hdr_ref;
100#endif
101 struct vnode *vnode_handle; /* vnode handle */
102} *vnode_pager_t;
103
104
105kern_return_t
106vnode_pager_cluster_read( /* forward */
107 vnode_pager_t,
108 vm_object_offset_t,
109 vm_object_offset_t,
110 uint32_t,
111 vm_size_t);
112
113void
114vnode_pager_cluster_write( /* forward */
115 vnode_pager_t,
116 vm_object_offset_t,
117 vm_size_t,
118 vm_object_offset_t *,
119 int *,
120 int);
121
122
123vnode_pager_t
124vnode_object_create( /* forward */
125 struct vnode *);
126
127vnode_pager_t
128vnode_pager_lookup( /* forward */
129 memory_object_t);
130
131struct vnode *
132vnode_pager_lookup_vnode( /* forward */
133 memory_object_t);
134
135ZONE_DEFINE_TYPE(vnode_pager_zone, "vnode pager structures",
136 struct vnode_pager, ZC_NOENCRYPT);
137
138#define VNODE_PAGER_NULL ((vnode_pager_t) 0)
139
140/* TODO: Should be set dynamically by vnode_pager_init() */
141#define CLUSTER_SHIFT 1
142
143
144#if DEBUG
145int pagerdebug = 0;
146
147#define PAGER_ALL 0xffffffff
148#define PAGER_INIT 0x00000001
149#define PAGER_PAGEIN 0x00000002
150
151#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
152#else
153#define PAGER_DEBUG(LEVEL, A)
154#endif
155
156extern int proc_resetpcontrol(int);
157
158
159extern int uiomove64(addr64_t, int, void *);
160#define MAX_RUN 32
161
162int
163memory_object_control_uiomove(
164 memory_object_control_t control,
165 memory_object_offset_t offset,
166 void * uio,
167 int start_offset,
168 int io_requested,
169 int mark_dirty,
170 int take_reference)
171{
172 vm_object_t object;
173 vm_page_t dst_page;
174 int xsize;
175 int retval = 0;
176 int cur_run;
177 int cur_needed;
178 int i;
179 int orig_offset;
180 vm_page_t page_run[MAX_RUN];
181 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
182
183 object = memory_object_control_to_vm_object(control);
184 if (object == VM_OBJECT_NULL) {
185 return 0;
186 }
187 assert(!object->internal);
188
189 vm_object_lock(object);
190
191 if (mark_dirty && object->vo_copy != VM_OBJECT_NULL) {
192 /*
193 * We can't modify the pages without honoring
194 * copy-on-write obligations first, so fall off
195 * this optimized path and fall back to the regular
196 * path.
197 */
198 vm_object_unlock(object);
199 return 0;
200 }
201 orig_offset = start_offset;
202
203 dirty_count = 0;
204 while (io_requested && retval == 0) {
205 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
206
207 if (cur_needed > MAX_RUN) {
208 cur_needed = MAX_RUN;
209 }
210
211 for (cur_run = 0; cur_run < cur_needed;) {
212 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) {
213 break;
214 }
215
216
217 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
218 /*
219 * someone else is playing with the page... if we've
220 * already collected pages into this run, go ahead
221 * and process now, we can't block on this
222 * page while holding other pages in the BUSY state
223 * otherwise we will wait
224 */
225 if (cur_run) {
226 break;
227 }
228 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
229 continue;
230 }
231 if (dst_page->vmp_laundry) {
232 vm_pageout_steal_laundry(page: dst_page, FALSE);
233 }
234
235 if (mark_dirty) {
236 if (dst_page->vmp_dirty == FALSE) {
237 dirty_count++;
238 }
239 SET_PAGE_DIRTY(dst_page, FALSE);
240 if (dst_page->vmp_cs_validated &&
241 !dst_page->vmp_cs_tainted) {
242 /*
243 * CODE SIGNING:
244 * We're modifying a code-signed
245 * page: force revalidate
246 */
247 dst_page->vmp_cs_validated = VMP_CS_ALL_FALSE;
248
249 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
250
251 pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: dst_page));
252 }
253 }
254 dst_page->vmp_busy = TRUE;
255
256 page_run[cur_run++] = dst_page;
257
258 offset += PAGE_SIZE_64;
259 }
260 if (cur_run == 0) {
261 /*
262 * we hit a 'hole' in the cache or
263 * a page we don't want to try to handle,
264 * so bail at this point
265 * we'll unlock the object below
266 */
267 break;
268 }
269 vm_object_unlock(object);
270
271 for (i = 0; i < cur_run; i++) {
272 dst_page = page_run[i];
273
274 if ((xsize = PAGE_SIZE - start_offset) > io_requested) {
275 xsize = io_requested;
276 }
277
278 if ((retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(m: dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio))) {
279 break;
280 }
281
282 io_requested -= xsize;
283 start_offset = 0;
284 }
285 vm_object_lock(object);
286
287 /*
288 * if we have more than 1 page to work on
289 * in the current run, or the original request
290 * started at offset 0 of the page, or we're
291 * processing multiple batches, we will move
292 * the pages to the tail of the inactive queue
293 * to implement an LRU for read/write accesses
294 *
295 * the check for orig_offset == 0 is there to
296 * mitigate the cost of small (< page_size) requests
297 * to the same page (this way we only move it once)
298 */
299 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
300 vm_page_lockspin_queues();
301
302 for (i = 0; i < cur_run; i++) {
303 vm_page_lru(page: page_run[i]);
304 }
305
306 vm_page_unlock_queues();
307 }
308 for (i = 0; i < cur_run; i++) {
309 dst_page = page_run[i];
310
311 /*
312 * someone is explicitly referencing this page...
313 * update clustered and speculative state
314 *
315 */
316 if (dst_page->vmp_clustered) {
317 VM_PAGE_CONSUME_CLUSTERED(dst_page);
318 }
319
320 PAGE_WAKEUP_DONE(dst_page);
321 }
322 orig_offset = 0;
323 }
324 vm_object_unlock(object);
325 return retval;
326}
327
328
329/*
330 *
331 */
332memory_object_t
333vnode_pager_setup(
334 struct vnode *vp,
335 __unused memory_object_t pager)
336{
337 vnode_pager_t vnode_object;
338
339 vnode_object = vnode_object_create(vp);
340 if (vnode_object == VNODE_PAGER_NULL) {
341 panic("vnode_pager_setup: vnode_object_create() failed");
342 }
343 return (memory_object_t)vnode_object;
344}
345
346/*
347 *
348 */
349kern_return_t
350vnode_pager_init(memory_object_t mem_obj,
351 memory_object_control_t control,
352#if !DEBUG
353 __unused
354#endif
355 memory_object_cluster_size_t pg_size)
356{
357 vnode_pager_t vnode_object;
358 kern_return_t kr;
359 memory_object_attr_info_data_t attributes;
360
361
362 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
363
364 if (control == MEMORY_OBJECT_CONTROL_NULL) {
365 return KERN_INVALID_ARGUMENT;
366 }
367
368 vnode_object = vnode_pager_lookup(mem_obj);
369
370 memory_object_control_reference(control);
371
372 vnode_object->vn_pgr_hdr.mo_control = control;
373
374 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
375 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
376 attributes.cluster_size = (1 << (PAGE_SHIFT));
377 attributes.may_cache_object = TRUE;
378 attributes.temporary = TRUE;
379
380 kr = memory_object_change_attributes(
381 memory_control: control,
382 MEMORY_OBJECT_ATTRIBUTE_INFO,
383 attributes: (memory_object_info_t) &attributes,
384 MEMORY_OBJECT_ATTR_INFO_COUNT);
385 if (kr != KERN_SUCCESS) {
386 panic("vnode_pager_init: memory_object_change_attributes() failed");
387 }
388
389 return KERN_SUCCESS;
390}
391
392/*
393 *
394 */
395kern_return_t
396vnode_pager_data_return(
397 memory_object_t mem_obj,
398 memory_object_offset_t offset,
399 memory_object_cluster_size_t data_cnt,
400 memory_object_offset_t *resid_offset,
401 int *io_error,
402 __unused boolean_t dirty,
403 __unused boolean_t kernel_copy,
404 int upl_flags)
405{
406 vnode_pager_t vnode_object;
407
408 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
409
410 vnode_object = vnode_pager_lookup(mem_obj);
411
412 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
413
414 return KERN_SUCCESS;
415}
416
417kern_return_t
418vnode_pager_data_initialize(
419 __unused memory_object_t mem_obj,
420 __unused memory_object_offset_t offset,
421 __unused memory_object_cluster_size_t data_cnt)
422{
423 panic("vnode_pager_data_initialize");
424 return KERN_FAILURE;
425}
426
427void
428vnode_pager_dirtied(
429 memory_object_t mem_obj,
430 vm_object_offset_t s_offset,
431 vm_object_offset_t e_offset)
432{
433 vnode_pager_t vnode_object;
434
435 if (mem_obj && mem_obj->mo_pager_ops == &vnode_pager_ops) {
436 vnode_object = vnode_pager_lookup(mem_obj);
437 vnode_pager_was_dirtied(vnode_object->vnode_handle, s_offset, e_offset);
438 }
439}
440
441kern_return_t
442vnode_pager_get_isinuse(
443 memory_object_t mem_obj,
444 uint32_t *isinuse)
445{
446 vnode_pager_t vnode_object;
447
448 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
449 *isinuse = 1;
450 return KERN_INVALID_ARGUMENT;
451 }
452
453 vnode_object = vnode_pager_lookup(mem_obj);
454
455 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
456 return KERN_SUCCESS;
457}
458
459kern_return_t
460vnode_pager_get_throttle_io_limit(
461 memory_object_t mem_obj,
462 uint32_t *limit)
463{
464 vnode_pager_t vnode_object;
465
466 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
467 return KERN_INVALID_ARGUMENT;
468 }
469
470 vnode_object = vnode_pager_lookup(mem_obj);
471
472 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
473 return KERN_SUCCESS;
474}
475
476kern_return_t
477vnode_pager_get_isSSD(
478 memory_object_t mem_obj,
479 boolean_t *isSSD)
480{
481 vnode_pager_t vnode_object;
482
483 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
484 return KERN_INVALID_ARGUMENT;
485 }
486
487 vnode_object = vnode_pager_lookup(mem_obj);
488
489 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
490 return KERN_SUCCESS;
491}
492
493#if FBDP_DEBUG_OBJECT_NO_PAGER
494kern_return_t
495vnode_pager_get_forced_unmount(
496 memory_object_t mem_obj,
497 bool *forced_unmount)
498{
499 vnode_pager_t vnode_object;
500
501 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
502 return KERN_INVALID_ARGUMENT;
503 }
504
505 vnode_object = vnode_pager_lookup(mem_obj);
506
507 *forced_unmount = vnode_pager_forced_unmount(vnode_object->vnode_handle);
508 return KERN_SUCCESS;
509}
510#endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
511
512kern_return_t
513vnode_pager_get_object_size(
514 memory_object_t mem_obj,
515 memory_object_offset_t *length)
516{
517 vnode_pager_t vnode_object;
518
519 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
520 *length = 0;
521 return KERN_INVALID_ARGUMENT;
522 }
523
524 vnode_object = vnode_pager_lookup(mem_obj);
525
526 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
527 return KERN_SUCCESS;
528}
529
530kern_return_t
531vnode_pager_get_object_name(
532 memory_object_t mem_obj,
533 char *pathname,
534 vm_size_t pathname_len,
535 char *filename,
536 vm_size_t filename_len,
537 boolean_t *truncated_path_p)
538{
539 vnode_pager_t vnode_object;
540
541 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
542 return KERN_INVALID_ARGUMENT;
543 }
544
545 vnode_object = vnode_pager_lookup(mem_obj);
546
547 return vnode_pager_get_name(vp: vnode_object->vnode_handle,
548 pathname,
549 pathname_len,
550 filename,
551 filename_len,
552 truncated_path_p);
553}
554
555kern_return_t
556vnode_pager_get_object_mtime(
557 memory_object_t mem_obj,
558 struct timespec *mtime,
559 struct timespec *cs_mtime)
560{
561 vnode_pager_t vnode_object;
562
563 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
564 return KERN_INVALID_ARGUMENT;
565 }
566
567 vnode_object = vnode_pager_lookup(mem_obj);
568
569 return vnode_pager_get_mtime(vp: vnode_object->vnode_handle,
570 mtime,
571 cs_mtime);
572}
573
574#if CHECK_CS_VALIDATION_BITMAP
575kern_return_t
576vnode_pager_cs_check_validation_bitmap(
577 memory_object_t mem_obj,
578 memory_object_offset_t offset,
579 int optype )
580{
581 vnode_pager_t vnode_object;
582
583 if (mem_obj == MEMORY_OBJECT_NULL ||
584 mem_obj->mo_pager_ops != &vnode_pager_ops) {
585 return KERN_INVALID_ARGUMENT;
586 }
587
588 vnode_object = vnode_pager_lookup(mem_obj);
589 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
590}
591#endif /* CHECK_CS_VALIDATION_BITMAP */
592
593/*
594 *
595 */
596kern_return_t
597vnode_pager_data_request(
598 memory_object_t mem_obj,
599 memory_object_offset_t offset,
600 __unused memory_object_cluster_size_t length,
601 __unused vm_prot_t desired_access,
602 memory_object_fault_info_t fault_info)
603{
604 vnode_pager_t vnode_object;
605 memory_object_offset_t base_offset;
606 vm_size_t size;
607 uint32_t io_streaming = 0;
608
609 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
610
611 vnode_object = vnode_pager_lookup(mem_obj);
612
613 size = MAX_UPL_TRANSFER_BYTES;
614 base_offset = offset;
615
616 if (memory_object_cluster_size(control: vnode_object->vn_pgr_hdr.mo_control,
617 start: &base_offset, length: &size, io_streaming: &io_streaming,
618 fault_info) != KERN_SUCCESS) {
619 size = PAGE_SIZE;
620 }
621
622 assert(offset >= base_offset &&
623 offset < base_offset + size);
624
625 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
626}
627
628/*
629 *
630 */
631void
632vnode_pager_reference(
633 memory_object_t mem_obj)
634{
635 vnode_pager_t vnode_object;
636
637 vnode_object = vnode_pager_lookup(mem_obj);
638 os_ref_retain_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
639}
640
641/*
642 *
643 */
644void
645vnode_pager_deallocate(
646 memory_object_t mem_obj)
647{
648 vnode_pager_t vnode_object;
649
650 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
651
652 vnode_object = vnode_pager_lookup(mem_obj);
653
654 if (os_ref_release_raw(&vnode_object->vn_pgr_hdr_ref, NULL) == 0) {
655 if (vnode_object->vnode_handle != NULL) {
656 vnode_pager_vrele(vp: vnode_object->vnode_handle);
657 }
658 zfree(vnode_pager_zone, vnode_object);
659 }
660}
661
662/*
663 *
664 */
665kern_return_t
666vnode_pager_terminate(
667#if !DEBUG
668 __unused
669#endif
670 memory_object_t mem_obj)
671{
672 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
673
674 return KERN_SUCCESS;
675}
676
677/*
678 *
679 */
680kern_return_t
681vnode_pager_map(
682 memory_object_t mem_obj,
683 vm_prot_t prot)
684{
685 vnode_pager_t vnode_object;
686 int ret;
687 kern_return_t kr;
688
689 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
690
691 vnode_object = vnode_pager_lookup(mem_obj);
692
693 ret = ubc_map(vp: vnode_object->vnode_handle, flags: prot);
694
695 if (ret != 0) {
696 kr = KERN_FAILURE;
697 } else {
698 kr = KERN_SUCCESS;
699 }
700
701 return kr;
702}
703
704kern_return_t
705vnode_pager_last_unmap(
706 memory_object_t mem_obj)
707{
708 vnode_pager_t vnode_object;
709
710 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
711
712 vnode_object = vnode_pager_lookup(mem_obj);
713
714 ubc_unmap(vp: vnode_object->vnode_handle);
715 return KERN_SUCCESS;
716}
717
718
719
720/*
721 *
722 */
723void
724vnode_pager_cluster_write(
725 vnode_pager_t vnode_object,
726 vm_object_offset_t offset,
727 vm_size_t cnt,
728 vm_object_offset_t * resid_offset,
729 int * io_error,
730 int upl_flags)
731{
732 vm_size_t size;
733 int errno;
734
735 if (upl_flags & UPL_MSYNC) {
736 upl_flags |= UPL_VNODE_PAGER;
737
738 if ((upl_flags & UPL_IOSYNC) && io_error) {
739 upl_flags |= UPL_KEEPCACHED;
740 }
741
742 while (cnt) {
743 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
744
745 assert((upl_size_t) size == size);
746 vnode_pageout(vnode_object->vnode_handle,
747 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
748
749 if ((upl_flags & UPL_KEEPCACHED)) {
750 if ((*io_error = errno)) {
751 break;
752 }
753 }
754 cnt -= size;
755 offset += size;
756 }
757 if (resid_offset) {
758 *resid_offset = offset;
759 }
760 } else {
761 vm_object_offset_t vnode_size;
762 vm_object_offset_t base_offset;
763
764 /*
765 * this is the pageout path
766 */
767 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
768
769 if (vnode_size > (offset + PAGE_SIZE)) {
770 /*
771 * preset the maximum size of the cluster
772 * and put us on a nice cluster boundary...
773 * and then clip the size to insure we
774 * don't request past the end of the underlying file
775 */
776 size = MAX_UPL_TRANSFER_BYTES;
777 base_offset = offset & ~((signed)(size - 1));
778
779 if ((base_offset + size) > vnode_size) {
780 size = round_page(x: ((vm_size_t)(vnode_size - base_offset)));
781 }
782 } else {
783 /*
784 * we've been requested to page out a page beyond the current
785 * end of the 'file'... don't try to cluster in this case...
786 * we still need to send this page through because it might
787 * be marked precious and the underlying filesystem may need
788 * to do something with it (besides page it out)...
789 */
790 base_offset = offset;
791 size = PAGE_SIZE;
792 }
793 assert((upl_size_t) size == size);
794 vnode_pageout(vnode_object->vnode_handle,
795 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
796 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
797 }
798}
799
800
801/*
802 *
803 */
804kern_return_t
805vnode_pager_cluster_read(
806 vnode_pager_t vnode_object,
807 vm_object_offset_t base_offset,
808 vm_object_offset_t offset,
809 uint32_t io_streaming,
810 vm_size_t cnt)
811{
812 int local_error = 0;
813 int kret;
814 int flags = 0;
815
816 assert(!(cnt & PAGE_MASK));
817
818 if (io_streaming) {
819 flags |= UPL_IOSTREAMING;
820 }
821
822 assert((upl_size_t) cnt == cnt);
823 kret = vnode_pagein(vnode_object->vnode_handle,
824 (upl_t) NULL,
825 (upl_offset_t) (offset - base_offset),
826 base_offset,
827 (upl_size_t) cnt,
828 flags,
829 &local_error);
830/*
831 * if(kret == PAGER_ABSENT) {
832 * Need to work out the defs here, 1 corresponds to PAGER_ABSENT
833 * defined in bsd/vm/vm_pager.h However, we should not be including
834 * that file here it is a layering violation.
835 */
836 if (kret == 1) {
837 int uplflags;
838 upl_t upl = NULL;
839 unsigned int count = 0;
840 kern_return_t kr;
841
842 uplflags = (UPL_NO_SYNC |
843 UPL_CLEAN_IN_PLACE |
844 UPL_SET_INTERNAL);
845 count = 0;
846 assert((upl_size_t) cnt == cnt);
847 kr = memory_object_upl_request(memory_control: vnode_object->vn_pgr_hdr.mo_control,
848 offset: base_offset, size: (upl_size_t) cnt,
849 upl: &upl, NULL, page_listCnt: &count, cntrl_flags: uplflags, VM_KERN_MEMORY_NONE);
850 if (kr == KERN_SUCCESS) {
851 upl_abort(upl_object: upl, abort_cond: 0);
852 upl_deallocate(upl);
853 } else {
854 /*
855 * We couldn't gather the page list, probably
856 * because the memory object doesn't have a link
857 * to a VM object anymore (forced unmount, for
858 * example). Just return an error to the vm_fault()
859 * path and let it handle it.
860 */
861 }
862
863 ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_VNODEPAGER_CLREAD_NO_UPL), arg: 0 /* arg */);
864 return KERN_FAILURE;
865 }
866
867 return KERN_SUCCESS;
868}
869
870/*
871 *
872 */
873vnode_pager_t
874vnode_object_create(
875 struct vnode *vp)
876{
877 vnode_pager_t vnode_object;
878
879 vnode_object = zalloc_flags(vnode_pager_zone, Z_WAITOK | Z_NOFAIL);
880
881 /*
882 * The vm_map call takes both named entry ports and raw memory
883 * objects in the same parameter. We need to make sure that
884 * vm_map does not see this object as a named entry port. So,
885 * we reserve the first word in the object for a fake ip_kotype
886 * setting - that will tell vm_map to use it as a memory object.
887 */
888 vnode_object->vn_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
889 vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
890 vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
891
892 os_ref_init_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
893 vnode_object->vnode_handle = vp;
894
895 return vnode_object;
896}
897
898/*
899 *
900 */
901vnode_pager_t
902vnode_pager_lookup(
903 memory_object_t name)
904{
905 vnode_pager_t vnode_object;
906
907 vnode_object = (vnode_pager_t)name;
908 assert(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops);
909 return vnode_object;
910}
911
912
913struct vnode *
914vnode_pager_lookup_vnode(
915 memory_object_t name)
916{
917 vnode_pager_t vnode_object;
918 vnode_object = (vnode_pager_t)name;
919 if (vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops) {
920 return vnode_object->vnode_handle;
921 } else {
922 return NULL;
923 }
924}
925
926/*********************** proc_info implementation *************/
927
928#include <sys/bsdtask_info.h>
929
930static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
931
932int
933fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
934{
935 vm_map_t map;
936 vm_map_offset_t address = (vm_map_offset_t)arg;
937 vm_map_entry_t tmp_entry;
938 vm_map_entry_t entry;
939 vm_map_offset_t start;
940 vm_region_extended_info_data_t extended;
941 vm_region_top_info_data_t top;
942 boolean_t do_region_footprint;
943 int effective_page_shift, effective_page_size;
944
945 task_lock(task);
946 map = task->map;
947 if (map == VM_MAP_NULL) {
948 task_unlock(task);
949 return 0;
950 }
951
952 effective_page_shift = vm_self_region_page_shift(target_map: map);
953 effective_page_size = (1 << effective_page_shift);
954
955 vm_map_reference(map);
956 task_unlock(task);
957
958 do_region_footprint = task_self_region_footprint();
959
960 vm_map_lock_read(map);
961
962 start = address;
963
964 if (!vm_map_lookup_entry_allow_pgz(map, address: start, entry: &tmp_entry)) {
965 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
966 if (do_region_footprint &&
967 address == tmp_entry->vme_end) {
968 ledger_amount_t ledger_resident;
969 ledger_amount_t ledger_compressed;
970
971 /*
972 * This request is right after the last valid
973 * memory region; instead of reporting the
974 * end of the address space, report a fake
975 * memory region to account for non-volatile
976 * purgeable and/or ledger-tagged memory
977 * owned by this task.
978 */
979 task_ledgers_footprint(ledger: task->ledger,
980 ledger_resident: &ledger_resident,
981 ledger_compressed: &ledger_compressed);
982 if (ledger_resident + ledger_compressed == 0) {
983 /* nothing to report */
984 vm_map_unlock_read(map);
985 vm_map_deallocate(map);
986 return 0;
987 }
988
989 /* provide fake region for purgeable */
990 pinfo->pri_offset = address;
991 pinfo->pri_protection = VM_PROT_DEFAULT;
992 pinfo->pri_max_protection = VM_PROT_DEFAULT;
993 pinfo->pri_inheritance = VM_INHERIT_NONE;
994 pinfo->pri_behavior = VM_BEHAVIOR_DEFAULT;
995 pinfo->pri_user_wired_count = 0;
996 pinfo->pri_user_tag = -1;
997 pinfo->pri_pages_resident =
998 (uint32_t) (ledger_resident / effective_page_size);
999 pinfo->pri_pages_shared_now_private = 0;
1000 pinfo->pri_pages_swapped_out =
1001 (uint32_t) (ledger_compressed / effective_page_size);
1002 pinfo->pri_pages_dirtied =
1003 (uint32_t) (ledger_resident / effective_page_size);
1004 pinfo->pri_ref_count = 1;
1005 pinfo->pri_shadow_depth = 0;
1006 pinfo->pri_share_mode = SM_PRIVATE;
1007 pinfo->pri_private_pages_resident =
1008 (uint32_t) (ledger_resident / effective_page_size);
1009 pinfo->pri_shared_pages_resident = 0;
1010 pinfo->pri_obj_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
1011 pinfo->pri_address = address;
1012 pinfo->pri_size =
1013 (uint64_t) (ledger_resident + ledger_compressed);
1014 pinfo->pri_depth = 0;
1015
1016 vm_map_unlock_read(map);
1017 vm_map_deallocate(map);
1018 return 1;
1019 }
1020 vm_map_unlock_read(map);
1021 vm_map_deallocate(map);
1022 return 0;
1023 }
1024 } else {
1025 entry = tmp_entry;
1026 }
1027
1028 start = entry->vme_start;
1029
1030 pinfo->pri_offset = VME_OFFSET(entry);
1031 pinfo->pri_protection = entry->protection;
1032 pinfo->pri_max_protection = entry->max_protection;
1033 pinfo->pri_inheritance = entry->inheritance;
1034 pinfo->pri_behavior = entry->behavior;
1035 pinfo->pri_user_wired_count = entry->user_wired_count;
1036 pinfo->pri_user_tag = VME_ALIAS(entry);
1037
1038 if (entry->is_sub_map) {
1039 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1040 } else {
1041 if (entry->is_shared) {
1042 pinfo->pri_flags |= PROC_REGION_SHARED;
1043 }
1044 }
1045
1046
1047 extended.protection = entry->protection;
1048 extended.user_tag = VME_ALIAS(entry);
1049 extended.pages_resident = 0;
1050 extended.pages_swapped_out = 0;
1051 extended.pages_shared_now_private = 0;
1052 extended.pages_dirtied = 0;
1053 extended.external_pager = 0;
1054 extended.shadow_depth = 0;
1055
1056 vm_map_region_walk(map, va: start, entry, offset: VME_OFFSET(entry), range: entry->vme_end - start, extended: &extended, TRUE, VM_REGION_EXTENDED_INFO_COUNT);
1057
1058 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) {
1059 extended.share_mode = SM_PRIVATE;
1060 }
1061
1062 top.private_pages_resident = 0;
1063 top.shared_pages_resident = 0;
1064 vm_map_region_top_walk(entry, top: &top);
1065
1066
1067 pinfo->pri_pages_resident = extended.pages_resident;
1068 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1069 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1070 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1071 pinfo->pri_ref_count = extended.ref_count;
1072 pinfo->pri_shadow_depth = extended.shadow_depth;
1073 pinfo->pri_share_mode = extended.share_mode;
1074
1075 pinfo->pri_private_pages_resident = top.private_pages_resident;
1076 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1077 pinfo->pri_obj_id = top.obj_id;
1078
1079 pinfo->pri_address = (uint64_t)start;
1080 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1081 pinfo->pri_depth = 0;
1082
1083 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1084 *vnodeaddr = (uintptr_t)0;
1085
1086 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) == 0) {
1087 vm_map_unlock_read(map);
1088 vm_map_deallocate(map);
1089 return 1;
1090 }
1091 }
1092
1093 vm_map_unlock_read(map);
1094 vm_map_deallocate(map);
1095 return 1;
1096}
1097
1098int
1099fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1100{
1101 vm_map_t map;
1102 vm_map_offset_t address = (vm_map_offset_t)arg;
1103 vm_map_entry_t tmp_entry;
1104 vm_map_entry_t entry;
1105
1106 task_lock(task);
1107 map = task->map;
1108 if (map == VM_MAP_NULL) {
1109 task_unlock(task);
1110 return 0;
1111 }
1112 vm_map_reference(map);
1113 task_unlock(task);
1114
1115 vm_map_lock_read(map);
1116
1117 if (!vm_map_lookup_entry_allow_pgz(map, address, entry: &tmp_entry)) {
1118 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1119 vm_map_unlock_read(map);
1120 vm_map_deallocate(map);
1121 return 0;
1122 }
1123 } else {
1124 entry = tmp_entry;
1125 }
1126
1127 while (entry != vm_map_to_entry(map)) {
1128 *vnodeaddr = 0;
1129 *vid = 0;
1130
1131 if (entry->is_sub_map == 0) {
1132 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1133 pinfo->pri_offset = VME_OFFSET(entry);
1134 pinfo->pri_protection = entry->protection;
1135 pinfo->pri_max_protection = entry->max_protection;
1136 pinfo->pri_inheritance = entry->inheritance;
1137 pinfo->pri_behavior = entry->behavior;
1138 pinfo->pri_user_wired_count = entry->user_wired_count;
1139 pinfo->pri_user_tag = VME_ALIAS(entry);
1140
1141 if (entry->is_shared) {
1142 pinfo->pri_flags |= PROC_REGION_SHARED;
1143 }
1144
1145 pinfo->pri_pages_resident = 0;
1146 pinfo->pri_pages_shared_now_private = 0;
1147 pinfo->pri_pages_swapped_out = 0;
1148 pinfo->pri_pages_dirtied = 0;
1149 pinfo->pri_ref_count = 0;
1150 pinfo->pri_shadow_depth = 0;
1151 pinfo->pri_share_mode = 0;
1152
1153 pinfo->pri_private_pages_resident = 0;
1154 pinfo->pri_shared_pages_resident = 0;
1155 pinfo->pri_obj_id = 0;
1156
1157 pinfo->pri_address = (uint64_t)entry->vme_start;
1158 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1159 pinfo->pri_depth = 0;
1160
1161 vm_map_unlock_read(map);
1162 vm_map_deallocate(map);
1163 return 1;
1164 }
1165 }
1166
1167 /* Keep searching for a vnode-backed mapping */
1168 entry = entry->vme_next;
1169 }
1170
1171 vm_map_unlock_read(map);
1172 vm_map_deallocate(map);
1173 return 0;
1174}
1175
1176int
1177find_region_details(task_t task, vm_map_offset_t offset,
1178 uintptr_t *vnodeaddr, uint32_t *vid,
1179 uint64_t *start, uint64_t *len)
1180{
1181 vm_map_t map;
1182 vm_map_entry_t tmp_entry, entry;
1183 int rc = 0;
1184
1185 task_lock(task);
1186 map = task->map;
1187 if (map == VM_MAP_NULL) {
1188 task_unlock(task);
1189 return 0;
1190 }
1191 vm_map_reference(map);
1192 task_unlock(task);
1193
1194 vm_map_lock_read(map);
1195 if (!vm_map_lookup_entry_allow_pgz(map, address: offset, entry: &tmp_entry)) {
1196 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1197 rc = 0;
1198 goto ret;
1199 }
1200 } else {
1201 entry = tmp_entry;
1202 }
1203
1204 while (entry != vm_map_to_entry(map)) {
1205 *vnodeaddr = 0;
1206 *vid = 0;
1207 *start = 0;
1208 *len = 0;
1209
1210 if (entry->is_sub_map == 0) {
1211 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1212 *start = entry->vme_start;
1213 *len = entry->vme_end - entry->vme_start;
1214 rc = 1;
1215 goto ret;
1216 }
1217 }
1218
1219 entry = entry->vme_next;
1220 }
1221
1222ret:
1223 vm_map_unlock_read(map);
1224 vm_map_deallocate(map);
1225 return rc;
1226}
1227
1228static int
1229fill_vnodeinfoforaddr(
1230 vm_map_entry_t entry,
1231 uintptr_t * vnodeaddr,
1232 uint32_t * vid)
1233{
1234 vm_object_t top_object, object;
1235 memory_object_t memory_object;
1236 memory_object_pager_ops_t pager_ops;
1237 kern_return_t kr;
1238 int shadow_depth;
1239
1240
1241 if (entry->is_sub_map) {
1242 return 0;
1243 } else {
1244 /*
1245 * The last object in the shadow chain has the
1246 * relevant pager information.
1247 */
1248 top_object = VME_OBJECT(entry);
1249 if (top_object == VM_OBJECT_NULL) {
1250 object = VM_OBJECT_NULL;
1251 shadow_depth = 0;
1252 } else {
1253 vm_object_lock(top_object);
1254 for (object = top_object, shadow_depth = 0;
1255 object->shadow != VM_OBJECT_NULL;
1256 object = object->shadow, shadow_depth++) {
1257 vm_object_lock(object->shadow);
1258 vm_object_unlock(object);
1259 }
1260 }
1261 }
1262
1263 if (object == VM_OBJECT_NULL) {
1264 return 0;
1265 } else if (object->internal) {
1266 vm_object_unlock(object);
1267 return 0;
1268 } else if (!object->pager_ready ||
1269 object->terminating ||
1270 !object->alive ||
1271 object->pager == NULL) {
1272 vm_object_unlock(object);
1273 return 0;
1274 } else {
1275 memory_object = object->pager;
1276 pager_ops = memory_object->mo_pager_ops;
1277 if (pager_ops == &vnode_pager_ops) {
1278 kr = vnode_pager_get_object_vnode(
1279 mem_obj: memory_object,
1280 vnodeaddr, vid);
1281 if (kr != KERN_SUCCESS) {
1282 vm_object_unlock(object);
1283 return 0;
1284 }
1285 } else {
1286 vm_object_unlock(object);
1287 return 0;
1288 }
1289 }
1290 vm_object_unlock(object);
1291 return 1;
1292}
1293
1294kern_return_t
1295vnode_pager_get_object_vnode(
1296 memory_object_t mem_obj,
1297 uintptr_t * vnodeaddr,
1298 uint32_t * vid)
1299{
1300 vnode_pager_t vnode_object;
1301
1302 vnode_object = vnode_pager_lookup(name: mem_obj);
1303 if (vnode_object->vnode_handle) {
1304 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1305 *vid = (uint32_t)vnode_vid(vp: (void *)vnode_object->vnode_handle);
1306
1307 return KERN_SUCCESS;
1308 }
1309
1310 return KERN_FAILURE;
1311}
1312
1313#if CONFIG_IOSCHED
1314kern_return_t
1315vnode_pager_get_object_devvp(
1316 memory_object_t mem_obj,
1317 uintptr_t *devvp)
1318{
1319 struct vnode *vp;
1320 uint32_t vid;
1321
1322 if (vnode_pager_get_object_vnode(mem_obj, vnodeaddr: (uintptr_t *)&vp, vid: (uint32_t *)&vid) != KERN_SUCCESS) {
1323 return KERN_FAILURE;
1324 }
1325 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1326 if (*devvp) {
1327 return KERN_SUCCESS;
1328 }
1329 return KERN_FAILURE;
1330}
1331#endif
1332
1333/*
1334 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1335 * object locked, otherwise return NULL with nothing locked.
1336 */
1337
1338vm_object_t
1339find_vnode_object(
1340 vm_map_entry_t entry
1341 )
1342{
1343 vm_object_t top_object, object;
1344 memory_object_t memory_object;
1345 memory_object_pager_ops_t pager_ops;
1346
1347 if (!entry->is_sub_map) {
1348 /*
1349 * The last object in the shadow chain has the
1350 * relevant pager information.
1351 */
1352
1353 top_object = VME_OBJECT(entry);
1354
1355 if (top_object) {
1356 vm_object_lock(top_object);
1357
1358 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1359 vm_object_lock(object->shadow);
1360 vm_object_unlock(object);
1361 }
1362
1363 if (object &&
1364 !object->internal &&
1365 object->pager_ready &&
1366 !object->terminating &&
1367 object->alive &&
1368 object->pager != NULL) {
1369 memory_object = object->pager;
1370 pager_ops = memory_object->mo_pager_ops;
1371
1372 /*
1373 * If this object points to the vnode_pager_ops, then we found what we're
1374 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1375 * vnode and so we fall through to the bottom and return NULL.
1376 */
1377
1378 if (pager_ops == &vnode_pager_ops) {
1379 return object; /* we return with the object locked */
1380 }
1381 }
1382
1383 vm_object_unlock(object);
1384 }
1385 }
1386
1387 return VM_OBJECT_NULL;
1388}
1389