1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41
42#include <kern/assert.h>
43#include <kern/host.h>
44#include <kern/ledger.h>
45#include <kern/thread.h>
46#include <kern/ipc_kobject.h>
47
48#include <ipc/ipc_port.h>
49#include <ipc/ipc_space.h>
50
51#include <vm/vm_map.h>
52#include <vm/vm_pageout.h>
53#include <vm/memory_object.h>
54#include <vm/vm_pageout.h>
55#include <vm/vm_protos.h>
56#include <vm/vm_purgeable_internal.h>
57
58
59/* BSD VM COMPONENT INTERFACES */
60int
61get_map_nentries(
62 vm_map_t);
63
64vm_offset_t
65get_map_start(
66 vm_map_t);
67
68vm_offset_t
69get_map_end(
70 vm_map_t);
71
72/*
73 *
74 */
75int
76get_map_nentries(
77 vm_map_t map)
78{
79 return(map->hdr.nentries);
80}
81
82mach_vm_offset_t
83mach_get_vm_start(vm_map_t map)
84{
85 return( vm_map_first_entry(map)->vme_start);
86}
87
88mach_vm_offset_t
89mach_get_vm_end(vm_map_t map)
90{
91 return( vm_map_last_entry(map)->vme_end);
92}
93
94/*
95 * BSD VNODE PAGER
96 */
97
98const struct memory_object_pager_ops vnode_pager_ops = {
99 vnode_pager_reference,
100 vnode_pager_deallocate,
101 vnode_pager_init,
102 vnode_pager_terminate,
103 vnode_pager_data_request,
104 vnode_pager_data_return,
105 vnode_pager_data_initialize,
106 vnode_pager_data_unlock,
107 vnode_pager_synchronize,
108 vnode_pager_map,
109 vnode_pager_last_unmap,
110 NULL, /* data_reclaim */
111 "vnode pager"
112};
113
114typedef struct vnode_pager {
115 /* mandatory generic header */
116 struct memory_object vn_pgr_hdr;
117
118 /* pager-specific */
119 unsigned int ref_count; /* reference count */
120 struct vnode *vnode_handle; /* vnode handle */
121} *vnode_pager_t;
122
123
124kern_return_t
125vnode_pager_cluster_read( /* forward */
126 vnode_pager_t,
127 vm_object_offset_t,
128 vm_object_offset_t,
129 uint32_t,
130 vm_size_t);
131
132void
133vnode_pager_cluster_write( /* forward */
134 vnode_pager_t,
135 vm_object_offset_t,
136 vm_size_t,
137 vm_object_offset_t *,
138 int *,
139 int);
140
141
142vnode_pager_t
143vnode_object_create( /* forward */
144 struct vnode *);
145
146vnode_pager_t
147vnode_pager_lookup( /* forward */
148 memory_object_t);
149
150struct vnode *
151vnode_pager_lookup_vnode( /* forward */
152 memory_object_t);
153
154zone_t vnode_pager_zone;
155
156
157#define VNODE_PAGER_NULL ((vnode_pager_t) 0)
158
159/* TODO: Should be set dynamically by vnode_pager_init() */
160#define CLUSTER_SHIFT 1
161
162/* TODO: Should be set dynamically by vnode_pager_bootstrap() */
163#define MAX_VNODE 10000
164
165
166#if DEBUG
167int pagerdebug=0;
168
169#define PAGER_ALL 0xffffffff
170#define PAGER_INIT 0x00000001
171#define PAGER_PAGEIN 0x00000002
172
173#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
174#else
175#define PAGER_DEBUG(LEVEL, A)
176#endif
177
178extern int proc_resetpcontrol(int);
179
180
181extern int uiomove64(addr64_t, int, void *);
182#define MAX_RUN 32
183
184int
185memory_object_control_uiomove(
186 memory_object_control_t control,
187 memory_object_offset_t offset,
188 void * uio,
189 int start_offset,
190 int io_requested,
191 int mark_dirty,
192 int take_reference)
193{
194 vm_object_t object;
195 vm_page_t dst_page;
196 int xsize;
197 int retval = 0;
198 int cur_run;
199 int cur_needed;
200 int i;
201 int orig_offset;
202 vm_page_t page_run[MAX_RUN];
203 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
204
205 object = memory_object_control_to_vm_object(control);
206 if (object == VM_OBJECT_NULL) {
207 return (0);
208 }
209 assert(!object->internal);
210
211 vm_object_lock(object);
212
213 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
214 /*
215 * We can't modify the pages without honoring
216 * copy-on-write obligations first, so fall off
217 * this optimized path and fall back to the regular
218 * path.
219 */
220 vm_object_unlock(object);
221 return 0;
222 }
223 orig_offset = start_offset;
224
225 dirty_count = 0;
226 while (io_requested && retval == 0) {
227
228 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
229
230 if (cur_needed > MAX_RUN)
231 cur_needed = MAX_RUN;
232
233 for (cur_run = 0; cur_run < cur_needed; ) {
234
235 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
236 break;
237
238
239 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
240 /*
241 * someone else is playing with the page... if we've
242 * already collected pages into this run, go ahead
243 * and process now, we can't block on this
244 * page while holding other pages in the BUSY state
245 * otherwise we will wait
246 */
247 if (cur_run)
248 break;
249 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
250 continue;
251 }
252 if (dst_page->vmp_laundry)
253 vm_pageout_steal_laundry(dst_page, FALSE);
254
255 if (mark_dirty) {
256 if (dst_page->vmp_dirty == FALSE)
257 dirty_count++;
258 SET_PAGE_DIRTY(dst_page, FALSE);
259 if (dst_page->vmp_cs_validated &&
260 !dst_page->vmp_cs_tainted) {
261 /*
262 * CODE SIGNING:
263 * We're modifying a code-signed
264 * page: force revalidate
265 */
266 dst_page->vmp_cs_validated = FALSE;
267
268 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
269
270 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
271 }
272 }
273 dst_page->vmp_busy = TRUE;
274
275 page_run[cur_run++] = dst_page;
276
277 offset += PAGE_SIZE_64;
278 }
279 if (cur_run == 0)
280 /*
281 * we hit a 'hole' in the cache or
282 * a page we don't want to try to handle,
283 * so bail at this point
284 * we'll unlock the object below
285 */
286 break;
287 vm_object_unlock(object);
288
289 for (i = 0; i < cur_run; i++) {
290
291 dst_page = page_run[i];
292
293 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
294 xsize = io_requested;
295
296 if ( (retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio)) )
297 break;
298
299 io_requested -= xsize;
300 start_offset = 0;
301 }
302 vm_object_lock(object);
303
304 /*
305 * if we have more than 1 page to work on
306 * in the current run, or the original request
307 * started at offset 0 of the page, or we're
308 * processing multiple batches, we will move
309 * the pages to the tail of the inactive queue
310 * to implement an LRU for read/write accesses
311 *
312 * the check for orig_offset == 0 is there to
313 * mitigate the cost of small (< page_size) requests
314 * to the same page (this way we only move it once)
315 */
316 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
317
318 vm_page_lockspin_queues();
319
320 for (i = 0; i < cur_run; i++)
321 vm_page_lru(page_run[i]);
322
323 vm_page_unlock_queues();
324 }
325 for (i = 0; i < cur_run; i++) {
326 dst_page = page_run[i];
327
328 /*
329 * someone is explicitly referencing this page...
330 * update clustered and speculative state
331 *
332 */
333 if (dst_page->vmp_clustered)
334 VM_PAGE_CONSUME_CLUSTERED(dst_page);
335
336 PAGE_WAKEUP_DONE(dst_page);
337 }
338 orig_offset = 0;
339 }
340 if (object->pager)
341 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
342 vm_object_unlock(object);
343 return (retval);
344}
345
346
347/*
348 *
349 */
350void
351vnode_pager_bootstrap(void)
352{
353 vm_size_t size;
354
355 size = (vm_size_t) sizeof(struct vnode_pager);
356 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
357 PAGE_SIZE, "vnode pager structures");
358 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
359 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
360
361
362#if CONFIG_CODE_DECRYPTION
363 apple_protect_pager_bootstrap();
364#endif /* CONFIG_CODE_DECRYPTION */
365 swapfile_pager_bootstrap();
366#if __arm64__
367 fourk_pager_bootstrap();
368#endif /* __arm64__ */
369 shared_region_pager_bootstrap();
370
371 return;
372}
373
374/*
375 *
376 */
377memory_object_t
378vnode_pager_setup(
379 struct vnode *vp,
380 __unused memory_object_t pager)
381{
382 vnode_pager_t vnode_object;
383
384 vnode_object = vnode_object_create(vp);
385 if (vnode_object == VNODE_PAGER_NULL)
386 panic("vnode_pager_setup: vnode_object_create() failed");
387 return((memory_object_t)vnode_object);
388}
389
390/*
391 *
392 */
393kern_return_t
394vnode_pager_init(memory_object_t mem_obj,
395 memory_object_control_t control,
396#if !DEBUG
397 __unused
398#endif
399 memory_object_cluster_size_t pg_size)
400{
401 vnode_pager_t vnode_object;
402 kern_return_t kr;
403 memory_object_attr_info_data_t attributes;
404
405
406 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
407
408 if (control == MEMORY_OBJECT_CONTROL_NULL)
409 return KERN_INVALID_ARGUMENT;
410
411 vnode_object = vnode_pager_lookup(mem_obj);
412
413 memory_object_control_reference(control);
414
415 vnode_object->vn_pgr_hdr.mo_control = control;
416
417 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
418 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
419 attributes.cluster_size = (1 << (PAGE_SHIFT));
420 attributes.may_cache_object = TRUE;
421 attributes.temporary = TRUE;
422
423 kr = memory_object_change_attributes(
424 control,
425 MEMORY_OBJECT_ATTRIBUTE_INFO,
426 (memory_object_info_t) &attributes,
427 MEMORY_OBJECT_ATTR_INFO_COUNT);
428 if (kr != KERN_SUCCESS)
429 panic("vnode_pager_init: memory_object_change_attributes() failed");
430
431 return(KERN_SUCCESS);
432}
433
434/*
435 *
436 */
437kern_return_t
438vnode_pager_data_return(
439 memory_object_t mem_obj,
440 memory_object_offset_t offset,
441 memory_object_cluster_size_t data_cnt,
442 memory_object_offset_t *resid_offset,
443 int *io_error,
444 __unused boolean_t dirty,
445 __unused boolean_t kernel_copy,
446 int upl_flags)
447{
448 vnode_pager_t vnode_object;
449
450 vnode_object = vnode_pager_lookup(mem_obj);
451
452 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
453
454 return KERN_SUCCESS;
455}
456
457kern_return_t
458vnode_pager_data_initialize(
459 __unused memory_object_t mem_obj,
460 __unused memory_object_offset_t offset,
461 __unused memory_object_cluster_size_t data_cnt)
462{
463 panic("vnode_pager_data_initialize");
464 return KERN_FAILURE;
465}
466
467kern_return_t
468vnode_pager_data_unlock(
469 __unused memory_object_t mem_obj,
470 __unused memory_object_offset_t offset,
471 __unused memory_object_size_t size,
472 __unused vm_prot_t desired_access)
473{
474 return KERN_FAILURE;
475}
476
477void
478vnode_pager_dirtied(
479 memory_object_t mem_obj,
480 vm_object_offset_t s_offset,
481 vm_object_offset_t e_offset)
482{
483 vnode_pager_t vnode_object;
484
485 if (mem_obj && mem_obj->mo_pager_ops == &vnode_pager_ops) {
486
487 vnode_object = vnode_pager_lookup(mem_obj);
488 vnode_pager_was_dirtied(vnode_object->vnode_handle, s_offset, e_offset);
489 }
490}
491
492kern_return_t
493vnode_pager_get_isinuse(
494 memory_object_t mem_obj,
495 uint32_t *isinuse)
496{
497 vnode_pager_t vnode_object;
498
499 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
500 *isinuse = 1;
501 return KERN_INVALID_ARGUMENT;
502 }
503
504 vnode_object = vnode_pager_lookup(mem_obj);
505
506 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
507 return KERN_SUCCESS;
508}
509
510kern_return_t
511vnode_pager_get_throttle_io_limit(
512 memory_object_t mem_obj,
513 uint32_t *limit)
514{
515 vnode_pager_t vnode_object;
516
517 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
518 return KERN_INVALID_ARGUMENT;
519
520 vnode_object = vnode_pager_lookup(mem_obj);
521
522 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
523 return KERN_SUCCESS;
524}
525
526kern_return_t
527vnode_pager_get_isSSD(
528 memory_object_t mem_obj,
529 boolean_t *isSSD)
530{
531 vnode_pager_t vnode_object;
532
533 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
534 return KERN_INVALID_ARGUMENT;
535
536 vnode_object = vnode_pager_lookup(mem_obj);
537
538 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
539 return KERN_SUCCESS;
540}
541
542kern_return_t
543vnode_pager_get_object_size(
544 memory_object_t mem_obj,
545 memory_object_offset_t *length)
546{
547 vnode_pager_t vnode_object;
548
549 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
550 *length = 0;
551 return KERN_INVALID_ARGUMENT;
552 }
553
554 vnode_object = vnode_pager_lookup(mem_obj);
555
556 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
557 return KERN_SUCCESS;
558}
559
560kern_return_t
561vnode_pager_get_object_name(
562 memory_object_t mem_obj,
563 char *pathname,
564 vm_size_t pathname_len,
565 char *filename,
566 vm_size_t filename_len,
567 boolean_t *truncated_path_p)
568{
569 vnode_pager_t vnode_object;
570
571 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
572 return KERN_INVALID_ARGUMENT;
573 }
574
575 vnode_object = vnode_pager_lookup(mem_obj);
576
577 return vnode_pager_get_name(vnode_object->vnode_handle,
578 pathname,
579 pathname_len,
580 filename,
581 filename_len,
582 truncated_path_p);
583}
584
585kern_return_t
586vnode_pager_get_object_mtime(
587 memory_object_t mem_obj,
588 struct timespec *mtime,
589 struct timespec *cs_mtime)
590{
591 vnode_pager_t vnode_object;
592
593 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
594 return KERN_INVALID_ARGUMENT;
595 }
596
597 vnode_object = vnode_pager_lookup(mem_obj);
598
599 return vnode_pager_get_mtime(vnode_object->vnode_handle,
600 mtime,
601 cs_mtime);
602}
603
604#if CHECK_CS_VALIDATION_BITMAP
605kern_return_t
606vnode_pager_cs_check_validation_bitmap(
607 memory_object_t mem_obj,
608 memory_object_offset_t offset,
609 int optype )
610{
611 vnode_pager_t vnode_object;
612
613 if (mem_obj == MEMORY_OBJECT_NULL ||
614 mem_obj->mo_pager_ops != &vnode_pager_ops) {
615 return KERN_INVALID_ARGUMENT;
616 }
617
618 vnode_object = vnode_pager_lookup(mem_obj);
619 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
620}
621#endif /* CHECK_CS_VALIDATION_BITMAP */
622
623/*
624 *
625 */
626kern_return_t
627vnode_pager_data_request(
628 memory_object_t mem_obj,
629 memory_object_offset_t offset,
630 __unused memory_object_cluster_size_t length,
631 __unused vm_prot_t desired_access,
632 memory_object_fault_info_t fault_info)
633{
634 vnode_pager_t vnode_object;
635 memory_object_offset_t base_offset;
636 vm_size_t size;
637 uint32_t io_streaming = 0;
638
639 vnode_object = vnode_pager_lookup(mem_obj);
640
641 size = MAX_UPL_TRANSFER_BYTES;
642 base_offset = offset;
643
644 if (memory_object_cluster_size(vnode_object->vn_pgr_hdr.mo_control,
645 &base_offset, &size, &io_streaming,
646 fault_info) != KERN_SUCCESS)
647 size = PAGE_SIZE;
648
649 assert(offset >= base_offset &&
650 offset < base_offset + size);
651
652 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
653}
654
655/*
656 *
657 */
658void
659vnode_pager_reference(
660 memory_object_t mem_obj)
661{
662 vnode_pager_t vnode_object;
663 unsigned int new_ref_count;
664
665 vnode_object = vnode_pager_lookup(mem_obj);
666 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
667 assert(new_ref_count > 1);
668}
669
670/*
671 *
672 */
673void
674vnode_pager_deallocate(
675 memory_object_t mem_obj)
676{
677 vnode_pager_t vnode_object;
678
679 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
680
681 vnode_object = vnode_pager_lookup(mem_obj);
682
683 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
684 if (vnode_object->vnode_handle != NULL) {
685 vnode_pager_vrele(vnode_object->vnode_handle);
686 }
687 zfree(vnode_pager_zone, vnode_object);
688 }
689 return;
690}
691
692/*
693 *
694 */
695kern_return_t
696vnode_pager_terminate(
697#if !DEBUG
698 __unused
699#endif
700 memory_object_t mem_obj)
701{
702 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
703
704 return(KERN_SUCCESS);
705}
706
707/*
708 *
709 */
710kern_return_t
711vnode_pager_synchronize(
712 __unused memory_object_t mem_obj,
713 __unused memory_object_offset_t offset,
714 __unused memory_object_size_t length,
715 __unused vm_sync_t sync_flags)
716{
717 panic("vnode_pager_synchronize: memory_object_synchronize no longer supported\n");
718 return (KERN_FAILURE);
719}
720
721/*
722 *
723 */
724kern_return_t
725vnode_pager_map(
726 memory_object_t mem_obj,
727 vm_prot_t prot)
728{
729 vnode_pager_t vnode_object;
730 int ret;
731 kern_return_t kr;
732
733 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
734
735 vnode_object = vnode_pager_lookup(mem_obj);
736
737 ret = ubc_map(vnode_object->vnode_handle, prot);
738
739 if (ret != 0) {
740 kr = KERN_FAILURE;
741 } else {
742 kr = KERN_SUCCESS;
743 }
744
745 return kr;
746}
747
748kern_return_t
749vnode_pager_last_unmap(
750 memory_object_t mem_obj)
751{
752 vnode_pager_t vnode_object;
753
754 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
755
756 vnode_object = vnode_pager_lookup(mem_obj);
757
758 ubc_unmap(vnode_object->vnode_handle);
759 return KERN_SUCCESS;
760}
761
762
763
764/*
765 *
766 */
767void
768vnode_pager_cluster_write(
769 vnode_pager_t vnode_object,
770 vm_object_offset_t offset,
771 vm_size_t cnt,
772 vm_object_offset_t * resid_offset,
773 int * io_error,
774 int upl_flags)
775{
776 vm_size_t size;
777 int errno;
778
779 if (upl_flags & UPL_MSYNC) {
780
781 upl_flags |= UPL_VNODE_PAGER;
782
783 if ( (upl_flags & UPL_IOSYNC) && io_error)
784 upl_flags |= UPL_KEEPCACHED;
785
786 while (cnt) {
787 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
788
789 assert((upl_size_t) size == size);
790 vnode_pageout(vnode_object->vnode_handle,
791 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
792
793 if ( (upl_flags & UPL_KEEPCACHED) ) {
794 if ( (*io_error = errno) )
795 break;
796 }
797 cnt -= size;
798 offset += size;
799 }
800 if (resid_offset)
801 *resid_offset = offset;
802
803 } else {
804 vm_object_offset_t vnode_size;
805 vm_object_offset_t base_offset;
806
807 /*
808 * this is the pageout path
809 */
810 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
811
812 if (vnode_size > (offset + PAGE_SIZE)) {
813 /*
814 * preset the maximum size of the cluster
815 * and put us on a nice cluster boundary...
816 * and then clip the size to insure we
817 * don't request past the end of the underlying file
818 */
819 size = MAX_UPL_TRANSFER_BYTES;
820 base_offset = offset & ~((signed)(size - 1));
821
822 if ((base_offset + size) > vnode_size)
823 size = round_page(((vm_size_t)(vnode_size - base_offset)));
824 } else {
825 /*
826 * we've been requested to page out a page beyond the current
827 * end of the 'file'... don't try to cluster in this case...
828 * we still need to send this page through because it might
829 * be marked precious and the underlying filesystem may need
830 * to do something with it (besides page it out)...
831 */
832 base_offset = offset;
833 size = PAGE_SIZE;
834 }
835 assert((upl_size_t) size == size);
836 vnode_pageout(vnode_object->vnode_handle,
837 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
838 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
839 }
840}
841
842
843/*
844 *
845 */
846kern_return_t
847vnode_pager_cluster_read(
848 vnode_pager_t vnode_object,
849 vm_object_offset_t base_offset,
850 vm_object_offset_t offset,
851 uint32_t io_streaming,
852 vm_size_t cnt)
853{
854 int local_error = 0;
855 int kret;
856 int flags = 0;
857
858 assert(! (cnt & PAGE_MASK));
859
860 if (io_streaming)
861 flags |= UPL_IOSTREAMING;
862
863 assert((upl_size_t) cnt == cnt);
864 kret = vnode_pagein(vnode_object->vnode_handle,
865 (upl_t) NULL,
866 (upl_offset_t) (offset - base_offset),
867 base_offset,
868 (upl_size_t) cnt,
869 flags,
870 &local_error);
871/*
872 if(kret == PAGER_ABSENT) {
873 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
874 defined in bsd/vm/vm_pager.h However, we should not be including
875 that file here it is a layering violation.
876*/
877 if (kret == 1) {
878 int uplflags;
879 upl_t upl = NULL;
880 unsigned int count = 0;
881 kern_return_t kr;
882
883 uplflags = (UPL_NO_SYNC |
884 UPL_CLEAN_IN_PLACE |
885 UPL_SET_INTERNAL);
886 count = 0;
887 assert((upl_size_t) cnt == cnt);
888 kr = memory_object_upl_request(vnode_object->vn_pgr_hdr.mo_control,
889 base_offset, (upl_size_t) cnt,
890 &upl, NULL, &count, uplflags, VM_KERN_MEMORY_NONE);
891 if (kr == KERN_SUCCESS) {
892 upl_abort(upl, 0);
893 upl_deallocate(upl);
894 } else {
895 /*
896 * We couldn't gather the page list, probably
897 * because the memory object doesn't have a link
898 * to a VM object anymore (forced unmount, for
899 * example). Just return an error to the vm_fault()
900 * path and let it handle it.
901 */
902 }
903
904 return KERN_FAILURE;
905 }
906
907 return KERN_SUCCESS;
908
909}
910
911/*
912 *
913 */
914vnode_pager_t
915vnode_object_create(
916 struct vnode *vp)
917{
918 vnode_pager_t vnode_object;
919
920 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
921 if (vnode_object == VNODE_PAGER_NULL)
922 return(VNODE_PAGER_NULL);
923
924 /*
925 * The vm_map call takes both named entry ports and raw memory
926 * objects in the same parameter. We need to make sure that
927 * vm_map does not see this object as a named entry port. So,
928 * we reserve the first word in the object for a fake ip_kotype
929 * setting - that will tell vm_map to use it as a memory object.
930 */
931 vnode_object->vn_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
932 vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
933 vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
934
935 vnode_object->ref_count = 1;
936 vnode_object->vnode_handle = vp;
937
938 return(vnode_object);
939}
940
941/*
942 *
943 */
944vnode_pager_t
945vnode_pager_lookup(
946 memory_object_t name)
947{
948 vnode_pager_t vnode_object;
949
950 vnode_object = (vnode_pager_t)name;
951 assert(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops);
952 return (vnode_object);
953}
954
955
956struct vnode *
957vnode_pager_lookup_vnode(
958 memory_object_t name)
959{
960 vnode_pager_t vnode_object;
961 vnode_object = (vnode_pager_t)name;
962 if(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops)
963 return (vnode_object->vnode_handle);
964 else
965 return NULL;
966}
967
968/*********************** proc_info implementation *************/
969
970#include <sys/bsdtask_info.h>
971
972static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
973
974
975int
976fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
977{
978
979 vm_map_t map;
980 vm_map_offset_t address = (vm_map_offset_t )arg;
981 vm_map_entry_t tmp_entry;
982 vm_map_entry_t entry;
983 vm_map_offset_t start;
984 vm_region_extended_info_data_t extended;
985 vm_region_top_info_data_t top;
986 boolean_t do_region_footprint;
987
988 task_lock(task);
989 map = task->map;
990 if (map == VM_MAP_NULL)
991 {
992 task_unlock(task);
993 return(0);
994 }
995 vm_map_reference(map);
996 task_unlock(task);
997
998 do_region_footprint = task_self_region_footprint();
999
1000 vm_map_lock_read(map);
1001
1002 start = address;
1003
1004 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1005 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1006 if (do_region_footprint &&
1007 address == tmp_entry->vme_end) {
1008 ledger_amount_t nonvol, nonvol_compressed;
1009
1010 /*
1011 * This request is right after the last valid
1012 * memory region; instead of reporting the
1013 * end of the address space, report a fake
1014 * memory region to account for non-volatile
1015 * purgeable memory owned by this task.
1016 */
1017
1018 ledger_get_balance(
1019 task->ledger,
1020 task_ledgers.purgeable_nonvolatile,
1021 &nonvol);
1022 ledger_get_balance(
1023 task->ledger,
1024 task_ledgers.purgeable_nonvolatile_compressed,
1025 &nonvol_compressed);
1026 if (nonvol + nonvol_compressed == 0) {
1027 /* nothing to report */
1028 vm_map_unlock_read(map);
1029 vm_map_deallocate(map);
1030 return 0;
1031 }
1032 /* provide fake region for purgeable */
1033 pinfo->pri_offset = address;
1034 pinfo->pri_protection = VM_PROT_DEFAULT;
1035 pinfo->pri_max_protection = VM_PROT_DEFAULT;
1036 pinfo->pri_inheritance = VM_INHERIT_NONE;
1037 pinfo->pri_behavior = VM_BEHAVIOR_DEFAULT;
1038 pinfo->pri_user_wired_count = 0;
1039 pinfo->pri_user_tag = -1;
1040 pinfo->pri_pages_resident =
1041 (uint32_t) (nonvol / PAGE_SIZE);
1042 pinfo->pri_pages_shared_now_private = 0;
1043 pinfo->pri_pages_swapped_out =
1044 (uint32_t) (nonvol_compressed / PAGE_SIZE);
1045 pinfo->pri_pages_dirtied =
1046 (uint32_t) (nonvol / PAGE_SIZE);
1047 pinfo->pri_ref_count = 1;
1048 pinfo->pri_shadow_depth = 0;
1049 pinfo->pri_share_mode = SM_PRIVATE;
1050 pinfo->pri_private_pages_resident =
1051 (uint32_t) (nonvol / PAGE_SIZE);
1052 pinfo->pri_shared_pages_resident = 0;
1053 pinfo->pri_obj_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
1054 pinfo->pri_address = address;
1055 pinfo->pri_size =
1056 (uint64_t) (nonvol + nonvol_compressed);
1057 pinfo->pri_depth = 0;
1058
1059 vm_map_unlock_read(map);
1060 vm_map_deallocate(map);
1061 return 1;
1062 }
1063 vm_map_unlock_read(map);
1064 vm_map_deallocate(map);
1065 return 0;
1066 }
1067 } else {
1068 entry = tmp_entry;
1069 }
1070
1071 start = entry->vme_start;
1072
1073 pinfo->pri_offset = VME_OFFSET(entry);
1074 pinfo->pri_protection = entry->protection;
1075 pinfo->pri_max_protection = entry->max_protection;
1076 pinfo->pri_inheritance = entry->inheritance;
1077 pinfo->pri_behavior = entry->behavior;
1078 pinfo->pri_user_wired_count = entry->user_wired_count;
1079 pinfo->pri_user_tag = VME_ALIAS(entry);
1080
1081 if (entry->is_sub_map) {
1082 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1083 } else {
1084 if (entry->is_shared)
1085 pinfo->pri_flags |= PROC_REGION_SHARED;
1086 }
1087
1088
1089 extended.protection = entry->protection;
1090 extended.user_tag = VME_ALIAS(entry);
1091 extended.pages_resident = 0;
1092 extended.pages_swapped_out = 0;
1093 extended.pages_shared_now_private = 0;
1094 extended.pages_dirtied = 0;
1095 extended.external_pager = 0;
1096 extended.shadow_depth = 0;
1097
1098 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended, TRUE, VM_REGION_EXTENDED_INFO_COUNT);
1099
1100 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1101 extended.share_mode = SM_PRIVATE;
1102
1103 top.private_pages_resident = 0;
1104 top.shared_pages_resident = 0;
1105 vm_map_region_top_walk(entry, &top);
1106
1107
1108 pinfo->pri_pages_resident = extended.pages_resident;
1109 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1110 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1111 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1112 pinfo->pri_ref_count = extended.ref_count;
1113 pinfo->pri_shadow_depth = extended.shadow_depth;
1114 pinfo->pri_share_mode = extended.share_mode;
1115
1116 pinfo->pri_private_pages_resident = top.private_pages_resident;
1117 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1118 pinfo->pri_obj_id = top.obj_id;
1119
1120 pinfo->pri_address = (uint64_t)start;
1121 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1122 pinfo->pri_depth = 0;
1123
1124 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1125 *vnodeaddr = (uintptr_t)0;
1126
1127 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1128 vm_map_unlock_read(map);
1129 vm_map_deallocate(map);
1130 return(1);
1131 }
1132 }
1133
1134 vm_map_unlock_read(map);
1135 vm_map_deallocate(map);
1136 return(1);
1137}
1138
1139int
1140fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1141{
1142
1143 vm_map_t map;
1144 vm_map_offset_t address = (vm_map_offset_t )arg;
1145 vm_map_entry_t tmp_entry;
1146 vm_map_entry_t entry;
1147
1148 task_lock(task);
1149 map = task->map;
1150 if (map == VM_MAP_NULL)
1151 {
1152 task_unlock(task);
1153 return(0);
1154 }
1155 vm_map_reference(map);
1156 task_unlock(task);
1157
1158 vm_map_lock_read(map);
1159
1160 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1161 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1162 vm_map_unlock_read(map);
1163 vm_map_deallocate(map);
1164 return(0);
1165 }
1166 } else {
1167 entry = tmp_entry;
1168 }
1169
1170 while (entry != vm_map_to_entry(map)) {
1171 *vnodeaddr = 0;
1172 *vid = 0;
1173
1174 if (entry->is_sub_map == 0) {
1175 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1176
1177 pinfo->pri_offset = VME_OFFSET(entry);
1178 pinfo->pri_protection = entry->protection;
1179 pinfo->pri_max_protection = entry->max_protection;
1180 pinfo->pri_inheritance = entry->inheritance;
1181 pinfo->pri_behavior = entry->behavior;
1182 pinfo->pri_user_wired_count = entry->user_wired_count;
1183 pinfo->pri_user_tag = VME_ALIAS(entry);
1184
1185 if (entry->is_shared)
1186 pinfo->pri_flags |= PROC_REGION_SHARED;
1187
1188 pinfo->pri_pages_resident = 0;
1189 pinfo->pri_pages_shared_now_private = 0;
1190 pinfo->pri_pages_swapped_out = 0;
1191 pinfo->pri_pages_dirtied = 0;
1192 pinfo->pri_ref_count = 0;
1193 pinfo->pri_shadow_depth = 0;
1194 pinfo->pri_share_mode = 0;
1195
1196 pinfo->pri_private_pages_resident = 0;
1197 pinfo->pri_shared_pages_resident = 0;
1198 pinfo->pri_obj_id = 0;
1199
1200 pinfo->pri_address = (uint64_t)entry->vme_start;
1201 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1202 pinfo->pri_depth = 0;
1203
1204 vm_map_unlock_read(map);
1205 vm_map_deallocate(map);
1206 return(1);
1207 }
1208 }
1209
1210 /* Keep searching for a vnode-backed mapping */
1211 entry = entry->vme_next;
1212 }
1213
1214 vm_map_unlock_read(map);
1215 vm_map_deallocate(map);
1216 return(0);
1217}
1218
1219static int
1220fill_vnodeinfoforaddr(
1221 vm_map_entry_t entry,
1222 uintptr_t * vnodeaddr,
1223 uint32_t * vid)
1224{
1225 vm_object_t top_object, object;
1226 memory_object_t memory_object;
1227 memory_object_pager_ops_t pager_ops;
1228 kern_return_t kr;
1229 int shadow_depth;
1230
1231
1232 if (entry->is_sub_map) {
1233 return(0);
1234 } else {
1235 /*
1236 * The last object in the shadow chain has the
1237 * relevant pager information.
1238 */
1239 top_object = VME_OBJECT(entry);
1240 if (top_object == VM_OBJECT_NULL) {
1241 object = VM_OBJECT_NULL;
1242 shadow_depth = 0;
1243 } else {
1244 vm_object_lock(top_object);
1245 for (object = top_object, shadow_depth = 0;
1246 object->shadow != VM_OBJECT_NULL;
1247 object = object->shadow, shadow_depth++) {
1248 vm_object_lock(object->shadow);
1249 vm_object_unlock(object);
1250 }
1251 }
1252 }
1253
1254 if (object == VM_OBJECT_NULL) {
1255 return(0);
1256 } else if (object->internal) {
1257 vm_object_unlock(object);
1258 return(0);
1259 } else if (! object->pager_ready ||
1260 object->terminating ||
1261 ! object->alive) {
1262 vm_object_unlock(object);
1263 return(0);
1264 } else {
1265 memory_object = object->pager;
1266 pager_ops = memory_object->mo_pager_ops;
1267 if (pager_ops == &vnode_pager_ops) {
1268 kr = vnode_pager_get_object_vnode(
1269 memory_object,
1270 vnodeaddr, vid);
1271 if (kr != KERN_SUCCESS) {
1272 vm_object_unlock(object);
1273 return(0);
1274 }
1275 } else {
1276 vm_object_unlock(object);
1277 return(0);
1278 }
1279 }
1280 vm_object_unlock(object);
1281 return(1);
1282}
1283
1284kern_return_t
1285vnode_pager_get_object_vnode (
1286 memory_object_t mem_obj,
1287 uintptr_t * vnodeaddr,
1288 uint32_t * vid)
1289{
1290 vnode_pager_t vnode_object;
1291
1292 vnode_object = vnode_pager_lookup(mem_obj);
1293 if (vnode_object->vnode_handle) {
1294 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1295 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1296
1297 return(KERN_SUCCESS);
1298 }
1299
1300 return(KERN_FAILURE);
1301}
1302
1303#if CONFIG_IOSCHED
1304kern_return_t
1305vnode_pager_get_object_devvp(
1306 memory_object_t mem_obj,
1307 uintptr_t *devvp)
1308{
1309 struct vnode *vp;
1310 uint32_t vid;
1311
1312 if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
1313 return (KERN_FAILURE);
1314 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1315 if (*devvp)
1316 return (KERN_SUCCESS);
1317 return (KERN_FAILURE);
1318}
1319#endif
1320
1321/*
1322 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1323 * object locked, otherwise return NULL with nothing locked.
1324 */
1325
1326vm_object_t
1327find_vnode_object(
1328 vm_map_entry_t entry
1329)
1330{
1331 vm_object_t top_object, object;
1332 memory_object_t memory_object;
1333 memory_object_pager_ops_t pager_ops;
1334
1335 if (!entry->is_sub_map) {
1336
1337 /*
1338 * The last object in the shadow chain has the
1339 * relevant pager information.
1340 */
1341
1342 top_object = VME_OBJECT(entry);
1343
1344 if (top_object) {
1345 vm_object_lock(top_object);
1346
1347 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1348 vm_object_lock(object->shadow);
1349 vm_object_unlock(object);
1350 }
1351
1352 if (object && !object->internal && object->pager_ready && !object->terminating &&
1353 object->alive) {
1354 memory_object = object->pager;
1355 pager_ops = memory_object->mo_pager_ops;
1356
1357 /*
1358 * If this object points to the vnode_pager_ops, then we found what we're
1359 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1360 * vnode and so we fall through to the bottom and return NULL.
1361 */
1362
1363 if (pager_ops == &vnode_pager_ops)
1364 return object; /* we return with the object locked */
1365 }
1366
1367 vm_object_unlock(object);
1368 }
1369
1370 }
1371
1372 return(VM_OBJECT_NULL);
1373}
1374