1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
61 *
62 * External memory management interface control functions.
63 */
64
65/*
66 * Interface dependencies:
67 */
68
69#include <mach/std_types.h> /* For pointer_t */
70#include <mach/mach_types.h>
71
72#include <mach/mig.h>
73#include <mach/kern_return.h>
74#include <mach/memory_object.h>
75#include <mach/memory_object_control.h>
76#include <mach/host_priv_server.h>
77#include <mach/boolean.h>
78#include <mach/vm_prot.h>
79#include <mach/message.h>
80
81/*
82 * Implementation dependencies:
83 */
84#include <string.h> /* For memcpy() */
85
86#include <kern/host.h>
87#include <kern/thread.h> /* For current_thread() */
88#include <kern/ipc_mig.h>
89#include <kern/misc_protos.h>
90
91#include <vm/vm_object.h>
92#include <vm/vm_fault.h>
93#include <vm/memory_object.h>
94#include <vm/vm_page.h>
95#include <vm/vm_pageout.h>
96#include <vm/pmap.h> /* For pmap_clear_modify */
97#include <vm/vm_kern.h> /* For kernel_map, vm_move */
98#include <vm/vm_map.h> /* For vm_map_pageable */
99#include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
100#include <vm/vm_shared_region.h>
101
102#include <vm/vm_external.h>
103
104#include <vm/vm_protos.h>
105
106memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
107LCK_MTX_DECLARE(memory_manager_default_lock, &vm_object_lck_grp);
108
109
110/*
111 * Routine: memory_object_should_return_page
112 *
113 * Description:
114 * Determine whether the given page should be returned,
115 * based on the page's state and on the given return policy.
116 *
117 * We should return the page if one of the following is true:
118 *
119 * 1. Page is dirty and should_return is not RETURN_NONE.
120 * 2. Page is precious and should_return is RETURN_ALL.
121 * 3. Should_return is RETURN_ANYTHING.
122 *
123 * As a side effect, m->vmp_dirty will be made consistent
124 * with pmap_is_modified(m), if should_return is not
125 * MEMORY_OBJECT_RETURN_NONE.
126 */
127
128#define memory_object_should_return_page(m, should_return) \
129 (should_return != MEMORY_OBJECT_RETURN_NONE && \
130 (((m)->vmp_dirty || ((m)->vmp_dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) || \
131 ((m)->vmp_precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
132 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
133
134typedef int memory_object_lock_result_t;
135
136#define MEMORY_OBJECT_LOCK_RESULT_DONE 0
137#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
138#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
139#define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
140
141memory_object_lock_result_t memory_object_lock_page(
142 vm_page_t m,
143 memory_object_return_t should_return,
144 boolean_t should_flush,
145 vm_prot_t prot);
146
147/*
148 * Routine: memory_object_lock_page
149 *
150 * Description:
151 * Perform the appropriate lock operations on the
152 * given page. See the description of
153 * "memory_object_lock_request" for the meanings
154 * of the arguments.
155 *
156 * Returns an indication that the operation
157 * completed, blocked, or that the page must
158 * be cleaned.
159 */
160memory_object_lock_result_t
161memory_object_lock_page(
162 vm_page_t m,
163 memory_object_return_t should_return,
164 boolean_t should_flush,
165 vm_prot_t prot)
166{
167 if (prot == VM_PROT_NO_CHANGE_LEGACY) {
168 prot = VM_PROT_NO_CHANGE;
169 }
170
171 if (m->vmp_busy || m->vmp_cleaning) {
172 return MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK;
173 }
174
175 if (m->vmp_laundry) {
176 vm_pageout_steal_laundry(page: m, FALSE);
177 }
178
179 /*
180 * Don't worry about pages for which the kernel
181 * does not have any data.
182 */
183 if (m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_restart) {
184 if (VMP_ERROR_GET(m) && should_flush && !VM_PAGE_WIRED(m)) {
185 /*
186 * dump the page, pager wants us to
187 * clean it up and there is no
188 * relevant data to return
189 */
190 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
191 }
192 return MEMORY_OBJECT_LOCK_RESULT_DONE;
193 }
194 assert(!m->vmp_fictitious);
195
196 if (VM_PAGE_WIRED(m)) {
197 /*
198 * The page is wired... just clean or return the page if needed.
199 * Wired pages don't get flushed or disconnected from the pmap.
200 */
201 if (memory_object_should_return_page(m, should_return)) {
202 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
203 }
204
205 return MEMORY_OBJECT_LOCK_RESULT_DONE;
206 }
207
208 if (should_flush) {
209 /*
210 * must do the pmap_disconnect before determining the
211 * need to return the page... otherwise it's possible
212 * for the page to go from the clean to the dirty state
213 * after we've made our decision
214 */
215 if (pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
216 SET_PAGE_DIRTY(m, FALSE);
217 }
218 } else {
219 /*
220 * If we are decreasing permission, do it now;
221 * let the fault handler take care of increases
222 * (pmap_page_protect may not increase protection).
223 */
224 if (prot != VM_PROT_NO_CHANGE) {
225 pmap_page_protect(phys: VM_PAGE_GET_PHYS_PAGE(m), VM_PROT_ALL & ~prot);
226 }
227 }
228 /*
229 * Handle returning dirty or precious pages
230 */
231 if (memory_object_should_return_page(m, should_return)) {
232 /*
233 * we use to do a pmap_disconnect here in support
234 * of memory_object_lock_request, but that routine
235 * no longer requires this... in any event, in
236 * our world, it would turn into a big noop since
237 * we don't lock the page in any way and as soon
238 * as we drop the object lock, the page can be
239 * faulted back into an address space
240 *
241 * if (!should_flush)
242 * pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
243 */
244 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
245 }
246
247 /*
248 * Handle flushing clean pages
249 */
250 if (should_flush) {
251 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
252 }
253
254 /*
255 * we use to deactivate clean pages at this point,
256 * but we do not believe that an msync should change
257 * the 'age' of a page in the cache... here is the
258 * original comment and code concerning this...
259 *
260 * XXX Make clean but not flush a paging hint,
261 * and deactivate the pages. This is a hack
262 * because it overloads flush/clean with
263 * implementation-dependent meaning. This only
264 * happens to pages that are already clean.
265 *
266 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
267 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
268 */
269
270 return MEMORY_OBJECT_LOCK_RESULT_DONE;
271}
272
273
274
275/*
276 * Routine: memory_object_lock_request [user interface]
277 *
278 * Description:
279 * Control use of the data associated with the given
280 * memory object. For each page in the given range,
281 * perform the following operations, in order:
282 * 1) restrict access to the page (disallow
283 * forms specified by "prot");
284 * 2) return data to the manager (if "should_return"
285 * is RETURN_DIRTY and the page is dirty, or
286 * "should_return" is RETURN_ALL and the page
287 * is either dirty or precious); and,
288 * 3) flush the cached copy (if "should_flush"
289 * is asserted).
290 * The set of pages is defined by a starting offset
291 * ("offset") and size ("size"). Only pages with the
292 * same page alignment as the starting offset are
293 * considered.
294 *
295 * A single acknowledgement is sent (to the "reply_to"
296 * port) when these actions are complete. If successful,
297 * the naked send right for reply_to is consumed.
298 */
299
300kern_return_t
301memory_object_lock_request(
302 memory_object_control_t control,
303 memory_object_offset_t offset,
304 memory_object_size_t size,
305 memory_object_offset_t * resid_offset,
306 int * io_errno,
307 memory_object_return_t should_return,
308 int flags,
309 vm_prot_t prot)
310{
311 vm_object_t object;
312
313 if (prot == VM_PROT_NO_CHANGE_LEGACY) {
314 prot = VM_PROT_NO_CHANGE;
315 }
316
317 /*
318 * Check for bogus arguments.
319 */
320 object = memory_object_control_to_vm_object(control);
321 if (object == VM_OBJECT_NULL) {
322 return KERN_INVALID_ARGUMENT;
323 }
324
325 if ((prot & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) != 0 && prot != VM_PROT_NO_CHANGE) {
326 return KERN_INVALID_ARGUMENT;
327 }
328
329 size = round_page_64(x: size);
330
331 /*
332 * Lock the object, and acquire a paging reference to
333 * prevent the memory_object reference from being released.
334 */
335 vm_object_lock(object);
336 vm_object_paging_begin(object);
337
338 if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
339 if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->vo_copy) {
340 flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL;
341 flags |= MEMORY_OBJECT_DATA_FLUSH;
342 }
343 }
344 offset -= object->paging_offset;
345
346 if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
347 vm_object_reap_pages(object, REAP_DATA_FLUSH);
348 } else {
349 (void)vm_object_update(object, offset, size, error_offset: resid_offset,
350 io_errno, should_return, flags, prot);
351 }
352
353 vm_object_paging_end(object);
354 vm_object_unlock(object);
355
356 return KERN_SUCCESS;
357}
358
359/*
360 * Routine: memory_object_destroy [user interface]
361 * Purpose:
362 * Shut down a memory object, despite the
363 * presence of address map (or other) references
364 * to the vm_object.
365 */
366kern_return_t
367memory_object_destroy(
368 memory_object_control_t control,
369 vm_object_destroy_reason_t reason)
370{
371 vm_object_t object;
372
373 object = memory_object_control_to_vm_object(control);
374 if (object == VM_OBJECT_NULL) {
375 return KERN_INVALID_ARGUMENT;
376 }
377
378 return vm_object_destroy(object, reason);
379}
380
381/*
382 * Routine: vm_object_sync
383 *
384 * Kernel internal function to synch out pages in a given
385 * range within an object to its memory manager. Much the
386 * same as memory_object_lock_request but page protection
387 * is not changed.
388 *
389 * If the should_flush and should_return flags are true pages
390 * are flushed, that is dirty & precious pages are written to
391 * the memory manager and then discarded. If should_return
392 * is false, only precious pages are returned to the memory
393 * manager.
394 *
395 * If should flush is false and should_return true, the memory
396 * manager's copy of the pages is updated. If should_return
397 * is also false, only the precious pages are updated. This
398 * last option is of limited utility.
399 *
400 * Returns:
401 * FALSE if no pages were returned to the pager
402 * TRUE otherwise.
403 */
404
405boolean_t
406vm_object_sync(
407 vm_object_t object,
408 vm_object_offset_t offset,
409 vm_object_size_t size,
410 boolean_t should_flush,
411 boolean_t should_return,
412 boolean_t should_iosync)
413{
414 boolean_t rv;
415 int flags;
416
417 /*
418 * Lock the object, and acquire a paging reference to
419 * prevent the memory_object and control ports from
420 * being destroyed.
421 */
422 vm_object_lock(object);
423 vm_object_paging_begin(object);
424
425 if (should_flush) {
426 flags = MEMORY_OBJECT_DATA_FLUSH;
427 /*
428 * This flush is from an msync(), not a truncate(), so the
429 * contents of the file are not affected.
430 * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
431 * that the data is not changed and that there's no need to
432 * push the old contents to a copy object.
433 */
434 flags |= MEMORY_OBJECT_DATA_NO_CHANGE;
435 } else {
436 flags = 0;
437 }
438
439 if (should_iosync) {
440 flags |= MEMORY_OBJECT_IO_SYNC;
441 }
442
443 rv = vm_object_update(object, offset, size: (vm_object_size_t)size, NULL, NULL,
444 should_return: (should_return) ?
445 MEMORY_OBJECT_RETURN_ALL :
446 MEMORY_OBJECT_RETURN_NONE,
447 flags,
448 VM_PROT_NO_CHANGE);
449
450
451 vm_object_paging_end(object);
452 vm_object_unlock(object);
453 return rv;
454}
455
456
457
458#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
459MACRO_BEGIN \
460 \
461 int upl_flags; \
462 memory_object_t pager; \
463 \
464 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
465 vm_object_paging_begin(object); \
466 vm_object_unlock(object); \
467 \
468 if (iosync) \
469 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
470 else \
471 upl_flags = UPL_MSYNC; \
472 \
473 (void) memory_object_data_return(pager, \
474 po, \
475 (memory_object_cluster_size_t)data_cnt, \
476 ro, \
477 ioerr, \
478 FALSE, \
479 FALSE, \
480 upl_flags); \
481 \
482 vm_object_lock(object); \
483 vm_object_paging_end(object); \
484 } \
485MACRO_END
486
487extern struct vnode *
488vnode_pager_lookup_vnode(memory_object_t);
489
490static int
491vm_object_update_extent(
492 vm_object_t object,
493 vm_object_offset_t offset,
494 vm_object_offset_t offset_end,
495 vm_object_offset_t *offset_resid,
496 int *io_errno,
497 boolean_t should_flush,
498 memory_object_return_t should_return,
499 boolean_t should_iosync,
500 vm_prot_t prot)
501{
502 vm_page_t m;
503 int retval = 0;
504 vm_object_offset_t paging_offset = 0;
505 vm_object_offset_t next_offset = offset;
506 memory_object_lock_result_t page_lock_result;
507 memory_object_cluster_size_t data_cnt = 0;
508 struct vm_page_delayed_work dw_array;
509 struct vm_page_delayed_work *dwp, *dwp_start;
510 bool dwp_finish_ctx = TRUE;
511 int dw_count;
512 int dw_limit;
513 int dirty_count;
514
515 dwp_start = dwp = NULL;
516 dw_count = 0;
517 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
518 dwp_start = vm_page_delayed_work_get_ctx();
519 if (dwp_start == NULL) {
520 dwp_start = &dw_array;
521 dw_limit = 1;
522 dwp_finish_ctx = FALSE;
523 }
524 dwp = dwp_start;
525
526 dirty_count = 0;
527
528 for (;
529 offset < offset_end && object->resident_page_count;
530 offset += PAGE_SIZE_64) {
531 /*
532 * Limit the number of pages to be cleaned at once to a contiguous
533 * run, or at most MAX_UPL_TRANSFER_BYTES
534 */
535 if (data_cnt) {
536 if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) || (next_offset != offset)) {
537 if (dw_count) {
538 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
539 dwp = dwp_start;
540 dw_count = 0;
541 }
542 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
543 paging_offset, offset_resid, io_errno, should_iosync);
544 data_cnt = 0;
545 }
546 }
547 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
548 dwp->dw_mask = 0;
549
550 page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
551
552 if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
553 /*
554 * End of a run of dirty/precious pages.
555 */
556 if (dw_count) {
557 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
558 dwp = dwp_start;
559 dw_count = 0;
560 }
561 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
562 paging_offset, offset_resid, io_errno, should_iosync);
563 /*
564 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
565 * allow the state of page 'm' to change... we need to re-lookup
566 * the current offset
567 */
568 data_cnt = 0;
569 continue;
570 }
571
572 switch (page_lock_result) {
573 case MEMORY_OBJECT_LOCK_RESULT_DONE:
574 break;
575
576 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
577 if (m->vmp_dirty == TRUE) {
578 dirty_count++;
579 }
580 dwp->dw_mask |= DW_vm_page_free;
581 break;
582
583 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
584 PAGE_SLEEP(object, m, THREAD_UNINT);
585 continue;
586
587 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
588 if (data_cnt == 0) {
589 paging_offset = offset;
590 }
591
592 data_cnt += PAGE_SIZE;
593 next_offset = offset + PAGE_SIZE_64;
594
595 /*
596 * wired pages shouldn't be flushed and
597 * since they aren't on any queue,
598 * no need to remove them
599 */
600 if (!VM_PAGE_WIRED(m)) {
601 if (should_flush) {
602 /*
603 * add additional state for the flush
604 */
605 m->vmp_free_when_done = TRUE;
606 }
607 /*
608 * we use to remove the page from the queues at this
609 * point, but we do not believe that an msync
610 * should cause the 'age' of a page to be changed
611 *
612 * else
613 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
614 */
615 }
616 retval = 1;
617 break;
618 }
619 if (dwp->dw_mask) {
620 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
621
622 if (dw_count >= dw_limit) {
623 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
624 dwp = dwp_start;
625 dw_count = 0;
626 }
627 }
628 break;
629 }
630 }
631
632 if (object->pager) {
633 task_update_logical_writes(task: current_task(), io_size: (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED, vp: vnode_pager_lookup_vnode(object->pager));
634 }
635 /*
636 * We have completed the scan for applicable pages.
637 * Clean any pages that have been saved.
638 */
639 if (dw_count) {
640 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
641 }
642
643 if (data_cnt) {
644 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
645 paging_offset, offset_resid, io_errno, should_iosync);
646 }
647
648 if (dwp_start && dwp_finish_ctx) {
649 vm_page_delayed_work_finish_ctx(dwp: dwp_start);
650 dwp_start = dwp = NULL;
651 }
652
653 return retval;
654}
655
656
657
658/*
659 * Routine: vm_object_update
660 * Description:
661 * Work function for m_o_lock_request(), vm_o_sync().
662 *
663 * Called with object locked and paging ref taken.
664 */
665kern_return_t
666vm_object_update(
667 vm_object_t object,
668 vm_object_offset_t offset,
669 vm_object_size_t size,
670 vm_object_offset_t *resid_offset,
671 int *io_errno,
672 memory_object_return_t should_return,
673 int flags,
674 vm_prot_t protection)
675{
676 vm_object_t copy_object = VM_OBJECT_NULL;
677 boolean_t data_returned = FALSE;
678 boolean_t update_cow;
679 boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
680 boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
681 vm_fault_return_t result;
682 int num_of_extents;
683 int n;
684#define MAX_EXTENTS 8
685#define EXTENT_SIZE (1024 * 1024 * 256)
686#define RESIDENT_LIMIT (1024 * 32)
687 struct extent {
688 vm_object_offset_t e_base;
689 vm_object_offset_t e_min;
690 vm_object_offset_t e_max;
691 } extents[MAX_EXTENTS];
692
693 /*
694 * To avoid blocking while scanning for pages, save
695 * dirty pages to be cleaned all at once.
696 *
697 * XXXO A similar strategy could be used to limit the
698 * number of times that a scan must be restarted for
699 * other reasons. Those pages that would require blocking
700 * could be temporarily collected in another list, or
701 * their offsets could be recorded in a small array.
702 */
703
704 /*
705 * XXX NOTE: May want to consider converting this to a page list
706 * XXX vm_map_copy interface. Need to understand object
707 * XXX coalescing implications before doing so.
708 */
709
710 update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
711 && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
712 !(flags & MEMORY_OBJECT_DATA_PURGE)))
713 || (flags & MEMORY_OBJECT_COPY_SYNC);
714
715 if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
716 int collisions = 0;
717
718 while ((copy_object = object->vo_copy) != VM_OBJECT_NULL) {
719 /*
720 * need to do a try here since we're swimming upstream
721 * against the normal lock ordering... however, we need
722 * to hold the object stable until we gain control of the
723 * copy object so we have to be careful how we approach this
724 */
725 if (vm_object_lock_try(copy_object)) {
726 /*
727 * we 'won' the lock on the copy object...
728 * no need to hold the object lock any longer...
729 * take a real reference on the copy object because
730 * we're going to call vm_fault_page on it which may
731 * under certain conditions drop the lock and the paging
732 * reference we're about to take... the reference
733 * will keep the copy object from going away if that happens
734 */
735 vm_object_unlock(object);
736 vm_object_reference_locked(copy_object);
737 break;
738 }
739 vm_object_unlock(object);
740
741 collisions++;
742 mutex_pause(collisions);
743
744 vm_object_lock(object);
745 }
746 }
747 if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
748 vm_object_offset_t i;
749 vm_object_size_t copy_size;
750 vm_object_offset_t copy_offset;
751 vm_prot_t prot;
752 vm_page_t page;
753 vm_page_t top_page;
754 kern_return_t error = 0;
755 struct vm_object_fault_info fault_info = {};
756
757 if (copy_object != VM_OBJECT_NULL) {
758 /*
759 * translate offset with respect to shadow's offset
760 */
761 copy_offset = (offset >= copy_object->vo_shadow_offset) ?
762 (offset - copy_object->vo_shadow_offset) : 0;
763
764 if (copy_offset > copy_object->vo_size) {
765 copy_offset = copy_object->vo_size;
766 }
767
768 /*
769 * clip size with respect to shadow offset
770 */
771 if (offset >= copy_object->vo_shadow_offset) {
772 copy_size = size;
773 } else if (size >= copy_object->vo_shadow_offset - offset) {
774 copy_size = (size - (copy_object->vo_shadow_offset - offset));
775 } else {
776 copy_size = 0;
777 }
778
779 if (copy_offset + copy_size > copy_object->vo_size) {
780 if (copy_object->vo_size >= copy_offset) {
781 copy_size = copy_object->vo_size - copy_offset;
782 } else {
783 copy_size = 0;
784 }
785 }
786 copy_size += copy_offset;
787 } else {
788 copy_object = object;
789
790 copy_size = offset + size;
791 copy_offset = offset;
792 }
793 fault_info.interruptible = THREAD_UNINT;
794 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
795 fault_info.lo_offset = copy_offset;
796 fault_info.hi_offset = copy_size;
797 fault_info.stealth = TRUE;
798 assert(fault_info.cs_bypass == FALSE);
799 assert(fault_info.csm_associated == FALSE);
800
801 vm_object_paging_begin(copy_object);
802
803 for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
804RETRY_COW_OF_LOCK_REQUEST:
805 fault_info.cluster_size = (vm_size_t) (copy_size - i);
806 assert(fault_info.cluster_size == copy_size - i);
807
808 prot = VM_PROT_WRITE | VM_PROT_READ;
809 page = VM_PAGE_NULL;
810 result = vm_fault_page(first_object: copy_object, first_offset: i,
811 VM_PROT_WRITE | VM_PROT_READ,
812 FALSE,
813 FALSE, /* page not looked up */
814 protection: &prot,
815 result_page: &page,
816 top_page: &top_page,
817 type_of_fault: (int *)0,
818 error_code: &error,
819 FALSE,
820 fault_info: &fault_info);
821
822 switch (result) {
823 case VM_FAULT_SUCCESS:
824 if (top_page) {
825 vm_fault_cleanup(
826 VM_PAGE_OBJECT(page), top_page);
827 vm_object_lock(copy_object);
828 vm_object_paging_begin(copy_object);
829 }
830 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
831 vm_page_lockspin_queues();
832
833 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
834 vm_page_deactivate(page);
835 }
836 vm_page_unlock_queues();
837 }
838 PAGE_WAKEUP_DONE(page);
839 break;
840 case VM_FAULT_RETRY:
841 prot = VM_PROT_WRITE | VM_PROT_READ;
842 vm_object_lock(copy_object);
843 vm_object_paging_begin(copy_object);
844 goto RETRY_COW_OF_LOCK_REQUEST;
845 case VM_FAULT_INTERRUPTED:
846 prot = VM_PROT_WRITE | VM_PROT_READ;
847 vm_object_lock(copy_object);
848 vm_object_paging_begin(copy_object);
849 goto RETRY_COW_OF_LOCK_REQUEST;
850 case VM_FAULT_MEMORY_SHORTAGE:
851 VM_PAGE_WAIT();
852 prot = VM_PROT_WRITE | VM_PROT_READ;
853 vm_object_lock(copy_object);
854 vm_object_paging_begin(copy_object);
855 goto RETRY_COW_OF_LOCK_REQUEST;
856 case VM_FAULT_SUCCESS_NO_VM_PAGE:
857 /* success but no VM page: fail */
858 vm_object_paging_end(copy_object);
859 vm_object_unlock(copy_object);
860 OS_FALLTHROUGH;
861 case VM_FAULT_MEMORY_ERROR:
862 if (object != copy_object) {
863 vm_object_deallocate(object: copy_object);
864 }
865 vm_object_lock(object);
866 goto BYPASS_COW_COPYIN;
867 default:
868 panic("vm_object_update: unexpected error 0x%x"
869 " from vm_fault_page()\n", result);
870 }
871 }
872 vm_object_paging_end(copy_object);
873 }
874 if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
875 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
876 vm_object_unlock(copy_object);
877 vm_object_deallocate(object: copy_object);
878 vm_object_lock(object);
879 }
880 return KERN_SUCCESS;
881 }
882 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
883 if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
884 vm_object_lock_assert_exclusive(copy_object);
885 VM_OBJECT_SET_SHADOW_SEVERED(object: copy_object, TRUE);
886 VM_OBJECT_SET_SHADOWED(object: copy_object, FALSE);
887 copy_object->shadow = NULL;
888 /*
889 * delete the ref the COW was holding on the target object
890 */
891 vm_object_deallocate(object);
892 }
893 vm_object_unlock(copy_object);
894 vm_object_deallocate(object: copy_object);
895 vm_object_lock(object);
896 }
897BYPASS_COW_COPYIN:
898
899 /*
900 * when we have a really large range to check relative
901 * to the number of actual resident pages, we'd like
902 * to use the resident page list to drive our checks
903 * however, the object lock will get dropped while processing
904 * the page which means the resident queue can change which
905 * means we can't walk the queue as we process the pages
906 * we also want to do the processing in offset order to allow
907 * 'runs' of pages to be collected if we're being told to
908 * flush to disk... the resident page queue is NOT ordered.
909 *
910 * a temporary solution (until we figure out how to deal with
911 * large address spaces more generically) is to pre-flight
912 * the resident page queue (if it's small enough) and develop
913 * a collection of extents (that encompass actual resident pages)
914 * to visit. This will at least allow us to deal with some of the
915 * more pathological cases in a more efficient manner. The current
916 * worst case (a single resident page at the end of an extremely large
917 * range) can take minutes to complete for ranges in the terrabyte
918 * category... since this routine is called when truncating a file,
919 * and we currently support files up to 16 Tbytes in size, this
920 * is not a theoretical problem
921 */
922
923 if ((object->resident_page_count < RESIDENT_LIMIT) &&
924 (atop_64(size) > (unsigned)(object->resident_page_count / (8 * MAX_EXTENTS)))) {
925 vm_page_t next;
926 vm_object_offset_t start;
927 vm_object_offset_t end;
928 vm_object_size_t e_mask;
929 vm_page_t m;
930
931 start = offset;
932 end = offset + size;
933 num_of_extents = 0;
934 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
935
936 m = (vm_page_t) vm_page_queue_first(&object->memq);
937
938 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t) m)) {
939 next = (vm_page_t) vm_page_queue_next(&m->vmp_listq);
940
941 if ((m->vmp_offset >= start) && (m->vmp_offset < end)) {
942 /*
943 * this is a page we're interested in
944 * try to fit it into a current extent
945 */
946 for (n = 0; n < num_of_extents; n++) {
947 if ((m->vmp_offset & e_mask) == extents[n].e_base) {
948 /*
949 * use (PAGE_SIZE - 1) to determine the
950 * max offset so that we don't wrap if
951 * we're at the last page of the space
952 */
953 if (m->vmp_offset < extents[n].e_min) {
954 extents[n].e_min = m->vmp_offset;
955 } else if ((m->vmp_offset + (PAGE_SIZE - 1)) > extents[n].e_max) {
956 extents[n].e_max = m->vmp_offset + (PAGE_SIZE - 1);
957 }
958 break;
959 }
960 }
961 if (n == num_of_extents) {
962 /*
963 * didn't find a current extent that can encompass
964 * this page
965 */
966 if (n < MAX_EXTENTS) {
967 /*
968 * if we still have room,
969 * create a new extent
970 */
971 extents[n].e_base = m->vmp_offset & e_mask;
972 extents[n].e_min = m->vmp_offset;
973 extents[n].e_max = m->vmp_offset + (PAGE_SIZE - 1);
974
975 num_of_extents++;
976 } else {
977 /*
978 * no room to create a new extent...
979 * fall back to a single extent based
980 * on the min and max page offsets
981 * we find in the range we're interested in...
982 * first, look through the extent list and
983 * develop the overall min and max for the
984 * pages we've looked at up to this point
985 */
986 for (n = 1; n < num_of_extents; n++) {
987 if (extents[n].e_min < extents[0].e_min) {
988 extents[0].e_min = extents[n].e_min;
989 }
990 if (extents[n].e_max > extents[0].e_max) {
991 extents[0].e_max = extents[n].e_max;
992 }
993 }
994 /*
995 * now setup to run through the remaining pages
996 * to determine the overall min and max
997 * offset for the specified range
998 */
999 extents[0].e_base = 0;
1000 e_mask = 0;
1001 num_of_extents = 1;
1002
1003 /*
1004 * by continuing, we'll reprocess the
1005 * page that forced us to abandon trying
1006 * to develop multiple extents
1007 */
1008 continue;
1009 }
1010 }
1011 }
1012 m = next;
1013 }
1014 } else {
1015 extents[0].e_min = offset;
1016 extents[0].e_max = offset + (size - 1);
1017
1018 num_of_extents = 1;
1019 }
1020 for (n = 0; n < num_of_extents; n++) {
1021 if (vm_object_update_extent(object, offset: extents[n].e_min, offset_end: extents[n].e_max, offset_resid: resid_offset, io_errno,
1022 should_flush, should_return, should_iosync, prot: protection)) {
1023 data_returned = TRUE;
1024 }
1025 }
1026 return data_returned;
1027}
1028
1029
1030static kern_return_t
1031vm_object_set_attributes_common(
1032 vm_object_t object,
1033 boolean_t may_cache,
1034 memory_object_copy_strategy_t copy_strategy)
1035{
1036 boolean_t object_became_ready;
1037
1038 if (object == VM_OBJECT_NULL) {
1039 return KERN_INVALID_ARGUMENT;
1040 }
1041
1042 /*
1043 * Verify the attributes of importance
1044 */
1045
1046 switch (copy_strategy) {
1047 case MEMORY_OBJECT_COPY_NONE:
1048 case MEMORY_OBJECT_COPY_DELAY:
1049 case MEMORY_OBJECT_COPY_DELAY_FORK:
1050 break;
1051 default:
1052 return KERN_INVALID_ARGUMENT;
1053 }
1054
1055 if (may_cache) {
1056 may_cache = TRUE;
1057 }
1058
1059 vm_object_lock(object);
1060
1061 /*
1062 * Copy the attributes
1063 */
1064 assert(!object->internal);
1065 object_became_ready = !object->pager_ready;
1066 object->copy_strategy = copy_strategy;
1067 VM_OBJECT_SET_CAN_PERSIST(object, value: may_cache);
1068
1069 /*
1070 * Wake up anyone waiting for the ready attribute
1071 * to become asserted.
1072 */
1073
1074 if (object_became_ready) {
1075 VM_OBJECT_SET_PAGER_READY(object, TRUE);
1076 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1077 }
1078
1079 vm_object_unlock(object);
1080
1081 return KERN_SUCCESS;
1082}
1083
1084
1085/*
1086 * Set the memory object attribute as provided.
1087 *
1088 * XXX This routine cannot be completed until the vm_msync, clean
1089 * in place, and cluster work is completed. See ifdef notyet
1090 * below and note that vm_object_set_attributes_common()
1091 * may have to be expanded.
1092 */
1093kern_return_t
1094memory_object_change_attributes(
1095 memory_object_control_t control,
1096 memory_object_flavor_t flavor,
1097 memory_object_info_t attributes,
1098 mach_msg_type_number_t count)
1099{
1100 vm_object_t object;
1101 kern_return_t result = KERN_SUCCESS;
1102 boolean_t may_cache;
1103 boolean_t invalidate;
1104 memory_object_copy_strategy_t copy_strategy;
1105
1106 object = memory_object_control_to_vm_object(control);
1107 if (object == VM_OBJECT_NULL) {
1108 return KERN_INVALID_ARGUMENT;
1109 }
1110
1111 vm_object_lock(object);
1112
1113 may_cache = object->can_persist;
1114 copy_strategy = object->copy_strategy;
1115#if notyet
1116 invalidate = object->invalidate;
1117#endif
1118 vm_object_unlock(object);
1119
1120 switch (flavor) {
1121 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1122 {
1123 old_memory_object_behave_info_t behave;
1124
1125 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1126 result = KERN_INVALID_ARGUMENT;
1127 break;
1128 }
1129
1130 behave = (old_memory_object_behave_info_t) attributes;
1131
1132 invalidate = behave->invalidate;
1133 copy_strategy = behave->copy_strategy;
1134
1135 break;
1136 }
1137
1138 case MEMORY_OBJECT_BEHAVIOR_INFO:
1139 {
1140 memory_object_behave_info_t behave;
1141
1142 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1143 result = KERN_INVALID_ARGUMENT;
1144 break;
1145 }
1146
1147 behave = (memory_object_behave_info_t) attributes;
1148
1149 invalidate = behave->invalidate;
1150 copy_strategy = behave->copy_strategy;
1151 break;
1152 }
1153
1154 case MEMORY_OBJECT_PERFORMANCE_INFO:
1155 {
1156 memory_object_perf_info_t perf;
1157
1158 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1159 result = KERN_INVALID_ARGUMENT;
1160 break;
1161 }
1162
1163 perf = (memory_object_perf_info_t) attributes;
1164
1165 may_cache = perf->may_cache;
1166
1167 break;
1168 }
1169
1170 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1171 {
1172 old_memory_object_attr_info_t attr;
1173
1174 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1175 result = KERN_INVALID_ARGUMENT;
1176 break;
1177 }
1178
1179 attr = (old_memory_object_attr_info_t) attributes;
1180
1181 may_cache = attr->may_cache;
1182 copy_strategy = attr->copy_strategy;
1183
1184 break;
1185 }
1186
1187 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1188 {
1189 memory_object_attr_info_t attr;
1190
1191 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1192 result = KERN_INVALID_ARGUMENT;
1193 break;
1194 }
1195
1196 attr = (memory_object_attr_info_t) attributes;
1197
1198 copy_strategy = attr->copy_strategy;
1199 may_cache = attr->may_cache_object;
1200
1201 break;
1202 }
1203
1204 default:
1205 result = KERN_INVALID_ARGUMENT;
1206 break;
1207 }
1208
1209 if (result != KERN_SUCCESS) {
1210 return result;
1211 }
1212
1213 if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1214 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1215 }
1216
1217 /*
1218 * XXX may_cache may become a tri-valued variable to handle
1219 * XXX uncache if not in use.
1220 */
1221 return vm_object_set_attributes_common(object,
1222 may_cache,
1223 copy_strategy);
1224}
1225
1226kern_return_t
1227memory_object_iopl_request(
1228 ipc_port_t port,
1229 memory_object_offset_t offset,
1230 upl_size_t *upl_size,
1231 upl_t *upl_ptr,
1232 upl_page_info_array_t user_page_list,
1233 unsigned int *page_list_count,
1234 upl_control_flags_t *flags,
1235 vm_tag_t tag)
1236{
1237 vm_object_t object;
1238 kern_return_t ret;
1239 upl_control_flags_t caller_flags;
1240 vm_named_entry_t named_entry;
1241
1242 caller_flags = *flags;
1243
1244 if (caller_flags & ~UPL_VALID_FLAGS) {
1245 /*
1246 * For forward compatibility's sake,
1247 * reject any unknown flag.
1248 */
1249 return KERN_INVALID_VALUE;
1250 }
1251
1252 named_entry = mach_memory_entry_from_port(port);
1253 if (named_entry != NULL) {
1254 /* a few checks to make sure user is obeying rules */
1255 if (*upl_size == 0) {
1256 if (offset >= named_entry->size) {
1257 return KERN_INVALID_RIGHT;
1258 }
1259 *upl_size = (upl_size_t)(named_entry->size - offset);
1260 if (*upl_size != named_entry->size - offset) {
1261 return KERN_INVALID_ARGUMENT;
1262 }
1263 }
1264 if (caller_flags & UPL_COPYOUT_FROM) {
1265 if ((named_entry->protection & VM_PROT_READ)
1266 != VM_PROT_READ) {
1267 return KERN_INVALID_RIGHT;
1268 }
1269 } else {
1270 if ((named_entry->protection &
1271 (VM_PROT_READ | VM_PROT_WRITE))
1272 != (VM_PROT_READ | VM_PROT_WRITE)) {
1273 return KERN_INVALID_RIGHT;
1274 }
1275 }
1276 if (named_entry->size < (offset + *upl_size)) {
1277 return KERN_INVALID_ARGUMENT;
1278 }
1279
1280 /* the callers parameter offset is defined to be the */
1281 /* offset from beginning of named entry offset in object */
1282 offset = offset + named_entry->offset;
1283 offset += named_entry->data_offset;
1284
1285 if (named_entry->is_sub_map ||
1286 named_entry->is_copy) {
1287 return KERN_INVALID_ARGUMENT;
1288 }
1289 if (!named_entry->is_object) {
1290 return KERN_INVALID_ARGUMENT;
1291 }
1292
1293 named_entry_lock(named_entry);
1294
1295 object = vm_named_entry_to_vm_object(named_entry);
1296 assert(object != VM_OBJECT_NULL);
1297 vm_object_reference(object);
1298 named_entry_unlock(named_entry);
1299 } else {
1300 return KERN_INVALID_ARGUMENT;
1301 }
1302 if (object == VM_OBJECT_NULL) {
1303 return KERN_INVALID_ARGUMENT;
1304 }
1305
1306 if (!object->private) {
1307 if (object->phys_contiguous) {
1308 *flags = UPL_PHYS_CONTIG;
1309 } else {
1310 *flags = 0;
1311 }
1312 } else {
1313 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1314 }
1315
1316 ret = vm_object_iopl_request(object,
1317 offset,
1318 size: *upl_size,
1319 upl_ptr,
1320 user_page_list,
1321 page_list_count,
1322 cntrl_flags: caller_flags,
1323 tag);
1324 vm_object_deallocate(object);
1325 return ret;
1326}
1327
1328/*
1329 * Routine: memory_object_upl_request [interface]
1330 * Purpose:
1331 * Cause the population of a portion of a vm_object.
1332 * Depending on the nature of the request, the pages
1333 * returned may be contain valid data or be uninitialized.
1334 *
1335 */
1336
1337kern_return_t
1338memory_object_upl_request(
1339 memory_object_control_t control,
1340 memory_object_offset_t offset,
1341 upl_size_t size,
1342 upl_t *upl_ptr,
1343 upl_page_info_array_t user_page_list,
1344 unsigned int *page_list_count,
1345 int cntrl_flags,
1346 int tag)
1347{
1348 vm_object_t object;
1349 vm_tag_t vmtag = (vm_tag_t)tag;
1350 assert(vmtag == tag);
1351
1352 object = memory_object_control_to_vm_object(control);
1353 if (object == VM_OBJECT_NULL) {
1354 return KERN_TERMINATED;
1355 }
1356
1357 return vm_object_upl_request(object,
1358 offset,
1359 size,
1360 upl: upl_ptr,
1361 page_info: user_page_list,
1362 count: page_list_count,
1363 flags: (upl_control_flags_t)(unsigned int) cntrl_flags,
1364 tag: vmtag);
1365}
1366
1367
1368kern_return_t
1369memory_object_cluster_size(
1370 memory_object_control_t control,
1371 memory_object_offset_t *start,
1372 vm_size_t *length,
1373 uint32_t *io_streaming,
1374 memory_object_fault_info_t mo_fault_info)
1375{
1376 vm_object_t object;
1377 vm_object_fault_info_t fault_info;
1378
1379 object = memory_object_control_to_vm_object(control);
1380
1381 if (object == VM_OBJECT_NULL || object->paging_offset > *start) {
1382 return KERN_INVALID_ARGUMENT;
1383 }
1384
1385 *start -= object->paging_offset;
1386
1387 fault_info = (vm_object_fault_info_t)(uintptr_t) mo_fault_info;
1388 vm_object_cluster_size(object,
1389 start: (vm_object_offset_t *)start,
1390 length,
1391 fault_info,
1392 io_streaming);
1393
1394 *start += object->paging_offset;
1395
1396 return KERN_SUCCESS;
1397}
1398
1399
1400/*
1401 * Routine: host_default_memory_manager [interface]
1402 * Purpose:
1403 * set/get the default memory manager port and default cluster
1404 * size.
1405 *
1406 * If successful, consumes the supplied naked send right.
1407 */
1408kern_return_t
1409host_default_memory_manager(
1410 host_priv_t host_priv,
1411 memory_object_default_t *default_manager,
1412 __unused memory_object_cluster_size_t cluster_size)
1413{
1414 memory_object_default_t current_manager;
1415 memory_object_default_t new_manager;
1416 memory_object_default_t returned_manager;
1417 kern_return_t result = KERN_SUCCESS;
1418
1419 if (host_priv == HOST_PRIV_NULL) {
1420 return KERN_INVALID_HOST;
1421 }
1422
1423 new_manager = *default_manager;
1424 lck_mtx_lock(lck: &memory_manager_default_lock);
1425 current_manager = memory_manager_default;
1426 returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
1427
1428 if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1429 /*
1430 * Retrieve the current value.
1431 */
1432 returned_manager = ipc_port_make_send_mqueue(port: current_manager);
1433 } else {
1434 /*
1435 * Only allow the kernel to change the value.
1436 */
1437 extern task_t kernel_task;
1438 if (current_task() != kernel_task) {
1439 result = KERN_NO_ACCESS;
1440 goto out;
1441 }
1442
1443 /*
1444 * If this is the first non-null manager, start
1445 * up the internal pager support.
1446 */
1447 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1448 result = vm_pageout_internal_start();
1449 if (result != KERN_SUCCESS) {
1450 goto out;
1451 }
1452 }
1453
1454 /*
1455 * Retrieve the current value,
1456 * and replace it with the supplied value.
1457 * We return the old reference to the caller
1458 * but we have to take a reference on the new
1459 * one.
1460 */
1461 returned_manager = current_manager;
1462 memory_manager_default = ipc_port_make_send_mqueue(port: new_manager);
1463
1464 /*
1465 * In case anyone's been waiting for a memory
1466 * manager to be established, wake them up.
1467 */
1468
1469 thread_wakeup((event_t) &memory_manager_default);
1470
1471 /*
1472 * Now that we have a default pager for anonymous memory,
1473 * reactivate all the throttled pages (i.e. dirty pages with
1474 * no pager).
1475 */
1476 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1477 vm_page_reactivate_all_throttled();
1478 }
1479 }
1480out:
1481 lck_mtx_unlock(lck: &memory_manager_default_lock);
1482
1483 *default_manager = returned_manager;
1484 return result;
1485}
1486
1487/*
1488 * Routine: memory_manager_default_reference
1489 * Purpose:
1490 * Returns a naked send right for the default
1491 * memory manager. The returned right is always
1492 * valid (not IP_NULL or IP_DEAD).
1493 */
1494
1495__private_extern__ memory_object_default_t
1496memory_manager_default_reference(void)
1497{
1498 memory_object_default_t current_manager;
1499
1500 lck_mtx_lock(lck: &memory_manager_default_lock);
1501 current_manager = memory_manager_default;
1502 while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1503 wait_result_t res;
1504
1505 res = lck_mtx_sleep(lck: &memory_manager_default_lock,
1506 lck_sleep_action: LCK_SLEEP_DEFAULT,
1507 event: (event_t) &memory_manager_default,
1508 THREAD_UNINT);
1509 assert(res == THREAD_AWAKENED);
1510 current_manager = memory_manager_default;
1511 }
1512 current_manager = ipc_port_make_send_mqueue(port: current_manager);
1513 lck_mtx_unlock(lck: &memory_manager_default_lock);
1514
1515 return current_manager;
1516}
1517
1518/*
1519 * Routine: memory_manager_default_check
1520 *
1521 * Purpose:
1522 * Check whether a default memory manager has been set
1523 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1524 * and KERN_FAILURE if dmm does not exist.
1525 *
1526 * If there is no default memory manager, log an error,
1527 * but only the first time.
1528 *
1529 */
1530__private_extern__ kern_return_t
1531memory_manager_default_check(void)
1532{
1533 memory_object_default_t current;
1534
1535 lck_mtx_lock(lck: &memory_manager_default_lock);
1536 current = memory_manager_default;
1537 if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1538 static boolean_t logged; /* initialized to 0 */
1539 boolean_t complain = !logged;
1540 logged = TRUE;
1541 lck_mtx_unlock(lck: &memory_manager_default_lock);
1542 if (complain) {
1543 printf(format: "Warning: No default memory manager\n");
1544 }
1545 return KERN_FAILURE;
1546 } else {
1547 lck_mtx_unlock(lck: &memory_manager_default_lock);
1548 return KERN_SUCCESS;
1549 }
1550}
1551
1552/* Allow manipulation of individual page state. This is actually part of */
1553/* the UPL regimen but takes place on the object rather than on a UPL */
1554
1555kern_return_t
1556memory_object_page_op(
1557 memory_object_control_t control,
1558 memory_object_offset_t offset,
1559 int ops,
1560 ppnum_t *phys_entry,
1561 int *flags)
1562{
1563 vm_object_t object;
1564
1565 object = memory_object_control_to_vm_object(control);
1566 if (object == VM_OBJECT_NULL) {
1567 return KERN_INVALID_ARGUMENT;
1568 }
1569
1570 return vm_object_page_op(object, offset, ops, phys_entry, flags);
1571}
1572
1573/*
1574 * memory_object_range_op offers performance enhancement over
1575 * memory_object_page_op for page_op functions which do not require page
1576 * level state to be returned from the call. Page_op was created to provide
1577 * a low-cost alternative to page manipulation via UPLs when only a single
1578 * page was involved. The range_op call establishes the ability in the _op
1579 * family of functions to work on multiple pages where the lack of page level
1580 * state handling allows the caller to avoid the overhead of the upl structures.
1581 */
1582
1583kern_return_t
1584memory_object_range_op(
1585 memory_object_control_t control,
1586 memory_object_offset_t offset_beg,
1587 memory_object_offset_t offset_end,
1588 int ops,
1589 int *range)
1590{
1591 vm_object_t object;
1592
1593 object = memory_object_control_to_vm_object(control);
1594 if (object == VM_OBJECT_NULL) {
1595 return KERN_INVALID_ARGUMENT;
1596 }
1597
1598 return vm_object_range_op(object,
1599 offset_beg,
1600 offset_end,
1601 ops,
1602 range: (uint32_t *) range);
1603}
1604
1605
1606void
1607memory_object_mark_used(
1608 memory_object_control_t control)
1609{
1610 vm_object_t object;
1611
1612 if (control == NULL) {
1613 return;
1614 }
1615
1616 object = memory_object_control_to_vm_object(control);
1617
1618 if (object != VM_OBJECT_NULL) {
1619 vm_object_cache_remove(object);
1620 }
1621}
1622
1623
1624void
1625memory_object_mark_unused(
1626 memory_object_control_t control,
1627 __unused boolean_t rage)
1628{
1629 vm_object_t object;
1630
1631 if (control == NULL) {
1632 return;
1633 }
1634
1635 object = memory_object_control_to_vm_object(control);
1636
1637 if (object != VM_OBJECT_NULL) {
1638 vm_object_cache_add(object);
1639 }
1640}
1641
1642void
1643memory_object_mark_io_tracking(
1644 memory_object_control_t control)
1645{
1646 vm_object_t object;
1647
1648 if (control == NULL) {
1649 return;
1650 }
1651 object = memory_object_control_to_vm_object(control);
1652
1653 if (object != VM_OBJECT_NULL) {
1654 vm_object_lock(object);
1655 object->io_tracking = TRUE;
1656 vm_object_unlock(object);
1657 }
1658}
1659
1660void
1661memory_object_mark_trusted(
1662 memory_object_control_t control)
1663{
1664 vm_object_t object;
1665
1666 if (control == NULL) {
1667 return;
1668 }
1669 object = memory_object_control_to_vm_object(control);
1670
1671 if (object != VM_OBJECT_NULL) {
1672 vm_object_lock(object);
1673 VM_OBJECT_SET_PAGER_TRUSTED(object, TRUE);
1674 vm_object_unlock(object);
1675 }
1676}
1677
1678#if FBDP_DEBUG_OBJECT_NO_PAGER
1679kern_return_t
1680memory_object_mark_as_tracked(
1681 memory_object_control_t control,
1682 bool new_value,
1683 bool *old_value)
1684{
1685 vm_object_t object;
1686
1687 if (control == NULL) {
1688 return KERN_INVALID_ARGUMENT;
1689 }
1690 object = memory_object_control_to_vm_object(control);
1691
1692 if (object == VM_OBJECT_NULL) {
1693 return KERN_FAILURE;
1694 }
1695
1696 vm_object_lock(object);
1697 *old_value = object->fbdp_tracked;
1698 VM_OBJECT_SET_FBDP_TRACKED(object, new_value);
1699 vm_object_unlock(object);
1700
1701 return KERN_SUCCESS;
1702}
1703#endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
1704
1705#if CONFIG_SECLUDED_MEMORY
1706void
1707memory_object_mark_eligible_for_secluded(
1708 memory_object_control_t control,
1709 boolean_t eligible_for_secluded)
1710{
1711 vm_object_t object;
1712
1713 if (control == NULL) {
1714 return;
1715 }
1716 object = memory_object_control_to_vm_object(control);
1717
1718 if (object == VM_OBJECT_NULL) {
1719 return;
1720 }
1721
1722 vm_object_lock(object);
1723 if (eligible_for_secluded &&
1724 secluded_for_filecache && /* global boot-arg */
1725 !object->eligible_for_secluded) {
1726 object->eligible_for_secluded = TRUE;
1727 vm_page_secluded.eligible_for_secluded += object->resident_page_count;
1728 } else if (!eligible_for_secluded &&
1729 object->eligible_for_secluded) {
1730 object->eligible_for_secluded = FALSE;
1731 vm_page_secluded.eligible_for_secluded -= object->resident_page_count;
1732 if (object->resident_page_count) {
1733 /* XXX FBDP TODO: flush pages from secluded queue? */
1734 // printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
1735 }
1736 }
1737 vm_object_unlock(object);
1738}
1739#endif /* CONFIG_SECLUDED_MEMORY */
1740
1741void
1742memory_object_mark_for_realtime(
1743 memory_object_control_t control,
1744 bool for_realtime)
1745{
1746 vm_object_t object;
1747
1748 if (control == NULL) {
1749 return;
1750 }
1751 object = memory_object_control_to_vm_object(control);
1752
1753 if (object == VM_OBJECT_NULL) {
1754 return;
1755 }
1756
1757 vm_object_lock(object);
1758 VM_OBJECT_SET_FOR_REALTIME(object, value: for_realtime);
1759 vm_object_unlock(object);
1760}
1761
1762kern_return_t
1763memory_object_pages_resident(
1764 memory_object_control_t control,
1765 boolean_t * has_pages_resident)
1766{
1767 vm_object_t object;
1768
1769 *has_pages_resident = FALSE;
1770
1771 object = memory_object_control_to_vm_object(control);
1772 if (object == VM_OBJECT_NULL) {
1773 return KERN_INVALID_ARGUMENT;
1774 }
1775
1776 if (object->resident_page_count) {
1777 *has_pages_resident = TRUE;
1778 }
1779
1780 return KERN_SUCCESS;
1781}
1782
1783kern_return_t
1784memory_object_signed(
1785 memory_object_control_t control,
1786 boolean_t is_signed)
1787{
1788 vm_object_t object;
1789
1790 object = memory_object_control_to_vm_object(control);
1791 if (object == VM_OBJECT_NULL) {
1792 return KERN_INVALID_ARGUMENT;
1793 }
1794
1795 vm_object_lock(object);
1796 object->code_signed = is_signed;
1797 vm_object_unlock(object);
1798
1799 return KERN_SUCCESS;
1800}
1801
1802boolean_t
1803memory_object_is_signed(
1804 memory_object_control_t control)
1805{
1806 boolean_t is_signed;
1807 vm_object_t object;
1808
1809 object = memory_object_control_to_vm_object(control);
1810 if (object == VM_OBJECT_NULL) {
1811 return FALSE;
1812 }
1813
1814 vm_object_lock_shared(object);
1815 is_signed = object->code_signed;
1816 vm_object_unlock(object);
1817
1818 return is_signed;
1819}
1820
1821boolean_t
1822memory_object_is_shared_cache(
1823 memory_object_control_t control)
1824{
1825 vm_object_t object = VM_OBJECT_NULL;
1826
1827 object = memory_object_control_to_vm_object(control);
1828 if (object == VM_OBJECT_NULL) {
1829 return FALSE;
1830 }
1831
1832 return object->object_is_shared_cache;
1833}
1834
1835__private_extern__ memory_object_control_t
1836memory_object_control_allocate(
1837 vm_object_t object)
1838{
1839 return object;
1840}
1841
1842__private_extern__ void
1843memory_object_control_collapse(
1844 memory_object_control_t *control,
1845 vm_object_t object)
1846{
1847 *control = object;
1848}
1849
1850__private_extern__ vm_object_t
1851memory_object_control_to_vm_object(
1852 memory_object_control_t control)
1853{
1854 return control;
1855}
1856
1857__private_extern__ vm_object_t
1858memory_object_to_vm_object(
1859 memory_object_t mem_obj)
1860{
1861 memory_object_control_t mo_control;
1862
1863 if (mem_obj == MEMORY_OBJECT_NULL) {
1864 return VM_OBJECT_NULL;
1865 }
1866 mo_control = mem_obj->mo_control;
1867 if (mo_control == NULL) {
1868 return VM_OBJECT_NULL;
1869 }
1870 return memory_object_control_to_vm_object(control: mo_control);
1871}
1872
1873void
1874memory_object_control_reference(
1875 __unused memory_object_control_t control)
1876{
1877 return;
1878}
1879
1880/*
1881 * We only every issue one of these references, so kill it
1882 * when that gets released (should switch the real reference
1883 * counting in true port-less EMMI).
1884 */
1885void
1886memory_object_control_deallocate(
1887 __unused memory_object_control_t control)
1888{
1889}
1890
1891void
1892memory_object_control_disable(
1893 memory_object_control_t *control)
1894{
1895 assert(*control != VM_OBJECT_NULL);
1896 *control = VM_OBJECT_NULL;
1897}
1898
1899memory_object_t
1900convert_port_to_memory_object(
1901 __unused mach_port_t port)
1902{
1903 return MEMORY_OBJECT_NULL;
1904}
1905
1906
1907mach_port_t
1908convert_memory_object_to_port(
1909 __unused memory_object_t object)
1910{
1911 return MACH_PORT_NULL;
1912}
1913
1914
1915/* Routine memory_object_reference */
1916void
1917memory_object_reference(
1918 memory_object_t memory_object)
1919{
1920 (memory_object->mo_pager_ops->memory_object_reference)(
1921 memory_object);
1922}
1923
1924/* Routine memory_object_deallocate */
1925void
1926memory_object_deallocate(
1927 memory_object_t memory_object)
1928{
1929 (memory_object->mo_pager_ops->memory_object_deallocate)(
1930 memory_object);
1931}
1932
1933
1934/* Routine memory_object_init */
1935kern_return_t
1936memory_object_init
1937(
1938 memory_object_t memory_object,
1939 memory_object_control_t memory_control,
1940 memory_object_cluster_size_t memory_object_page_size
1941)
1942{
1943 return (memory_object->mo_pager_ops->memory_object_init)(
1944 memory_object,
1945 memory_control,
1946 memory_object_page_size);
1947}
1948
1949/* Routine memory_object_terminate */
1950kern_return_t
1951memory_object_terminate
1952(
1953 memory_object_t memory_object
1954)
1955{
1956 return (memory_object->mo_pager_ops->memory_object_terminate)(
1957 memory_object);
1958}
1959
1960/* Routine memory_object_data_request */
1961kern_return_t
1962memory_object_data_request
1963(
1964 memory_object_t memory_object,
1965 memory_object_offset_t offset,
1966 memory_object_cluster_size_t length,
1967 vm_prot_t desired_access,
1968 memory_object_fault_info_t fault_info
1969)
1970{
1971 return (memory_object->mo_pager_ops->memory_object_data_request)(
1972 memory_object,
1973 offset,
1974 length,
1975 desired_access,
1976 fault_info);
1977}
1978
1979/* Routine memory_object_data_return */
1980kern_return_t
1981memory_object_data_return
1982(
1983 memory_object_t memory_object,
1984 memory_object_offset_t offset,
1985 memory_object_cluster_size_t size,
1986 memory_object_offset_t *resid_offset,
1987 int *io_error,
1988 boolean_t dirty,
1989 boolean_t kernel_copy,
1990 int upl_flags
1991)
1992{
1993 return (memory_object->mo_pager_ops->memory_object_data_return)(
1994 memory_object,
1995 offset,
1996 size,
1997 resid_offset,
1998 io_error,
1999 dirty,
2000 kernel_copy,
2001 upl_flags);
2002}
2003
2004/* Routine memory_object_data_initialize */
2005kern_return_t
2006memory_object_data_initialize
2007(
2008 memory_object_t memory_object,
2009 memory_object_offset_t offset,
2010 memory_object_cluster_size_t size
2011)
2012{
2013 return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2014 memory_object,
2015 offset,
2016 size);
2017}
2018
2019/*
2020 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2021 * each time a "named" VM object gets mapped directly or indirectly
2022 * (copy-on-write mapping). A "named" VM object has an extra reference held
2023 * by the pager to keep it alive until the pager decides that the
2024 * memory object (and its VM object) can be reclaimed.
2025 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2026 * the mappings of that memory object have been removed.
2027 *
2028 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2029 * are serialized (through object->mapping_in_progress), to ensure that the
2030 * pager gets a consistent view of the mapping status of the memory object.
2031 *
2032 * This allows the pager to keep track of how many times a memory object
2033 * has been mapped and with which protections, to decide when it can be
2034 * reclaimed.
2035 */
2036
2037/* Routine memory_object_map */
2038kern_return_t
2039memory_object_map
2040(
2041 memory_object_t memory_object,
2042 vm_prot_t prot
2043)
2044{
2045 return (memory_object->mo_pager_ops->memory_object_map)(
2046 memory_object,
2047 prot);
2048}
2049
2050/* Routine memory_object_last_unmap */
2051kern_return_t
2052memory_object_last_unmap
2053(
2054 memory_object_t memory_object
2055)
2056{
2057 return (memory_object->mo_pager_ops->memory_object_last_unmap)(
2058 memory_object);
2059}
2060
2061boolean_t
2062memory_object_backing_object
2063(
2064 memory_object_t memory_object,
2065 memory_object_offset_t offset,
2066 vm_object_t *backing_object,
2067 vm_object_offset_t *backing_offset)
2068{
2069 if (memory_object->mo_pager_ops->memory_object_backing_object == NULL) {
2070 return FALSE;
2071 }
2072 return (memory_object->mo_pager_ops->memory_object_backing_object)(
2073 memory_object,
2074 offset,
2075 backing_object,
2076 backing_offset);
2077}
2078
2079upl_t
2080convert_port_to_upl(
2081 __unused ipc_port_t port)
2082{
2083 return NULL;
2084}
2085
2086mach_port_t
2087convert_upl_to_port(
2088 __unused upl_t upl)
2089{
2090 return MACH_PORT_NULL;
2091}
2092