memory_object.c source code [xnu/osfmk/vm/memory_object.c]

1	/*
2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56	/*
57	*/
58	/*
59	* File: vm/memory_object.c
60	* Author: Michael Wayne Young
61	*
62	* External memory management interface control functions.
63	*/
64
65	/*
66	* Interface dependencies:
67	*/
68
69	#include <mach/std_types.h> /* For pointer_t */
70	#include <mach/mach_types.h>
71
72	#include <mach/mig.h>
73	#include <mach/kern_return.h>
74	#include <mach/memory_object.h>
75	#include <mach/memory_object_control.h>
76	#include <mach/host_priv_server.h>
77	#include <mach/boolean.h>
78	#include <mach/vm_prot.h>
79	#include <mach/message.h>
80
81	/*
82	* Implementation dependencies:
83	*/
84	#include <string.h> /* For memcpy() */
85
86	#include <kern/host.h>
87	#include <kern/thread.h> /* For current_thread() */
88	#include <kern/ipc_mig.h>
89	#include <kern/misc_protos.h>
90
91	#include <vm/vm_object.h>
92	#include <vm/vm_fault.h>
93	#include <vm/memory_object.h>
94	#include <vm/vm_page.h>
95	#include <vm/vm_pageout.h>
96	#include <vm/pmap.h> /* For pmap_clear_modify */
97	#include <vm/vm_kern.h> /* For kernel_map, vm_move */
98	#include <vm/vm_map.h> /* For vm_map_pageable */
99	#include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
100	#include <vm/vm_shared_region.h>
101
102	#include <vm/vm_external.h>
103
104	#include <vm/vm_protos.h>
105
106	memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
107	LCK_MTX_DECLARE(memory_manager_default_lock, &vm_object_lck_grp);
108
109
110	/*
111	* Routine: memory_object_should_return_page
112	*
113	* Description:
114	* Determine whether the given page should be returned,
115	* based on the page's state and on the given return policy.
116	*
117	* We should return the page if one of the following is true:
118	*
119	* 1. Page is dirty and should_return is not RETURN_NONE.
120	* 2. Page is precious and should_return is RETURN_ALL.
121	* 3. Should_return is RETURN_ANYTHING.
122	*
123	* As a side effect, m->vmp_dirty will be made consistent
124	* with pmap_is_modified(m), if should_return is not
125	* MEMORY_OBJECT_RETURN_NONE.
126	*/
127
128	#define memory_object_should_return_page(m, should_return) \
129	(should_return != MEMORY_OBJECT_RETURN_NONE && \
130	(((m)->vmp_dirty \|\| ((m)->vmp_dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) \|\| \
131	((m)->vmp_precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) \|\| \
132	(should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
133
134	typedef int memory_object_lock_result_t;
135
136	#define MEMORY_OBJECT_LOCK_RESULT_DONE 0
137	#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
138	#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
139	#define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
140
141	memory_object_lock_result_t memory_object_lock_page(
142	vm_page_t m,
143	memory_object_return_t should_return,
144	boolean_t should_flush,
145	vm_prot_t prot);
146
147	/*
148	* Routine: memory_object_lock_page
149	*
150	* Description:
151	* Perform the appropriate lock operations on the
152	* given page. See the description of
153	* "memory_object_lock_request" for the meanings
154	* of the arguments.
155	*
156	* Returns an indication that the operation
157	* completed, blocked, or that the page must
158	* be cleaned.
159	*/
160	memory_object_lock_result_t
161	memory_object_lock_page(
162	vm_page_t m,
163	memory_object_return_t should_return,
164	boolean_t should_flush,
165	vm_prot_t prot)
166	{
167	if (prot == VM_PROT_NO_CHANGE_LEGACY) {
168	prot = VM_PROT_NO_CHANGE;
169	}
170
171	if (m->vmp_busy \|\| m->vmp_cleaning) {
172	return MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK;
173	}
174
175	if (m->vmp_laundry) {
176	vm_pageout_steal_laundry(page: m, FALSE);
177	}
178
179	/*
180	* Don't worry about pages for which the kernel
181	* does not have any data.
182	*/
183	if (m->vmp_absent \|\| VMP_ERROR_GET(m) \|\| m->vmp_restart) {
184	if (VMP_ERROR_GET(m) && should_flush && !VM_PAGE_WIRED(m)) {
185	/*
186	* dump the page, pager wants us to
187	* clean it up and there is no
188	* relevant data to return
189	*/
190	return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
191	}
192	return MEMORY_OBJECT_LOCK_RESULT_DONE;
193	}
194	assert(!m->vmp_fictitious);
195
196	if (VM_PAGE_WIRED(m)) {
197	/*
198	* The page is wired... just clean or return the page if needed.
199	* Wired pages don't get flushed or disconnected from the pmap.
200	*/
201	if (memory_object_should_return_page(m, should_return)) {
202	return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
203	}
204
205	return MEMORY_OBJECT_LOCK_RESULT_DONE;
206	}
207
208	if (should_flush) {
209	/*
210	* must do the pmap_disconnect before determining the
211	* need to return the page... otherwise it's possible
212	* for the page to go from the clean to the dirty state
213	* after we've made our decision
214	*/
215	if (pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
216	SET_PAGE_DIRTY(m, FALSE);
217	}
218	} else {
219	/*
220	* If we are decreasing permission, do it now;
221	* let the fault handler take care of increases
222	* (pmap_page_protect may not increase protection).
223	*/
224	if (prot != VM_PROT_NO_CHANGE) {
225	pmap_page_protect(phys: VM_PAGE_GET_PHYS_PAGE(m), VM_PROT_ALL & ~prot);
226	}
227	}
228	/*
229	* Handle returning dirty or precious pages
230	*/
231	if (memory_object_should_return_page(m, should_return)) {
232	/*
233	* we use to do a pmap_disconnect here in support
234	* of memory_object_lock_request, but that routine
235	* no longer requires this... in any event, in
236	* our world, it would turn into a big noop since
237	* we don't lock the page in any way and as soon
238	* as we drop the object lock, the page can be
239	* faulted back into an address space
240	*
241	* if (!should_flush)
242	* pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
243	*/
244	return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
245	}
246
247	/*
248	* Handle flushing clean pages
249	*/
250	if (should_flush) {
251	return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
252	}
253
254	/*
255	* we use to deactivate clean pages at this point,
256	* but we do not believe that an msync should change
257	* the 'age' of a page in the cache... here is the
258	* original comment and code concerning this...
259	*
260	* XXX Make clean but not flush a paging hint,
261	* and deactivate the pages. This is a hack
262	* because it overloads flush/clean with
263	* implementation-dependent meaning. This only
264	* happens to pages that are already clean.
265	*
266	* if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
267	* return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
268	*/
269
270	return MEMORY_OBJECT_LOCK_RESULT_DONE;
271	}
272
273
274
275	/*
276	* Routine: memory_object_lock_request [user interface]
277	*
278	* Description:
279	* Control use of the data associated with the given
280	* memory object. For each page in the given range,
281	* perform the following operations, in order:
282	* 1) restrict access to the page (disallow
283	* forms specified by "prot");
284	* 2) return data to the manager (if "should_return"
285	* is RETURN_DIRTY and the page is dirty, or
286	* "should_return" is RETURN_ALL and the page
287	* is either dirty or precious); and,
288	* 3) flush the cached copy (if "should_flush"
289	* is asserted).
290	* The set of pages is defined by a starting offset
291	* ("offset") and size ("size"). Only pages with the
292	* same page alignment as the starting offset are
293	* considered.
294	*
295	* A single acknowledgement is sent (to the "reply_to"
296	* port) when these actions are complete. If successful,
297	* the naked send right for reply_to is consumed.
298	*/
299
300	kern_return_t
301	memory_object_lock_request(
302	memory_object_control_t control,
303	memory_object_offset_t offset,
304	memory_object_size_t size,
305	memory_object_offset_t * resid_offset,
306	int * io_errno,
307	memory_object_return_t should_return,
308	int flags,
309	vm_prot_t prot)
310	{
311	vm_object_t object;
312
313	if (prot == VM_PROT_NO_CHANGE_LEGACY) {
314	prot = VM_PROT_NO_CHANGE;
315	}
316
317	/*
318	* Check for bogus arguments.
319	*/
320	object = memory_object_control_to_vm_object(control);
321	if (object == VM_OBJECT_NULL) {
322	return KERN_INVALID_ARGUMENT;
323	}
324
325	if ((prot & ~(VM_PROT_ALL \| VM_PROT_ALLEXEC)) != `0` && prot != VM_PROT_NO_CHANGE) {
326	return KERN_INVALID_ARGUMENT;
327	}
328
329	size = round_page_64(x: size);
330
331	/*
332	* Lock the object, and acquire a paging reference to
333	* prevent the memory_object reference from being released.
334	*/
335	vm_object_lock(object);
336	vm_object_paging_begin(object);
337
338	if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
339	if ((should_return != MEMORY_OBJECT_RETURN_NONE) \|\| offset \|\| object->vo_copy) {
340	flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL;
341	flags \|= MEMORY_OBJECT_DATA_FLUSH;
342	}
343	}
344	offset -= object->paging_offset;
345
346	if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
347	vm_object_reap_pages(object, REAP_DATA_FLUSH);
348	} else {
349	(void)vm_object_update(object, offset, size, error_offset: resid_offset,
350	io_errno, should_return, flags, prot);
351	}
352
353	vm_object_paging_end(object);
354	vm_object_unlock(object);
355
356	return KERN_SUCCESS;
357	}
358
359	/*
360	* Routine: memory_object_destroy [user interface]
361	* Purpose:
362	* Shut down a memory object, despite the
363	* presence of address map (or other) references
364	* to the vm_object.
365	*/
366	kern_return_t
367	memory_object_destroy(
368	memory_object_control_t control,
369	vm_object_destroy_reason_t reason)
370	{
371	vm_object_t object;
372
373	object = memory_object_control_to_vm_object(control);
374	if (object == VM_OBJECT_NULL) {
375	return KERN_INVALID_ARGUMENT;
376	}
377
378	return vm_object_destroy(object, reason);
379	}
380
381	/*
382	* Routine: vm_object_sync
383	*
384	* Kernel internal function to synch out pages in a given
385	* range within an object to its memory manager. Much the
386	* same as memory_object_lock_request but page protection
387	* is not changed.
388	*
389	* If the should_flush and should_return flags are true pages
390	* are flushed, that is dirty & precious pages are written to
391	* the memory manager and then discarded. If should_return
392	* is false, only precious pages are returned to the memory
393	* manager.
394	*
395	* If should flush is false and should_return true, the memory
396	* manager's copy of the pages is updated. If should_return
397	* is also false, only the precious pages are updated. This
398	* last option is of limited utility.
399	*
400	* Returns:
401	* FALSE if no pages were returned to the pager
402	* TRUE otherwise.
403	*/
404
405	boolean_t
406	vm_object_sync(
407	vm_object_t object,
408	vm_object_offset_t offset,
409	vm_object_size_t size,
410	boolean_t should_flush,
411	boolean_t should_return,
412	boolean_t should_iosync)
413	{
414	boolean_t rv;
415	int flags;
416
417	/*
418	* Lock the object, and acquire a paging reference to
419	* prevent the memory_object and control ports from
420	* being destroyed.
421	*/
422	vm_object_lock(object);
423	vm_object_paging_begin(object);
424
425	if (should_flush) {
426	flags = MEMORY_OBJECT_DATA_FLUSH;
427	/*
428	* This flush is from an msync(), not a truncate(), so the
429	* contents of the file are not affected.
430	* MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
431	* that the data is not changed and that there's no need to
432	* push the old contents to a copy object.
433	*/
434	flags \|= MEMORY_OBJECT_DATA_NO_CHANGE;
435	} else {
436	flags = `0`;
437	}
438
439	if (should_iosync) {
440	flags \|= MEMORY_OBJECT_IO_SYNC;
441	}
442
443	rv = vm_object_update(object, offset, size: (vm_object_size_t)size, NULL, NULL,
444	should_return: (should_return) ?
445	MEMORY_OBJECT_RETURN_ALL :
446	MEMORY_OBJECT_RETURN_NONE,
447	flags,
448	VM_PROT_NO_CHANGE);
449
450
451	vm_object_paging_end(object);
452	vm_object_unlock(object);
453	return rv;
454	}
455
456
457
458	#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
459	MACRO_BEGIN \
460	\
461	int upl_flags; \
462	memory_object_t pager; \
463	\
464	if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
465	vm_object_paging_begin(object); \
466	vm_object_unlock(object); \
467	\
468	if (iosync) \
469	upl_flags = UPL_MSYNC \| UPL_IOSYNC; \
470	else \
471	upl_flags = UPL_MSYNC; \
472	\
473	(void) memory_object_data_return(pager, \
474	po, \
475	(memory_object_cluster_size_t)data_cnt, \
476	ro, \
477	ioerr, \
478	FALSE, \
479	FALSE, \
480	upl_flags); \
481	\
482	vm_object_lock(object); \
483	vm_object_paging_end(object); \
484	} \
485	MACRO_END
486
487	extern struct vnode *
488	vnode_pager_lookup_vnode(memory_object_t);
489
490	static int
491	vm_object_update_extent(
492	vm_object_t object,
493	vm_object_offset_t offset,
494	vm_object_offset_t offset_end,
495	vm_object_offset_t *offset_resid,
496	int *io_errno,
497	boolean_t should_flush,
498	memory_object_return_t should_return,
499	boolean_t should_iosync,
500	vm_prot_t prot)
501	{
502	vm_page_t m;
503	int retval = `0`;
504	vm_object_offset_t paging_offset = `0`;
505	vm_object_offset_t next_offset = offset;
506	memory_object_lock_result_t page_lock_result;
507	memory_object_cluster_size_t data_cnt = `0`;
508	struct vm_page_delayed_work dw_array;
509	struct vm_page_delayed_work dwp, dwp_start;
510	bool dwp_finish_ctx = TRUE;
511	int dw_count;
512	int dw_limit;
513	int dirty_count;
514
515	dwp_start = dwp = NULL;
516	dw_count = `0`;
517	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
518	dwp_start = vm_page_delayed_work_get_ctx();
519	if (dwp_start == NULL) {
520	dwp_start = &dw_array;
521	dw_limit = `1`;
522	dwp_finish_ctx = FALSE;
523	}
524	dwp = dwp_start;
525
526	dirty_count = `0`;
527
528	for (;
529	offset < offset_end && object->resident_page_count;
530	offset += PAGE_SIZE_64) {
531	/*
532	* Limit the number of pages to be cleaned at once to a contiguous
533	* run, or at most MAX_UPL_TRANSFER_BYTES
534	*/
535	if (data_cnt) {
536	if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) \|\| (next_offset != offset)) {
537	if (dw_count) {
538	vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
539	dwp = dwp_start;
540	dw_count = `0`;
541	}
542	LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
543	paging_offset, offset_resid, io_errno, should_iosync);
544	data_cnt = `0`;
545	}
546	}
547	while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
548	dwp->dw_mask = `0`;
549
550	page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
551
552	if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
553	/*
554	* End of a run of dirty/precious pages.
555	*/
556	if (dw_count) {
557	vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
558	dwp = dwp_start;
559	dw_count = `0`;
560	}
561	LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
562	paging_offset, offset_resid, io_errno, should_iosync);
563	/*
564	* LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
565	* allow the state of page 'm' to change... we need to re-lookup
566	* the current offset
567	*/
568	data_cnt = `0`;
569	continue;
570	}
571
572	switch (page_lock_result) {
573	case MEMORY_OBJECT_LOCK_RESULT_DONE:
574	break;
575
576	case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
577	if (m->vmp_dirty == TRUE) {
578	dirty_count++;
579	}
580	dwp->dw_mask \|= DW_vm_page_free;
581	break;
582
583	case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
584	PAGE_SLEEP(object, m, THREAD_UNINT);
585	continue;
586
587	case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
588	if (data_cnt == `0`) {
589	paging_offset = offset;
590	}
591
592	data_cnt += PAGE_SIZE;
593	next_offset = offset + PAGE_SIZE_64;
594
595	/*
596	* wired pages shouldn't be flushed and
597	* since they aren't on any queue,
598	* no need to remove them
599	*/
600	if (!VM_PAGE_WIRED(m)) {
601	if (should_flush) {
602	/*
603	* add additional state for the flush
604	*/
605	m->vmp_free_when_done = TRUE;
606	}
607	/*
608	* we use to remove the page from the queues at this
609	* point, but we do not believe that an msync
610	* should cause the 'age' of a page to be changed
611	*
612	* else
613	* dwp->dw_mask \|= DW_VM_PAGE_QUEUES_REMOVE;
614	*/
615	}
616	retval = `1`;
617	break;
618	}
619	if (dwp->dw_mask) {
620	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
621
622	if (dw_count >= dw_limit) {
623	vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
624	dwp = dwp_start;
625	dw_count = `0`;
626	}
627	}
628	break;
629	}
630	}
631
632	if (object->pager) {
633	task_update_logical_writes(task: current_task(), io_size: (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED, vp: vnode_pager_lookup_vnode(object->pager));
634	}
635	/*
636	* We have completed the scan for applicable pages.
637	* Clean any pages that have been saved.
638	*/
639	if (dw_count) {
640	vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count);
641	}
642
643	if (data_cnt) {
644	LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
645	paging_offset, offset_resid, io_errno, should_iosync);
646	}
647
648	if (dwp_start && dwp_finish_ctx) {
649	vm_page_delayed_work_finish_ctx(dwp: dwp_start);
650	dwp_start = dwp = NULL;
651	}
652
653	return retval;
654	}
655
656
657
658	/*
659	* Routine: vm_object_update
660	* Description:
661	* Work function for m_o_lock_request(), vm_o_sync().
662	*
663	* Called with object locked and paging ref taken.
664	*/
665	kern_return_t
666	vm_object_update(
667	vm_object_t object,
668	vm_object_offset_t offset,
669	vm_object_size_t size,
670	vm_object_offset_t *resid_offset,
671	int *io_errno,
672	memory_object_return_t should_return,
673	int flags,
674	vm_prot_t protection)
675	{
676	vm_object_t copy_object = VM_OBJECT_NULL;
677	boolean_t data_returned = FALSE;
678	boolean_t update_cow;
679	boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
680	boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
681	vm_fault_return_t result;
682	int num_of_extents;
683	int n;
684	#define MAX_EXTENTS 8
685	#define EXTENT_SIZE (1024 * 1024 * 256)
686	#define RESIDENT_LIMIT (1024 * 32)
687	struct extent {
688	vm_object_offset_t e_base;
689	vm_object_offset_t e_min;
690	vm_object_offset_t e_max;
691	} extents[MAX_EXTENTS];
692
693	/*
694	* To avoid blocking while scanning for pages, save
695	* dirty pages to be cleaned all at once.
696	*
697	* XXXO A similar strategy could be used to limit the
698	* number of times that a scan must be restarted for
699	* other reasons. Those pages that would require blocking
700	* could be temporarily collected in another list, or
701	* their offsets could be recorded in a small array.
702	*/
703
704	/*
705	* XXX NOTE: May want to consider converting this to a page list
706	* XXX vm_map_copy interface. Need to understand object
707	* XXX coalescing implications before doing so.
708	*/
709
710	update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
711	&& (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
712	!(flags & MEMORY_OBJECT_DATA_PURGE)))
713	\|\| (flags & MEMORY_OBJECT_COPY_SYNC);
714
715	if (update_cow \|\| (flags & (MEMORY_OBJECT_DATA_PURGE \| MEMORY_OBJECT_DATA_SYNC))) {
716	int collisions = `0`;
717
718	while ((copy_object = object->vo_copy) != VM_OBJECT_NULL) {
719	/*
720	* need to do a try here since we're swimming upstream
721	* against the normal lock ordering... however, we need
722	* to hold the object stable until we gain control of the
723	* copy object so we have to be careful how we approach this
724	*/
725	if (vm_object_lock_try(copy_object)) {
726	/*
727	* we 'won' the lock on the copy object...
728	* no need to hold the object lock any longer...
729	* take a real reference on the copy object because
730	* we're going to call vm_fault_page on it which may
731	* under certain conditions drop the lock and the paging
732	* reference we're about to take... the reference
733	* will keep the copy object from going away if that happens
734	*/
735	vm_object_unlock(object);
736	vm_object_reference_locked(copy_object);
737	break;
738	}
739	vm_object_unlock(object);
740
741	collisions++;
742	mutex_pause(collisions);
743
744	vm_object_lock(object);
745	}
746	}
747	if ((copy_object != VM_OBJECT_NULL && update_cow) \|\| (flags & MEMORY_OBJECT_DATA_SYNC)) {
748	vm_object_offset_t i;
749	vm_object_size_t copy_size;
750	vm_object_offset_t copy_offset;
751	vm_prot_t prot;
752	vm_page_t page;
753	vm_page_t top_page;
754	kern_return_t error = `0`;
755	struct vm_object_fault_info fault_info = {};
756
757	if (copy_object != VM_OBJECT_NULL) {
758	/*
759	* translate offset with respect to shadow's offset
760	*/
761	copy_offset = (offset >= copy_object->vo_shadow_offset) ?
762	(offset - copy_object->vo_shadow_offset) : `0`;
763
764	if (copy_offset > copy_object->vo_size) {
765	copy_offset = copy_object->vo_size;
766	}
767
768	/*
769	* clip size with respect to shadow offset
770	*/
771	if (offset >= copy_object->vo_shadow_offset) {
772	copy_size = size;
773	} else if (size >= copy_object->vo_shadow_offset - offset) {
774	copy_size = (size - (copy_object->vo_shadow_offset - offset));
775	} else {
776	copy_size = `0`;
777	}
778
779	if (copy_offset + copy_size > copy_object->vo_size) {
780	if (copy_object->vo_size >= copy_offset) {
781	copy_size = copy_object->vo_size - copy_offset;
782	} else {
783	copy_size = `0`;
784	}
785	}
786	copy_size += copy_offset;
787	} else {
788	copy_object = object;
789
790	copy_size = offset + size;
791	copy_offset = offset;
792	}
793	fault_info.interruptible = THREAD_UNINT;
794	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
795	fault_info.lo_offset = copy_offset;
796	fault_info.hi_offset = copy_size;
797	fault_info.stealth = TRUE;
798	assert(fault_info.cs_bypass == FALSE);
799	assert(fault_info.csm_associated == FALSE);
800
801	vm_object_paging_begin(copy_object);
802
803	for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
804	RETRY_COW_OF_LOCK_REQUEST:
805	fault_info.cluster_size = (vm_size_t) (copy_size - i);
806	assert(fault_info.cluster_size == copy_size - i);
807
808	prot = VM_PROT_WRITE \| VM_PROT_READ;
809	page = VM_PAGE_NULL;
810	result = vm_fault_page(first_object: copy_object, first_offset: i,
811	VM_PROT_WRITE \| VM_PROT_READ,
812	FALSE,
813	FALSE, / page not looked up /
814	protection: &prot,
815	result_page: &page,
816	top_page: &top_page,
817	type_of_fault: (int *)`0`,
818	error_code: &error,
819	FALSE,
820	fault_info: &fault_info);
821
822	switch (result) {
823	case VM_FAULT_SUCCESS:
824	if (top_page) {
825	vm_fault_cleanup(
826	VM_PAGE_OBJECT(page), top_page);
827	vm_object_lock(copy_object);
828	vm_object_paging_begin(copy_object);
829	}
830	if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
831	vm_page_lockspin_queues();
832
833	if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
834	vm_page_deactivate(page);
835	}
836	vm_page_unlock_queues();
837	}
838	PAGE_WAKEUP_DONE(page);
839	break;
840	case VM_FAULT_RETRY:
841	prot = VM_PROT_WRITE \| VM_PROT_READ;
842	vm_object_lock(copy_object);
843	vm_object_paging_begin(copy_object);
844	goto RETRY_COW_OF_LOCK_REQUEST;
845	case VM_FAULT_INTERRUPTED:
846	prot = VM_PROT_WRITE \| VM_PROT_READ;
847	vm_object_lock(copy_object);
848	vm_object_paging_begin(copy_object);
849	goto RETRY_COW_OF_LOCK_REQUEST;
850	case VM_FAULT_MEMORY_SHORTAGE:
851	VM_PAGE_WAIT();
852	prot = VM_PROT_WRITE \| VM_PROT_READ;
853	vm_object_lock(copy_object);
854	vm_object_paging_begin(copy_object);
855	goto RETRY_COW_OF_LOCK_REQUEST;
856	case VM_FAULT_SUCCESS_NO_VM_PAGE:
857	/ success but no VM page: fail /
858	vm_object_paging_end(copy_object);
859	vm_object_unlock(copy_object);
860	OS_FALLTHROUGH;
861	case VM_FAULT_MEMORY_ERROR:
862	if (object != copy_object) {
863	vm_object_deallocate(object: copy_object);
864	}
865	vm_object_lock(object);
866	goto BYPASS_COW_COPYIN;
867	default:
868	panic("vm_object_update: unexpected error 0x%x"
869	" from vm_fault_page()\n", result);
870	}
871	}
872	vm_object_paging_end(copy_object);
873	}
874	if ((flags & (MEMORY_OBJECT_DATA_SYNC \| MEMORY_OBJECT_COPY_SYNC))) {
875	if (copy_object != VM_OBJECT_NULL && copy_object != object) {
876	vm_object_unlock(copy_object);
877	vm_object_deallocate(object: copy_object);
878	vm_object_lock(object);
879	}
880	return KERN_SUCCESS;
881	}
882	if (copy_object != VM_OBJECT_NULL && copy_object != object) {
883	if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
884	vm_object_lock_assert_exclusive(copy_object);
885	VM_OBJECT_SET_SHADOW_SEVERED(object: copy_object, TRUE);
886	VM_OBJECT_SET_SHADOWED(object: copy_object, FALSE);
887	copy_object->shadow = NULL;
888	/*
889	* delete the ref the COW was holding on the target object
890	*/
891	vm_object_deallocate(object);
892	}
893	vm_object_unlock(copy_object);
894	vm_object_deallocate(object: copy_object);
895	vm_object_lock(object);
896	}
897	BYPASS_COW_COPYIN:
898
899	/*
900	* when we have a really large range to check relative
901	* to the number of actual resident pages, we'd like
902	* to use the resident page list to drive our checks
903	* however, the object lock will get dropped while processing
904	* the page which means the resident queue can change which
905	* means we can't walk the queue as we process the pages
906	* we also want to do the processing in offset order to allow
907	* 'runs' of pages to be collected if we're being told to
908	* flush to disk... the resident page queue is NOT ordered.
909	*
910	* a temporary solution (until we figure out how to deal with
911	* large address spaces more generically) is to pre-flight
912	* the resident page queue (if it's small enough) and develop
913	* a collection of extents (that encompass actual resident pages)
914	* to visit. This will at least allow us to deal with some of the
915	* more pathological cases in a more efficient manner. The current
916	* worst case (a single resident page at the end of an extremely large
917	* range) can take minutes to complete for ranges in the terrabyte
918	* category... since this routine is called when truncating a file,
919	* and we currently support files up to 16 Tbytes in size, this
920	* is not a theoretical problem
921	*/
922
923	if ((object->resident_page_count < RESIDENT_LIMIT) &&
924	(atop_64(size) > (unsigned)(object->resident_page_count / (`8` * MAX_EXTENTS)))) {
925	vm_page_t next;
926	vm_object_offset_t start;
927	vm_object_offset_t end;
928	vm_object_size_t e_mask;
929	vm_page_t m;
930
931	start = offset;
932	end = offset + size;
933	num_of_extents = `0`;
934	e_mask = ~((vm_object_size_t)(EXTENT_SIZE - `1`));
935
936	m = (vm_page_t) vm_page_queue_first(&object->memq);
937
938	while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t) m)) {
939	next = (vm_page_t) vm_page_queue_next(&m->vmp_listq);
940
941	if ((m->vmp_offset >= start) && (m->vmp_offset < end)) {
942	/*
943	* this is a page we're interested in
944	* try to fit it into a current extent
945	*/
946	for (n = `0`; n < num_of_extents; n++) {
947	if ((m->vmp_offset & e_mask) == extents[n].e_base) {
948	/*
949	* use (PAGE_SIZE - 1) to determine the
950	* max offset so that we don't wrap if
951	* we're at the last page of the space
952	*/
953	if (m->vmp_offset < extents[n].e_min) {
954	extents[n].e_min = m->vmp_offset;
955	} else if ((m->vmp_offset + (PAGE_SIZE - `1`)) > extents[n].e_max) {
956	extents[n].e_max = m->vmp_offset + (PAGE_SIZE - `1`);
957	}
958	break;
959	}
960	}
961	if (n == num_of_extents) {
962	/*
963	* didn't find a current extent that can encompass
964	* this page
965	*/
966	if (n < MAX_EXTENTS) {
967	/*
968	* if we still have room,
969	* create a new extent
970	*/
971	extents[n].e_base = m->vmp_offset & e_mask;
972	extents[n].e_min = m->vmp_offset;
973	extents[n].e_max = m->vmp_offset + (PAGE_SIZE - `1`);
974
975	num_of_extents++;
976	} else {
977	/*
978	* no room to create a new extent...
979	* fall back to a single extent based
980	* on the min and max page offsets
981	* we find in the range we're interested in...
982	* first, look through the extent list and
983	* develop the overall min and max for the
984	* pages we've looked at up to this point
985	*/
986	for (n = `1`; n < num_of_extents; n++) {
987	if (extents[n].e_min < extents[`0`].e_min) {
988	extents[`0`].e_min = extents[n].e_min;
989	}
990	if (extents[n].e_max > extents[`0`].e_max) {
991	extents[`0`].e_max = extents[n].e_max;
992	}
993	}
994	/*
995	* now setup to run through the remaining pages
996	* to determine the overall min and max
997	* offset for the specified range
998	*/
999	extents[`0`].e_base = `0`;
1000	e_mask = `0`;
1001	num_of_extents = `1`;
1002
1003	/*
1004	* by continuing, we'll reprocess the
1005	* page that forced us to abandon trying
1006	* to develop multiple extents
1007	*/
1008	continue;
1009	}
1010	}
1011	}
1012	m = next;
1013	}
1014	} else {
1015	extents[`0`].e_min = offset;
1016	extents[`0`].e_max = offset + (size - `1`);
1017
1018	num_of_extents = `1`;
1019	}
1020	for (n = `0`; n < num_of_extents; n++) {
1021	if (vm_object_update_extent(object, offset: extents[n].e_min, offset_end: extents[n].e_max, offset_resid: resid_offset, io_errno,
1022	should_flush, should_return, should_iosync, prot: protection)) {
1023	data_returned = TRUE;
1024	}
1025	}
1026	return data_returned;
1027	}
1028
1029
1030	static kern_return_t
1031	vm_object_set_attributes_common(
1032	vm_object_t object,
1033	boolean_t may_cache,
1034	memory_object_copy_strategy_t copy_strategy)
1035	{
1036	boolean_t object_became_ready;
1037
1038	if (object == VM_OBJECT_NULL) {
1039	return KERN_INVALID_ARGUMENT;
1040	}
1041
1042	/*
1043	* Verify the attributes of importance
1044	*/
1045
1046	switch (copy_strategy) {
1047	case MEMORY_OBJECT_COPY_NONE:
1048	case MEMORY_OBJECT_COPY_DELAY:
1049	case MEMORY_OBJECT_COPY_DELAY_FORK:
1050	break;
1051	default:
1052	return KERN_INVALID_ARGUMENT;
1053	}
1054
1055	if (may_cache) {
1056	may_cache = TRUE;
1057	}
1058
1059	vm_object_lock(object);
1060
1061	/*
1062	* Copy the attributes
1063	*/
1064	assert(!object->internal);
1065	object_became_ready = !object->pager_ready;
1066	object->copy_strategy = copy_strategy;
1067	VM_OBJECT_SET_CAN_PERSIST(object, value: may_cache);
1068
1069	/*
1070	* Wake up anyone waiting for the ready attribute
1071	* to become asserted.
1072	*/
1073
1074	if (object_became_ready) {
1075	VM_OBJECT_SET_PAGER_READY(object, TRUE);
1076	vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1077	}
1078
1079	vm_object_unlock(object);
1080
1081	return KERN_SUCCESS;
1082	}
1083
1084
1085	/*
1086	* Set the memory object attribute as provided.
1087	*
1088	* XXX This routine cannot be completed until the vm_msync, clean
1089	* in place, and cluster work is completed. See ifdef notyet
1090	* below and note that vm_object_set_attributes_common()
1091	* may have to be expanded.
1092	*/
1093	kern_return_t
1094	memory_object_change_attributes(
1095	memory_object_control_t control,
1096	memory_object_flavor_t flavor,
1097	memory_object_info_t attributes,
1098	mach_msg_type_number_t count)
1099	{
1100	vm_object_t object;
1101	kern_return_t result = KERN_SUCCESS;
1102	boolean_t may_cache;
1103	boolean_t invalidate;
1104	memory_object_copy_strategy_t copy_strategy;
1105
1106	object = memory_object_control_to_vm_object(control);
1107	if (object == VM_OBJECT_NULL) {
1108	return KERN_INVALID_ARGUMENT;
1109	}
1110
1111	vm_object_lock(object);
1112
1113	may_cache = object->can_persist;
1114	copy_strategy = object->copy_strategy;
1115	#if notyet
1116	invalidate = object->invalidate;
1117	#endif
1118	vm_object_unlock(object);
1119
1120	switch (flavor) {
1121	case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1122	{
1123	old_memory_object_behave_info_t behave;
1124
1125	if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1126	result = KERN_INVALID_ARGUMENT;
1127	break;
1128	}
1129
1130	behave = (old_memory_object_behave_info_t) attributes;
1131
1132	invalidate = behave->invalidate;
1133	copy_strategy = behave->copy_strategy;
1134
1135	break;
1136	}
1137
1138	case MEMORY_OBJECT_BEHAVIOR_INFO:
1139	{
1140	memory_object_behave_info_t behave;
1141
1142	if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1143	result = KERN_INVALID_ARGUMENT;
1144	break;
1145	}
1146
1147	behave = (memory_object_behave_info_t) attributes;
1148
1149	invalidate = behave->invalidate;
1150	copy_strategy = behave->copy_strategy;
1151	break;
1152	}
1153
1154	case MEMORY_OBJECT_PERFORMANCE_INFO:
1155	{
1156	memory_object_perf_info_t perf;
1157
1158	if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1159	result = KERN_INVALID_ARGUMENT;
1160	break;
1161	}
1162
1163	perf = (memory_object_perf_info_t) attributes;
1164
1165	may_cache = perf->may_cache;
1166
1167	break;
1168	}
1169
1170	case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1171	{
1172	old_memory_object_attr_info_t attr;
1173
1174	if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1175	result = KERN_INVALID_ARGUMENT;
1176	break;
1177	}
1178
1179	attr = (old_memory_object_attr_info_t) attributes;
1180
1181	may_cache = attr->may_cache;
1182	copy_strategy = attr->copy_strategy;
1183
1184	break;
1185	}
1186
1187	case MEMORY_OBJECT_ATTRIBUTE_INFO:
1188	{
1189	memory_object_attr_info_t attr;
1190
1191	if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1192	result = KERN_INVALID_ARGUMENT;
1193	break;
1194	}
1195
1196	attr = (memory_object_attr_info_t) attributes;
1197
1198	copy_strategy = attr->copy_strategy;
1199	may_cache = attr->may_cache_object;
1200
1201	break;
1202	}
1203
1204	default:
1205	result = KERN_INVALID_ARGUMENT;
1206	break;
1207	}
1208
1209	if (result != KERN_SUCCESS) {
1210	return result;
1211	}
1212
1213	if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1214	copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1215	}
1216
1217	/*
1218	* XXX may_cache may become a tri-valued variable to handle
1219	* XXX uncache if not in use.
1220	*/
1221	return vm_object_set_attributes_common(object,
1222	may_cache,
1223	copy_strategy);
1224	}
1225
1226	kern_return_t
1227	memory_object_iopl_request(
1228	ipc_port_t port,
1229	memory_object_offset_t offset,
1230	upl_size_t *upl_size,
1231	upl_t *upl_ptr,
1232	upl_page_info_array_t user_page_list,
1233	unsigned int *page_list_count,
1234	upl_control_flags_t *flags,
1235	vm_tag_t tag)
1236	{
1237	vm_object_t object;
1238	kern_return_t ret;
1239	upl_control_flags_t caller_flags;
1240	vm_named_entry_t named_entry;
1241
1242	caller_flags = *flags;
1243
1244	if (caller_flags & ~UPL_VALID_FLAGS) {
1245	/*
1246	* For forward compatibility's sake,
1247	* reject any unknown flag.
1248	*/
1249	return KERN_INVALID_VALUE;
1250	}
1251
1252	named_entry = mach_memory_entry_from_port(port);
1253	if (named_entry != NULL) {
1254	/ a few checks to make sure user is obeying rules /
1255	if (*upl_size == `0`) {
1256	if (offset >= named_entry->size) {
1257	return KERN_INVALID_RIGHT;
1258	}
1259	*upl_size = (upl_size_t)(named_entry->size - offset);
1260	if (*upl_size != named_entry->size - offset) {
1261	return KERN_INVALID_ARGUMENT;
1262	}
1263	}
1264	if (caller_flags & UPL_COPYOUT_FROM) {
1265	if ((named_entry->protection & VM_PROT_READ)
1266	!= VM_PROT_READ) {
1267	return KERN_INVALID_RIGHT;
1268	}
1269	} else {
1270	if ((named_entry->protection &
1271	(VM_PROT_READ \| VM_PROT_WRITE))
1272	!= (VM_PROT_READ \| VM_PROT_WRITE)) {
1273	return KERN_INVALID_RIGHT;
1274	}
1275	}
1276	if (named_entry->size < (offset + *upl_size)) {
1277	return KERN_INVALID_ARGUMENT;
1278	}
1279
1280	/ the callers parameter offset is defined to be the /
1281	/ offset from beginning of named entry offset in object /
1282	offset = offset + named_entry->offset;
1283	offset += named_entry->data_offset;
1284
1285	if (named_entry->is_sub_map \|\|
1286	named_entry->is_copy) {
1287	return KERN_INVALID_ARGUMENT;
1288	}
1289	if (!named_entry->is_object) {
1290	return KERN_INVALID_ARGUMENT;
1291	}
1292
1293	named_entry_lock(named_entry);
1294
1295	object = vm_named_entry_to_vm_object(named_entry);
1296	assert(object != VM_OBJECT_NULL);
1297	vm_object_reference(object);
1298	named_entry_unlock(named_entry);
1299	} else {
1300	return KERN_INVALID_ARGUMENT;
1301	}
1302	if (object == VM_OBJECT_NULL) {
1303	return KERN_INVALID_ARGUMENT;
1304	}
1305
1306	if (!object->private) {
1307	if (object->phys_contiguous) {
1308	*flags = UPL_PHYS_CONTIG;
1309	} else {
1310	*flags = `0`;
1311	}
1312	} else {
1313	*flags = UPL_DEV_MEMORY \| UPL_PHYS_CONTIG;
1314	}
1315
1316	ret = vm_object_iopl_request(object,
1317	offset,
1318	size: *upl_size,
1319	upl_ptr,
1320	user_page_list,
1321	page_list_count,
1322	cntrl_flags: caller_flags,
1323	tag);
1324	vm_object_deallocate(object);
1325	return ret;
1326	}
1327
1328	/*
1329	* Routine: memory_object_upl_request [interface]
1330	* Purpose:
1331	* Cause the population of a portion of a vm_object.
1332	* Depending on the nature of the request, the pages
1333	* returned may be contain valid data or be uninitialized.
1334	*
1335	*/
1336
1337	kern_return_t
1338	memory_object_upl_request(
1339	memory_object_control_t control,
1340	memory_object_offset_t offset,
1341	upl_size_t size,
1342	upl_t *upl_ptr,
1343	upl_page_info_array_t user_page_list,
1344	unsigned int *page_list_count,
1345	int cntrl_flags,
1346	int tag)
1347	{
1348	vm_object_t object;
1349	vm_tag_t vmtag = (vm_tag_t)tag;
1350	assert(vmtag == tag);
1351
1352	object = memory_object_control_to_vm_object(control);
1353	if (object == VM_OBJECT_NULL) {
1354	return KERN_TERMINATED;
1355	}
1356
1357	return vm_object_upl_request(object,
1358	offset,
1359	size,
1360	upl: upl_ptr,
1361	page_info: user_page_list,
1362	count: page_list_count,
1363	flags: (upl_control_flags_t)(unsigned int) cntrl_flags,
1364	tag: vmtag);
1365	}
1366
1367
1368	kern_return_t
1369	memory_object_cluster_size(
1370	memory_object_control_t control,
1371	memory_object_offset_t *start,
1372	vm_size_t *length,
1373	uint32_t *io_streaming,
1374	memory_object_fault_info_t mo_fault_info)
1375	{
1376	vm_object_t object;
1377	vm_object_fault_info_t fault_info;
1378
1379	object = memory_object_control_to_vm_object(control);
1380
1381	if (object == VM_OBJECT_NULL \|\| object->paging_offset > *start) {
1382	return KERN_INVALID_ARGUMENT;
1383	}
1384
1385	*start -= object->paging_offset;
1386
1387	fault_info = (vm_object_fault_info_t)(uintptr_t) mo_fault_info;
1388	vm_object_cluster_size(object,
1389	start: (vm_object_offset_t *)start,
1390	length,
1391	fault_info,
1392	io_streaming);
1393
1394	*start += object->paging_offset;
1395
1396	return KERN_SUCCESS;
1397	}
1398
1399
1400	/*
1401	* Routine: host_default_memory_manager [interface]
1402	* Purpose:
1403	* set/get the default memory manager port and default cluster
1404	* size.
1405	*
1406	* If successful, consumes the supplied naked send right.
1407	*/
1408	kern_return_t
1409	host_default_memory_manager(
1410	host_priv_t host_priv,
1411	memory_object_default_t *default_manager,
1412	__unused memory_object_cluster_size_t cluster_size)
1413	{
1414	memory_object_default_t current_manager;
1415	memory_object_default_t new_manager;
1416	memory_object_default_t returned_manager;
1417	kern_return_t result = KERN_SUCCESS;
1418
1419	if (host_priv == HOST_PRIV_NULL) {
1420	return KERN_INVALID_HOST;
1421	}
1422
1423	new_manager = *default_manager;
1424	lck_mtx_lock(lck: &memory_manager_default_lock);
1425	current_manager = memory_manager_default;
1426	returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
1427
1428	if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1429	/*
1430	* Retrieve the current value.
1431	*/
1432	returned_manager = ipc_port_make_send_mqueue(port: current_manager);
1433	} else {
1434	/*
1435	* Only allow the kernel to change the value.
1436	*/
1437	extern task_t kernel_task;
1438	if (current_task() != kernel_task) {
1439	result = KERN_NO_ACCESS;
1440	goto out;
1441	}
1442
1443	/*
1444	* If this is the first non-null manager, start
1445	* up the internal pager support.
1446	*/
1447	if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1448	result = vm_pageout_internal_start();
1449	if (result != KERN_SUCCESS) {
1450	goto out;
1451	}
1452	}
1453
1454	/*
1455	* Retrieve the current value,
1456	* and replace it with the supplied value.
1457	* We return the old reference to the caller
1458	* but we have to take a reference on the new
1459	* one.
1460	*/
1461	returned_manager = current_manager;
1462	memory_manager_default = ipc_port_make_send_mqueue(port: new_manager);
1463
1464	/*
1465	* In case anyone's been waiting for a memory
1466	* manager to be established, wake them up.
1467	*/
1468
1469	thread_wakeup((event_t) &memory_manager_default);
1470
1471	/*
1472	* Now that we have a default pager for anonymous memory,
1473	* reactivate all the throttled pages (i.e. dirty pages with
1474	* no pager).
1475	*/
1476	if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1477	vm_page_reactivate_all_throttled();
1478	}
1479	}
1480	out:
1481	lck_mtx_unlock(lck: &memory_manager_default_lock);
1482
1483	*default_manager = returned_manager;
1484	return result;
1485	}
1486
1487	/*
1488	* Routine: memory_manager_default_reference
1489	* Purpose:
1490	* Returns a naked send right for the default
1491	* memory manager. The returned right is always
1492	* valid (not IP_NULL or IP_DEAD).
1493	*/
1494
1495	__private_extern__ memory_object_default_t
1496	memory_manager_default_reference(void)
1497	{
1498	memory_object_default_t current_manager;
1499
1500	lck_mtx_lock(lck: &memory_manager_default_lock);
1501	current_manager = memory_manager_default;
1502	while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1503	wait_result_t res;
1504
1505	res = lck_mtx_sleep(lck: &memory_manager_default_lock,
1506	lck_sleep_action: LCK_SLEEP_DEFAULT,
1507	event: (event_t) &memory_manager_default,
1508	THREAD_UNINT);
1509	assert(res == THREAD_AWAKENED);
1510	current_manager = memory_manager_default;
1511	}
1512	current_manager = ipc_port_make_send_mqueue(port: current_manager);
1513	lck_mtx_unlock(lck: &memory_manager_default_lock);
1514
1515	return current_manager;
1516	}
1517
1518	/*
1519	* Routine: memory_manager_default_check
1520	*
1521	* Purpose:
1522	* Check whether a default memory manager has been set
1523	* up yet, or not. Returns KERN_SUCCESS if dmm exists,
1524	* and KERN_FAILURE if dmm does not exist.
1525	*
1526	* If there is no default memory manager, log an error,
1527	* but only the first time.
1528	*
1529	*/
1530	__private_extern__ kern_return_t
1531	memory_manager_default_check(void)
1532	{
1533	memory_object_default_t current;
1534
1535	lck_mtx_lock(lck: &memory_manager_default_lock);
1536	current = memory_manager_default;
1537	if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1538	static boolean_t logged; / initialized to 0 /
1539	boolean_t complain = !logged;
1540	logged = TRUE;
1541	lck_mtx_unlock(lck: &memory_manager_default_lock);
1542	if (complain) {
1543	printf(format: "Warning: No default memory manager\n");
1544	}
1545	return KERN_FAILURE;
1546	} else {
1547	lck_mtx_unlock(lck: &memory_manager_default_lock);
1548	return KERN_SUCCESS;
1549	}
1550	}
1551
1552	/ Allow manipulation of individual page state. This is actually part of /
1553	/ the UPL regimen but takes place on the object rather than on a UPL /
1554
1555	kern_return_t
1556	memory_object_page_op(
1557	memory_object_control_t control,
1558	memory_object_offset_t offset,
1559	int ops,
1560	ppnum_t *phys_entry,
1561	int *flags)
1562	{
1563	vm_object_t object;
1564
1565	object = memory_object_control_to_vm_object(control);
1566	if (object == VM_OBJECT_NULL) {
1567	return KERN_INVALID_ARGUMENT;
1568	}
1569
1570	return vm_object_page_op(object, offset, ops, phys_entry, flags);
1571	}
1572
1573	/*
1574	* memory_object_range_op offers performance enhancement over
1575	* memory_object_page_op for page_op functions which do not require page
1576	* level state to be returned from the call. Page_op was created to provide
1577	* a low-cost alternative to page manipulation via UPLs when only a single
1578	* page was involved. The range_op call establishes the ability in the _op
1579	* family of functions to work on multiple pages where the lack of page level
1580	* state handling allows the caller to avoid the overhead of the upl structures.
1581	*/
1582
1583	kern_return_t
1584	memory_object_range_op(
1585	memory_object_control_t control,
1586	memory_object_offset_t offset_beg,
1587	memory_object_offset_t offset_end,
1588	int ops,
1589	int *range)
1590	{
1591	vm_object_t object;
1592
1593	object = memory_object_control_to_vm_object(control);
1594	if (object == VM_OBJECT_NULL) {
1595	return KERN_INVALID_ARGUMENT;
1596	}
1597
1598	return vm_object_range_op(object,
1599	offset_beg,
1600	offset_end,
1601	ops,
1602	range: (uint32_t *) range);
1603	}
1604
1605
1606	void
1607	memory_object_mark_used(
1608	memory_object_control_t control)
1609	{
1610	vm_object_t object;
1611
1612	if (control == NULL) {
1613	return;
1614	}
1615
1616	object = memory_object_control_to_vm_object(control);
1617
1618	if (object != VM_OBJECT_NULL) {
1619	vm_object_cache_remove(object);
1620	}
1621	}
1622
1623
1624	void
1625	memory_object_mark_unused(
1626	memory_object_control_t control,
1627	__unused boolean_t rage)
1628	{
1629	vm_object_t object;
1630
1631	if (control == NULL) {
1632	return;
1633	}
1634
1635	object = memory_object_control_to_vm_object(control);
1636
1637	if (object != VM_OBJECT_NULL) {
1638	vm_object_cache_add(object);
1639	}
1640	}
1641
1642	void
1643	memory_object_mark_io_tracking(
1644	memory_object_control_t control)
1645	{
1646	vm_object_t object;
1647
1648	if (control == NULL) {
1649	return;
1650	}
1651	object = memory_object_control_to_vm_object(control);
1652
1653	if (object != VM_OBJECT_NULL) {
1654	vm_object_lock(object);
1655	object->io_tracking = TRUE;
1656	vm_object_unlock(object);
1657	}
1658	}
1659
1660	void
1661	memory_object_mark_trusted(
1662	memory_object_control_t control)
1663	{
1664	vm_object_t object;
1665
1666	if (control == NULL) {
1667	return;
1668	}
1669	object = memory_object_control_to_vm_object(control);
1670
1671	if (object != VM_OBJECT_NULL) {
1672	vm_object_lock(object);
1673	VM_OBJECT_SET_PAGER_TRUSTED(object, TRUE);
1674	vm_object_unlock(object);
1675	}
1676	}
1677
1678	#if FBDP_DEBUG_OBJECT_NO_PAGER
1679	kern_return_t
1680	memory_object_mark_as_tracked(
1681	memory_object_control_t control,
1682	bool new_value,
1683	bool *old_value)
1684	{
1685	vm_object_t object;
1686
1687	if (control == NULL) {
1688	return KERN_INVALID_ARGUMENT;
1689	}
1690	object = memory_object_control_to_vm_object(control);
1691
1692	if (object == VM_OBJECT_NULL) {
1693	return KERN_FAILURE;
1694	}
1695
1696	vm_object_lock(object);
1697	*old_value = object->fbdp_tracked;
1698	VM_OBJECT_SET_FBDP_TRACKED(object, new_value);
1699	vm_object_unlock(object);
1700
1701	return KERN_SUCCESS;
1702	}
1703	#endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
1704
1705	#if CONFIG_SECLUDED_MEMORY
1706	void
1707	memory_object_mark_eligible_for_secluded(
1708	memory_object_control_t control,
1709	boolean_t eligible_for_secluded)
1710	{
1711	vm_object_t object;
1712
1713	if (control == NULL) {
1714	return;
1715	}
1716	object = memory_object_control_to_vm_object(control);
1717
1718	if (object == VM_OBJECT_NULL) {
1719	return;
1720	}
1721
1722	vm_object_lock(object);
1723	if (eligible_for_secluded &&
1724	secluded_for_filecache && / global boot-arg /
1725	!object->eligible_for_secluded) {
1726	object->eligible_for_secluded = TRUE;
1727	vm_page_secluded.eligible_for_secluded += object->resident_page_count;
1728	} else if (!eligible_for_secluded &&
1729	object->eligible_for_secluded) {
1730	object->eligible_for_secluded = FALSE;
1731	vm_page_secluded.eligible_for_secluded -= object->resident_page_count;
1732	if (object->resident_page_count) {
1733	/ XXX FBDP TODO: flush pages from secluded queue? /
1734	// printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
1735	}
1736	}
1737	vm_object_unlock(object);
1738	}
1739	#endif /* CONFIG_SECLUDED_MEMORY */
1740
1741	void
1742	memory_object_mark_for_realtime(
1743	memory_object_control_t control,
1744	bool for_realtime)
1745	{
1746	vm_object_t object;
1747
1748	if (control == NULL) {
1749	return;
1750	}
1751	object = memory_object_control_to_vm_object(control);
1752
1753	if (object == VM_OBJECT_NULL) {
1754	return;
1755	}
1756
1757	vm_object_lock(object);
1758	VM_OBJECT_SET_FOR_REALTIME(object, value: for_realtime);
1759	vm_object_unlock(object);
1760	}
1761
1762	kern_return_t
1763	memory_object_pages_resident(
1764	memory_object_control_t control,
1765	boolean_t * has_pages_resident)
1766	{
1767	vm_object_t object;
1768
1769	*has_pages_resident = FALSE;
1770
1771	object = memory_object_control_to_vm_object(control);
1772	if (object == VM_OBJECT_NULL) {
1773	return KERN_INVALID_ARGUMENT;
1774	}
1775
1776	if (object->resident_page_count) {
1777	*has_pages_resident = TRUE;
1778	}
1779
1780	return KERN_SUCCESS;
1781	}
1782
1783	kern_return_t
1784	memory_object_signed(
1785	memory_object_control_t control,
1786	boolean_t is_signed)
1787	{
1788	vm_object_t object;
1789
1790	object = memory_object_control_to_vm_object(control);
1791	if (object == VM_OBJECT_NULL) {
1792	return KERN_INVALID_ARGUMENT;
1793	}
1794
1795	vm_object_lock(object);
1796	object->code_signed = is_signed;
1797	vm_object_unlock(object);
1798
1799	return KERN_SUCCESS;
1800	}
1801
1802	boolean_t
1803	memory_object_is_signed(
1804	memory_object_control_t control)
1805	{
1806	boolean_t is_signed;
1807	vm_object_t object;
1808
1809	object = memory_object_control_to_vm_object(control);
1810	if (object == VM_OBJECT_NULL) {
1811	return FALSE;
1812	}
1813
1814	vm_object_lock_shared(object);
1815	is_signed = object->code_signed;
1816	vm_object_unlock(object);
1817
1818	return is_signed;
1819	}
1820
1821	boolean_t
1822	memory_object_is_shared_cache(
1823	memory_object_control_t control)
1824	{
1825	vm_object_t object = VM_OBJECT_NULL;
1826
1827	object = memory_object_control_to_vm_object(control);
1828	if (object == VM_OBJECT_NULL) {
1829	return FALSE;
1830	}
1831
1832	return object->object_is_shared_cache;
1833	}
1834
1835	__private_extern__ memory_object_control_t
1836	memory_object_control_allocate(
1837	vm_object_t object)
1838	{
1839	return object;
1840	}
1841
1842	__private_extern__ void
1843	memory_object_control_collapse(
1844	memory_object_control_t *control,
1845	vm_object_t object)
1846	{
1847	*control = object;
1848	}
1849
1850	__private_extern__ vm_object_t
1851	memory_object_control_to_vm_object(
1852	memory_object_control_t control)
1853	{
1854	return control;
1855	}
1856
1857	__private_extern__ vm_object_t
1858	memory_object_to_vm_object(
1859	memory_object_t mem_obj)
1860	{
1861	memory_object_control_t mo_control;
1862
1863	if (mem_obj == MEMORY_OBJECT_NULL) {
1864	return VM_OBJECT_NULL;
1865	}
1866	mo_control = mem_obj->mo_control;
1867	if (mo_control == NULL) {
1868	return VM_OBJECT_NULL;
1869	}
1870	return memory_object_control_to_vm_object(control: mo_control);
1871	}
1872
1873	void
1874	memory_object_control_reference(
1875	__unused memory_object_control_t control)
1876	{
1877	return;
1878	}
1879
1880	/*
1881	* We only every issue one of these references, so kill it
1882	* when that gets released (should switch the real reference
1883	* counting in true port-less EMMI).
1884	*/
1885	void
1886	memory_object_control_deallocate(
1887	__unused memory_object_control_t control)
1888	{
1889	}
1890
1891	void
1892	memory_object_control_disable(
1893	memory_object_control_t *control)
1894	{
1895	assert(*control != VM_OBJECT_NULL);
1896	*control = VM_OBJECT_NULL;
1897	}
1898
1899	memory_object_t
1900	convert_port_to_memory_object(
1901	__unused mach_port_t port)
1902	{
1903	return MEMORY_OBJECT_NULL;
1904	}
1905
1906
1907	mach_port_t
1908	convert_memory_object_to_port(
1909	__unused memory_object_t object)
1910	{
1911	return MACH_PORT_NULL;
1912	}
1913
1914
1915	/ Routine memory_object_reference /
1916	void
1917	memory_object_reference(
1918	memory_object_t memory_object)
1919	{
1920	(memory_object->mo_pager_ops->memory_object_reference)(
1921	memory_object);
1922	}
1923
1924	/ Routine memory_object_deallocate /
1925	void
1926	memory_object_deallocate(
1927	memory_object_t memory_object)
1928	{
1929	(memory_object->mo_pager_ops->memory_object_deallocate)(
1930	memory_object);
1931	}
1932
1933
1934	/ Routine memory_object_init /
1935	kern_return_t
1936	memory_object_init
1937	(
1938	memory_object_t memory_object,
1939	memory_object_control_t memory_control,
1940	memory_object_cluster_size_t memory_object_page_size
1941	)
1942	{
1943	return (memory_object->mo_pager_ops->memory_object_init)(
1944	memory_object,
1945	memory_control,
1946	memory_object_page_size);
1947	}
1948
1949	/ Routine memory_object_terminate /
1950	kern_return_t
1951	memory_object_terminate
1952	(
1953	memory_object_t memory_object
1954	)
1955	{
1956	return (memory_object->mo_pager_ops->memory_object_terminate)(
1957	memory_object);
1958	}
1959
1960	/ Routine memory_object_data_request /
1961	kern_return_t
1962	memory_object_data_request
1963	(
1964	memory_object_t memory_object,
1965	memory_object_offset_t offset,
1966	memory_object_cluster_size_t length,
1967	vm_prot_t desired_access,
1968	memory_object_fault_info_t fault_info
1969	)
1970	{
1971	return (memory_object->mo_pager_ops->memory_object_data_request)(
1972	memory_object,
1973	offset,
1974	length,
1975	desired_access,
1976	fault_info);
1977	}
1978
1979	/ Routine memory_object_data_return /
1980	kern_return_t
1981	memory_object_data_return
1982	(
1983	memory_object_t memory_object,
1984	memory_object_offset_t offset,
1985	memory_object_cluster_size_t size,
1986	memory_object_offset_t *resid_offset,
1987	int *io_error,
1988	boolean_t dirty,
1989	boolean_t kernel_copy,
1990	int upl_flags
1991	)
1992	{
1993	return (memory_object->mo_pager_ops->memory_object_data_return)(
1994	memory_object,
1995	offset,
1996	size,
1997	resid_offset,
1998	io_error,
1999	dirty,
2000	kernel_copy,
2001	upl_flags);
2002	}
2003
2004	/ Routine memory_object_data_initialize /
2005	kern_return_t
2006	memory_object_data_initialize
2007	(
2008	memory_object_t memory_object,
2009	memory_object_offset_t offset,
2010	memory_object_cluster_size_t size
2011	)
2012	{
2013	return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2014	memory_object,
2015	offset,
2016	size);
2017	}
2018
2019	/*
2020	* memory_object_map() is called by VM (in vm_map_enter() and its variants)
2021	* each time a "named" VM object gets mapped directly or indirectly
2022	* (copy-on-write mapping). A "named" VM object has an extra reference held
2023	* by the pager to keep it alive until the pager decides that the
2024	* memory object (and its VM object) can be reclaimed.
2025	* VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2026	* the mappings of that memory object have been removed.
2027	*
2028	* For a given VM object, calls to memory_object_map() and memory_object_unmap()
2029	* are serialized (through object->mapping_in_progress), to ensure that the
2030	* pager gets a consistent view of the mapping status of the memory object.
2031	*
2032	* This allows the pager to keep track of how many times a memory object
2033	* has been mapped and with which protections, to decide when it can be
2034	* reclaimed.
2035	*/
2036
2037	/ Routine memory_object_map /
2038	kern_return_t
2039	memory_object_map
2040	(
2041	memory_object_t memory_object,
2042	vm_prot_t prot
2043	)
2044	{
2045	return (memory_object->mo_pager_ops->memory_object_map)(
2046	memory_object,
2047	prot);
2048	}
2049
2050	/ Routine memory_object_last_unmap /
2051	kern_return_t
2052	memory_object_last_unmap
2053	(
2054	memory_object_t memory_object
2055	)
2056	{
2057	return (memory_object->mo_pager_ops->memory_object_last_unmap)(
2058	memory_object);
2059	}
2060
2061	boolean_t
2062	memory_object_backing_object
2063	(
2064	memory_object_t memory_object,
2065	memory_object_offset_t offset,
2066	vm_object_t *backing_object,
2067	vm_object_offset_t *backing_offset)
2068	{
2069	if (memory_object->mo_pager_ops->memory_object_backing_object == NULL) {
2070	return FALSE;
2071	}
2072	return (memory_object->mo_pager_ops->memory_object_backing_object)(
2073	memory_object,
2074	offset,
2075	backing_object,
2076	backing_offset);
2077	}
2078
2079	upl_t
2080	convert_port_to_upl(
2081	__unused ipc_port_t port)
2082	{
2083	return NULL;
2084	}
2085
2086	mach_port_t
2087	convert_upl_to_port(
2088	__unused upl_t upl)
2089	{
2090	return MACH_PORT_NULL;
2091	}
2092

Browse the source code of xnu/osfmk/vm/memory_object.c