vm_pageout.c source code [xnu/osfmk/vm/vm_pageout.c]

1	/*
2	* Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56	/*
57	*/
58	/*
59	* File: vm/vm_pageout.c
60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
61	* Date: 1985
62	*
63	* The proverbial page-out daemon.
64	*/
65
66	#include <stdint.h>
67
68	#include <debug.h>
69	#include <mach_pagemap.h>
70	#include <mach_cluster_stats.h>
71
72	#include <mach/mach_types.h>
73	#include <mach/memory_object.h>
74	#include <mach/memory_object_default.h>
75	#include <mach/memory_object_control_server.h>
76	#include <mach/mach_host_server.h>
77	#include <mach/upl.h>
78	#include <mach/vm_map.h>
79	#include <mach/vm_param.h>
80	#include <mach/vm_statistics.h>
81	#include <mach/sdt.h>
82
83	#include <kern/kern_types.h>
84	#include <kern/counters.h>
85	#include <kern/host_statistics.h>
86	#include <kern/machine.h>
87	#include <kern/misc_protos.h>
88	#include <kern/sched.h>
89	#include <kern/thread.h>
90	#include <kern/xpr.h>
91	#include <kern/kalloc.h>
92	#include <kern/policy_internal.h>
93	#include <kern/thread_group.h>
94
95	#include <machine/vm_tuning.h>
96	#include <machine/commpage.h>
97
98	#include <vm/pmap.h>
99	#include <vm/vm_compressor_pager.h>
100	#include <vm/vm_fault.h>
101	#include <vm/vm_map.h>
102	#include <vm/vm_object.h>
103	#include <vm/vm_page.h>
104	#include <vm/vm_pageout.h>
105	#include <vm/vm_protos.h> /* must be last */
106	#include <vm/memory_object.h>
107	#include <vm/vm_purgeable_internal.h>
108	#include <vm/vm_shared_region.h>
109	#include <vm/vm_compressor.h>
110
111	#include <san/kasan.h>
112
113	#if CONFIG_PHANTOM_CACHE
114	#include <vm/vm_phantom_cache.h>
115	#endif
116
117	#if UPL_DEBUG
118	#include <libkern/OSDebug.h>
119	#endif
120
121	extern int cs_debug;
122
123	extern void mbuf_drain(boolean_t);
124
125	#if VM_PRESSURE_EVENTS
126	#if CONFIG_JETSAM
127	extern unsigned int memorystatus_available_pages;
128	extern unsigned int memorystatus_available_pages_pressure;
129	extern unsigned int memorystatus_available_pages_critical;
130	#else /* CONFIG_JETSAM */
131	extern uint64_t memorystatus_available_pages;
132	extern uint64_t memorystatus_available_pages_pressure;
133	extern uint64_t memorystatus_available_pages_critical;
134	#endif /* CONFIG_JETSAM */
135
136	extern unsigned int memorystatus_frozen_count;
137	extern unsigned int memorystatus_suspended_count;
138	extern vm_pressure_level_t memorystatus_vm_pressure_level;
139
140	void vm_pressure_response(void);
141	extern void consider_vm_pressure_events(void);
142
143	#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
144	#endif /* VM_PRESSURE_EVENTS */
145
146
147	#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
148	#ifdef CONFIG_EMBEDDED
149	#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
150	#else
151	#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
152	#endif
153	#endif
154
155	#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
156	#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
157	#endif
158
159	#ifndef VM_PAGE_LAUNDRY_MAX
160	#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
161	#endif /* VM_PAGEOUT_LAUNDRY_MAX */
162
163	#ifndef VM_PAGEOUT_BURST_WAIT
164	#define VM_PAGEOUT_BURST_WAIT 1 /* milliseconds */
165	#endif /* VM_PAGEOUT_BURST_WAIT */
166
167	#ifndef VM_PAGEOUT_EMPTY_WAIT
168	#define VM_PAGEOUT_EMPTY_WAIT 50 /* milliseconds */
169	#endif /* VM_PAGEOUT_EMPTY_WAIT */
170
171	#ifndef VM_PAGEOUT_DEADLOCK_WAIT
172	#define VM_PAGEOUT_DEADLOCK_WAIT 100 /* milliseconds */
173	#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
174
175	#ifndef VM_PAGEOUT_IDLE_WAIT
176	#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
177	#endif /* VM_PAGEOUT_IDLE_WAIT */
178
179	#ifndef VM_PAGEOUT_SWAP_WAIT
180	#define VM_PAGEOUT_SWAP_WAIT 10 /* milliseconds */
181	#endif /* VM_PAGEOUT_SWAP_WAIT */
182
183
184	#ifndef VM_PAGE_SPECULATIVE_TARGET
185	#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
186	#endif /* VM_PAGE_SPECULATIVE_TARGET */
187
188
189	/*
190	* To obtain a reasonable LRU approximation, the inactive queue
191	* needs to be large enough to give pages on it a chance to be
192	* referenced a second time. This macro defines the fraction
193	* of active+inactive pages that should be inactive.
194	* The pageout daemon uses it to update vm_page_inactive_target.
195	*
196	* If vm_page_free_count falls below vm_page_free_target and
197	* vm_page_inactive_count is below vm_page_inactive_target,
198	* then the pageout daemon starts running.
199	*/
200
201	#ifndef VM_PAGE_INACTIVE_TARGET
202	#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
203	#endif /* VM_PAGE_INACTIVE_TARGET */
204
205	/*
206	* Once the pageout daemon starts running, it keeps going
207	* until vm_page_free_count meets or exceeds vm_page_free_target.
208	*/
209
210	#ifndef VM_PAGE_FREE_TARGET
211	#ifdef CONFIG_EMBEDDED
212	#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
213	#else
214	#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
215	#endif
216	#endif /* VM_PAGE_FREE_TARGET */
217
218
219	/*
220	* The pageout daemon always starts running once vm_page_free_count
221	* falls below vm_page_free_min.
222	*/
223
224	#ifndef VM_PAGE_FREE_MIN
225	#ifdef CONFIG_EMBEDDED
226	#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
227	#else
228	#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
229	#endif
230	#endif /* VM_PAGE_FREE_MIN */
231
232	#ifdef CONFIG_EMBEDDED
233	#define VM_PAGE_FREE_RESERVED_LIMIT 100
234	#define VM_PAGE_FREE_MIN_LIMIT 1500
235	#define VM_PAGE_FREE_TARGET_LIMIT 2000
236	#else
237	#define VM_PAGE_FREE_RESERVED_LIMIT 1700
238	#define VM_PAGE_FREE_MIN_LIMIT 3500
239	#define VM_PAGE_FREE_TARGET_LIMIT 4000
240	#endif
241
242	/*
243	* When vm_page_free_count falls below vm_page_free_reserved,
244	* only vm-privileged threads can allocate pages. vm-privilege
245	* allows the pageout daemon and default pager (and any other
246	* associated threads needed for default pageout) to continue
247	* operation by dipping into the reserved pool of pages.
248	*/
249
250	#ifndef VM_PAGE_FREE_RESERVED
251	#define VM_PAGE_FREE_RESERVED(n) \
252	((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
253	#endif /* VM_PAGE_FREE_RESERVED */
254
255	/*
256	* When we dequeue pages from the inactive list, they are
257	* reactivated (ie, put back on the active queue) if referenced.
258	* However, it is possible to starve the free list if other
259	* processors are referencing pages faster than we can turn off
260	* the referenced bit. So we limit the number of reactivations
261	* we will make per call of vm_pageout_scan().
262	*/
263	#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
264
265	#ifndef VM_PAGE_REACTIVATE_LIMIT
266	#ifdef CONFIG_EMBEDDED
267	#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
268	#else
269	#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
270	#endif
271	#endif /* VM_PAGE_REACTIVATE_LIMIT */
272	#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
273
274	extern boolean_t hibernate_cleaning_in_progress;
275
276	/*
277	* Forward declarations for internal routines.
278	*/
279	struct cq {
280	struct vm_pageout_queue *q;
281	void *current_chead;
282	char *scratch_buf;
283	int id;
284	};
285
286	struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
287
288
289	#if VM_PRESSURE_EVENTS
290	void vm_pressure_thread(void);
291
292	boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
293	boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
294
295	boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
296	boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
297	#endif
298
299	void vm_pageout_garbage_collect(int);
300	static void vm_pageout_iothread_external(void);
301	static void vm_pageout_iothread_internal(struct cq *cq);
302	static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
303
304	extern void vm_pageout_continue(void);
305	extern void vm_pageout_scan(void);
306
307	void vm_tests(void); / forward /
308
309	#if !CONFIG_EMBEDDED
310	static boolean_t vm_pageout_waiter = FALSE;
311	static boolean_t vm_pageout_running = FALSE;
312	#endif /* !CONFIG_EMBEDDED */
313
314
315	#if DEVELOPMENT \|\| DEBUG
316	struct vm_pageout_debug vm_pageout_debug;
317	#endif
318	struct vm_pageout_vminfo vm_pageout_vminfo;
319	struct vm_pageout_state vm_pageout_state;
320	struct vm_config vm_config;
321
322	struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
323	struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
324
325	int vm_upl_wait_for_pages = `0`;
326	vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
327
328	boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
329
330	int vm_debug_events = `0`;
331
332	#if CONFIG_MEMORYSTATUS
333	extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
334
335	uint32_t vm_pageout_memorystatus_fb_factor_nr = `5`;
336	uint32_t vm_pageout_memorystatus_fb_factor_dr = `2`;
337
338	#endif
339
340
341
342	/*
343	* Routine: vm_pageout_object_terminate
344	* Purpose:
345	* Destroy the pageout_object, and perform all of the
346	* required cleanup actions.
347	*
348	* In/Out conditions:
349	* The object must be locked, and will be returned locked.
350	*/
351	void
352	vm_pageout_object_terminate(
353	vm_object_t object)
354	{
355	vm_object_t shadow_object;
356
357	/*
358	* Deal with the deallocation (last reference) of a pageout object
359	* (used for cleaning-in-place) by dropping the paging references/
360	* freeing pages in the original object.
361	*/
362
363	assert(object->pageout);
364	shadow_object = object->shadow;
365	vm_object_lock(shadow_object);
366
367	while (!vm_page_queue_empty(&object->memq)) {
368	vm_page_t p, m;
369	vm_object_offset_t offset;
370
371	p = (vm_page_t) vm_page_queue_first(&object->memq);
372
373	assert(p->vmp_private);
374	assert(p->vmp_free_when_done);
375	p->vmp_free_when_done = FALSE;
376	assert(!p->vmp_cleaning);
377	assert(!p->vmp_laundry);
378
379	offset = p->vmp_offset;
380	VM_PAGE_FREE(p);
381	p = VM_PAGE_NULL;
382
383	m = vm_page_lookup(shadow_object,
384	offset + object->vo_shadow_offset);
385
386	if(m == VM_PAGE_NULL)
387	continue;
388
389	assert((m->vmp_dirty) \|\| (m->vmp_precious) \|\|
390	(m->vmp_busy && m->vmp_cleaning));
391
392	/*
393	* Handle the trusted pager throttle.
394	* Also decrement the burst throttle (if external).
395	*/
396	vm_page_lock_queues();
397	if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
398	vm_pageout_throttle_up(m);
399
400	/*
401	* Handle the "target" page(s). These pages are to be freed if
402	* successfully cleaned. Target pages are always busy, and are
403	* wired exactly once. The initial target pages are not mapped,
404	* (so cannot be referenced or modified) but converted target
405	* pages may have been modified between the selection as an
406	* adjacent page and conversion to a target.
407	*/
408	if (m->vmp_free_when_done) {
409	assert(m->vmp_busy);
410	assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
411	assert(m->vmp_wire_count == `1`);
412	m->vmp_cleaning = FALSE;
413	m->vmp_free_when_done = FALSE;
414	/*
415	* Revoke all access to the page. Since the object is
416	* locked, and the page is busy, this prevents the page
417	* from being dirtied after the pmap_disconnect() call
418	* returns.
419	*
420	* Since the page is left "dirty" but "not modifed", we
421	* can detect whether the page was redirtied during
422	* pageout by checking the modify state.
423	*/
424	if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
425	SET_PAGE_DIRTY(m, FALSE);
426	} else {
427	m->vmp_dirty = FALSE;
428	}
429
430	if (m->vmp_dirty) {
431	vm_page_unwire(m, TRUE); / reactivates /
432	VM_STAT_INCR(reactivations);
433	PAGE_WAKEUP_DONE(m);
434	} else {
435	vm_page_free(m); / clears busy, etc. /
436	}
437	vm_page_unlock_queues();
438	continue;
439	}
440	/*
441	* Handle the "adjacent" pages. These pages were cleaned in
442	* place, and should be left alone.
443	* If prep_pin_count is nonzero, then someone is using the
444	* page, so make it active.
445	*/
446	if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
447	if (m->vmp_reference)
448	vm_page_activate(m);
449	else
450	vm_page_deactivate(m);
451	}
452	if (m->vmp_overwriting) {
453	/*
454	* the (COPY_OUT_FROM == FALSE) request_page_list case
455	*/
456	if (m->vmp_busy) {
457	/*
458	* We do not re-set m->vmp_dirty !
459	* The page was busy so no extraneous activity
460	* could have occurred. COPY_INTO is a read into the
461	* new pages. CLEAN_IN_PLACE does actually write
462	* out the pages but handling outside of this code
463	* will take care of resetting dirty. We clear the
464	* modify however for the Programmed I/O case.
465	*/
466	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
467
468	m->vmp_busy = FALSE;
469	m->vmp_absent = FALSE;
470	} else {
471	/*
472	* alternate (COPY_OUT_FROM == FALSE) request_page_list case
473	* Occurs when the original page was wired
474	* at the time of the list request
475	*/
476	assert(VM_PAGE_WIRED(m));
477	vm_page_unwire(m, TRUE); / reactivates /
478	}
479	m->vmp_overwriting = FALSE;
480	} else {
481	m->vmp_dirty = FALSE;
482	}
483	m->vmp_cleaning = FALSE;
484
485	/*
486	* Wakeup any thread waiting for the page to be un-cleaning.
487	*/
488	PAGE_WAKEUP(m);
489	vm_page_unlock_queues();
490	}
491	/*
492	* Account for the paging reference taken in vm_paging_object_allocate.
493	*/
494	vm_object_activity_end(shadow_object);
495	vm_object_unlock(shadow_object);
496
497	assert(object->ref_count == `0`);
498	assert(object->paging_in_progress == `0`);
499	assert(object->activity_in_progress == `0`);
500	assert(object->resident_page_count == `0`);
501	return;
502	}
503
504	/*
505	* Routine: vm_pageclean_setup
506	*
507	* Purpose: setup a page to be cleaned (made non-dirty), but not
508	* necessarily flushed from the VM page cache.
509	* This is accomplished by cleaning in place.
510	*
511	* The page must not be busy, and new_object
512	* must be locked.
513	*
514	*/
515	static void
516	vm_pageclean_setup(
517	vm_page_t m,
518	vm_page_t new_m,
519	vm_object_t new_object,
520	vm_object_offset_t new_offset)
521	{
522	assert(!m->vmp_busy);
523	#if 0
524	assert(!m->vmp_cleaning);
525	#endif
526
527	XPR(XPR_VM_PAGEOUT,
528	"vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
529	VM_PAGE_OBJECT(m), m->vmp_offset, m,
530	new_m, new_offset);
531
532	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
533
534	/*
535	* Mark original page as cleaning in place.
536	*/
537	m->vmp_cleaning = TRUE;
538	SET_PAGE_DIRTY(m, FALSE);
539	m->vmp_precious = FALSE;
540
541	/*
542	* Convert the fictitious page to a private shadow of
543	* the real page.
544	*/
545	assert(new_m->vmp_fictitious);
546	assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
547	new_m->vmp_fictitious = FALSE;
548	new_m->vmp_private = TRUE;
549	new_m->vmp_free_when_done = TRUE;
550	VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
551
552	vm_page_lockspin_queues();
553	vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
554	vm_page_unlock_queues();
555
556	vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
557	assert(!new_m->vmp_wanted);
558	new_m->vmp_busy = FALSE;
559	}
560
561	/*
562	* Routine: vm_pageout_initialize_page
563	* Purpose:
564	* Causes the specified page to be initialized in
565	* the appropriate memory object. This routine is used to push
566	* pages into a copy-object when they are modified in the
567	* permanent object.
568	*
569	* The page is moved to a temporary object and paged out.
570	*
571	* In/out conditions:
572	* The page in question must not be on any pageout queues.
573	* The object to which it belongs must be locked.
574	* The page must be busy, but not hold a paging reference.
575	*
576	* Implementation:
577	* Move this page to a completely new object.
578	*/
579	void
580	vm_pageout_initialize_page(
581	vm_page_t m)
582	{
583	vm_object_t object;
584	vm_object_offset_t paging_offset;
585	memory_object_t pager;
586
587	XPR(XPR_VM_PAGEOUT,
588	"vm_pageout_initialize_page, page 0x%X\n",
589	m, `0`, `0`, `0`, `0`);
590
591	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
592
593	object = VM_PAGE_OBJECT(m);
594
595	assert(m->vmp_busy);
596	assert(object->internal);
597
598	/*
599	* Verify that we really want to clean this page
600	*/
601	assert(!m->vmp_absent);
602	assert(!m->vmp_error);
603	assert(m->vmp_dirty);
604
605	/*
606	* Create a paging reference to let us play with the object.
607	*/
608	paging_offset = m->vmp_offset + object->paging_offset;
609
610	if (m->vmp_absent \|\| m->vmp_error \|\| m->vmp_restart \|\| (!m->vmp_dirty && !m->vmp_precious)) {
611	panic("reservation without pageout?"); / alan /
612
613	VM_PAGE_FREE(m);
614	vm_object_unlock(object);
615
616	return;
617	}
618
619	/*
620	* If there's no pager, then we can't clean the page. This should
621	* never happen since this should be a copy object and therefore not
622	* an external object, so the pager should always be there.
623	*/
624
625	pager = object->pager;
626
627	if (pager == MEMORY_OBJECT_NULL) {
628	panic("missing pager for copy object");
629
630	VM_PAGE_FREE(m);
631	return;
632	}
633
634	/*
635	* set the page for future call to vm_fault_list_request
636	*/
637	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
638	SET_PAGE_DIRTY(m, FALSE);
639
640	/*
641	* keep the object from collapsing or terminating
642	*/
643	vm_object_paging_begin(object);
644	vm_object_unlock(object);
645
646	/*
647	* Write the data to its pager.
648	* Note that the data is passed by naming the new object,
649	* not a virtual address; the pager interface has been
650	* manipulated to use the "internal memory" data type.
651	* [The object reference from its allocation is donated
652	* to the eventual recipient.]
653	*/
654	memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
655
656	vm_object_lock(object);
657	vm_object_paging_end(object);
658	}
659
660
661	/*
662	* vm_pageout_cluster:
663	*
664	* Given a page, queue it to the appropriate I/O thread,
665	* which will page it out and attempt to clean adjacent pages
666	* in the same operation.
667	*
668	* The object and queues must be locked. We will take a
669	* paging reference to prevent deallocation or collapse when we
670	* release the object lock back at the call site. The I/O thread
671	* is responsible for consuming this reference
672	*
673	* The page must not be on any pageout queue.
674	*/
675	#if DEVELOPMENT \|\| DEBUG
676	vmct_stats_t vmct_stats;
677
678	int32_t vmct_active = `0`;
679	uint64_t vm_compressor_epoch_start = `0`;
680	uint64_t vm_compressor_epoch_stop = `0`;
681
682	typedef enum vmct_state_t {
683	VMCT_IDLE,
684	VMCT_AWAKENED,
685	VMCT_ACTIVE,
686	} vmct_state_t;
687	vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
688	#endif
689
690
691	void
692	vm_pageout_cluster(vm_page_t m)
693	{
694	vm_object_t object = VM_PAGE_OBJECT(m);
695	struct vm_pageout_queue *q;
696
697
698	XPR(XPR_VM_PAGEOUT,
699	"vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
700	object, m->vmp_offset, m, `0`, `0`);
701
702	VM_PAGE_CHECK(m);
703	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
704	vm_object_lock_assert_exclusive(object);
705
706	/*
707	* Only a certain kind of page is appreciated here.
708	*/
709	assert((m->vmp_dirty \|\| m->vmp_precious) && (!VM_PAGE_WIRED(m)));
710	assert(!m->vmp_cleaning && !m->vmp_laundry);
711	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
712
713	/*
714	* protect the object from collapse or termination
715	*/
716	vm_object_activity_begin(object);
717
718	if (object->internal == TRUE) {
719	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
720
721	m->vmp_busy = TRUE;
722
723	q = &vm_pageout_queue_internal;
724	} else
725	q = &vm_pageout_queue_external;
726
727	/*
728	* pgo_laundry count is tied to the laundry bit
729	*/
730	m->vmp_laundry = TRUE;
731	q->pgo_laundry++;
732
733	m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
734	vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, vmp_pageq);
735
736	if (q->pgo_idle == TRUE) {
737	q->pgo_idle = FALSE;
738	thread_wakeup((event_t) &q->pgo_pending);
739	}
740	VM_PAGE_CHECK(m);
741	}
742
743
744	/*
745	* A page is back from laundry or we are stealing it back from
746	* the laundering state. See if there are some pages waiting to
747	* go to laundry and if we can let some of them go now.
748	*
749	* Object and page queues must be locked.
750	*/
751	void
752	vm_pageout_throttle_up(
753	vm_page_t m)
754	{
755	struct vm_pageout_queue *q;
756	vm_object_t m_object;
757
758	m_object = VM_PAGE_OBJECT(m);
759
760	assert(m_object != VM_OBJECT_NULL);
761	assert(m_object != kernel_object);
762
763	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
764	vm_object_lock_assert_exclusive(m_object);
765
766	if (m_object->internal == TRUE)
767	q = &vm_pageout_queue_internal;
768	else
769	q = &vm_pageout_queue_external;
770
771	if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
772
773	vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, vmp_pageq);
774	m->vmp_q_state = VM_PAGE_NOT_ON_Q;
775
776	VM_PAGE_ZERO_PAGEQ_ENTRY(m);
777
778	vm_object_activity_end(m_object);
779
780	VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, `1`);
781	}
782	if (m->vmp_laundry == TRUE) {
783
784	m->vmp_laundry = FALSE;
785	q->pgo_laundry--;
786
787	if (q->pgo_throttled == TRUE) {
788	q->pgo_throttled = FALSE;
789	thread_wakeup((event_t) &q->pgo_laundry);
790	}
791	if (q->pgo_draining == TRUE && q->pgo_laundry == `0`) {
792	q->pgo_draining = FALSE;
793	thread_wakeup((event_t) (&q->pgo_laundry+`1`));
794	}
795	VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, `1`);
796	}
797	}
798
799
800	static void
801	vm_pageout_throttle_up_batch(
802	struct vm_pageout_queue *q,
803	int batch_cnt)
804	{
805	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
806
807	VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
808
809	q->pgo_laundry -= batch_cnt;
810
811	if (q->pgo_throttled == TRUE) {
812	q->pgo_throttled = FALSE;
813	thread_wakeup((event_t) &q->pgo_laundry);
814	}
815	if (q->pgo_draining == TRUE && q->pgo_laundry == `0`) {
816	q->pgo_draining = FALSE;
817	thread_wakeup((event_t) (&q->pgo_laundry+`1`));
818	}
819	}
820
821
822
823	/*
824	* VM memory pressure monitoring.
825	*
826	* vm_pageout_scan() keeps track of the number of pages it considers and
827	* reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
828	*
829	* compute_memory_pressure() is called every second from compute_averages()
830	* and moves "vm_pageout_stat_now" forward, to start accumulating the number
831	* of recalimed pages in a new vm_pageout_stat[] bucket.
832	*
833	* mach_vm_pressure_monitor() collects past statistics about memory pressure.
834	* The caller provides the number of seconds ("nsecs") worth of statistics
835	* it wants, up to 30 seconds.
836	* It computes the number of pages reclaimed in the past "nsecs" seconds and
837	* also returns the number of pages the system still needs to reclaim at this
838	* moment in time.
839	*/
840	#if DEVELOPMENT \|\| DEBUG
841	#define VM_PAGEOUT_STAT_SIZE (30 * 8) + 1
842	#else
843	#define VM_PAGEOUT_STAT_SIZE (1 * 8) + 1
844	#endif
845	struct vm_pageout_stat {
846	unsigned long vm_page_active_count;
847	unsigned long vm_page_speculative_count;
848	unsigned long vm_page_inactive_count;
849	unsigned long vm_page_anonymous_count;
850
851	unsigned long vm_page_free_count;
852	unsigned long vm_page_wire_count;
853	unsigned long vm_page_compressor_count;
854
855	unsigned long vm_page_pages_compressed;
856	unsigned long vm_page_pageable_internal_count;
857	unsigned long vm_page_pageable_external_count;
858	unsigned long vm_page_xpmapped_external_count;
859
860	unsigned int pages_grabbed;
861	unsigned int pages_freed;
862
863	unsigned int pages_compressed;
864	unsigned int pages_grabbed_by_compressor;
865	unsigned int failed_compressions;
866
867	unsigned int pages_evicted;
868	unsigned int pages_purged;
869
870	unsigned int considered;
871	unsigned int considered_bq_internal;
872	unsigned int considered_bq_external;
873
874	unsigned int skipped_external;
875	unsigned int filecache_min_reactivations;
876
877	unsigned int freed_speculative;
878	unsigned int freed_cleaned;
879	unsigned int freed_internal;
880	unsigned int freed_external;
881
882	unsigned int cleaned_dirty_external;
883	unsigned int cleaned_dirty_internal;
884
885	unsigned int inactive_referenced;
886	unsigned int inactive_nolock;
887	unsigned int reactivation_limit_exceeded;
888	unsigned int forced_inactive_reclaim;
889
890	unsigned int throttled_internal_q;
891	unsigned int throttled_external_q;
892
893	unsigned int phantom_ghosts_found;
894	unsigned int phantom_ghosts_added;
895	} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`}, };
896
897	unsigned int vm_pageout_stat_now = `0`;
898
899	#define VM_PAGEOUT_STAT_BEFORE(i) \
900	(((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
901	#define VM_PAGEOUT_STAT_AFTER(i) \
902	(((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
903
904	#if VM_PAGE_BUCKETS_CHECK
905	int vm_page_buckets_check_interval = `80`; / in eighths of a second /
906	#endif /* VM_PAGE_BUCKETS_CHECK */
907
908
909	void
910	record_memory_pressure(void);
911	void
912	record_memory_pressure(void)
913	{
914	unsigned int vm_pageout_next;
915
916	#if VM_PAGE_BUCKETS_CHECK
917	/ check the consistency of VM page buckets at regular interval /
918	static int counter = `0`;
919	if ((++counter % vm_page_buckets_check_interval) == `0`) {
920	vm_page_buckets_check();
921	}
922	#endif /* VM_PAGE_BUCKETS_CHECK */
923
924	vm_pageout_state.vm_memory_pressure =
925	vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
926	vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
927	vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
928	vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
929
930	commpage_set_memory_pressure( (unsigned int)vm_pageout_state.vm_memory_pressure );
931
932	/ move "now" forward /
933	vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
934
935	bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
936
937	vm_pageout_stat_now = vm_pageout_next;
938	}
939
940
941	/*
942	* IMPORTANT
943	* mach_vm_ctl_page_free_wanted() is called indirectly, via
944	* mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
945	* it must be safe in the restricted stackshot context. Locks and/or
946	* blocking are not allowable.
947	*/
948	unsigned int
949	mach_vm_ctl_page_free_wanted(void)
950	{
951	unsigned int page_free_target, page_free_count, page_free_wanted;
952
953	page_free_target = vm_page_free_target;
954	page_free_count = vm_page_free_count;
955	if (page_free_target > page_free_count) {
956	page_free_wanted = page_free_target - page_free_count;
957	} else {
958	page_free_wanted = `0`;
959	}
960
961	return page_free_wanted;
962	}
963
964
965	/*
966	* IMPORTANT:
967	* mach_vm_pressure_monitor() is called when taking a stackshot, with
968	* wait_for_pressure FALSE, so that code path must remain safe in the
969	* restricted stackshot context. No blocking or locks are allowable.
970	* on that code path.
971	*/
972
973	kern_return_t
974	mach_vm_pressure_monitor(
975	boolean_t wait_for_pressure,
976	unsigned int nsecs_monitored,
977	unsigned int *pages_reclaimed_p,
978	unsigned int *pages_wanted_p)
979	{
980	wait_result_t wr;
981	unsigned int vm_pageout_then, vm_pageout_now;
982	unsigned int pages_reclaimed;
983	unsigned int units_of_monitor;
984
985	units_of_monitor = `8` * nsecs_monitored;
986	/*
987	* We don't take the vm_page_queue_lock here because we don't want
988	* vm_pressure_monitor() to get in the way of the vm_pageout_scan()
989	* thread when it's trying to reclaim memory. We don't need fully
990	* accurate monitoring anyway...
991	*/
992
993	if (wait_for_pressure) {
994	/ wait until there's memory pressure /
995	while (vm_page_free_count >= vm_page_free_target) {
996	wr = assert_wait((event_t) &vm_page_free_wanted,
997	THREAD_INTERRUPTIBLE);
998	if (wr == THREAD_WAITING) {
999	wr = thread_block(THREAD_CONTINUE_NULL);
1000	}
1001	if (wr == THREAD_INTERRUPTED) {
1002	return KERN_ABORTED;
1003	}
1004	if (wr == THREAD_AWAKENED) {
1005	/*
1006	* The memory pressure might have already
1007	* been relieved but let's not block again
1008	* and let's report that there was memory
1009	* pressure at some point.
1010	*/
1011	break;
1012	}
1013	}
1014	}
1015
1016	/ provide the number of pages the system wants to reclaim /
1017	if (pages_wanted_p != NULL) {
1018	*pages_wanted_p = mach_vm_ctl_page_free_wanted();
1019	}
1020
1021	if (pages_reclaimed_p == NULL) {
1022	return KERN_SUCCESS;
1023	}
1024
1025	/ provide number of pages reclaimed in the last "nsecs_monitored" /
1026	vm_pageout_now = vm_pageout_stat_now;
1027	pages_reclaimed = `0`;
1028	for (vm_pageout_then =
1029	VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1030	vm_pageout_then != vm_pageout_now &&
1031	units_of_monitor-- != `0`;
1032	vm_pageout_then =
1033	VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1034	pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1035	pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1036	pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1037	pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1038	}
1039	*pages_reclaimed_p = pages_reclaimed;
1040
1041	return KERN_SUCCESS;
1042	}
1043
1044
1045
1046	#if DEVELOPMENT \|\| DEBUG
1047
1048	static void
1049	vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t , int*);
1050
1051	/*
1052	* condition variable used to make sure there is
1053	* only a single sweep going on at a time
1054	*/
1055	boolean_t vm_pageout_disconnect_all_pages_active = FALSE;
1056
1057
1058	void
1059	vm_pageout_disconnect_all_pages()
1060	{
1061	vm_page_lock_queues();
1062
1063	if (vm_pageout_disconnect_all_pages_active == TRUE) {
1064	vm_page_unlock_queues();
1065	return;
1066	}
1067	vm_pageout_disconnect_all_pages_active = TRUE;
1068	vm_page_unlock_queues();
1069
1070	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1071	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1072	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1073
1074	vm_pageout_disconnect_all_pages_active = FALSE;
1075	}
1076
1077
1078	void
1079	vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t q, int* qcount)
1080	{
1081	vm_page_t m;
1082	vm_object_t t_object = NULL;
1083	vm_object_t l_object = NULL;
1084	vm_object_t m_object = NULL;
1085	int delayed_unlock = `0`;
1086	int try_failed_count = `0`;
1087	int disconnected_count = `0`;
1088	int paused_count = `0`;
1089	int object_locked_count = `0`;
1090
1091	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) \| DBG_FUNC_START,
1092	q, qcount, `0`, `0`, `0`);
1093
1094	vm_page_lock_queues();
1095
1096	while (qcount && !vm_page_queue_empty(q)) {
1097
1098	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1099
1100	m = (vm_page_t) vm_page_queue_first(q);
1101	m_object = VM_PAGE_OBJECT(m);
1102
1103	/*
1104	* check to see if we currently are working
1105	* with the same object... if so, we've
1106	* already got the lock
1107	*/
1108	if (m_object != l_object) {
1109	/*
1110	* the object associated with candidate page is
1111	* different from the one we were just working
1112	* with... dump the lock if we still own it
1113	*/
1114	if (l_object != NULL) {
1115	vm_object_unlock(l_object);
1116	l_object = NULL;
1117	}
1118	if (m_object != t_object)
1119	try_failed_count = `0`;
1120
1121	/*
1122	* Try to lock object; since we've alread got the
1123	* page queues lock, we can only 'try' for this one.
1124	* if the 'try' fails, we need to do a mutex_pause
1125	* to allow the owner of the object lock a chance to
1126	* run...
1127	*/
1128	if ( !vm_object_lock_try_scan(m_object)) {
1129
1130	if (try_failed_count > `20`) {
1131	goto reenter_pg_on_q;
1132	}
1133	vm_page_unlock_queues();
1134	mutex_pause(try_failed_count++);
1135	vm_page_lock_queues();
1136	delayed_unlock = `0`;
1137
1138	paused_count++;
1139
1140	t_object = m_object;
1141	continue;
1142	}
1143	object_locked_count++;
1144
1145	l_object = m_object;
1146	}
1147	if ( !m_object->alive \|\| m->vmp_cleaning \|\| m->vmp_laundry \|\| m->vmp_busy \|\| m->vmp_absent \|\| m->vmp_error \|\| m->vmp_free_when_done) {
1148	/*
1149	* put it back on the head of its queue
1150	*/
1151	goto reenter_pg_on_q;
1152	}
1153	if (m->vmp_pmapped == TRUE) {
1154
1155	pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1156
1157	disconnected_count++;
1158	}
1159	reenter_pg_on_q:
1160	vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1161	vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1162
1163	qcount--;
1164	try_failed_count = `0`;
1165
1166	if (delayed_unlock++ > `128`) {
1167
1168	if (l_object != NULL) {
1169	vm_object_unlock(l_object);
1170	l_object = NULL;
1171	}
1172	lck_mtx_yield(&vm_page_queue_lock);
1173	delayed_unlock = `0`;
1174	}
1175	}
1176	if (l_object != NULL) {
1177	vm_object_unlock(l_object);
1178	l_object = NULL;
1179	}
1180	vm_page_unlock_queues();
1181
1182	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) \| DBG_FUNC_END,
1183	q, disconnected_count, object_locked_count, paused_count, `0`);
1184	}
1185
1186	#endif
1187
1188
1189	static void
1190	vm_pageout_page_queue(vm_page_queue_head_t , int*);
1191
1192	/*
1193	* condition variable used to make sure there is
1194	* only a single sweep going on at a time
1195	*/
1196	boolean_t vm_pageout_anonymous_pages_active = FALSE;
1197
1198
1199	void
1200	vm_pageout_anonymous_pages()
1201	{
1202	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1203
1204	vm_page_lock_queues();
1205
1206	if (vm_pageout_anonymous_pages_active == TRUE) {
1207	vm_page_unlock_queues();
1208	return;
1209	}
1210	vm_pageout_anonymous_pages_active = TRUE;
1211	vm_page_unlock_queues();
1212
1213	vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214	vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215	vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217	if (VM_CONFIG_SWAP_IS_PRESENT)
1218	vm_consider_swapping();
1219
1220	vm_page_lock_queues();
1221	vm_pageout_anonymous_pages_active = FALSE;
1222	vm_page_unlock_queues();
1223	}
1224	}
1225
1226
1227	void
1228	vm_pageout_page_queue(vm_page_queue_head_t q, int* qcount)
1229	{
1230	vm_page_t m;
1231	vm_object_t t_object = NULL;
1232	vm_object_t l_object = NULL;
1233	vm_object_t m_object = NULL;
1234	int delayed_unlock = `0`;
1235	int try_failed_count = `0`;
1236	int refmod_state;
1237	int pmap_options;
1238	struct vm_pageout_queue *iq;
1239	ppnum_t phys_page;
1240
1241
1242	iq = &vm_pageout_queue_internal;
1243
1244	vm_page_lock_queues();
1245
1246	while (qcount && !vm_page_queue_empty(q)) {
1247
1248	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1249
1250	if (VM_PAGE_Q_THROTTLED(iq)) {
1251
1252	if (l_object != NULL) {
1253	vm_object_unlock(l_object);
1254	l_object = NULL;
1255	}
1256	iq->pgo_draining = TRUE;
1257
1258	assert_wait((event_t) (&iq->pgo_laundry + `1`), THREAD_INTERRUPTIBLE);
1259	vm_page_unlock_queues();
1260
1261	thread_block(THREAD_CONTINUE_NULL);
1262
1263	vm_page_lock_queues();
1264	delayed_unlock = `0`;
1265	continue;
1266	}
1267	m = (vm_page_t) vm_page_queue_first(q);
1268	m_object = VM_PAGE_OBJECT(m);
1269
1270	/*
1271	* check to see if we currently are working
1272	* with the same object... if so, we've
1273	* already got the lock
1274	*/
1275	if (m_object != l_object) {
1276	if ( !m_object->internal)
1277	goto reenter_pg_on_q;
1278
1279	/*
1280	* the object associated with candidate page is
1281	* different from the one we were just working
1282	* with... dump the lock if we still own it
1283	*/
1284	if (l_object != NULL) {
1285	vm_object_unlock(l_object);
1286	l_object = NULL;
1287	}
1288	if (m_object != t_object)
1289	try_failed_count = `0`;
1290
1291	/*
1292	* Try to lock object; since we've alread got the
1293	* page queues lock, we can only 'try' for this one.
1294	* if the 'try' fails, we need to do a mutex_pause
1295	* to allow the owner of the object lock a chance to
1296	* run...
1297	*/
1298	if ( !vm_object_lock_try_scan(m_object)) {
1299
1300	if (try_failed_count > `20`) {
1301	goto reenter_pg_on_q;
1302	}
1303	vm_page_unlock_queues();
1304	mutex_pause(try_failed_count++);
1305	vm_page_lock_queues();
1306	delayed_unlock = `0`;
1307
1308	t_object = m_object;
1309	continue;
1310	}
1311	l_object = m_object;
1312	}
1313	if ( !m_object->alive \|\| m->vmp_cleaning \|\| m->vmp_laundry \|\| m->vmp_busy \|\| m->vmp_absent \|\| m->vmp_error \|\| m->vmp_free_when_done) {
1314	/*
1315	* page is not to be cleaned
1316	* put it back on the head of its queue
1317	*/
1318	goto reenter_pg_on_q;
1319	}
1320	phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1321
1322	if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1323	refmod_state = pmap_get_refmod(phys_page);
1324
1325	if (refmod_state & VM_MEM_REFERENCED)
1326	m->vmp_reference = TRUE;
1327	if (refmod_state & VM_MEM_MODIFIED) {
1328	SET_PAGE_DIRTY(m, FALSE);
1329	}
1330	}
1331	if (m->vmp_reference == TRUE) {
1332	m->vmp_reference = FALSE;
1333	pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1334	goto reenter_pg_on_q;
1335	}
1336	if (m->vmp_pmapped == TRUE) {
1337	if (m->vmp_dirty \|\| m->vmp_precious) {
1338	pmap_options = PMAP_OPTIONS_COMPRESSOR;
1339	} else {
1340	pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1341	}
1342	refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1343	if (refmod_state & VM_MEM_MODIFIED) {
1344	SET_PAGE_DIRTY(m, FALSE);
1345	}
1346	}
1347
1348	if ( !m->vmp_dirty && !m->vmp_precious) {
1349	vm_page_unlock_queues();
1350	VM_PAGE_FREE(m);
1351	vm_page_lock_queues();
1352	delayed_unlock = `0`;
1353
1354	goto next_pg;
1355	}
1356	if (!m_object->pager_initialized \|\| m_object->pager == MEMORY_OBJECT_NULL) {
1357
1358	if (!m_object->pager_initialized) {
1359
1360	vm_page_unlock_queues();
1361
1362	vm_object_collapse(m_object, (vm_object_offset_t) `0`, TRUE);
1363
1364	if (!m_object->pager_initialized)
1365	vm_object_compressor_pager_create(m_object);
1366
1367	vm_page_lock_queues();
1368	delayed_unlock = `0`;
1369	}
1370	if (!m_object->pager_initialized \|\| m_object->pager == MEMORY_OBJECT_NULL)
1371	goto reenter_pg_on_q;
1372	/*
1373	* vm_object_compressor_pager_create will drop the object lock
1374	* which means 'm' may no longer be valid to use
1375	*/
1376	continue;
1377	}
1378	/*
1379	* we've already factored out pages in the laundry which
1380	* means this page can't be on the pageout queue so it's
1381	* safe to do the vm_page_queues_remove
1382	*/
1383	vm_page_queues_remove(m, TRUE);
1384
1385	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1386
1387	vm_pageout_cluster(m);
1388
1389	goto next_pg;
1390
1391	reenter_pg_on_q:
1392	vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1393	vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1394	next_pg:
1395	qcount--;
1396	try_failed_count = `0`;
1397
1398	if (delayed_unlock++ > `128`) {
1399
1400	if (l_object != NULL) {
1401	vm_object_unlock(l_object);
1402	l_object = NULL;
1403	}
1404	lck_mtx_yield(&vm_page_queue_lock);
1405	delayed_unlock = `0`;
1406	}
1407	}
1408	if (l_object != NULL) {
1409	vm_object_unlock(l_object);
1410	l_object = NULL;
1411	}
1412	vm_page_unlock_queues();
1413	}
1414
1415
1416
1417	/*
1418	* function in BSD to apply I/O throttle to the pageout thread
1419	*/
1420	extern void vm_pageout_io_throttle(void);
1421
1422	#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1423	MACRO_BEGIN \
1424	/* \
1425	* If a "reusable" page somehow made it back into \
1426	* the active queue, it's been re-used and is not \
1427	* quite re-usable. \
1428	* If the VM object was "all_reusable", consider it \
1429	* as "all re-used" instead of converting it to \
1430	* "partially re-used", which could be expensive. \
1431	*/ \
1432	assert(VM_PAGE_OBJECT((m)) == (obj)); \
1433	if ((m)->vmp_reusable \|\| \
1434	(obj)->all_reusable) { \
1435	vm_object_reuse_pages((obj), \
1436	(m)->vmp_offset, \
1437	(m)->vmp_offset + PAGE_SIZE_64, \
1438	FALSE); \
1439	} \
1440	MACRO_END
1441
1442
1443	#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1444	#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1445
1446	#define FCS_IDLE 0
1447	#define FCS_DELAYED 1
1448	#define FCS_DEADLOCK_DETECTED 2
1449
1450	struct flow_control {
1451	int state;
1452	mach_timespec_t ts;
1453	};
1454
1455
1456	#if CONFIG_BACKGROUND_QUEUE
1457	uint64_t vm_pageout_rejected_bq_internal = `0`;
1458	uint64_t vm_pageout_rejected_bq_external = `0`;
1459	uint64_t vm_pageout_skipped_bq_internal = `0`;
1460	#endif
1461
1462	#define ANONS_GRABBED_LIMIT 2
1463
1464
1465	#if 0
1466	static void vm_pageout_delayed_unlock(int , int* , vm_page_t );
1467	#endif
1468	static void vm_pageout_prepare_to_block(vm_object_t , int* , vm_page_t , int , int*);
1469
1470	#define VM_PAGEOUT_PB_NO_ACTION 0
1471	#define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1472	#define VM_PAGEOUT_PB_THREAD_YIELD 2
1473
1474
1475	#if 0
1476	static void
1477	vm_pageout_delayed_unlock(int delayed_unlock, int* local_freed, vm_page_t local_freeq)
1478	{
1479	if (*local_freeq) {
1480	vm_page_unlock_queues();
1481
1482	VM_DEBUG_CONSTANT_EVENT(
1483	vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1484	vm_page_free_count, `0`, `0`, `1`);
1485
1486	vm_page_free_list(*local_freeq, TRUE);
1487
1488	VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1489	vm_page_free_count, *local_freed, `0`, `1`);
1490
1491	*local_freeq = NULL;
1492	*local_freed = `0`;
1493
1494	vm_page_lock_queues();
1495	} else {
1496	lck_mtx_yield(&vm_page_queue_lock);
1497	}
1498	*delayed_unlock = `1`;
1499	}
1500	#endif
1501
1502
1503	static void
1504	vm_pageout_prepare_to_block(vm_object_t object, int* *delayed_unlock,
1505	vm_page_t local_freeq, int* local_freed, int* action)
1506	{
1507	vm_page_unlock_queues();
1508
1509	if (*object != NULL) {
1510	vm_object_unlock(*object);
1511	*object = NULL;
1512	}
1513	if (*local_freeq) {
1514
1515	vm_page_free_list(*local_freeq, TRUE);
1516
1517	*local_freeq = NULL;
1518	*local_freed = `0`;
1519	}
1520	*delayed_unlock = `1`;
1521
1522	switch (action) {
1523
1524	case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1525	vm_consider_waking_compactor_swapper();
1526	break;
1527	case VM_PAGEOUT_PB_THREAD_YIELD:
1528	thread_yield_internal(`1`);
1529	break;
1530	case VM_PAGEOUT_PB_NO_ACTION:
1531	default:
1532	break;
1533	}
1534	vm_page_lock_queues();
1535	}
1536
1537
1538	static struct vm_pageout_vminfo last;
1539
1540	uint64_t last_vm_page_pages_grabbed = `0`;
1541
1542	extern uint32_t c_segment_pages_compressed;
1543
1544	extern uint64_t shared_region_pager_reclaimed;
1545	extern struct memory_object_pager_ops shared_region_pager_ops;
1546
1547	void update_vm_info(void)
1548	{
1549	uint64_t tmp;
1550
1551	vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1552	vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1553	vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1554	vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1555
1556	vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1557	vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1558	vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1559
1560	vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1561	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1562	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1563	vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1564
1565
1566	tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1567	vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1568	last.vm_pageout_considered_page = tmp;
1569
1570	tmp = vm_pageout_vminfo.vm_pageout_compressions;
1571	vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp - last.vm_pageout_compressions);
1572	last.vm_pageout_compressions = tmp;
1573
1574	tmp = vm_pageout_vminfo.vm_compressor_failed;
1575	vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1576	last.vm_compressor_failed = tmp;
1577
1578	tmp = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1579	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp - last.vm_compressor_pages_grabbed);
1580	last.vm_compressor_pages_grabbed = tmp;
1581
1582	tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1583	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1584	last.vm_phantom_cache_found_ghost = tmp;
1585
1586	tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1587	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1588	last.vm_phantom_cache_added_ghost = tmp;
1589
1590	tmp = get_pages_grabbed_count();
1591	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp - last_vm_page_pages_grabbed);
1592	last_vm_page_pages_grabbed = tmp;
1593
1594	tmp = vm_pageout_vminfo.vm_page_pages_freed;
1595	vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1596	last.vm_page_pages_freed = tmp;
1597
1598
1599	if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1600
1601	tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1602	vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1603	last.vm_pageout_pages_evicted = tmp;
1604
1605	tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1606	vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1607	last.vm_pageout_pages_purged = tmp;
1608
1609	tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1610	vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1611	last.vm_pageout_freed_speculative = tmp;
1612
1613	tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1614	vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1615	last.vm_pageout_freed_external = tmp;
1616
1617	tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1618	vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1619	last.vm_pageout_inactive_referenced = tmp;
1620
1621	tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1622	vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1623	last.vm_pageout_scan_inactive_throttled_external = tmp;
1624
1625	tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1626	vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1627	last.vm_pageout_inactive_dirty_external = tmp;
1628
1629	tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1630	vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1631	last.vm_pageout_freed_cleaned = tmp;
1632
1633	tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1634	vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1635	last.vm_pageout_inactive_nolock = tmp;
1636
1637	tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1638	vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1639	last.vm_pageout_scan_inactive_throttled_internal = tmp;
1640
1641	tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1642	vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1643	last.vm_pageout_skipped_external = tmp;
1644
1645	tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1646	vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1647	last.vm_pageout_reactivation_limit_exceeded = tmp;
1648
1649	tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1650	vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1651	last.vm_pageout_inactive_force_reclaim = tmp;
1652
1653	tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1654	vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1655	last.vm_pageout_freed_internal = tmp;
1656
1657	tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1658	vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1659	last.vm_pageout_considered_bq_internal = tmp;
1660
1661	tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1662	vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1663	last.vm_pageout_considered_bq_external = tmp;
1664
1665	tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1666	vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1667	last.vm_pageout_filecache_min_reactivated = tmp;
1668
1669	tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1670	vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1671	last.vm_pageout_inactive_dirty_internal = tmp;
1672	}
1673
1674	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) \| DBG_FUNC_NONE,
1675	vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1676	vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1677	vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1678	vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count,
1679	`0`);
1680
1681	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) \| DBG_FUNC_NONE,
1682	vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1683	vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1684	vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count,
1685	`0`,
1686	`0`);
1687
1688	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) \| DBG_FUNC_NONE,
1689	vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1690	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1691	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1692	vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count,
1693	`0`);
1694
1695	if (vm_pageout_stats[vm_pageout_stat_now].considered \|\|
1696	vm_pageout_stats[vm_pageout_stat_now].pages_compressed \|\|
1697	vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1698
1699	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) \| DBG_FUNC_NONE,
1700	vm_pageout_stats[vm_pageout_stat_now].considered,
1701	vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1702	vm_pageout_stats[vm_pageout_stat_now].freed_external,
1703	vm_pageout_stats[vm_pageout_stat_now].inactive_referenced,
1704	`0`);
1705
1706	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) \| DBG_FUNC_NONE,
1707	vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1708	vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1709	vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1710	vm_pageout_stats[vm_pageout_stat_now].inactive_nolock,
1711	`0`);
1712
1713	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) \| DBG_FUNC_NONE,
1714	vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1715	vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1716	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1717	vm_pageout_stats[vm_pageout_stat_now].skipped_external,
1718	`0`);
1719
1720	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) \| DBG_FUNC_NONE,
1721	vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1722	vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1723	vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1724	vm_pageout_stats[vm_pageout_stat_now].freed_internal,
1725	`0`);
1726
1727	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) \| DBG_FUNC_NONE,
1728	vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1729	vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1730	vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1731	vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal,
1732	`0`);
1733
1734	}
1735	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) \| DBG_FUNC_NONE,
1736	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1737	vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1738	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1739	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added,
1740	`0`);
1741
1742	record_memory_pressure();
1743	}
1744
1745
1746	void
1747	vm_page_balance_inactive(int max_to_move)
1748	{
1749	vm_page_t m;
1750
1751	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1752
1753	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1754	vm_page_inactive_count +
1755	vm_page_speculative_count);
1756
1757	while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
1758
1759	VM_PAGEOUT_DEBUG(vm_pageout_balanced, `1`);
1760
1761	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
1762
1763	assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
1764	assert(!m->vmp_laundry);
1765	assert(VM_PAGE_OBJECT(m) != kernel_object);
1766	assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1767
1768	DTRACE_VM2(scan, int, `1`, (uint64_t *), NULL);
1769
1770	/*
1771	* by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1772	*
1773	* a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1774	* updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1775	* new reference happens. If no futher references happen on the page after that remote TLB flushes
1776	* we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1777	* by pageout_scan, which is just fine since the last reference would have happened quite far
1778	* in the past (TLB caches don't hang around for very long), and of course could just as easily
1779	* have happened before we moved the page
1780	*/
1781	if (m->vmp_pmapped == TRUE)
1782	pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1783
1784	/*
1785	* The page might be absent or busy,
1786	* but vm_page_deactivate can handle that.
1787	* FALSE indicates that we don't want a H/W clear reference
1788	*/
1789	vm_page_deactivate_internal(m, FALSE);
1790	}
1791	}
1792
1793
1794	/*
1795	* vm_pageout_scan does the dirty work for the pageout daemon.
1796	* It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1797	* held and vm_page_free_wanted == 0.
1798	*/
1799	void
1800	vm_pageout_scan(void)
1801	{
1802	unsigned int loop_count = `0`;
1803	unsigned int inactive_burst_count = `0`;
1804	unsigned int reactivated_this_call;
1805	unsigned int reactivate_limit;
1806	vm_page_t local_freeq = NULL;
1807	int local_freed = `0`;
1808	int delayed_unlock;
1809	int delayed_unlock_limit = `0`;
1810	int refmod_state = `0`;
1811	int vm_pageout_deadlock_target = `0`;
1812	struct vm_pageout_queue *iq;
1813	struct vm_pageout_queue *eq;
1814	struct vm_speculative_age_q *sq;
1815	struct flow_control flow_control = { `0`, { `0`, `0` } };
1816	boolean_t inactive_throttled = FALSE;
1817	mach_timespec_t ts;
1818	unsigned int msecs = `0`;
1819	vm_object_t object = NULL;
1820	uint32_t inactive_reclaim_run;
1821	boolean_t exceeded_burst_throttle;
1822	boolean_t grab_anonymous = FALSE;
1823	boolean_t force_anonymous = FALSE;
1824	boolean_t force_speculative_aging = FALSE;
1825	int anons_grabbed = `0`;
1826	int page_prev_q_state = `0`;
1827	#if CONFIG_BACKGROUND_QUEUE
1828	boolean_t page_from_bg_q = FALSE;
1829	#endif
1830	int cache_evict_throttle = `0`;
1831	uint32_t vm_pageout_inactive_external_forced_reactivate_limit = `0`;
1832	uint32_t inactive_external_count;
1833	int force_purge = `0`;
1834	int divisor;
1835	#define DELAY_SPECULATIVE_AGE 1000
1836	int delay_speculative_age = `0`;
1837	vm_object_t m_object = VM_OBJECT_NULL;
1838
1839	#if VM_PRESSURE_EVENTS
1840	vm_pressure_level_t pressure_level;
1841	#endif /* VM_PRESSURE_EVENTS */
1842
1843	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1844	vm_pageout_vminfo.vm_pageout_freed_speculative,
1845	vm_pageout_state.vm_pageout_inactive_clean,
1846	vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
1847	vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
1848
1849	flow_control.state = FCS_IDLE;
1850	iq = &vm_pageout_queue_internal;
1851	eq = &vm_pageout_queue_external;
1852	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1853
1854
1855	XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", `0`, `0`, `0`, `0`, `0`);
1856
1857	/ Ask the pmap layer to return any pages it no longer needs. /
1858	uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
1859
1860	vm_page_lock_queues();
1861
1862	vm_page_wire_count -= pmap_wired_pages_freed;
1863
1864	delayed_unlock = `1`;
1865
1866	/*
1867	* Calculate the max number of referenced pages on the inactive
1868	* queue that we will reactivate.
1869	*/
1870	reactivated_this_call = `0`;
1871	reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1872	vm_page_inactive_count);
1873	inactive_reclaim_run = `0`;
1874
1875	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1876
1877	/*
1878	* We must limit the rate at which we send pages to the pagers
1879	* so that we don't tie up too many pages in the I/O queues.
1880	* We implement a throttling mechanism using the laundry count
1881	* to limit the number of pages outstanding to the default
1882	* and external pagers. We can bypass the throttles and look
1883	* for clean pages if the pageout queues don't drain in a timely
1884	* fashion since this may indicate that the pageout paths are
1885	* stalled waiting for memory, which only we can provide.
1886	*/
1887
1888	Restart:
1889
1890	assert(object == NULL);
1891	assert(delayed_unlock != `0`);
1892
1893	vm_page_anonymous_min = vm_page_inactive_target / `20`;
1894
1895	if (vm_pageout_state.vm_page_speculative_percentage > `50`)
1896	vm_pageout_state.vm_page_speculative_percentage = `50`;
1897	else if (vm_pageout_state.vm_page_speculative_percentage <= `0`)
1898	vm_pageout_state.vm_page_speculative_percentage = `1`;
1899
1900	vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1901	vm_page_inactive_count);
1902
1903	for (;;) {
1904	vm_page_t m;
1905
1906	DTRACE_VM2(rev, int, `1`, (uint64_t *), NULL);
1907
1908	if (vm_upl_wait_for_pages < `0`)
1909	vm_upl_wait_for_pages = `0`;
1910
1911	delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1912
1913	if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1914	delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1915
1916	#if CONFIG_SECLUDED_MEMORY
1917	/*
1918	* Deal with secluded_q overflow.
1919	*/
1920	if (vm_page_secluded_count > vm_page_secluded_target) {
1921	vm_page_t secluded_page;
1922
1923	/*
1924	* SECLUDED_AGING_BEFORE_ACTIVE:
1925	* Excess secluded pages go to the active queue and
1926	* will later go to the inactive queue.
1927	*/
1928	assert((vm_page_secluded_count_free +
1929	vm_page_secluded_count_inuse) ==
1930	vm_page_secluded_count);
1931	secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1932	assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1933
1934	vm_page_queues_remove(secluded_page, FALSE);
1935	assert(!secluded_page->vmp_fictitious);
1936	assert(!VM_PAGE_WIRED(secluded_page));
1937
1938	if (secluded_page->vmp_object == `0`) {
1939	/ transfer to free queue /
1940	assert(secluded_page->vmp_busy);
1941	secluded_page->vmp_snext = local_freeq;
1942	local_freeq = secluded_page;
1943	local_freed++;
1944	} else {
1945	/ transfer to head of active queue /
1946	vm_page_enqueue_active(secluded_page, FALSE);
1947	secluded_page = VM_PAGE_NULL;
1948	}
1949	}
1950	#endif /* CONFIG_SECLUDED_MEMORY */
1951
1952	assert(delayed_unlock);
1953
1954	/*
1955	* maintain our balance
1956	*/
1957	vm_page_balance_inactive(`1`);
1958
1959
1960	/**********************************************************************
1961	* above this point we're playing with the active and secluded queues
1962	* below this point we're playing with the throttling mechanisms
1963	* and the inactive queue
1964	**********************************************************************/
1965
1966	if (vm_page_free_count + local_freed >= vm_page_free_target)
1967	{
1968	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1969
1970	vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
1971	VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
1972	/*
1973	* make sure the pageout I/O threads are running
1974	* throttled in case there are still requests
1975	* in the laundry... since we have met our targets
1976	* we don't need the laundry to be cleaned in a timely
1977	* fashion... so let's avoid interfering with foreground
1978	* activity
1979	*/
1980	vm_pageout_adjust_eq_iothrottle(eq, TRUE);
1981
1982	lck_mtx_lock(&vm_page_queue_free_lock);
1983
1984	if ((vm_page_free_count >= vm_page_free_target) &&
1985	(vm_page_free_wanted == `0`) && (vm_page_free_wanted_privileged == `0`)) {
1986	/*
1987	* done - we have met our target and
1988	* there is no one waiting for a page.
1989	*/
1990	return_from_scan:
1991	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1992
1993	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
1994	vm_pageout_state.vm_pageout_inactive,
1995	vm_pageout_state.vm_pageout_inactive_used, `0`, `0`);
1996	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
1997	vm_pageout_vminfo.vm_pageout_freed_speculative,
1998	vm_pageout_state.vm_pageout_inactive_clean,
1999	vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
2000	vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
2001
2002	return;
2003	}
2004	lck_mtx_unlock(&vm_page_queue_free_lock);
2005	}
2006
2007	/*
2008	* Before anything, we check if we have any ripe volatile
2009	* objects around. If so, try to purge the first object.
2010	* If the purge fails, fall through to reclaim a page instead.
2011	* If the purge succeeds, go back to the top and reevalute
2012	* the new memory situation.
2013	*/
2014
2015	assert (available_for_purge>=`0`);
2016	force_purge = `0`; / no force-purging /
2017
2018	#if VM_PRESSURE_EVENTS
2019	pressure_level = memorystatus_vm_pressure_level;
2020
2021	if (pressure_level > kVMPressureNormal) {
2022
2023	if (pressure_level >= kVMPressureCritical) {
2024	force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2025	} else if (pressure_level >= kVMPressureUrgent) {
2026	force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2027	} else if (pressure_level >= kVMPressureWarning) {
2028	force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2029	}
2030	}
2031	#endif /* VM_PRESSURE_EVENTS */
2032
2033	if (available_for_purge \|\| force_purge) {
2034
2035	if (object != NULL) {
2036	vm_object_unlock(object);
2037	object = NULL;
2038	}
2039
2040	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2041
2042	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, `0`, `0`, `0`);
2043	if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2044	VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, `1`);
2045	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, `0`, `0`, `0`);
2046	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2047	continue;
2048	}
2049	VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, `0`, `0`, `0`, -`1`);
2050	memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2051	}
2052
2053	if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2054	/*
2055	* try to pull pages from the aging bins...
2056	* see vm_page.h for an explanation of how
2057	* this mechanism works
2058	*/
2059	struct vm_speculative_age_q *aq;
2060	boolean_t can_steal = FALSE;
2061	int num_scanned_queues;
2062
2063	aq = &vm_page_queue_speculative[speculative_steal_index];
2064
2065	num_scanned_queues = `0`;
2066	while (vm_page_queue_empty(&aq->age_q) &&
2067	num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2068
2069	speculative_steal_index++;
2070
2071	if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2072	speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2073
2074	aq = &vm_page_queue_speculative[speculative_steal_index];
2075	}
2076
2077	if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + `1`) {
2078	/*
2079	* XXX We've scanned all the speculative
2080	* queues but still haven't found one
2081	* that is not empty, even though
2082	* vm_page_speculative_count is not 0.
2083	*/
2084	if (!vm_page_queue_empty(&sq->age_q))
2085	continue;
2086	#if DEVELOPMENT \|\| DEBUG
2087	panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2088	#endif
2089	/ readjust... /
2090	vm_page_speculative_count = `0`;
2091	/ ... and continue /
2092	continue;
2093	}
2094
2095	if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target \|\| force_speculative_aging == TRUE)
2096	can_steal = TRUE;
2097	else {
2098	if (!delay_speculative_age) {
2099	mach_timespec_t ts_fully_aged;
2100
2101	ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / `1000`;
2102	ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % `1000`)
2103	* `1000` * NSEC_PER_USEC;
2104
2105	ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2106
2107	clock_sec_t sec;
2108	clock_nsec_t nsec;
2109	clock_get_system_nanotime(&sec, &nsec);
2110	ts.tv_sec = (unsigned int) sec;
2111	ts.tv_nsec = nsec;
2112
2113	if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= `0`)
2114	can_steal = TRUE;
2115	else
2116	delay_speculative_age++;
2117	} else {
2118	delay_speculative_age++;
2119	if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2120	delay_speculative_age = `0`;
2121	}
2122	}
2123	if (can_steal == TRUE)
2124	vm_page_speculate_ageit(aq);
2125	}
2126	force_speculative_aging = FALSE;
2127
2128	if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == `0`) {
2129
2130	int pages_evicted;
2131
2132	if (object != NULL) {
2133	vm_object_unlock(object);
2134	object = NULL;
2135	}
2136	KERNEL_DEBUG_CONSTANT(`0x13001ec` \| DBG_FUNC_START, `0`, `0`, `0`, `0`, `0`);
2137
2138	pages_evicted = vm_object_cache_evict(`100`, `10`);
2139
2140	KERNEL_DEBUG_CONSTANT(`0x13001ec` \| DBG_FUNC_END, pages_evicted, `0`, `0`, `0`, `0`);
2141
2142	if (pages_evicted) {
2143
2144	vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2145
2146	VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2147	vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, `0`);
2148	memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2149
2150	/*
2151	* we just freed up to 100 pages,
2152	* so go back to the top of the main loop
2153	* and re-evaulate the memory situation
2154	*/
2155	continue;
2156	} else
2157	cache_evict_throttle = `1000`;
2158	}
2159	if (cache_evict_throttle)
2160	cache_evict_throttle--;
2161
2162	divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2163
2164	#if CONFIG_JETSAM
2165	/*
2166	* don't let the filecache_min fall below 15% of available memory
2167	* on systems with an active compressor that isn't nearing its
2168	* limits w/r to accepting new data
2169	*
2170	* on systems w/o the compressor/swapper, the filecache is always
2171	* a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2172	* since most (if not all) of the anonymous pages are in the
2173	* throttled queue (which isn't counted as available) which
2174	* effectively disables this filter
2175	*/
2176	if (vm_compressor_low_on_space() \|\| divisor == `0`)
2177	vm_pageout_state.vm_page_filecache_min = `0`;
2178	else
2179	vm_pageout_state.vm_page_filecache_min =
2180	((AVAILABLE_NON_COMPRESSED_MEMORY) * `10`) / divisor;
2181	#else
2182	if (vm_compressor_out_of_space() \|\| divisor == `0`)
2183	vm_pageout_state.vm_page_filecache_min = `0`;
2184	else {
2185	/*
2186	* don't let the filecache_min fall below the specified critical level
2187	*/
2188	vm_pageout_state.vm_page_filecache_min =
2189	((AVAILABLE_NON_COMPRESSED_MEMORY) * `10`) / divisor;
2190	}
2191	#endif
2192	if (vm_page_free_count < (vm_page_free_reserved / `4`))
2193	vm_pageout_state.vm_page_filecache_min = `0`;
2194
2195	exceeded_burst_throttle = FALSE;
2196	/*
2197	* Sometimes we have to pause:
2198	* 1) No inactive pages - nothing to do.
2199	* 2) Loop control - no acceptable pages found on the inactive queue
2200	* within the last vm_pageout_burst_inactive_throttle iterations
2201	* 3) Flow control - default pageout queue is full
2202	*/
2203	if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2204	vm_page_queue_empty(&vm_page_queue_anonymous) &&
2205	vm_page_queue_empty(&vm_page_queue_cleaned) &&
2206	vm_page_queue_empty(&sq->age_q)) {
2207	VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, `1`);
2208	msecs = vm_pageout_state.vm_pageout_empty_wait;
2209	goto vm_pageout_scan_delay;
2210
2211	} else if (inactive_burst_count >=
2212	MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2213	(vm_page_inactive_count +
2214	vm_page_speculative_count))) {
2215	VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, `1`);
2216	msecs = vm_pageout_state.vm_pageout_burst_wait;
2217
2218	exceeded_burst_throttle = TRUE;
2219	goto vm_pageout_scan_delay;
2220
2221	} else if (VM_PAGE_Q_THROTTLED(iq) &&
2222	VM_DYNAMIC_PAGING_ENABLED()) {
2223	clock_sec_t sec;
2224	clock_nsec_t nsec;
2225
2226	switch (flow_control.state) {
2227
2228	case FCS_IDLE:
2229	if ((vm_page_free_count + local_freed) < vm_page_free_target &&
2230	vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2231	/*
2232	* since the compressor is running independently of vm_pageout_scan
2233	* let's not wait for it just yet... as long as we have a healthy supply
2234	* of filecache pages to work with, let's keep stealing those.
2235	*/
2236	inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2237
2238	if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2239	(inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2240	anons_grabbed = ANONS_GRABBED_LIMIT;
2241	VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, `1`);
2242	goto consider_inactive;
2243	}
2244	}
2245	reset_deadlock_timer:
2246	ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / `1000`;
2247	ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % `1000`) * `1000` * NSEC_PER_USEC;
2248	clock_get_system_nanotime(&sec, &nsec);
2249	flow_control.ts.tv_sec = (unsigned int) sec;
2250	flow_control.ts.tv_nsec = nsec;
2251	ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2252
2253	flow_control.state = FCS_DELAYED;
2254	msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2255
2256	vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2257	break;
2258
2259	case FCS_DELAYED:
2260	clock_get_system_nanotime(&sec, &nsec);
2261	ts.tv_sec = (unsigned int) sec;
2262	ts.tv_nsec = nsec;
2263
2264	if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= `0`) {
2265	/*
2266	* the pageout thread for the default pager is potentially
2267	* deadlocked since the
2268	* default pager queue has been throttled for more than the
2269	* allowable time... we need to move some clean pages or dirty
2270	* pages belonging to the external pagers if they aren't throttled
2271	* vm_page_free_wanted represents the number of threads currently
2272	* blocked waiting for pages... we'll move one page for each of
2273	* these plus a fixed amount to break the logjam... once we're done
2274	* moving this number of pages, we'll re-enter the FSC_DELAYED state
2275	* with a new timeout target since we have no way of knowing
2276	* whether we've broken the deadlock except through observation
2277	* of the queue associated with the default pager... we need to
2278	* stop moving pages and allow the system to run to see what
2279	* state it settles into.
2280	*/
2281	vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2282	vm_page_free_wanted + vm_page_free_wanted_privileged;
2283	VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, `1`);
2284	flow_control.state = FCS_DEADLOCK_DETECTED;
2285	thread_wakeup((event_t) &vm_pageout_garbage_collect);
2286	goto consider_inactive;
2287	}
2288	/*
2289	* just resniff instead of trying
2290	* to compute a new delay time... we're going to be
2291	* awakened immediately upon a laundry completion,
2292	* so we won't wait any longer than necessary
2293	*/
2294	msecs = vm_pageout_state.vm_pageout_idle_wait;
2295	break;
2296
2297	case FCS_DEADLOCK_DETECTED:
2298	if (vm_pageout_deadlock_target)
2299	goto consider_inactive;
2300	goto reset_deadlock_timer;
2301
2302	}
2303	vm_pageout_scan_delay:
2304	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2305
2306	vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2307	VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2308
2309	if (vm_page_free_count >= vm_page_free_target) {
2310	/*
2311	* we're here because
2312	* 1) someone else freed up some pages while we had
2313	* the queues unlocked above
2314	* and we've hit one of the 3 conditions that
2315	* cause us to pause the pageout scan thread
2316	*
2317	* since we already have enough free pages,
2318	* let's avoid stalling and return normally
2319	*
2320	* before we return, make sure the pageout I/O threads
2321	* are running throttled in case there are still requests
2322	* in the laundry... since we have enough free pages
2323	* we don't need the laundry to be cleaned in a timely
2324	* fashion... so let's avoid interfering with foreground
2325	* activity
2326	*
2327	* we don't want to hold vm_page_queue_free_lock when
2328	* calling vm_pageout_adjust_eq_iothrottle (since it
2329	* may cause other locks to be taken), we do the intitial
2330	* check outside of the lock. Once we take the lock,
2331	* we recheck the condition since it may have changed.
2332	* if it has, no problem, we will make the threads
2333	* non-throttled before actually blocking
2334	*/
2335	vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2336	}
2337	lck_mtx_lock(&vm_page_queue_free_lock);
2338
2339	if (vm_page_free_count >= vm_page_free_target &&
2340	(vm_page_free_wanted == `0`) && (vm_page_free_wanted_privileged == `0`)) {
2341	goto return_from_scan;
2342	}
2343	lck_mtx_unlock(&vm_page_queue_free_lock);
2344
2345	if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2346	/*
2347	* we're most likely about to block due to one of
2348	* the 3 conditions that cause vm_pageout_scan to
2349	* not be able to make forward progress w/r
2350	* to providing new pages to the free queue,
2351	* so unthrottle the I/O threads in case we
2352	* have laundry to be cleaned... it needs
2353	* to be completed ASAP.
2354	*
2355	* even if we don't block, we want the io threads
2356	* running unthrottled since the sum of free +
2357	* clean pages is still under our free target
2358	*/
2359	vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2360	}
2361	if (vm_page_cleaned_count > `0` && exceeded_burst_throttle == FALSE) {
2362	/*
2363	* if we get here we're below our free target and
2364	* we're stalling due to a full laundry queue or
2365	* we don't have any inactive pages other then
2366	* those in the clean queue...
2367	* however, we have pages on the clean queue that
2368	* can be moved to the free queue, so let's not
2369	* stall the pageout scan
2370	*/
2371	flow_control.state = FCS_IDLE;
2372	goto consider_inactive;
2373	}
2374	if (flow_control.state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2375	flow_control.state = FCS_IDLE;
2376	goto consider_inactive;
2377	}
2378
2379	VM_CHECK_MEMORYSTATUS;
2380
2381	if (flow_control.state != FCS_IDLE)
2382	VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, `1`);
2383
2384	iq->pgo_throttled = TRUE;
2385	assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, `1000`*NSEC_PER_USEC);
2386
2387	counter(c_vm_pageout_scan_block++);
2388
2389	vm_page_unlock_queues();
2390
2391	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2392
2393	VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2394	iq->pgo_laundry, iq->pgo_maxlaundry, msecs, `0`);
2395	memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2396
2397	thread_block(THREAD_CONTINUE_NULL);
2398
2399	VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2400	iq->pgo_laundry, iq->pgo_maxlaundry, msecs, `0`);
2401	memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2402
2403	vm_page_lock_queues();
2404
2405	iq->pgo_throttled = FALSE;
2406
2407	if (loop_count >= vm_page_inactive_count)
2408	loop_count = `0`;
2409	inactive_burst_count = `0`;
2410
2411	goto Restart;
2412	/NOTREACHED/
2413	}
2414
2415
2416	flow_control.state = FCS_IDLE;
2417	consider_inactive:
2418	vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2419	vm_pageout_inactive_external_forced_reactivate_limit);
2420	loop_count++;
2421	inactive_burst_count++;
2422	vm_pageout_state.vm_pageout_inactive++;
2423
2424	/*
2425	* Choose a victim.
2426	*/
2427	while (`1`) {
2428
2429	#if CONFIG_BACKGROUND_QUEUE
2430	page_from_bg_q = FALSE;
2431	#endif /* CONFIG_BACKGROUND_QUEUE */
2432
2433	m = NULL;
2434	m_object = VM_OBJECT_NULL;
2435
2436	if (VM_DYNAMIC_PAGING_ENABLED()) {
2437	assert(vm_page_throttled_count == `0`);
2438	assert(vm_page_queue_empty(&vm_page_queue_throttled));
2439	}
2440
2441	/*
2442	* Try for a clean-queue inactive page.
2443	* These are pages that vm_pageout_scan tried to steal earlier, but
2444	* were dirty and had to be cleaned. Pick them up now that they are clean.
2445	*/
2446	if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2447	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2448
2449	assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2450
2451	break;
2452	}
2453
2454	/*
2455	* The next most eligible pages are ones we paged in speculatively,
2456	* but which have not yet been touched and have been aged out.
2457	*/
2458	if (!vm_page_queue_empty(&sq->age_q)) {
2459	m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2460
2461	assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2462
2463	if (!m->vmp_dirty \|\| force_anonymous == FALSE)
2464	break;
2465	else
2466	m = NULL;
2467	}
2468
2469	#if CONFIG_BACKGROUND_QUEUE
2470	if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2471	vm_object_t bg_m_object = NULL;
2472
2473	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2474
2475	bg_m_object = VM_PAGE_OBJECT(m);
2476
2477	if (!VM_PAGE_PAGEABLE(m)) {
2478	/*
2479	* This page is on the background queue
2480	* but not on a pageable queue. This is
2481	* likely a transient state and whoever
2482	* took it out of its pageable queue
2483	* will likely put it back on a pageable
2484	* queue soon but we can't deal with it
2485	* at this point, so let's ignore this
2486	* page.
2487	*/
2488	} else if (force_anonymous == FALSE \|\| bg_m_object->internal) {
2489
2490	if (bg_m_object->internal &&
2491	(VM_PAGE_Q_THROTTLED(iq) \|\|
2492	vm_compressor_out_of_space() == TRUE \|\|
2493	vm_page_free_count < (vm_page_free_reserved / `4`))) {
2494
2495	vm_pageout_skipped_bq_internal++;
2496	} else {
2497	page_from_bg_q = TRUE;
2498
2499	if (bg_m_object->internal)
2500	vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2501	else
2502	vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2503	break;
2504	}
2505	}
2506	}
2507	#endif
2508	inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2509
2510	if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min \|\| force_anonymous == TRUE) \|\|
2511	(inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2512	grab_anonymous = TRUE;
2513	anons_grabbed = `0`;
2514
2515	vm_pageout_vminfo.vm_pageout_skipped_external++;
2516	goto want_anonymous;
2517	}
2518	grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2519
2520	#if CONFIG_JETSAM
2521	/ If the file-backed pool has accumulated*
2522	* significantly more pages than the jetsam
2523	* threshold, prefer to reclaim those
2524	* inline to minimise compute overhead of reclaiming
2525	* anonymous pages.
2526	* This calculation does not account for the CPU local
2527	* external page queues, as those are expected to be
2528	* much smaller relative to the global pools.
2529	*/
2530	if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2531	if (vm_page_pageable_external_count >
2532	vm_pageout_state.vm_page_filecache_min) {
2533	if ((vm_page_pageable_external_count *
2534	vm_pageout_memorystatus_fb_factor_dr) >
2535	(memorystatus_available_pages_critical *
2536	vm_pageout_memorystatus_fb_factor_nr)) {
2537	grab_anonymous = FALSE;
2538
2539	VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, `1`);
2540	}
2541	}
2542	if (grab_anonymous) {
2543	VM_PAGEOUT_DEBUG(vm_grab_anon_nops, `1`);
2544	}
2545	}
2546	#endif /* CONFIG_JETSAM */
2547
2548	want_anonymous:
2549	if (grab_anonymous == FALSE \|\| anons_grabbed >= ANONS_GRABBED_LIMIT \|\| vm_page_queue_empty(&vm_page_queue_anonymous)) {
2550
2551	if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2552	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2553
2554	assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2555	anons_grabbed = `0`;
2556
2557	if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2558
2559	if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2560	if ((++reactivated_this_call % `100`)) {
2561	vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2562	goto must_activate_page;
2563	}
2564	/*
2565	* steal 1% of the file backed pages even if
2566	* we are under the limit that has been set
2567	* for a healthy filecache
2568	*/
2569	}
2570	}
2571	break;
2572	}
2573	}
2574	if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2575	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2576
2577	assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2578	anons_grabbed++;
2579
2580	break;
2581	}
2582
2583	/*
2584	* if we've gotten here, we have no victim page.
2585	* check to see if we've not finished balancing the queues
2586	* or we have a page on the aged speculative queue that we
2587	* skipped due to force_anonymous == TRUE.. or we have
2588	* speculative pages that we can prematurely age... if
2589	* one of these cases we'll keep going, else panic
2590	*/
2591	force_anonymous = FALSE;
2592	VM_PAGEOUT_DEBUG(vm_pageout_no_victim, `1`);
2593
2594	if (!vm_page_queue_empty(&sq->age_q))
2595	goto done_with_inactivepage;
2596
2597	if (vm_page_speculative_count) {
2598	force_speculative_aging = TRUE;
2599	goto done_with_inactivepage;
2600	}
2601	panic("vm_pageout: no victim");
2602
2603	/ NOTREACHED /
2604	}
2605	assert(VM_PAGE_PAGEABLE(m));
2606	m_object = VM_PAGE_OBJECT(m);
2607	force_anonymous = FALSE;
2608
2609	page_prev_q_state = m->vmp_q_state;
2610	/*
2611	* we just found this page on one of our queues...
2612	* it can't also be on the pageout queue, so safe
2613	* to call vm_page_queues_remove
2614	*/
2615	vm_page_queues_remove(m, TRUE);
2616
2617	assert(!m->vmp_laundry);
2618	assert(!m->vmp_private);
2619	assert(!m->vmp_fictitious);
2620	assert(m_object != kernel_object);
2621	assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2622
2623	vm_pageout_vminfo.vm_pageout_considered_page++;
2624
2625	DTRACE_VM2(scan, int, `1`, (uint64_t *), NULL);
2626
2627	/*
2628	* check to see if we currently are working
2629	* with the same object... if so, we've
2630	* already got the lock
2631	*/
2632	if (m_object != object) {
2633	/*
2634	* the object associated with candidate page is
2635	* different from the one we were just working
2636	* with... dump the lock if we still own it
2637	*/
2638	if (object != NULL) {
2639	vm_object_unlock(object);
2640	object = NULL;
2641	}
2642	/*
2643	* Try to lock object; since we've alread got the
2644	* page queues lock, we can only 'try' for this one.
2645	* if the 'try' fails, we need to do a mutex_pause
2646	* to allow the owner of the object lock a chance to
2647	* run... otherwise, we're likely to trip over this
2648	* object in the same state as we work our way through
2649	* the queue... clumps of pages associated with the same
2650	* object are fairly typical on the inactive and active queues
2651	*/
2652	if (!vm_object_lock_try_scan(m_object)) {
2653	vm_page_t m_want = NULL;
2654
2655	vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2656
2657	if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2658	VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, `1`);
2659
2660	pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2661
2662	m->vmp_reference = FALSE;
2663
2664	if ( !m_object->object_is_shared_cache) {
2665	/*
2666	* don't apply this optimization if this is the shared cache
2667	* object, it's too easy to get rid of very hot and important
2668	* pages...
2669	* m->vmp_object must be stable since we hold the page queues lock...
2670	* we can update the scan_collisions field sans the object lock
2671	* since it is a separate field and this is the only spot that does
2672	* a read-modify-write operation and it is never executed concurrently...
2673	* we can asynchronously set this field to 0 when creating a UPL, so it
2674	* is possible for the value to be a bit non-determistic, but that's ok
2675	* since it's only used as a hint
2676	*/
2677	m_object->scan_collisions = `1`;
2678	}
2679	if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2680	m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2681	else if ( !vm_page_queue_empty(&sq->age_q))
2682	m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2683	else if ( (grab_anonymous == FALSE \|\| anons_grabbed >= ANONS_GRABBED_LIMIT \|\|
2684	vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2685	!vm_page_queue_empty(&vm_page_queue_inactive))
2686	m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2687	else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2688	m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2689
2690	/*
2691	* this is the next object we're going to be interested in
2692	* try to make sure its available after the mutex_pause
2693	* returns control
2694	*/
2695	if (m_want)
2696	vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2697
2698	goto requeue_page;
2699	}
2700	object = m_object;
2701	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2702	}
2703	assert(m_object == object);
2704	assert(VM_PAGE_OBJECT(m) == m_object);
2705
2706	if (m->vmp_busy) {
2707	/*
2708	* Somebody is already playing with this page.
2709	* Put it back on the appropriate queue
2710	*
2711	*/
2712	VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, `1`);
2713
2714	if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2715	VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, `1`);
2716	requeue_page:
2717	if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2718	vm_page_enqueue_inactive(m, FALSE);
2719	else
2720	vm_page_activate(m);
2721	#if CONFIG_BACKGROUND_QUEUE
2722	#if DEVELOPMENT \|\| DEBUG
2723	if (page_from_bg_q == TRUE) {
2724	if (m_object->internal)
2725	vm_pageout_rejected_bq_internal++;
2726	else
2727	vm_pageout_rejected_bq_external++;
2728	}
2729	#endif
2730	#endif
2731	goto done_with_inactivepage;
2732	}
2733
2734	/*
2735	* if (m->vmp_cleaning && !m->vmp_free_when_done)
2736	* If already cleaning this page in place
2737	* just leave if off the paging queues.
2738	* We can leave the page mapped, and upl_commit_range
2739	* will put it on the clean queue.
2740	*
2741	* if (m->vmp_free_when_done && !m->vmp_cleaning)
2742	* an msync INVALIDATE is in progress...
2743	* this page has been marked for destruction
2744	* after it has been cleaned,
2745	* but not yet gathered into a UPL
2746	* where 'cleaning' will be set...
2747	* just leave it off the paging queues
2748	*
2749	* if (m->vmp_free_when_done && m->vmp_clenaing)
2750	* an msync INVALIDATE is in progress
2751	* and the UPL has already gathered this page...
2752	* just leave it off the paging queues
2753	*/
2754	if (m->vmp_free_when_done \|\| m->vmp_cleaning) {
2755	goto done_with_inactivepage;
2756	}
2757
2758
2759	/*
2760	* If it's absent, in error or the object is no longer alive,
2761	* we can reclaim the page... in the no longer alive case,
2762	* there are 2 states the page can be in that preclude us
2763	* from reclaiming it - busy or cleaning - that we've already
2764	* dealt with
2765	*/
2766	if (m->vmp_absent \|\| m->vmp_error \|\| !object->alive) {
2767
2768	if (m->vmp_absent)
2769	VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, `1`);
2770	else if (!object->alive)
2771	VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, `1`);
2772	else
2773	VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, `1`);
2774	reclaim_page:
2775	if (vm_pageout_deadlock_target) {
2776	VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, `1`);
2777	vm_pageout_deadlock_target--;
2778	}
2779
2780	DTRACE_VM2(dfree, int, `1`, (uint64_t *), NULL);
2781
2782	if (object->internal) {
2783	DTRACE_VM2(anonfree, int, `1`, (uint64_t *), NULL);
2784	} else {
2785	DTRACE_VM2(fsfree, int, `1`, (uint64_t *), NULL);
2786	}
2787	assert(!m->vmp_cleaning);
2788	assert(!m->vmp_laundry);
2789
2790	if (!object->internal &&
2791	object->pager != NULL &&
2792	object->pager->mo_pager_ops == &shared_region_pager_ops) {
2793	shared_region_pager_reclaimed++;
2794	}
2795
2796	m->vmp_busy = TRUE;
2797
2798	/*
2799	* remove page from object here since we're already
2800	* behind the object lock... defer the rest of the work
2801	* we'd normally do in vm_page_free_prepare_object
2802	* until 'vm_page_free_list' is called
2803	*/
2804	if (m->vmp_tabled)
2805	vm_page_remove(m, TRUE);
2806
2807	assert(m->vmp_pageq.next == `0` && m->vmp_pageq.prev == `0`);
2808	m->vmp_snext = local_freeq;
2809	local_freeq = m;
2810	local_freed++;
2811
2812	if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2813	vm_pageout_vminfo.vm_pageout_freed_speculative++;
2814	else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2815	vm_pageout_vminfo.vm_pageout_freed_cleaned++;
2816	else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
2817	vm_pageout_vminfo.vm_pageout_freed_internal++;
2818	else
2819	vm_pageout_vminfo.vm_pageout_freed_external++;
2820
2821	inactive_burst_count = `0`;
2822	goto done_with_inactivepage;
2823	}
2824	if (object->copy == VM_OBJECT_NULL) {
2825	/*
2826	* No one else can have any interest in this page.
2827	* If this is an empty purgable object, the page can be
2828	* reclaimed even if dirty.
2829	* If the page belongs to a volatile purgable object, we
2830	* reactivate it if the compressor isn't active.
2831	*/
2832	if (object->purgable == VM_PURGABLE_EMPTY) {
2833	if (m->vmp_pmapped == TRUE) {
2834	/ unmap the page /
2835	refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2836	if (refmod_state & VM_MEM_MODIFIED) {
2837	SET_PAGE_DIRTY(m, FALSE);
2838	}
2839	}
2840	if (m->vmp_dirty \|\| m->vmp_precious) {
2841	/ we saved the cost of cleaning this page ! /
2842	vm_page_purged_count++;
2843	}
2844	goto reclaim_page;
2845	}
2846
2847	if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2848	/*
2849	* With the VM compressor, the cost of
2850	* reclaiming a page is much lower (no I/O),
2851	* so if we find a "volatile" page, it's better
2852	* to let it get compressed rather than letting
2853	* it occupy a full page until it gets purged.
2854	* So no need to check for "volatile" here.
2855	*/
2856	} else if (object->purgable == VM_PURGABLE_VOLATILE) {
2857	/*
2858	* Avoid cleaning a "volatile" page which might
2859	* be purged soon.
2860	*/
2861
2862	/ if it's wired, we can't put it on our queue /
2863	assert(!VM_PAGE_WIRED(m));
2864
2865	/ just stick it back on! /
2866	reactivated_this_call++;
2867
2868	if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2869	VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, `1`);
2870
2871	goto reactivate_page;
2872	}
2873	}
2874	/*
2875	* If it's being used, reactivate.
2876	* (Fictitious pages are either busy or absent.)
2877	* First, update the reference and dirty bits
2878	* to make sure the page is unreferenced.
2879	*/
2880	refmod_state = -`1`;
2881
2882	if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
2883	refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2884
2885	if (refmod_state & VM_MEM_REFERENCED)
2886	m->vmp_reference = TRUE;
2887	if (refmod_state & VM_MEM_MODIFIED) {
2888	SET_PAGE_DIRTY(m, FALSE);
2889	}
2890	}
2891
2892	if (m->vmp_reference \|\| m->vmp_dirty) {
2893	/ deal with a rogue "reusable" page /
2894	VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
2895	}
2896	divisor = vm_pageout_state.vm_page_xpmapped_min_divisor;
2897
2898	if (divisor == `0`)
2899	vm_pageout_state.vm_page_xpmapped_min = `0`;
2900	else
2901	vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * `10`) / divisor;
2902
2903	if (!m->vmp_no_cache &&
2904	#if CONFIG_BACKGROUND_QUEUE
2905	page_from_bg_q == FALSE &&
2906	#endif
2907	(m->vmp_reference \|\| (m->vmp_xpmapped && !object->internal &&
2908	(vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
2909	/*
2910	* The page we pulled off the inactive list has
2911	* been referenced. It is possible for other
2912	* processors to be touching pages faster than we
2913	* can clear the referenced bit and traverse the
2914	* inactive queue, so we limit the number of
2915	* reactivations.
2916	*/
2917	if (++reactivated_this_call >= reactivate_limit) {
2918	vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
2919	} else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2920	vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
2921	} else {
2922	uint32_t isinuse;
2923
2924	if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2925	VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, `1`);
2926
2927	vm_pageout_vminfo.vm_pageout_inactive_referenced++;
2928	reactivate_page:
2929	if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2930	vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2931	/*
2932	* no explict mappings of this object exist
2933	* and it's not open via the filesystem
2934	*/
2935	vm_page_deactivate(m);
2936	VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, `1`);
2937	} else {
2938	must_activate_page:
2939	/*
2940	* The page was/is being used, so put back on active list.
2941	*/
2942	vm_page_activate(m);
2943	VM_STAT_INCR(reactivations);
2944	inactive_burst_count = `0`;
2945	}
2946	#if CONFIG_BACKGROUND_QUEUE
2947	#if DEVELOPMENT \|\| DEBUG
2948	if (page_from_bg_q == TRUE) {
2949	if (m_object->internal)
2950	vm_pageout_rejected_bq_internal++;
2951	else
2952	vm_pageout_rejected_bq_external++;
2953	}
2954	#endif
2955	#endif
2956	if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2957	VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, `1`);
2958	vm_pageout_state.vm_pageout_inactive_used++;
2959
2960	goto done_with_inactivepage;
2961	}
2962	/*
2963	* Make sure we call pmap_get_refmod() if it
2964	* wasn't already called just above, to update
2965	* the dirty bit.
2966	*/
2967	if ((refmod_state == -`1`) && !m->vmp_dirty && m->vmp_pmapped) {
2968	refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2969	if (refmod_state & VM_MEM_MODIFIED) {
2970	SET_PAGE_DIRTY(m, FALSE);
2971	}
2972	}
2973	}
2974
2975	XPR(XPR_VM_PAGEOUT,
2976	"vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2977	object, m->vmp_offset, m, `0`,`0`);
2978
2979	/*
2980	* we've got a candidate page to steal...
2981	*
2982	* m->vmp_dirty is up to date courtesy of the
2983	* preceding check for m->vmp_reference... if
2984	* we get here, then m->vmp_reference had to be
2985	* FALSE (or possibly "reactivate_limit" was
2986	* exceeded), but in either case we called
2987	* pmap_get_refmod() and updated both
2988	* m->vmp_reference and m->vmp_dirty
2989	*
2990	* if it's dirty or precious we need to
2991	* see if the target queue is throtttled
2992	* it if is, we need to skip over it by moving it back
2993	* to the end of the inactive queue
2994	*/
2995
2996	inactive_throttled = FALSE;
2997
2998	if (m->vmp_dirty \|\| m->vmp_precious) {
2999	if (object->internal) {
3000	if (VM_PAGE_Q_THROTTLED(iq))
3001	inactive_throttled = TRUE;
3002	} else if (VM_PAGE_Q_THROTTLED(eq)) {
3003	inactive_throttled = TRUE;
3004	}
3005	}
3006	throttle_inactive:
3007	if (!VM_DYNAMIC_PAGING_ENABLED() &&
3008	object->internal && m->vmp_dirty &&
3009	(object->purgable == VM_PURGABLE_DENY \|\|
3010	object->purgable == VM_PURGABLE_NONVOLATILE \|\|
3011	object->purgable == VM_PURGABLE_VOLATILE)) {
3012	vm_page_check_pageable_safe(m);
3013	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3014	vm_page_queue_enter(&vm_page_queue_throttled, m,
3015	vm_page_t, vmp_pageq);
3016	m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3017	vm_page_throttled_count++;
3018
3019	VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, `1`);
3020
3021	inactive_burst_count = `0`;
3022	goto done_with_inactivepage;
3023	}
3024	if (inactive_throttled == TRUE) {
3025
3026	if (object->internal == FALSE) {
3027	/*
3028	* we need to break up the following potential deadlock case...
3029	* a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3030	* b) The thread doing the writing is waiting for pages while holding the truncate lock
3031	* c) Most of the pages in the inactive queue belong to this file.
3032	*
3033	* we are potentially in this deadlock because...
3034	* a) the external pageout queue is throttled
3035	* b) we're done with the active queue and moved on to the inactive queue
3036	* c) we've got a dirty external page
3037	*
3038	* since we don't know the reason for the external pageout queue being throttled we
3039	* must suspect that we are deadlocked, so move the current page onto the active queue
3040	* in an effort to cause a page from the active queue to 'age' to the inactive queue
3041	*
3042	* if we don't have jetsam configured (i.e. we have a dynamic pager), set
3043	* 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3044	* pool the next time we select a victim page... if we can make enough new free pages,
3045	* the deadlock will break, the external pageout queue will empty and it will no longer
3046	* be throttled
3047	*
3048	* if we have jetsam configured, keep a count of the pages reactivated this way so
3049	* that we can try to find clean pages in the active/inactive queues before
3050	* deciding to jetsam a process
3051	*/
3052	vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
3053
3054	vm_page_check_pageable_safe(m);
3055	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3056	vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, vmp_pageq);
3057	m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
3058	vm_page_active_count++;
3059	vm_page_pageable_external_count++;
3060
3061	vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3062
3063	#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3064	vm_pageout_inactive_external_forced_reactivate_limit--;
3065
3066	if (vm_pageout_inactive_external_forced_reactivate_limit <= `0`) {
3067	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3068	/*
3069	* Possible deadlock scenario so request jetsam action
3070	*/
3071	assert(object);
3072	vm_object_unlock(object);
3073	object = VM_OBJECT_NULL;
3074	vm_page_unlock_queues();
3075
3076	VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3077	vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3078
3079	/ Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. /
3080	if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3081	VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, `1`);
3082	}
3083
3084	VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3085	vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3086
3087	vm_page_lock_queues();
3088	delayed_unlock = `1`;
3089	}
3090	#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3091	force_anonymous = TRUE;
3092	#endif
3093	inactive_burst_count = `0`;
3094	goto done_with_inactivepage;
3095	} else {
3096	goto must_activate_page;
3097	}
3098	}
3099
3100	/*
3101	* we've got a page that we can steal...
3102	* eliminate all mappings and make sure
3103	* we have the up-to-date modified state
3104	*
3105	* if we need to do a pmap_disconnect then we
3106	* need to re-evaluate m->vmp_dirty since the pmap_disconnect
3107	* provides the true state atomically... the
3108	* page was still mapped up to the pmap_disconnect
3109	* and may have been dirtied at the last microsecond
3110	*
3111	* Note that if 'pmapped' is FALSE then the page is not
3112	* and has not been in any map, so there is no point calling
3113	* pmap_disconnect(). m->vmp_dirty could have been set in anticipation
3114	* of likely usage of the page.
3115	*/
3116	if (m->vmp_pmapped == TRUE) {
3117	int pmap_options;
3118
3119	/*
3120	* Don't count this page as going into the compressor
3121	* if any of these are true:
3122	* 1) compressed pager isn't enabled
3123	* 2) Freezer enabled device with compressed pager
3124	* backend (exclusive use) i.e. most of the VM system
3125	* (including vm_pageout_scan) has no knowledge of
3126	* the compressor
3127	* 3) This page belongs to a file and hence will not be
3128	* sent into the compressor
3129	*/
3130	if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE \|\|
3131	object->internal == FALSE) {
3132	pmap_options = `0`;
3133	} else if (m->vmp_dirty \|\| m->vmp_precious) {
3134	/*
3135	* VM knows that this page is dirty (or
3136	* precious) and needs to be compressed
3137	* rather than freed.
3138	* Tell the pmap layer to count this page
3139	* as "compressed".
3140	*/
3141	pmap_options = PMAP_OPTIONS_COMPRESSOR;
3142	} else {
3143	/*
3144	* VM does not know if the page needs to
3145	* be preserved but the pmap layer might tell
3146	* us if any mapping has "modified" it.
3147	* Let's the pmap layer to count this page
3148	* as compressed if and only if it has been
3149	* modified.
3150	*/
3151	pmap_options =
3152	PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3153	}
3154	refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3155	pmap_options,
3156	NULL);
3157	if (refmod_state & VM_MEM_MODIFIED) {
3158	SET_PAGE_DIRTY(m, FALSE);
3159	}
3160	}
3161
3162	/*
3163	* reset our count of pages that have been reclaimed
3164	* since the last page was 'stolen'
3165	*/
3166	inactive_reclaim_run = `0`;
3167
3168	/*
3169	* If it's clean and not precious, we can free the page.
3170	*/
3171	if (!m->vmp_dirty && !m->vmp_precious) {
3172
3173	vm_pageout_state.vm_pageout_inactive_clean++;
3174
3175	/*
3176	* OK, at this point we have found a page we are going to free.
3177	*/
3178	#if CONFIG_PHANTOM_CACHE
3179	if (!object->internal)
3180	vm_phantom_cache_add_ghost(m);
3181	#endif
3182	goto reclaim_page;
3183	}
3184
3185	/*
3186	* The page may have been dirtied since the last check
3187	* for a throttled target queue (which may have been skipped
3188	* if the page was clean then). With the dirty page
3189	* disconnected here, we can make one final check.
3190	*/
3191	if (object->internal) {
3192	if (VM_PAGE_Q_THROTTLED(iq))
3193	inactive_throttled = TRUE;
3194	} else if (VM_PAGE_Q_THROTTLED(eq)) {
3195	inactive_throttled = TRUE;
3196	}
3197
3198	if (inactive_throttled == TRUE)
3199	goto throttle_inactive;
3200
3201	#if VM_PRESSURE_EVENTS
3202	#if CONFIG_JETSAM
3203
3204	/*
3205	* If Jetsam is enabled, then the sending
3206	* of memory pressure notifications is handled
3207	* from the same thread that takes care of high-water
3208	* and other jetsams i.e. the memorystatus_thread.
3209	*/
3210
3211	#else /* CONFIG_JETSAM */
3212
3213	vm_pressure_response();
3214
3215	#endif /* CONFIG_JETSAM */
3216	#endif /* VM_PRESSURE_EVENTS */
3217
3218	if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3219	VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, `1`);
3220
3221	if (object->internal)
3222	vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3223	else
3224	vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3225
3226	/*
3227	* internal pages will go to the compressor...
3228	* external pages will go to the appropriate pager to be cleaned
3229	* and upon completion will end up on 'vm_page_queue_cleaned' which
3230	* is a preferred queue to steal from
3231	*/
3232	vm_pageout_cluster(m);
3233	inactive_burst_count = `0`;
3234
3235	done_with_inactivepage:
3236
3237	if (delayed_unlock++ > delayed_unlock_limit) {
3238	int freed = local_freed;
3239
3240	vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3241	VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3242	if (freed == `0`)
3243	lck_mtx_yield(&vm_page_queue_lock);
3244	} else if (vm_pageout_scan_wants_object) {
3245	vm_page_unlock_queues();
3246	mutex_pause(`0`);
3247	vm_page_lock_queues();
3248	}
3249	/*
3250	* back to top of pageout scan loop
3251	*/
3252	}
3253	}
3254
3255
3256	void
3257	vm_page_free_reserve(
3258	int pages)
3259	{
3260	int free_after_reserve;
3261
3262	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3263
3264	if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3265	vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3266	else
3267	vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3268
3269	} else {
3270	if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3271	vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3272	else
3273	vm_page_free_reserved += pages;
3274	}
3275	free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3276
3277	vm_page_free_min = vm_page_free_reserved +
3278	VM_PAGE_FREE_MIN(free_after_reserve);
3279
3280	if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3281	vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3282
3283	vm_page_free_target = vm_page_free_reserved +
3284	VM_PAGE_FREE_TARGET(free_after_reserve);
3285
3286	if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3287	vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3288
3289	if (vm_page_free_target < vm_page_free_min + `5`)
3290	vm_page_free_target = vm_page_free_min + `5`;
3291
3292	vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / `2`);
3293	}
3294
3295	/*
3296	* vm_pageout is the high level pageout daemon.
3297	*/
3298
3299	void
3300	vm_pageout_continue(void)
3301	{
3302	DTRACE_VM2(pgrrun, int, `1`, (uint64_t *), NULL);
3303	VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, `1`);
3304
3305	#if !CONFIG_EMBEDDED
3306	lck_mtx_lock(&vm_page_queue_free_lock);
3307	vm_pageout_running = TRUE;
3308	lck_mtx_unlock(&vm_page_queue_free_lock);
3309	#endif /* CONFIG_EMBEDDED */
3310
3311	vm_pageout_scan();
3312	/*
3313	* we hold both the vm_page_queue_free_lock
3314	* and the vm_page_queues_lock at this point
3315	*/
3316	assert(vm_page_free_wanted == `0`);
3317	assert(vm_page_free_wanted_privileged == `0`);
3318	assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3319
3320	#if !CONFIG_EMBEDDED
3321	vm_pageout_running = FALSE;
3322	if (vm_pageout_waiter) {
3323	vm_pageout_waiter = FALSE;
3324	thread_wakeup((event_t)&vm_pageout_waiter);
3325	}
3326	#endif /* !CONFIG_EMBEDDED */
3327
3328	lck_mtx_unlock(&vm_page_queue_free_lock);
3329	vm_page_unlock_queues();
3330
3331	counter(c_vm_pageout_block++);
3332	thread_block((thread_continue_t)vm_pageout_continue);
3333	/NOTREACHED/
3334	}
3335
3336	#if !CONFIG_EMBEDDED
3337	kern_return_t
3338	vm_pageout_wait(uint64_t deadline)
3339	{
3340	kern_return_t kr;
3341
3342	lck_mtx_lock(&vm_page_queue_free_lock);
3343	for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3344	vm_pageout_waiter = TRUE;
3345	if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3346	&vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3347	(event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3348	kr = KERN_OPERATION_TIMED_OUT;
3349	}
3350	}
3351	lck_mtx_unlock(&vm_page_queue_free_lock);
3352
3353	return (kr);
3354	}
3355	#endif /* !CONFIG_EMBEDDED */
3356
3357
3358	static void
3359	vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3360	{
3361	vm_page_t m = NULL;
3362	vm_object_t object;
3363	vm_object_offset_t offset;
3364	memory_object_t pager;
3365
3366	/ On systems with a compressor, the external IO thread clears its*
3367	* VM privileged bit to accommodate large allocations (e.g. bulk UPL
3368	* creation)
3369	*/
3370	if (vm_pageout_state.vm_pageout_internal_iothread != THREAD_NULL)
3371	current_thread()->options &= ~TH_OPT_VMPRIV;
3372
3373	vm_page_lockspin_queues();
3374
3375	while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3376
3377	q->pgo_busy = TRUE;
3378	vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3379
3380	assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3381	VM_PAGE_CHECK(m);
3382	/*
3383	* grab a snapshot of the object and offset this
3384	* page is tabled in so that we can relookup this
3385	* page after we've taken the object lock - these
3386	* fields are stable while we hold the page queues lock
3387	* but as soon as we drop it, there is nothing to keep
3388	* this page in this object... we hold an activity_in_progress
3389	* on this object which will keep it from terminating
3390	*/
3391	object = VM_PAGE_OBJECT(m);
3392	offset = m->vmp_offset;
3393
3394	m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3395	VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3396
3397	vm_page_unlock_queues();
3398
3399	vm_object_lock(object);
3400
3401	m = vm_page_lookup(object, offset);
3402
3403	if (m == NULL \|\|
3404	m->vmp_busy \|\| m->vmp_cleaning \|\| !m->vmp_laundry \|\| (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
3405	/*
3406	* it's either the same page that someone else has
3407	* started cleaning (or it's finished cleaning or
3408	* been put back on the pageout queue), or
3409	* the page has been freed or we have found a
3410	* new page at this offset... in all of these cases
3411	* we merely need to release the activity_in_progress
3412	* we took when we put the page on the pageout queue
3413	*/
3414	vm_object_activity_end(object);
3415	vm_object_unlock(object);
3416
3417	vm_page_lockspin_queues();
3418	continue;
3419	}
3420	pager = object->pager;
3421
3422	if (pager == MEMORY_OBJECT_NULL) {
3423	/*
3424	* This pager has been destroyed by either
3425	* memory_object_destroy or vm_object_destroy, and
3426	* so there is nowhere for the page to go.
3427	*/
3428	if (m->vmp_free_when_done) {
3429	/*
3430	* Just free the page... VM_PAGE_FREE takes
3431	* care of cleaning up all the state...
3432	* including doing the vm_pageout_throttle_up
3433	*/
3434	VM_PAGE_FREE(m);
3435	} else {
3436	vm_page_lockspin_queues();
3437
3438	vm_pageout_throttle_up(m);
3439	vm_page_activate(m);
3440
3441	vm_page_unlock_queues();
3442
3443	/*
3444	* And we are done with it.
3445	*/
3446	}
3447	vm_object_activity_end(object);
3448	vm_object_unlock(object);
3449
3450	vm_page_lockspin_queues();
3451	continue;
3452	}
3453	#if 0
3454	/*
3455	* we don't hold the page queue lock
3456	* so this check isn't safe to make
3457	*/
3458	VM_PAGE_CHECK(m);
3459	#endif
3460	/*
3461	* give back the activity_in_progress reference we
3462	* took when we queued up this page and replace it
3463	* it with a paging_in_progress reference that will
3464	* also hold the paging offset from changing and
3465	* prevent the object from terminating
3466	*/
3467	vm_object_activity_end(object);
3468	vm_object_paging_begin(object);
3469	vm_object_unlock(object);
3470
3471	/*
3472	* Send the data to the pager.
3473	* any pageout clustering happens there
3474	*/
3475	memory_object_data_return(pager,
3476	m->vmp_offset + object->paging_offset,
3477	PAGE_SIZE,
3478	NULL,
3479	NULL,
3480	FALSE,
3481	FALSE,
3482	`0`);
3483
3484	vm_object_lock(object);
3485	vm_object_paging_end(object);
3486	vm_object_unlock(object);
3487
3488	vm_pageout_io_throttle();
3489
3490	vm_page_lockspin_queues();
3491	}
3492	q->pgo_busy = FALSE;
3493	q->pgo_idle = TRUE;
3494
3495	assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3496	vm_page_unlock_queues();
3497
3498	thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3499	/NOTREACHED/
3500	}
3501
3502
3503	#define MAX_FREE_BATCH 32
3504	uint32_t vm_compressor_time_thread; / Set via sysctl to record time accrued by*
3505	* this thread.
3506	*/
3507
3508
3509	void
3510	vm_pageout_iothread_internal_continue(struct cq *);
3511	void
3512	vm_pageout_iothread_internal_continue(struct cq *cq)
3513	{
3514	struct vm_pageout_queue *q;
3515	vm_page_t m = NULL;
3516	boolean_t pgo_draining;
3517	vm_page_t local_q;
3518	int local_cnt;
3519	vm_page_t local_freeq = NULL;
3520	int local_freed = `0`;
3521	int local_batch_size;
3522	#if DEVELOPMENT \|\| DEBUG
3523	int ncomps = `0`;
3524	boolean_t marked_active = FALSE;
3525	#endif
3526	KERNEL_DEBUG(`0xe040000c` \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
3527
3528	q = cq->q;
3529	local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * `2`);
3530
3531	#if RECORD_THE_COMPRESSED_DATA
3532	if (q->pgo_laundry)
3533	c_compressed_record_init();
3534	#endif
3535	while (TRUE) {
3536	int pages_left_on_q = `0`;
3537
3538	local_cnt = `0`;
3539	local_q = NULL;
3540
3541	KERNEL_DEBUG(`0xe0400014` \| DBG_FUNC_START, `0`, `0`, `0`, `0`, `0`);
3542
3543	vm_page_lock_queues();
3544	#if DEVELOPMENT \|\| DEBUG
3545	if (marked_active == FALSE) {
3546	vmct_active++;
3547	vmct_state[cq->id] = VMCT_ACTIVE;
3548	marked_active = TRUE;
3549	if (vmct_active == `1`) {
3550	vm_compressor_epoch_start = mach_absolute_time();
3551	}
3552	}
3553	#endif
3554	KERNEL_DEBUG(`0xe0400014` \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
3555
3556	KERNEL_DEBUG(`0xe0400018` \| DBG_FUNC_START, q->pgo_laundry, `0`, `0`, `0`, `0`);
3557
3558	while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3559
3560	vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3561	assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3562	VM_PAGE_CHECK(m);
3563
3564	m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3565	VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3566	m->vmp_laundry = FALSE;
3567
3568	m->vmp_snext = local_q;
3569	local_q = m;
3570	local_cnt++;
3571	}
3572	if (local_q == NULL)
3573	break;
3574
3575	q->pgo_busy = TRUE;
3576
3577	if ((pgo_draining = q->pgo_draining) == FALSE) {
3578	vm_pageout_throttle_up_batch(q, local_cnt);
3579	pages_left_on_q = q->pgo_laundry;
3580	} else
3581	pages_left_on_q = q->pgo_laundry - local_cnt;
3582
3583	vm_page_unlock_queues();
3584
3585	#if !RECORD_THE_COMPRESSED_DATA
3586	if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - `1`)) {
3587	thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + `1`));
3588	}
3589	#endif
3590	KERNEL_DEBUG(`0xe0400018` \| DBG_FUNC_END, q->pgo_laundry, `0`, `0`, `0`, `0`);
3591
3592	while (local_q) {
3593
3594	KERNEL_DEBUG(`0xe0400024` \| DBG_FUNC_START, local_cnt, `0`, `0`, `0`, `0`);
3595
3596	m = local_q;
3597	local_q = m->vmp_snext;
3598	m->vmp_snext = NULL;
3599
3600	if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m) == KERN_SUCCESS) {
3601	#if DEVELOPMENT \|\| DEBUG
3602	ncomps++;
3603	#endif
3604	KERNEL_DEBUG(`0xe0400024` \| DBG_FUNC_END, local_cnt, `0`, `0`, `0`, `0`);
3605
3606	m->vmp_snext = local_freeq;
3607	local_freeq = m;
3608	local_freed++;
3609
3610	if (local_freed >= MAX_FREE_BATCH) {
3611
3612	OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3613
3614	vm_page_free_list(local_freeq, TRUE);
3615
3616	local_freeq = NULL;
3617	local_freed = `0`;
3618	}
3619	}
3620	#if !CONFIG_JETSAM
3621	while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3622	kern_return_t wait_result;
3623	int need_wakeup = `0`;
3624
3625	if (local_freeq) {
3626	OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3627
3628	vm_page_free_list(local_freeq, TRUE);
3629	local_freeq = NULL;
3630	local_freed = `0`;
3631
3632	continue;
3633	}
3634	lck_mtx_lock_spin(&vm_page_queue_free_lock);
3635
3636	if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3637
3638	if (vm_page_free_wanted_privileged++ == `0`)
3639	need_wakeup = `1`;
3640	wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3641
3642	lck_mtx_unlock(&vm_page_queue_free_lock);
3643
3644	if (need_wakeup)
3645	thread_wakeup((event_t)&vm_page_free_wanted);
3646
3647	if (wait_result == THREAD_WAITING)
3648
3649	thread_block(THREAD_CONTINUE_NULL);
3650	} else
3651	lck_mtx_unlock(&vm_page_queue_free_lock);
3652	}
3653	#endif
3654	}
3655	if (local_freeq) {
3656	OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3657
3658	vm_page_free_list(local_freeq, TRUE);
3659	local_freeq = NULL;
3660	local_freed = `0`;
3661	}
3662	if (pgo_draining == TRUE) {
3663	vm_page_lockspin_queues();
3664	vm_pageout_throttle_up_batch(q, local_cnt);
3665	vm_page_unlock_queues();
3666	}
3667	}
3668	KERNEL_DEBUG(`0xe040000c` \| DBG_FUNC_START, `0`, `0`, `0`, `0`, `0`);
3669
3670	/*
3671	* queue lock is held and our q is empty
3672	*/
3673	q->pgo_busy = FALSE;
3674	q->pgo_idle = TRUE;
3675
3676	assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3677	#if DEVELOPMENT \|\| DEBUG
3678	if (marked_active == TRUE) {
3679	vmct_active--;
3680	vmct_state[cq->id] = VMCT_IDLE;
3681
3682	if (vmct_active == `0`) {
3683	vm_compressor_epoch_stop = mach_absolute_time();
3684	assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
3685	"Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
3686	vm_compressor_epoch_start, vm_compressor_epoch_stop);
3687	/ This interval includes intervals where one or more*
3688	* compressor threads were pre-empted
3689	*/
3690	vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
3691	}
3692	}
3693	#endif
3694	vm_page_unlock_queues();
3695	#if DEVELOPMENT \|\| DEBUG
3696	if (__improbable(vm_compressor_time_thread)) {
3697	vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3698	vmct_stats.vmct_pages[cq->id] += ncomps;
3699	vmct_stats.vmct_iterations[cq->id]++;
3700	if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3701	vmct_stats.vmct_maxpages[cq->id] = ncomps;
3702	}
3703	if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3704	vmct_stats.vmct_minpages[cq->id] = ncomps;
3705	}
3706	}
3707	#endif
3708
3709	KERNEL_DEBUG(`0xe0400018` \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
3710
3711	thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3712	/NOTREACHED/
3713	}
3714
3715
3716	kern_return_t
3717	vm_pageout_compress_page(void *current_chead, char* *scratch_buf, vm_page_t m)
3718	{
3719	vm_object_t object;
3720	memory_object_t pager;
3721	int compressed_count_delta;
3722	kern_return_t retval;
3723
3724	object = VM_PAGE_OBJECT(m);
3725
3726	assert(!m->vmp_free_when_done);
3727	assert(!m->vmp_laundry);
3728
3729	pager = object->pager;
3730
3731	if (!object->pager_initialized \|\| pager == MEMORY_OBJECT_NULL) {
3732
3733	KERNEL_DEBUG(`0xe0400010` \| DBG_FUNC_START, object, pager, `0`, `0`, `0`);
3734
3735	vm_object_lock(object);
3736
3737	/*
3738	* If there is no memory object for the page, create
3739	* one and hand it to the compression pager.
3740	*/
3741
3742	if (!object->pager_initialized)
3743	vm_object_collapse(object, (vm_object_offset_t) `0`, TRUE);
3744	if (!object->pager_initialized)
3745	vm_object_compressor_pager_create(object);
3746
3747	pager = object->pager;
3748
3749	if (!object->pager_initialized \|\| pager == MEMORY_OBJECT_NULL) {
3750	/*
3751	* Still no pager for the object,
3752	* or the pager has been destroyed.
3753	* Reactivate the page.
3754	*
3755	* Should only happen if there is no
3756	* compression pager
3757	*/
3758	PAGE_WAKEUP_DONE(m);
3759
3760	vm_page_lockspin_queues();
3761	vm_page_activate(m);
3762	VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, `1`);
3763	vm_page_unlock_queues();
3764
3765	/*
3766	* And we are done with it.
3767	*/
3768	vm_object_activity_end(object);
3769	vm_object_unlock(object);
3770
3771	return KERN_FAILURE;
3772	}
3773	vm_object_unlock(object);
3774
3775	KERNEL_DEBUG(`0xe0400010` \| DBG_FUNC_END, object, pager, `0`, `0`, `0`);
3776	}
3777	assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3778	assert(object->activity_in_progress > `0`);
3779
3780	retval = vm_compressor_pager_put(
3781	pager,
3782	m->vmp_offset + object->paging_offset,
3783	VM_PAGE_GET_PHYS_PAGE(m),
3784	current_chead,
3785	scratch_buf,
3786	&compressed_count_delta);
3787
3788	vm_object_lock(object);
3789
3790	assert(object->activity_in_progress > `0`);
3791	assert(VM_PAGE_OBJECT(m) == object);
3792	assert( !VM_PAGE_WIRED(m));
3793
3794	vm_compressor_pager_count(pager,
3795	compressed_count_delta,
3796	FALSE, / shared_lock /
3797	object);
3798
3799	if (retval == KERN_SUCCESS) {
3800	/*
3801	* If the object is purgeable, its owner's
3802	* purgeable ledgers will be updated in
3803	* vm_page_remove() but the page still
3804	* contributes to the owner's memory footprint,
3805	* so account for it as such.
3806	*/
3807	if ((object->purgable != VM_PURGABLE_DENY \|\|
3808	object->vo_ledger_tag) &&
3809	object->vo_owner != NULL) {
3810	/ one more compressed purgeable/tagged page /
3811	vm_object_owner_compressed_update(object,
3812	+`1`);
3813	}
3814	VM_STAT_INCR(compressions);
3815
3816	if (m->vmp_tabled)
3817	vm_page_remove(m, TRUE);
3818
3819	} else {
3820	PAGE_WAKEUP_DONE(m);
3821
3822	vm_page_lockspin_queues();
3823
3824	vm_page_activate(m);
3825	vm_pageout_vminfo.vm_compressor_failed++;
3826
3827	vm_page_unlock_queues();
3828	}
3829	vm_object_activity_end(object);
3830	vm_object_unlock(object);
3831
3832	return retval;
3833	}
3834
3835
3836	static void
3837	vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3838	{
3839	uint32_t policy;
3840
3841	if (hibernate_cleaning_in_progress == TRUE)
3842	req_lowpriority = FALSE;
3843
3844	if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
3845
3846	vm_page_unlock_queues();
3847
3848	if (req_lowpriority == TRUE) {
3849	policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3850	DTRACE_VM(laundrythrottle);
3851	} else {
3852	policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3853	DTRACE_VM(laundryunthrottle);
3854	}
3855	proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
3856	TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3857
3858	eq->pgo_lowpriority = req_lowpriority;
3859
3860	vm_page_lock_queues();
3861	}
3862	}
3863
3864
3865	static void
3866	vm_pageout_iothread_external(void)
3867	{
3868	thread_t self = current_thread();
3869
3870	self->options \|= TH_OPT_VMPRIV;
3871
3872	DTRACE_VM2(laundrythrottle, int, `1`, (uint64_t *), NULL);
3873
3874	proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
3875	TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3876
3877	vm_page_lock_queues();
3878
3879	vm_pageout_queue_external.pgo_tid = self->thread_id;
3880	vm_pageout_queue_external.pgo_lowpriority = TRUE;
3881	vm_pageout_queue_external.pgo_inited = TRUE;
3882
3883	vm_page_unlock_queues();
3884
3885	vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3886
3887	/NOTREACHED/
3888	}
3889
3890
3891	static void
3892	vm_pageout_iothread_internal(struct cq *cq)
3893	{
3894	thread_t self = current_thread();
3895
3896	self->options \|= TH_OPT_VMPRIV;
3897
3898	vm_page_lock_queues();
3899
3900	vm_pageout_queue_internal.pgo_tid = self->thread_id;
3901	vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3902	vm_pageout_queue_internal.pgo_inited = TRUE;
3903
3904	vm_page_unlock_queues();
3905
3906	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
3907	thread_vm_bind_group_add();
3908
3909
3910	thread_set_thread_name(current_thread(), "VM_compressor");
3911	#if DEVELOPMENT \|\| DEBUG
3912	vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
3913	#endif
3914	vm_pageout_iothread_internal_continue(cq);
3915
3916	/NOTREACHED/
3917	}
3918
3919	kern_return_t
3920	vm_set_buffer_cleanup_callout(boolean_t (func)(int*))
3921	{
3922	if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3923	return KERN_SUCCESS;
3924	} else {
3925	return KERN_FAILURE; / Already set /
3926	}
3927	}
3928
3929	extern boolean_t memorystatus_manual_testing_on;
3930	extern unsigned int memorystatus_level;
3931
3932
3933	#if VM_PRESSURE_EVENTS
3934
3935	boolean_t vm_pressure_events_enabled = FALSE;
3936
3937	void
3938	vm_pressure_response(void)
3939	{
3940
3941	vm_pressure_level_t old_level = kVMPressureNormal;
3942	int new_level = -`1`;
3943	unsigned int total_pages;
3944	uint64_t available_memory = `0`;
3945
3946	if (vm_pressure_events_enabled == FALSE)
3947	return;
3948
3949	#if CONFIG_EMBEDDED
3950
3951	available_memory = (uint64_t) memorystatus_available_pages;
3952
3953	#else /* CONFIG_EMBEDDED */
3954
3955	available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3956	memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3957
3958	#endif /* CONFIG_EMBEDDED */
3959
3960	total_pages = (unsigned int) atop_64(max_mem);
3961	#if CONFIG_SECLUDED_MEMORY
3962	total_pages -= vm_page_secluded_count;
3963	#endif /* CONFIG_SECLUDED_MEMORY */
3964	memorystatus_level = (unsigned int) ((available_memory * `100`) / total_pages);
3965
3966	if (memorystatus_manual_testing_on) {
3967	return;
3968	}
3969
3970	old_level = memorystatus_vm_pressure_level;
3971
3972	switch (memorystatus_vm_pressure_level) {
3973
3974	case kVMPressureNormal:
3975	{
3976	if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3977	new_level = kVMPressureCritical;
3978	} else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
3979	new_level = kVMPressureWarning;
3980	}
3981	break;
3982	}
3983
3984	case kVMPressureWarning:
3985	case kVMPressureUrgent:
3986	{
3987	if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3988	new_level = kVMPressureNormal;
3989	} else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3990	new_level = kVMPressureCritical;
3991	}
3992	break;
3993	}
3994
3995	case kVMPressureCritical:
3996	{
3997	if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3998	new_level = kVMPressureNormal;
3999	} else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4000	new_level = kVMPressureWarning;
4001	}
4002	break;
4003	}
4004
4005	default:
4006	return;
4007	}
4008
4009	if (new_level != -`1`) {
4010	memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4011
4012	if (new_level != old_level) {
4013	VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4014	new_level, old_level, `0`, `0`);
4015	}
4016
4017	if ((memorystatus_vm_pressure_level != kVMPressureNormal) \|\| (old_level != memorystatus_vm_pressure_level)) {
4018	if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4019	thread_wakeup(&vm_pressure_thread);
4020	}
4021
4022	if (old_level != memorystatus_vm_pressure_level) {
4023	thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4024	}
4025	}
4026	}
4027
4028	}
4029	#endif /* VM_PRESSURE_EVENTS */
4030
4031	kern_return_t
4032	mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4033
4034	#if CONFIG_EMBEDDED
4035
4036	return KERN_FAILURE;
4037
4038	#elif !VM_PRESSURE_EVENTS
4039
4040	return KERN_FAILURE;
4041
4042	#else /* VM_PRESSURE_EVENTS */
4043
4044	kern_return_t kr = KERN_SUCCESS;
4045
4046	if (pressure_level != NULL) {
4047
4048	vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4049
4050	if (wait_for_pressure == TRUE) {
4051	wait_result_t wr = `0`;
4052
4053	while (old_level == *pressure_level) {
4054	wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4055	THREAD_INTERRUPTIBLE);
4056	if (wr == THREAD_WAITING) {
4057	wr = thread_block(THREAD_CONTINUE_NULL);
4058	}
4059	if (wr == THREAD_INTERRUPTED) {
4060	return KERN_ABORTED;
4061	}
4062	if (wr == THREAD_AWAKENED) {
4063
4064	old_level = memorystatus_vm_pressure_level;
4065
4066	if (old_level != *pressure_level) {
4067	break;
4068	}
4069	}
4070	}
4071	}
4072
4073	*pressure_level = old_level;
4074	kr = KERN_SUCCESS;
4075	} else {
4076	kr = KERN_INVALID_ARGUMENT;
4077	}
4078
4079	return kr;
4080	#endif /* VM_PRESSURE_EVENTS */
4081	}
4082
4083	#if VM_PRESSURE_EVENTS
4084	void
4085	vm_pressure_thread(void) {
4086	static boolean_t thread_initialized = FALSE;
4087
4088	if (thread_initialized == TRUE) {
4089	vm_pageout_state.vm_pressure_thread_running = TRUE;
4090	consider_vm_pressure_events();
4091	vm_pageout_state.vm_pressure_thread_running = FALSE;
4092	}
4093
4094	thread_set_thread_name(current_thread(), "VM_pressure");
4095	thread_initialized = TRUE;
4096	assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4097	thread_block((thread_continue_t)vm_pressure_thread);
4098	}
4099	#endif /* VM_PRESSURE_EVENTS */
4100
4101
4102	/*
4103	* called once per-second via "compute_averages"
4104	*/
4105	void
4106	compute_pageout_gc_throttle(__unused void *arg)
4107	{
4108	if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4109
4110	vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4111
4112	thread_wakeup((event_t) &vm_pageout_garbage_collect);
4113	}
4114	}
4115
4116	/*
4117	* vm_pageout_garbage_collect can also be called when the zone allocator needs
4118	* to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4119	* jetsams. We need to check if the zone map size is above its jetsam limit to
4120	* decide if this was indeed the case.
4121	*
4122	* We need to do this on a different thread because of the following reasons:
4123	*
4124	* 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4125	* itself causing the system to hang. We perform synchronous jetsams if we're
4126	* leaking in the VM map entries zone, so the leaking process could be doing a
4127	* zalloc for a VM map entry while holding its vm_map lock, when it decides to
4128	* jetsam itself. We also need the vm_map lock on the process termination path,
4129	* which would now lead the dying process to deadlock against itself.
4130	*
4131	* 2. The jetsam path might need to allocate zone memory itself. We could try
4132	* using the non-blocking variant of zalloc for this path, but we can still
4133	* end up trying to do a kernel_memory_allocate when the zone_map is almost
4134	* full.
4135	*/
4136
4137	extern boolean_t is_zone_map_nearing_exhaustion(void);
4138
4139	void
4140	vm_pageout_garbage_collect(int collect)
4141	{
4142	if (collect) {
4143	if (is_zone_map_nearing_exhaustion()) {
4144	/*
4145	* Woken up by the zone allocator for zone-map-exhaustion jetsams.
4146	*
4147	* Bail out after calling zone_gc (which triggers the
4148	* zone-map-exhaustion jetsams). If we fall through, the subsequent
4149	* operations that clear out a bunch of caches might allocate zone
4150	* memory themselves (for eg. vm_map operations would need VM map
4151	* entries). Since the zone map is almost full at this point, we
4152	* could end up with a panic. We just need to quickly jetsam a
4153	* process and exit here.
4154	*
4155	* It could so happen that we were woken up to relieve memory
4156	* pressure and the zone map also happened to be near its limit at
4157	* the time, in which case we'll skip out early. But that should be
4158	* ok; if memory pressure persists, the thread will simply be woken
4159	* up again.
4160	*/
4161	consider_zone_gc(TRUE);
4162
4163	} else {
4164	/ Woken up by vm_pageout_scan or compute_pageout_gc_throttle. /
4165	boolean_t buf_large_zfree = FALSE;
4166	boolean_t first_try = TRUE;
4167
4168	stack_collect();
4169
4170	consider_machine_collect();
4171	mbuf_drain(FALSE);
4172
4173	do {
4174	if (consider_buffer_cache_collect != NULL) {
4175	buf_large_zfree = (*consider_buffer_cache_collect)(`0`);
4176	}
4177	if (first_try == TRUE \|\| buf_large_zfree == TRUE) {
4178	/*
4179	* consider_zone_gc should be last, because the other operations
4180	* might return memory to zones.
4181	*/
4182	consider_zone_gc(FALSE);
4183	}
4184	first_try = FALSE;
4185
4186	} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4187
4188	consider_machine_adjust();
4189	}
4190	}
4191
4192	assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4193
4194	thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)`1`);
4195	/NOTREACHED/
4196	}
4197
4198
4199	#if VM_PAGE_BUCKETS_CHECK
4200	#if VM_PAGE_FAKE_BUCKETS
4201	extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4202	#endif /* VM_PAGE_FAKE_BUCKETS */
4203	#endif /* VM_PAGE_BUCKETS_CHECK */
4204
4205
4206
4207	void
4208	vm_set_restrictions()
4209	{
4210	host_basic_info_data_t hinfo;
4211	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4212
4213	#define BSD_HOST 1
4214	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4215
4216	assert(hinfo.max_cpus > `0`);
4217
4218	if (hinfo.max_cpus <= `3`) {
4219	/*
4220	* on systems with a limited number of CPUS, bind the
4221	* 4 major threads that can free memory and that tend to use
4222	* a fair bit of CPU under pressured conditions to a single processor.
4223	* This insures that these threads don't hog all of the available CPUs
4224	* (important for camera launch), while allowing them to run independently
4225	* w/r to locks... the 4 threads are
4226	* vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4227	* vm_compressor_swap_trigger_thread (minor and major compactions),
4228	* memorystatus_thread (jetsams).
4229	*
4230	* the first time the thread is run, it is responsible for checking the
4231	* state of vm_restricted_to_single_processor, and if TRUE it calls
4232	* thread_bind_master... someday this should be replaced with a group
4233	* scheduling mechanism and KPI.
4234	*/
4235	vm_pageout_state.vm_restricted_to_single_processor = TRUE;
4236	} else
4237	vm_pageout_state.vm_restricted_to_single_processor = FALSE;
4238	}
4239
4240	void
4241	vm_pageout(void)
4242	{
4243	thread_t self = current_thread();
4244	thread_t thread;
4245	kern_return_t result;
4246	spl_t s;
4247
4248	/*
4249	* Set thread privileges.
4250	*/
4251	s = splsched();
4252
4253	thread_lock(self);
4254	self->options \|= TH_OPT_VMPRIV;
4255	sched_set_thread_base_priority(self, BASEPRI_VM);
4256	thread_unlock(self);
4257
4258	if (!self->reserved_stack)
4259	self->reserved_stack = self->kernel_stack;
4260
4261	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
4262	thread_vm_bind_group_add();
4263
4264	splx(s);
4265
4266	thread_set_thread_name(current_thread(), "VM_pageout_scan");
4267
4268	/*
4269	* Initialize some paging parameters.
4270	*/
4271
4272	vm_pageout_state.vm_pressure_thread_running = FALSE;
4273	vm_pageout_state.vm_pressure_changed = FALSE;
4274	vm_pageout_state.memorystatus_purge_on_warning = `2`;
4275	vm_pageout_state.memorystatus_purge_on_urgent = `5`;
4276	vm_pageout_state.memorystatus_purge_on_critical = `8`;
4277	vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
4278	vm_pageout_state.vm_page_speculative_percentage = `5`;
4279	vm_pageout_state.vm_page_speculative_target = `0`;
4280
4281	vm_pageout_state.vm_pageout_external_iothread = THREAD_NULL;
4282	vm_pageout_state.vm_pageout_internal_iothread = THREAD_NULL;
4283
4284	vm_pageout_state.vm_pageout_swap_wait = `0`;
4285	vm_pageout_state.vm_pageout_idle_wait = `0`;
4286	vm_pageout_state.vm_pageout_empty_wait = `0`;
4287	vm_pageout_state.vm_pageout_burst_wait = `0`;
4288	vm_pageout_state.vm_pageout_deadlock_wait = `0`;
4289	vm_pageout_state.vm_pageout_deadlock_relief = `0`;
4290	vm_pageout_state.vm_pageout_burst_inactive_throttle = `0`;
4291
4292	vm_pageout_state.vm_pageout_inactive = `0`;
4293	vm_pageout_state.vm_pageout_inactive_used = `0`;
4294	vm_pageout_state.vm_pageout_inactive_clean = `0`;
4295
4296	vm_pageout_state.vm_memory_pressure = `0`;
4297	vm_pageout_state.vm_page_filecache_min = `0`;
4298	#if CONFIG_JETSAM
4299	vm_pageout_state.vm_page_filecache_min_divisor = `70`;
4300	vm_pageout_state.vm_page_xpmapped_min_divisor = `40`;
4301	#else
4302	vm_pageout_state.vm_page_filecache_min_divisor = `27`;
4303	vm_pageout_state.vm_page_xpmapped_min_divisor = `36`;
4304	#endif
4305	vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
4306
4307	vm_pageout_state.vm_pageout_considered_page_last = `0`;
4308
4309	if (vm_pageout_state.vm_pageout_swap_wait == `0`)
4310	vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4311
4312	if (vm_pageout_state.vm_pageout_idle_wait == `0`)
4313	vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4314
4315	if (vm_pageout_state.vm_pageout_burst_wait == `0`)
4316	vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4317
4318	if (vm_pageout_state.vm_pageout_empty_wait == `0`)
4319	vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4320
4321	if (vm_pageout_state.vm_pageout_deadlock_wait == `0`)
4322	vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4323
4324	if (vm_pageout_state.vm_pageout_deadlock_relief == `0`)
4325	vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4326
4327	if (vm_pageout_state.vm_pageout_burst_inactive_throttle == `0`)
4328	vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4329	/*
4330	* even if we've already called vm_page_free_reserve
4331	* call it again here to insure that the targets are
4332	* accurately calculated (it uses vm_page_free_count_init)
4333	* calling it with an arg of 0 will not change the reserve
4334	* but will re-calculate free_min and free_target
4335	*/
4336	if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4337	vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4338	} else
4339	vm_page_free_reserve(`0`);
4340
4341
4342	vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4343	vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4344	vm_pageout_queue_external.pgo_laundry = `0`;
4345	vm_pageout_queue_external.pgo_idle = FALSE;
4346	vm_pageout_queue_external.pgo_busy = FALSE;
4347	vm_pageout_queue_external.pgo_throttled = FALSE;
4348	vm_pageout_queue_external.pgo_draining = FALSE;
4349	vm_pageout_queue_external.pgo_lowpriority = FALSE;
4350	vm_pageout_queue_external.pgo_tid = -`1`;
4351	vm_pageout_queue_external.pgo_inited = FALSE;
4352
4353	vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4354	vm_pageout_queue_internal.pgo_maxlaundry = `0`;
4355	vm_pageout_queue_internal.pgo_laundry = `0`;
4356	vm_pageout_queue_internal.pgo_idle = FALSE;
4357	vm_pageout_queue_internal.pgo_busy = FALSE;
4358	vm_pageout_queue_internal.pgo_throttled = FALSE;
4359	vm_pageout_queue_internal.pgo_draining = FALSE;
4360	vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4361	vm_pageout_queue_internal.pgo_tid = -`1`;
4362	vm_pageout_queue_internal.pgo_inited = FALSE;
4363
4364	/ internal pageout thread started when default pager registered first time /
4365	/ external pageout and garbage collection threads started here /
4366
4367	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4368	BASEPRI_VM,
4369	&vm_pageout_state.vm_pageout_external_iothread);
4370	if (result != KERN_SUCCESS)
4371	panic("vm_pageout_iothread_external: create failed");
4372
4373	thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
4374
4375	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4376	BASEPRI_DEFAULT,
4377	&thread);
4378	if (result != KERN_SUCCESS)
4379	panic("vm_pageout_garbage_collect: create failed");
4380
4381	thread_deallocate(thread);
4382
4383	#if VM_PRESSURE_EVENTS
4384	result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4385	BASEPRI_DEFAULT,
4386	&thread);
4387
4388	if (result != KERN_SUCCESS)
4389	panic("vm_pressure_thread: create failed");
4390
4391	thread_deallocate(thread);
4392	#endif
4393
4394	vm_object_reaper_init();
4395
4396
4397	bzero(&vm_config, sizeof(vm_config));
4398
4399	switch(vm_compressor_mode) {
4400
4401	case VM_PAGER_DEFAULT:
4402	printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4403
4404	case VM_PAGER_COMPRESSOR_WITH_SWAP:
4405	vm_config.compressor_is_present = TRUE;
4406	vm_config.swap_is_present = TRUE;
4407	vm_config.compressor_is_active = TRUE;
4408	vm_config.swap_is_active = TRUE;
4409	break;
4410
4411	case VM_PAGER_COMPRESSOR_NO_SWAP:
4412	vm_config.compressor_is_present = TRUE;
4413	vm_config.swap_is_present = TRUE;
4414	vm_config.compressor_is_active = TRUE;
4415	break;
4416
4417	case VM_PAGER_FREEZER_DEFAULT:
4418	printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4419
4420	case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4421	vm_config.compressor_is_present = TRUE;
4422	vm_config.swap_is_present = TRUE;
4423	break;
4424
4425	case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4426	vm_config.compressor_is_present = TRUE;
4427	vm_config.swap_is_present = TRUE;
4428	vm_config.compressor_is_active = TRUE;
4429	vm_config.freezer_swap_is_active = TRUE;
4430	break;
4431
4432	case VM_PAGER_NOT_CONFIGURED:
4433	break;
4434
4435	default:
4436	printf("unknown compressor mode - %x\n", vm_compressor_mode);
4437	break;
4438	}
4439	if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4440	vm_compressor_pager_init();
4441
4442	#if VM_PRESSURE_EVENTS
4443	vm_pressure_events_enabled = TRUE;
4444	#endif /* VM_PRESSURE_EVENTS */
4445
4446	#if CONFIG_PHANTOM_CACHE
4447	vm_phantom_cache_init();
4448	#endif
4449	#if VM_PAGE_BUCKETS_CHECK
4450	#if VM_PAGE_FAKE_BUCKETS
4451	printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4452	(uint64_t) vm_page_fake_buckets_start,
4453	(uint64_t) vm_page_fake_buckets_end);
4454	pmap_protect(kernel_pmap,
4455	vm_page_fake_buckets_start,
4456	vm_page_fake_buckets_end,
4457	VM_PROT_READ);
4458	// (char ) vm_page_fake_buckets_start = 'x'; / panic! /
4459	#endif /* VM_PAGE_FAKE_BUCKETS */
4460	#endif /* VM_PAGE_BUCKETS_CHECK */
4461
4462	#if VM_OBJECT_TRACKING
4463	vm_object_tracking_init();
4464	#endif /* VM_OBJECT_TRACKING */
4465
4466	vm_tests();
4467
4468	vm_pageout_continue();
4469
4470	/*
4471	* Unreached code!
4472	*
4473	* The vm_pageout_continue() call above never returns, so the code below is never
4474	* executed. We take advantage of this to declare several DTrace VM related probe
4475	* points that our kernel doesn't have an analog for. These are probe points that
4476	* exist in Solaris and are in the DTrace documentation, so people may have written
4477	* scripts that use them. Declaring the probe points here means their scripts will
4478	* compile and execute which we want for portability of the scripts, but since this
4479	* section of code is never reached, the probe points will simply never fire. Yes,
4480	* this is basically a hack. The problem is the DTrace probe points were chosen with
4481	* Solaris specific VM events in mind, not portability to different VM implementations.
4482	*/
4483
4484	DTRACE_VM2(execfree, int, `1`, (uint64_t *), NULL);
4485	DTRACE_VM2(execpgin, int, `1`, (uint64_t *), NULL);
4486	DTRACE_VM2(execpgout, int, `1`, (uint64_t *), NULL);
4487	DTRACE_VM2(pgswapin, int, `1`, (uint64_t *), NULL);
4488	DTRACE_VM2(pgswapout, int, `1`, (uint64_t *), NULL);
4489	DTRACE_VM2(swapin, int, `1`, (uint64_t *), NULL);
4490	DTRACE_VM2(swapout, int, `1`, (uint64_t *), NULL);
4491	/NOTREACHED/
4492	}
4493
4494
4495
4496	kern_return_t
4497	vm_pageout_internal_start(void)
4498	{
4499	kern_return_t result;
4500	int i;
4501	host_basic_info_data_t hinfo;
4502
4503	assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4504
4505	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4506	#define BSD_HOST 1
4507	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4508
4509	assert(hinfo.max_cpus > `0`);
4510
4511	#if CONFIG_EMBEDDED
4512	vm_pageout_state.vm_compressor_thread_count = `1`;
4513	#else
4514	if (hinfo.max_cpus > `4`)
4515	vm_pageout_state.vm_compressor_thread_count = `2`;
4516	else
4517	vm_pageout_state.vm_compressor_thread_count = `1`;
4518	#endif
4519	PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
4520	sizeof(vm_pageout_state.vm_compressor_thread_count));
4521
4522	if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus)
4523	vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - `1`;
4524	if (vm_pageout_state.vm_compressor_thread_count <= `0`)
4525	vm_pageout_state.vm_compressor_thread_count = `1`;
4526	else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4527	vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4528
4529	vm_pageout_queue_internal.pgo_maxlaundry = (vm_pageout_state.vm_compressor_thread_count * `4`) * VM_PAGE_LAUNDRY_MAX;
4530
4531	PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4532
4533	for (i = `0`; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4534	ciq[i].id = i;
4535	ciq[i].q = &vm_pageout_queue_internal;
4536	ciq[i].current_chead = NULL;
4537	ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4538
4539	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i],
4540	BASEPRI_VM, &vm_pageout_state.vm_pageout_internal_iothread);
4541
4542	if (result == KERN_SUCCESS)
4543	thread_deallocate(vm_pageout_state.vm_pageout_internal_iothread);
4544	else
4545	break;
4546	}
4547	return result;
4548	}
4549
4550	#if CONFIG_IOSCHED
4551	/*
4552	* To support I/O Expedite for compressed files we mark the upls with special flags.
4553	* The way decmpfs works is that we create a big upl which marks all the pages needed to
4554	* represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4555	* then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4556	* being held in the big original UPL. We mark each of these smaller UPLs with the flag
4557	* UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4558	* decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4559	* by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4560	* unless the real I/O upl is being destroyed).
4561	*/
4562
4563
4564	static void
4565	upl_set_decmp_info(upl_t upl, upl_t src_upl)
4566	{
4567	assert((src_upl->flags & UPL_DECMP_REQ) != `0`);
4568
4569	upl_lock(src_upl);
4570	if (src_upl->decmp_io_upl) {
4571	/*
4572	* If there is already an alive real I/O UPL, ignore this new UPL.
4573	* This case should rarely happen and even if it does, it just means
4574	* that we might issue a spurious expedite which the driver is expected
4575	* to handle.
4576	*/
4577	upl_unlock(src_upl);
4578	return;
4579	}
4580	src_upl->decmp_io_upl = (void *)upl;
4581	src_upl->ref_count++;
4582
4583	upl->flags \|= UPL_DECMP_REAL_IO;
4584	upl->decmp_io_upl = (void *)src_upl;
4585	upl_unlock(src_upl);
4586	}
4587	#endif /* CONFIG_IOSCHED */
4588
4589	#if UPL_DEBUG
4590	int upl_debug_enabled = `1`;
4591	#else
4592	int upl_debug_enabled = `0`;
4593	#endif
4594
4595	static upl_t
4596	upl_create(int type, int flags, upl_size_t size)
4597	{
4598	upl_t upl;
4599	vm_size_t page_field_size = `0`;
4600	int upl_flags = `0`;
4601	vm_size_t upl_size = sizeof(struct upl);
4602
4603	size = round_page_32(size);
4604
4605	if (type & UPL_CREATE_LITE) {
4606	page_field_size = (atop(size) + `7`) >> `3`;
4607	page_field_size = (page_field_size + `3`) & `0xFFFFFFFC`;
4608
4609	upl_flags \|= UPL_LITE;
4610	}
4611	if (type & UPL_CREATE_INTERNAL) {
4612	upl_size += sizeof(struct upl_page_info) * atop(size);
4613
4614	upl_flags \|= UPL_INTERNAL;
4615	}
4616	upl = (upl_t)kalloc(upl_size + page_field_size);
4617
4618	if (page_field_size)
4619	bzero((char *)upl + upl_size, page_field_size);
4620
4621	upl->flags = upl_flags \| flags;
4622	upl->kaddr = (vm_offset_t)`0`;
4623	upl->size = `0`;
4624	upl->map_object = NULL;
4625	upl->ref_count = `1`;
4626	upl->ext_ref_count = `0`;
4627	upl->highest_page = `0`;
4628	upl_lock_init(upl);
4629	upl->vector_upl = NULL;
4630	upl->associated_upl = NULL;
4631	upl->upl_iodone = NULL;
4632	#if CONFIG_IOSCHED
4633	if (type & UPL_CREATE_IO_TRACKING) {
4634	upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4635	}
4636
4637	upl->upl_reprio_info = `0`;
4638	upl->decmp_io_upl = `0`;
4639	if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4640	/ Only support expedite on internal UPLs /
4641	thread_t curthread = current_thread();
4642	upl->upl_reprio_info = (uint64_t )kalloc(sizeof(uint64_t) atop(size));
4643	bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4644	upl->flags \|= UPL_EXPEDITE_SUPPORTED;
4645	if (curthread->decmp_upl != NULL)
4646	upl_set_decmp_info(upl, curthread->decmp_upl);
4647	}
4648	#endif
4649	#if CONFIG_IOSCHED \|\| UPL_DEBUG
4650	if ((type & UPL_CREATE_IO_TRACKING) \|\| upl_debug_enabled) {
4651	upl->upl_creator = current_thread();
4652	upl->uplq.next = `0`;
4653	upl->uplq.prev = `0`;
4654	upl->flags \|= UPL_TRACKED_BY_OBJECT;
4655	}
4656	#endif
4657
4658	#if UPL_DEBUG
4659	upl->ubc_alias1 = `0`;
4660	upl->ubc_alias2 = `0`;
4661
4662	upl->upl_state = `0`;
4663	upl->upl_commit_index = `0`;
4664	bzero(&upl->upl_commit_records[`0`], sizeof(upl->upl_commit_records));
4665
4666	(void) OSBacktrace(&upl->upl_create_retaddr[`0`], UPL_DEBUG_STACK_FRAMES);
4667	#endif /* UPL_DEBUG */
4668
4669	return(upl);
4670	}
4671
4672	static void
4673	upl_destroy(upl_t upl)
4674	{
4675	int page_field_size; / bit field in word size buf /
4676	int size;
4677
4678	if (upl->ext_ref_count) {
4679	panic("upl(%p) ext_ref_count", upl);
4680	}
4681
4682	#if CONFIG_IOSCHED
4683	if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4684	upl_t src_upl;
4685	src_upl = upl->decmp_io_upl;
4686	assert((src_upl->flags & UPL_DECMP_REQ) != `0`);
4687	upl_lock(src_upl);
4688	src_upl->decmp_io_upl = NULL;
4689	upl_unlock(src_upl);
4690	upl_deallocate(src_upl);
4691	}
4692	#endif /* CONFIG_IOSCHED */
4693
4694	#if CONFIG_IOSCHED \|\| UPL_DEBUG
4695	if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4696	vm_object_t object;
4697
4698	if (upl->flags & UPL_SHADOWED) {
4699	object = upl->map_object->shadow;
4700	} else {
4701	object = upl->map_object;
4702	}
4703
4704	vm_object_lock(object);
4705	queue_remove(&object->uplq, upl, upl_t, uplq);
4706	vm_object_activity_end(object);
4707	vm_object_collapse(object, `0`, TRUE);
4708	vm_object_unlock(object);
4709	}
4710	#endif
4711	/*
4712	* drop a reference on the map_object whether or
4713	* not a pageout object is inserted
4714	*/
4715	if (upl->flags & UPL_SHADOWED)
4716	vm_object_deallocate(upl->map_object);
4717
4718	if (upl->flags & UPL_DEVICE_MEMORY)
4719	size = PAGE_SIZE;
4720	else
4721	size = upl->size;
4722	page_field_size = `0`;
4723
4724	if (upl->flags & UPL_LITE) {
4725	page_field_size = ((size/PAGE_SIZE) + `7`) >> `3`;
4726	page_field_size = (page_field_size + `3`) & `0xFFFFFFFC`;
4727	}
4728	upl_lock_destroy(upl);
4729	upl->vector_upl = (vector_upl_t) `0xfeedbeef`;
4730
4731	#if CONFIG_IOSCHED
4732	if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4733	kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4734	#endif
4735
4736	if (upl->flags & UPL_INTERNAL) {
4737	kfree(upl,
4738	sizeof(struct upl) +
4739	(sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4740	+ page_field_size);
4741	} else {
4742	kfree(upl, sizeof(struct upl) + page_field_size);
4743	}
4744	}
4745
4746	void
4747	upl_deallocate(upl_t upl)
4748	{
4749	upl_lock(upl);
4750
4751	if (--upl->ref_count == `0`) {
4752	if(vector_upl_is_valid(upl))
4753	vector_upl_deallocate(upl);
4754	upl_unlock(upl);
4755
4756	if (upl->upl_iodone)
4757	upl_callout_iodone(upl);
4758
4759	upl_destroy(upl);
4760	} else
4761	upl_unlock(upl);
4762	}
4763
4764	#if CONFIG_IOSCHED
4765	void
4766	upl_mark_decmp(upl_t upl)
4767	{
4768	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4769	upl->flags \|= UPL_DECMP_REQ;
4770	upl->upl_creator->decmp_upl = (void *)upl;
4771	}
4772	}
4773
4774	void
4775	upl_unmark_decmp(upl_t upl)
4776	{
4777	if(upl && (upl->flags & UPL_DECMP_REQ)) {
4778	upl->upl_creator->decmp_upl = NULL;
4779	}
4780	}
4781
4782	#endif /* CONFIG_IOSCHED */
4783
4784	#define VM_PAGE_Q_BACKING_UP(q) \
4785	((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4786
4787	boolean_t must_throttle_writes(void);
4788
4789	boolean_t
4790	must_throttle_writes()
4791	{
4792	if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4793	vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * `6`) / `10`)
4794	return (TRUE);
4795
4796	return (FALSE);
4797	}
4798
4799
4800	/*
4801	* Routine: vm_object_upl_request
4802	* Purpose:
4803	* Cause the population of a portion of a vm_object.
4804	* Depending on the nature of the request, the pages
4805	* returned may be contain valid data or be uninitialized.
4806	* A page list structure, listing the physical pages
4807	* will be returned upon request.
4808	* This function is called by the file system or any other
4809	* supplier of backing store to a pager.
4810	* IMPORTANT NOTE: The caller must still respect the relationship
4811	* between the vm_object and its backing memory object. The
4812	* caller MUST NOT substitute changes in the backing file
4813	* without first doing a memory_object_lock_request on the
4814	* target range unless it is know that the pages are not
4815	* shared with another entity at the pager level.
4816	* Copy_in_to:
4817	* if a page list structure is present
4818	* return the mapped physical pages, where a
4819	* page is not present, return a non-initialized
4820	* one. If the no_sync bit is turned on, don't
4821	* call the pager unlock to synchronize with other
4822	* possible copies of the page. Leave pages busy
4823	* in the original object, if a page list structure
4824	* was specified. When a commit of the page list
4825	* pages is done, the dirty bit will be set for each one.
4826	* Copy_out_from:
4827	* If a page list structure is present, return
4828	* all mapped pages. Where a page does not exist
4829	* map a zero filled one. Leave pages busy in
4830	* the original object. If a page list structure
4831	* is not specified, this call is a no-op.
4832	*
4833	* Note: access of default pager objects has a rather interesting
4834	* twist. The caller of this routine, presumably the file system
4835	* page cache handling code, will never actually make a request
4836	* against a default pager backed object. Only the default
4837	* pager will make requests on backing store related vm_objects
4838	* In this way the default pager can maintain the relationship
4839	* between backing store files (abstract memory objects) and
4840	* the vm_objects (cache objects), they support.
4841	*
4842	*/
4843
4844	__private_extern__ kern_return_t
4845	vm_object_upl_request(
4846	vm_object_t object,
4847	vm_object_offset_t offset,
4848	upl_size_t size,
4849	upl_t *upl_ptr,
4850	upl_page_info_array_t user_page_list,
4851	unsigned int *page_list_count,
4852	upl_control_flags_t cntrl_flags,
4853	vm_tag_t tag)
4854	{
4855	vm_page_t dst_page = VM_PAGE_NULL;
4856	vm_object_offset_t dst_offset;
4857	upl_size_t xfer_size;
4858	unsigned int size_in_pages;
4859	boolean_t dirty;
4860	boolean_t hw_dirty;
4861	upl_t upl = NULL;
4862	unsigned int entry;
4863	vm_page_t alias_page = NULL;
4864	int refmod_state = `0`;
4865	wpl_array_t lite_list = NULL;
4866	vm_object_t last_copy_object;
4867	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
4868	struct vm_page_delayed_work *dwp;
4869	int dw_count;
4870	int dw_limit;
4871	int io_tracking_flag = `0`;
4872	int grab_options;
4873	int page_grab_count = `0`;
4874	ppnum_t phys_page;
4875	pmap_flush_context pmap_flush_context_storage;
4876	boolean_t pmap_flushes_delayed = FALSE;
4877
4878	if (cntrl_flags & ~UPL_VALID_FLAGS) {
4879	/*
4880	* For forward compatibility's sake,
4881	* reject any unknown flag.
4882	*/
4883	return KERN_INVALID_VALUE;
4884	}
4885	if ( (!object->internal) && (object->paging_offset != `0`) )
4886	panic("vm_object_upl_request: external object with non-zero paging offset\n");
4887	if (object->phys_contiguous)
4888	panic("vm_object_upl_request: contiguous object specified\n");
4889
4890	VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, `0`, `0`);
4891
4892	if (size > MAX_UPL_SIZE_BYTES)
4893	size = MAX_UPL_SIZE_BYTES;
4894
4895	if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
4896	*page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
4897
4898	#if CONFIG_IOSCHED \|\| UPL_DEBUG
4899	if (object->io_tracking \|\| upl_debug_enabled)
4900	io_tracking_flag \|= UPL_CREATE_IO_TRACKING;
4901	#endif
4902	#if CONFIG_IOSCHED
4903	if (object->io_tracking)
4904	io_tracking_flag \|= UPL_CREATE_EXPEDITE_SUP;
4905	#endif
4906
4907	if (cntrl_flags & UPL_SET_INTERNAL) {
4908	if (cntrl_flags & UPL_SET_LITE) {
4909
4910	upl = upl_create(UPL_CREATE_INTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, `0`, size);
4911
4912	user_page_list = (upl_page_info_t ) (((uintptr_t)upl) + sizeof(struct* upl));
4913	lite_list = (wpl_array_t)
4914	(((uintptr_t)user_page_list) +
4915	((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4916	if (size == `0`) {
4917	user_page_list = NULL;
4918	lite_list = NULL;
4919	}
4920	} else {
4921	upl = upl_create(UPL_CREATE_INTERNAL \| io_tracking_flag, `0`, size);
4922
4923	user_page_list = (upl_page_info_t ) (((uintptr_t)upl) + sizeof(struct* upl));
4924	if (size == `0`) {
4925	user_page_list = NULL;
4926	}
4927	}
4928	} else {
4929	if (cntrl_flags & UPL_SET_LITE) {
4930
4931	upl = upl_create(UPL_CREATE_EXTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, `0`, size);
4932
4933	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4934	if (size == `0`) {
4935	lite_list = NULL;
4936	}
4937	} else {
4938	upl = upl_create(UPL_CREATE_EXTERNAL \| io_tracking_flag, `0`, size);
4939	}
4940	}
4941	*upl_ptr = upl;
4942
4943	if (user_page_list)
4944	user_page_list[`0`].device = FALSE;
4945
4946	if (cntrl_flags & UPL_SET_LITE) {
4947	upl->map_object = object;
4948	} else {
4949	upl->map_object = vm_object_allocate(size);
4950	/*
4951	* No neeed to lock the new object: nobody else knows
4952	* about it yet, so it's all ours so far.
4953	*/
4954	upl->map_object->shadow = object;
4955	upl->map_object->pageout = TRUE;
4956	upl->map_object->can_persist = FALSE;
4957	upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4958	upl->map_object->vo_shadow_offset = offset;
4959	upl->map_object->wimg_bits = object->wimg_bits;
4960
4961	VM_PAGE_GRAB_FICTITIOUS(alias_page);
4962
4963	upl->flags \|= UPL_SHADOWED;
4964	}
4965	if (cntrl_flags & UPL_FOR_PAGEOUT)
4966	upl->flags \|= UPL_PAGEOUT;
4967
4968	vm_object_lock(object);
4969	vm_object_activity_begin(object);
4970
4971	grab_options = `0`;
4972	#if CONFIG_SECLUDED_MEMORY
4973	if (object->can_grab_secluded) {
4974	grab_options \|= VM_PAGE_GRAB_SECLUDED;
4975	}
4976	#endif /* CONFIG_SECLUDED_MEMORY */
4977
4978	/*
4979	* we can lock in the paging_offset once paging_in_progress is set
4980	*/
4981	upl->size = size;
4982	upl->offset = offset + object->paging_offset;
4983
4984	#if CONFIG_IOSCHED \|\| UPL_DEBUG
4985	if (object->io_tracking \|\| upl_debug_enabled) {
4986	vm_object_activity_begin(object);
4987	queue_enter(&object->uplq, upl, upl_t, uplq);
4988	}
4989	#endif
4990	if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
4991	/*
4992	* Honor copy-on-write obligations
4993	*
4994	* The caller is gathering these pages and
4995	* might modify their contents. We need to
4996	* make sure that the copy object has its own
4997	* private copies of these pages before we let
4998	* the caller modify them.
4999	*/
5000	vm_object_update(object,
5001	offset,
5002	size,
5003	NULL,
5004	NULL,
5005	FALSE, / should_return /
5006	MEMORY_OBJECT_COPY_SYNC,
5007	VM_PROT_NO_CHANGE);
5008
5009	VM_PAGEOUT_DEBUG(upl_cow, `1`);
5010	VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
5011	}
5012	/*
5013	* remember which copy object we synchronized with
5014	*/
5015	last_copy_object = object->copy;
5016	entry = `0`;
5017
5018	xfer_size = size;
5019	dst_offset = offset;
5020	size_in_pages = size / PAGE_SIZE;
5021
5022	dwp = &dw_array[`0`];
5023	dw_count = `0`;
5024	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5025
5026	if (vm_page_free_count > (vm_page_free_target + size_in_pages) \|\|
5027	object->resident_page_count < ((MAX_UPL_SIZE_BYTES * `2`) >> PAGE_SHIFT))
5028	object->scan_collisions = `0`;
5029
5030	if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5031	boolean_t isSSD = FALSE;
5032
5033	#if CONFIG_EMBEDDED
5034	isSSD = TRUE;
5035	#else
5036	vnode_pager_get_isSSD(object->pager, &isSSD);
5037	#endif
5038	vm_object_unlock(object);
5039
5040	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5041
5042	if (isSSD == TRUE)
5043	delay(`1000` * size_in_pages);
5044	else
5045	delay(`5000` * size_in_pages);
5046	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5047
5048	vm_object_lock(object);
5049	}
5050
5051	while (xfer_size) {
5052
5053	dwp->dw_mask = `0`;
5054
5055	if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5056	vm_object_unlock(object);
5057	VM_PAGE_GRAB_FICTITIOUS(alias_page);
5058	vm_object_lock(object);
5059	}
5060	if (cntrl_flags & UPL_COPYOUT_FROM) {
5061	upl->flags \|= UPL_PAGE_SYNC_DONE;
5062
5063	if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) \|\|
5064	dst_page->vmp_fictitious \|\|
5065	dst_page->vmp_absent \|\|
5066	dst_page->vmp_error \|\|
5067	dst_page->vmp_cleaning \|\|
5068	(VM_PAGE_WIRED(dst_page))) {
5069
5070	if (user_page_list)
5071	user_page_list[entry].phys_addr = `0`;
5072
5073	goto try_next_page;
5074	}
5075	phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5076
5077	/*
5078	* grab this up front...
5079	* a high percentange of the time we're going to
5080	* need the hardware modification state a bit later
5081	* anyway... so we can eliminate an extra call into
5082	* the pmap layer by grabbing it here and recording it
5083	*/
5084	if (dst_page->vmp_pmapped)
5085	refmod_state = pmap_get_refmod(phys_page);
5086	else
5087	refmod_state = `0`;
5088
5089	if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5090	/*
5091	* page is on inactive list and referenced...
5092	* reactivate it now... this gets it out of the
5093	* way of vm_pageout_scan which would have to
5094	* reactivate it upon tripping over it
5095	*/
5096	dwp->dw_mask \|= DW_vm_page_activate;
5097	}
5098	if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5099	/*
5100	* we're only asking for DIRTY pages to be returned
5101	*/
5102	if (dst_page->vmp_laundry \|\| !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5103	/*
5104	* if we were the page stolen by vm_pageout_scan to be
5105	* cleaned (as opposed to a buddy being clustered in
5106	* or this request is not being driven by a PAGEOUT cluster
5107	* then we only need to check for the page being dirty or
5108	* precious to decide whether to return it
5109	*/
5110	if (dst_page->vmp_dirty \|\| dst_page->vmp_precious \|\| (refmod_state & VM_MEM_MODIFIED))
5111	goto check_busy;
5112	goto dont_return;
5113	}
5114	/*
5115	* this is a request for a PAGEOUT cluster and this page
5116	* is merely along for the ride as a 'buddy'... not only
5117	* does it have to be dirty to be returned, but it also
5118	* can't have been referenced recently...
5119	*/
5120	if ( (hibernate_cleaning_in_progress == TRUE \|\|
5121	(!((refmod_state & VM_MEM_REFERENCED) \|\| dst_page->vmp_reference) \|\|
5122	(dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5123	((refmod_state & VM_MEM_MODIFIED) \|\| dst_page->vmp_dirty \|\| dst_page->vmp_precious) ) {
5124	goto check_busy;
5125	}
5126	dont_return:
5127	/*
5128	* if we reach here, we're not to return
5129	* the page... go on to the next one
5130	*/
5131	if (dst_page->vmp_laundry == TRUE) {
5132	/*
5133	* if we get here, the page is not 'cleaning' (filtered out above).
5134	* since it has been referenced, remove it from the laundry
5135	* so we don't pay the cost of an I/O to clean a page
5136	* we're just going to take back
5137	*/
5138	vm_page_lockspin_queues();
5139
5140	vm_pageout_steal_laundry(dst_page, TRUE);
5141	vm_page_activate(dst_page);
5142
5143	vm_page_unlock_queues();
5144	}
5145	if (user_page_list)
5146	user_page_list[entry].phys_addr = `0`;
5147
5148	goto try_next_page;
5149	}
5150	check_busy:
5151	if (dst_page->vmp_busy) {
5152	if (cntrl_flags & UPL_NOBLOCK) {
5153	if (user_page_list)
5154	user_page_list[entry].phys_addr = `0`;
5155	dwp->dw_mask = `0`;
5156
5157	goto try_next_page;
5158	}
5159	/*
5160	* someone else is playing with the
5161	* page. We will have to wait.
5162	*/
5163	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5164
5165	continue;
5166	}
5167	if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5168
5169	vm_page_lockspin_queues();
5170
5171	if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5172	/*
5173	* we've buddied up a page for a clustered pageout
5174	* that has already been moved to the pageout
5175	* queue by pageout_scan... we need to remove
5176	* it from the queue and drop the laundry count
5177	* on that queue
5178	*/
5179	vm_pageout_throttle_up(dst_page);
5180	}
5181	vm_page_unlock_queues();
5182	}
5183	hw_dirty = refmod_state & VM_MEM_MODIFIED;
5184	dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5185
5186	if (phys_page > upl->highest_page)
5187	upl->highest_page = phys_page;
5188
5189	assert (!pmap_is_noencrypt(phys_page));
5190
5191	if (cntrl_flags & UPL_SET_LITE) {
5192	unsigned int pg_num;
5193
5194	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5195	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5196	lite_list[pg_num>>`5`] \|= `1` << (pg_num & `31`);
5197
5198	if (hw_dirty) {
5199	if (pmap_flushes_delayed == FALSE) {
5200	pmap_flush_context_init(&pmap_flush_context_storage);
5201	pmap_flushes_delayed = TRUE;
5202	}
5203	pmap_clear_refmod_options(phys_page,
5204	VM_MEM_MODIFIED,
5205	PMAP_OPTIONS_NOFLUSH \| PMAP_OPTIONS_CLEAR_WRITE,
5206	&pmap_flush_context_storage);
5207	}
5208
5209	/*
5210	* Mark original page as cleaning
5211	* in place.
5212	*/
5213	dst_page->vmp_cleaning = TRUE;
5214	dst_page->vmp_precious = FALSE;
5215	} else {
5216	/*
5217	* use pageclean setup, it is more
5218	* convenient even for the pageout
5219	* cases here
5220	*/
5221	vm_object_lock(upl->map_object);
5222	vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5223	vm_object_unlock(upl->map_object);
5224
5225	alias_page->vmp_absent = FALSE;
5226	alias_page = NULL;
5227	}
5228	if (dirty) {
5229	SET_PAGE_DIRTY(dst_page, FALSE);
5230	} else {
5231	dst_page->vmp_dirty = FALSE;
5232	}
5233
5234	if (!dirty)
5235	dst_page->vmp_precious = TRUE;
5236
5237	if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5238	if ( !VM_PAGE_WIRED(dst_page))
5239	dst_page->vmp_free_when_done = TRUE;
5240	}
5241	} else {
5242	if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5243	/*
5244	* Honor copy-on-write obligations
5245	*
5246	* The copy object has changed since we
5247	* last synchronized for copy-on-write.
5248	* Another copy object might have been
5249	* inserted while we released the object's
5250	* lock. Since someone could have seen the
5251	* original contents of the remaining pages
5252	* through that new object, we have to
5253	* synchronize with it again for the remaining
5254	* pages only. The previous pages are "busy"
5255	* so they can not be seen through the new
5256	* mapping. The new mapping will see our
5257	* upcoming changes for those previous pages,
5258	* but that's OK since they couldn't see what
5259	* was there before. It's just a race anyway
5260	* and there's no guarantee of consistency or
5261	* atomicity. We just don't want new mappings
5262	* to see both the before and after pages.
5263	*/
5264	if (object->copy != VM_OBJECT_NULL) {
5265	vm_object_update(
5266	object,
5267	dst_offset,/ current offset /
5268	xfer_size, / remaining size /
5269	NULL,
5270	NULL,
5271	FALSE, / should_return /
5272	MEMORY_OBJECT_COPY_SYNC,
5273	VM_PROT_NO_CHANGE);
5274
5275	VM_PAGEOUT_DEBUG(upl_cow_again, `1`);
5276	VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
5277	}
5278	/*
5279	* remember the copy object we synced with
5280	*/
5281	last_copy_object = object->copy;
5282	}
5283	dst_page = vm_page_lookup(object, dst_offset);
5284
5285	if (dst_page != VM_PAGE_NULL) {
5286
5287	if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5288	/*
5289	* skip over pages already present in the cache
5290	*/
5291	if (user_page_list)
5292	user_page_list[entry].phys_addr = `0`;
5293
5294	goto try_next_page;
5295	}
5296	if (dst_page->vmp_fictitious) {
5297	panic("need corner case for fictitious page");
5298	}
5299
5300	if (dst_page->vmp_busy \|\| dst_page->vmp_cleaning) {
5301	/*
5302	* someone else is playing with the
5303	* page. We will have to wait.
5304	*/
5305	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5306
5307	continue;
5308	}
5309	if (dst_page->vmp_laundry)
5310	vm_pageout_steal_laundry(dst_page, FALSE);
5311	} else {
5312	if (object->private) {
5313	/*
5314	* This is a nasty wrinkle for users
5315	* of upl who encounter device or
5316	* private memory however, it is
5317	* unavoidable, only a fault can
5318	* resolve the actual backing
5319	* physical page by asking the
5320	* backing device.
5321	*/
5322	if (user_page_list)
5323	user_page_list[entry].phys_addr = `0`;
5324
5325	goto try_next_page;
5326	}
5327	if (object->scan_collisions) {
5328	/*
5329	* the pageout_scan thread is trying to steal
5330	* pages from this object, but has run into our
5331	* lock... grab 2 pages from the head of the object...
5332	* the first is freed on behalf of pageout_scan, the
5333	* 2nd is for our own use... we use vm_object_page_grab
5334	* in both cases to avoid taking pages from the free
5335	* list since we are under memory pressure and our
5336	* lock on this object is getting in the way of
5337	* relieving it
5338	*/
5339	dst_page = vm_object_page_grab(object);
5340
5341	if (dst_page != VM_PAGE_NULL)
5342	vm_page_release(dst_page,
5343	FALSE);
5344
5345	dst_page = vm_object_page_grab(object);
5346	}
5347	if (dst_page == VM_PAGE_NULL) {
5348	/*
5349	* need to allocate a page
5350	*/
5351	dst_page = vm_page_grab_options(grab_options);
5352	if (dst_page != VM_PAGE_NULL)
5353	page_grab_count++;
5354	}
5355	if (dst_page == VM_PAGE_NULL) {
5356	if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT \| UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT \| UPL_NOBLOCK)) {
5357	/*
5358	* we don't want to stall waiting for pages to come onto the free list
5359	* while we're already holding absent pages in this UPL
5360	* the caller will deal with the empty slots
5361	*/
5362	if (user_page_list)
5363	user_page_list[entry].phys_addr = `0`;
5364
5365	goto try_next_page;
5366	}
5367	/*
5368	* no pages available... wait
5369	* then try again for the same
5370	* offset...
5371	*/
5372	vm_object_unlock(object);
5373
5374	OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5375
5376	VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, `0`, `0`, `0`);
5377
5378	VM_PAGE_WAIT();
5379	OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5380
5381	VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, `0`, `0`, `0`);
5382
5383	vm_object_lock(object);
5384
5385	continue;
5386	}
5387	vm_page_insert(dst_page, object, dst_offset);
5388
5389	dst_page->vmp_absent = TRUE;
5390	dst_page->vmp_busy = FALSE;
5391
5392	if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5393	/*
5394	* if UPL_RET_ONLY_ABSENT was specified,
5395	* than we're definitely setting up a
5396	* upl for a clustered read/pagein
5397	* operation... mark the pages as clustered
5398	* so upl_commit_range can put them on the
5399	* speculative list
5400	*/
5401	dst_page->vmp_clustered = TRUE;
5402
5403	if ( !(cntrl_flags & UPL_FILE_IO))
5404	VM_STAT_INCR(pageins);
5405	}
5406	}
5407	phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5408
5409	dst_page->vmp_overwriting = TRUE;
5410
5411	if (dst_page->vmp_pmapped) {
5412	if ( !(cntrl_flags & UPL_FILE_IO))
5413	/*
5414	* eliminate all mappings from the
5415	* original object and its prodigy
5416	*/
5417	refmod_state = pmap_disconnect(phys_page);
5418	else
5419	refmod_state = pmap_get_refmod(phys_page);
5420	} else
5421	refmod_state = `0`;
5422
5423	hw_dirty = refmod_state & VM_MEM_MODIFIED;
5424	dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5425
5426	if (cntrl_flags & UPL_SET_LITE) {
5427	unsigned int pg_num;
5428
5429	pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5430	assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5431	lite_list[pg_num>>`5`] \|= `1` << (pg_num & `31`);
5432
5433	if (hw_dirty)
5434	pmap_clear_modify(phys_page);
5435
5436	/*
5437	* Mark original page as cleaning
5438	* in place.
5439	*/
5440	dst_page->vmp_cleaning = TRUE;
5441	dst_page->vmp_precious = FALSE;
5442	} else {
5443	/*
5444	* use pageclean setup, it is more
5445	* convenient even for the pageout
5446	* cases here
5447	*/
5448	vm_object_lock(upl->map_object);
5449	vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5450	vm_object_unlock(upl->map_object);
5451
5452	alias_page->vmp_absent = FALSE;
5453	alias_page = NULL;
5454	}
5455
5456	if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5457	upl->flags &= ~UPL_CLEAR_DIRTY;
5458	upl->flags \|= UPL_SET_DIRTY;
5459	dirty = TRUE;
5460	upl->flags \|= UPL_SET_DIRTY;
5461	} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5462	/*
5463	* clean in place for read implies
5464	* that a write will be done on all
5465	* the pages that are dirty before
5466	* a upl commit is done. The caller
5467	* is obligated to preserve the
5468	* contents of all pages marked dirty
5469	*/
5470	upl->flags \|= UPL_CLEAR_DIRTY;
5471	}
5472	dst_page->vmp_dirty = dirty;
5473
5474	if (!dirty)
5475	dst_page->vmp_precious = TRUE;
5476
5477	if ( !VM_PAGE_WIRED(dst_page)) {
5478	/*
5479	* deny access to the target page while
5480	* it is being worked on
5481	*/
5482	dst_page->vmp_busy = TRUE;
5483	} else
5484	dwp->dw_mask \|= DW_vm_page_wire;
5485
5486	/*
5487	* We might be about to satisfy a fault which has been
5488	* requested. So no need for the "restart" bit.
5489	*/
5490	dst_page->vmp_restart = FALSE;
5491	if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5492	/*
5493	* expect the page to be used
5494	*/
5495	dwp->dw_mask \|= DW_set_reference;
5496	}
5497	if (cntrl_flags & UPL_PRECIOUS) {
5498	if (object->internal) {
5499	SET_PAGE_DIRTY(dst_page, FALSE);
5500	dst_page->vmp_precious = FALSE;
5501	} else {
5502	dst_page->vmp_precious = TRUE;
5503	}
5504	} else {
5505	dst_page->vmp_precious = FALSE;
5506	}
5507	}
5508	if (dst_page->vmp_busy)
5509	upl->flags \|= UPL_HAS_BUSY;
5510
5511	if (phys_page > upl->highest_page)
5512	upl->highest_page = phys_page;
5513	assert (!pmap_is_noencrypt(phys_page));
5514	if (user_page_list) {
5515	user_page_list[entry].phys_addr = phys_page;
5516	user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
5517	user_page_list[entry].absent = dst_page->vmp_absent;
5518	user_page_list[entry].dirty = dst_page->vmp_dirty;
5519	user_page_list[entry].precious = dst_page->vmp_precious;
5520	user_page_list[entry].device = FALSE;
5521	user_page_list[entry].needed = FALSE;
5522	if (dst_page->vmp_clustered == TRUE)
5523	user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5524	else
5525	user_page_list[entry].speculative = FALSE;
5526	user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
5527	user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
5528	user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
5529	user_page_list[entry].mark = FALSE;
5530	}
5531	/*
5532	* if UPL_RET_ONLY_ABSENT is set, then
5533	* we are working with a fresh page and we've
5534	* just set the clustered flag on it to
5535	* indicate that it was drug in as part of a
5536	* speculative cluster... so leave it alone
5537	*/
5538	if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5539	/*
5540	* someone is explicitly grabbing this page...
5541	* update clustered and speculative state
5542	*
5543	*/
5544	if (dst_page->vmp_clustered)
5545	VM_PAGE_CONSUME_CLUSTERED(dst_page);
5546	}
5547	try_next_page:
5548	if (dwp->dw_mask) {
5549	if (dwp->dw_mask & DW_vm_page_activate)
5550	VM_STAT_INCR(reactivations);
5551
5552	VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5553
5554	if (dw_count >= dw_limit) {
5555	vm_page_do_delayed_work(object, tag, &dw_array[`0`], dw_count);
5556
5557	dwp = &dw_array[`0`];
5558	dw_count = `0`;
5559	}
5560	}
5561	entry++;
5562	dst_offset += PAGE_SIZE_64;
5563	xfer_size -= PAGE_SIZE;
5564	}
5565	if (dw_count)
5566	vm_page_do_delayed_work(object, tag, &dw_array[`0`], dw_count);
5567
5568	if (alias_page != NULL) {
5569	VM_PAGE_FREE(alias_page);
5570	}
5571	if (pmap_flushes_delayed == TRUE)
5572	pmap_flush(&pmap_flush_context_storage);
5573
5574	if (page_list_count != NULL) {
5575	if (upl->flags & UPL_INTERNAL)
5576	*page_list_count = `0`;
5577	else if (*page_list_count > entry)
5578	*page_list_count = entry;
5579	}
5580	#if UPL_DEBUG
5581	upl->upl_state = `1`;
5582	#endif
5583	vm_object_unlock(object);
5584
5585	VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, `0`, `0`, `0`);
5586
5587	return KERN_SUCCESS;
5588	}
5589
5590	/*
5591	* Routine: vm_object_super_upl_request
5592	* Purpose:
5593	* Cause the population of a portion of a vm_object
5594	* in much the same way as memory_object_upl_request.
5595	* Depending on the nature of the request, the pages
5596	* returned may be contain valid data or be uninitialized.
5597	* However, the region may be expanded up to the super
5598	* cluster size provided.
5599	*/
5600
5601	__private_extern__ kern_return_t
5602	vm_object_super_upl_request(
5603	vm_object_t object,
5604	vm_object_offset_t offset,
5605	upl_size_t size,
5606	upl_size_t super_cluster,
5607	upl_t *upl,
5608	upl_page_info_t *user_page_list,
5609	unsigned int *page_list_count,
5610	upl_control_flags_t cntrl_flags,
5611	vm_tag_t tag)
5612	{
5613	if (object->paging_offset > offset \|\| ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5614	return KERN_FAILURE;
5615
5616	assert(object->paging_in_progress);
5617	offset = offset - object->paging_offset;
5618
5619	if (super_cluster > size) {
5620
5621	vm_object_offset_t base_offset;
5622	upl_size_t super_size;
5623	vm_object_size_t super_size_64;
5624
5625	base_offset = (offset & ~((vm_object_offset_t) super_cluster - `1`));
5626	super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<`1` : super_cluster;
5627	super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5628	super_size = (upl_size_t) super_size_64;
5629	assert(super_size == super_size_64);
5630
5631	if (offset > (base_offset + super_size)) {
5632	panic("vm_object_super_upl_request: Missed target pageout"
5633	" %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5634	offset, base_offset, super_size, super_cluster,
5635	size, object->paging_offset);
5636	}
5637	/*
5638	* apparently there is a case where the vm requests a
5639	* page to be written out who's offset is beyond the
5640	* object size
5641	*/
5642	if ((offset + size) > (base_offset + super_size)) {
5643	super_size_64 = (offset + size) - base_offset;
5644	super_size = (upl_size_t) super_size_64;
5645	assert(super_size == super_size_64);
5646	}
5647
5648	offset = base_offset;
5649	size = super_size;
5650	}
5651	return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5652	}
5653
5654	#if CONFIG_EMBEDDED
5655	int cs_executable_create_upl = `0`;
5656	extern int proc_selfpid(void);
5657	extern char proc_name_address(void* *p);
5658	#endif /* CONFIG_EMBEDDED */
5659
5660	kern_return_t
5661	vm_map_create_upl(
5662	vm_map_t map,
5663	vm_map_address_t offset,
5664	upl_size_t *upl_size,
5665	upl_t *upl,
5666	upl_page_info_array_t page_list,
5667	unsigned int *count,
5668	upl_control_flags_t *flags,
5669	vm_tag_t tag)
5670	{
5671	vm_map_entry_t entry;
5672	upl_control_flags_t caller_flags;
5673	int force_data_sync;
5674	int sync_cow_data;
5675	vm_object_t local_object;
5676	vm_map_offset_t local_offset;
5677	vm_map_offset_t local_start;
5678	kern_return_t ret;
5679
5680	assert(page_aligned(offset));
5681
5682	caller_flags = *flags;
5683
5684	if (caller_flags & ~UPL_VALID_FLAGS) {
5685	/*
5686	* For forward compatibility's sake,
5687	* reject any unknown flag.
5688	*/
5689	return KERN_INVALID_VALUE;
5690	}
5691	force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5692	sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5693
5694	if (upl == NULL)
5695	return KERN_INVALID_ARGUMENT;
5696
5697	REDISCOVER_ENTRY:
5698	vm_map_lock_read(map);
5699
5700	if (!vm_map_lookup_entry(map, offset, &entry)) {
5701	vm_map_unlock_read(map);
5702	return KERN_FAILURE;
5703	}
5704
5705	if ((entry->vme_end - offset) < *upl_size) {
5706	*upl_size = (upl_size_t) (entry->vme_end - offset);
5707	assert(*upl_size == entry->vme_end - offset);
5708	}
5709
5710	if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5711	*flags = `0`;
5712
5713	if (!entry->is_sub_map &&
5714	VME_OBJECT(entry) != VM_OBJECT_NULL) {
5715	if (VME_OBJECT(entry)->private)
5716	*flags = UPL_DEV_MEMORY;
5717
5718	if (VME_OBJECT(entry)->phys_contiguous)
5719	*flags \|= UPL_PHYS_CONTIG;
5720	}
5721	vm_map_unlock_read(map);
5722	return KERN_SUCCESS;
5723	}
5724
5725	if (VME_OBJECT(entry) == VM_OBJECT_NULL \|\|
5726	!VME_OBJECT(entry)->phys_contiguous) {
5727	if (*upl_size > MAX_UPL_SIZE_BYTES)
5728	*upl_size = MAX_UPL_SIZE_BYTES;
5729	}
5730
5731	/*
5732	* Create an object if necessary.
5733	*/
5734	if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5735
5736	if (vm_map_lock_read_to_write(map))
5737	goto REDISCOVER_ENTRY;
5738
5739	VME_OBJECT_SET(entry,
5740	vm_object_allocate((vm_size_t)
5741	(entry->vme_end -
5742	entry->vme_start)));
5743	VME_OFFSET_SET(entry, `0`);
5744	assert(entry->use_pmap);
5745
5746	vm_map_lock_write_to_read(map);
5747	}
5748
5749	if (!(caller_flags & UPL_COPYOUT_FROM) &&
5750	!entry->is_sub_map &&
5751	!(entry->protection & VM_PROT_WRITE)) {
5752	vm_map_unlock_read(map);
5753	return KERN_PROTECTION_FAILURE;
5754	}
5755
5756	#if CONFIG_EMBEDDED
5757	if (map->pmap != kernel_pmap &&
5758	(caller_flags & UPL_COPYOUT_FROM) &&
5759	(entry->protection & VM_PROT_EXECUTE) &&
5760	!(entry->protection & VM_PROT_WRITE)) {
5761	vm_offset_t kaddr;
5762	vm_size_t ksize;
5763
5764	/*
5765	* We're about to create a read-only UPL backed by
5766	* memory from an executable mapping.
5767	* Wiring the pages would result in the pages being copied
5768	* (due to the "MAP_PRIVATE" mapping) and no longer
5769	* code-signed, so no longer eligible for execution.
5770	* Instead, let's copy the data into a kernel buffer and
5771	* create the UPL from this kernel buffer.
5772	* The kernel buffer is then freed, leaving the UPL holding
5773	* the last reference on the VM object, so the memory will
5774	* be released when the UPL is committed.
5775	*/
5776
5777	vm_map_unlock_read(map);
5778	/ allocate kernel buffer /
5779	ksize = round_page(*upl_size);
5780	kaddr = `0`;
5781	ret = kmem_alloc_pageable(kernel_map,
5782	&kaddr,
5783	ksize,
5784	tag);
5785	if (ret == KERN_SUCCESS) {
5786	/ copyin the user data /
5787	assert(page_aligned(offset));
5788	ret = copyinmap(map, offset, (void )kaddr, upl_size);
5789	}
5790	if (ret == KERN_SUCCESS) {
5791	if (ksize > *upl_size) {
5792	/ zero out the extra space in kernel buffer /
5793	memset((void )(kaddr + upl_size),
5794	`0`,
5795	ksize - *upl_size);
5796	}
5797	/ create the UPL from the kernel buffer /
5798	ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5799	upl, page_list, count, flags, tag);
5800	}
5801	if (kaddr != `0`) {
5802	/ free the kernel buffer /
5803	kmem_free(kernel_map, kaddr, ksize);
5804	kaddr = `0`;
5805	ksize = `0`;
5806	}
5807	#if DEVELOPMENT \|\| DEBUG
5808	DTRACE_VM4(create_upl_from_executable,
5809	vm_map_t, map,
5810	vm_map_address_t, offset,
5811	upl_size_t, *upl_size,
5812	kern_return_t, ret);
5813	#endif /* DEVELOPMENT \|\| DEBUG */
5814	return ret;
5815	}
5816	#endif /* CONFIG_EMBEDDED */
5817
5818	local_object = VME_OBJECT(entry);
5819	assert(local_object != VM_OBJECT_NULL);
5820
5821	if (!entry->is_sub_map &&
5822	!entry->needs_copy &&
5823	*upl_size != `0` &&
5824	local_object->vo_size > upl_size && /* partial UPL /
5825	entry->wired_count == `0` && / No COW for entries that are wired /
5826	(map->pmap != kernel_pmap) && / alias checks /
5827	(vm_map_entry_should_cow_for_true_share(entry) / case 1 /
5828	\|\|
5829	(/ case 2 /
5830	local_object->internal &&
5831	(local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5832	local_object->ref_count > `1`))) {
5833	vm_prot_t prot;
5834
5835	/*
5836	* Case 1:
5837	* Set up the targeted range for copy-on-write to avoid
5838	* applying true_share/copy_delay to the entire object.
5839	*
5840	* Case 2:
5841	* This map entry covers only part of an internal
5842	* object. There could be other map entries covering
5843	* other areas of this object and some of these map
5844	* entries could be marked as "needs_copy", which
5845	* assumes that the object is COPY_SYMMETRIC.
5846	* To avoid marking this object as COPY_DELAY and
5847	* "true_share", let's shadow it and mark the new
5848	* (smaller) object as "true_share" and COPY_DELAY.
5849	*/
5850
5851	if (vm_map_lock_read_to_write(map)) {
5852	goto REDISCOVER_ENTRY;
5853	}
5854	vm_map_lock_assert_exclusive(map);
5855	assert(VME_OBJECT(entry) == local_object);
5856
5857	vm_map_clip_start(map,
5858	entry,
5859	vm_map_trunc_page(offset,
5860	VM_MAP_PAGE_MASK(map)));
5861	vm_map_clip_end(map,
5862	entry,
5863	vm_map_round_page(offset + *upl_size,
5864	VM_MAP_PAGE_MASK(map)));
5865	if ((entry->vme_end - offset) < *upl_size) {
5866	*upl_size = (upl_size_t) (entry->vme_end - offset);
5867	assert(*upl_size == entry->vme_end - offset);
5868	}
5869
5870	prot = entry->protection & ~VM_PROT_WRITE;
5871	if (override_nx(map, VME_ALIAS(entry)) && prot)
5872	prot \|= VM_PROT_EXECUTE;
5873	vm_object_pmap_protect(local_object,
5874	VME_OFFSET(entry),
5875	entry->vme_end - entry->vme_start,
5876	((entry->is_shared \|\|
5877	map->mapped_in_other_pmaps)
5878	? PMAP_NULL
5879	: map->pmap),
5880	entry->vme_start,
5881	prot);
5882
5883	assert(entry->wired_count == `0`);
5884
5885	/*
5886	* Lock the VM object and re-check its status: if it's mapped
5887	* in another address space, we could still be racing with
5888	* another thread holding that other VM map exclusively.
5889	*/
5890	vm_object_lock(local_object);
5891	if (local_object->true_share) {
5892	/ object is already in proper state: no COW needed /
5893	assert(local_object->copy_strategy !=
5894	MEMORY_OBJECT_COPY_SYMMETRIC);
5895	} else {
5896	/ not true_share: ask for copy-on-write below /
5897	assert(local_object->copy_strategy ==
5898	MEMORY_OBJECT_COPY_SYMMETRIC);
5899	entry->needs_copy = TRUE;
5900	}
5901	vm_object_unlock(local_object);
5902
5903	vm_map_lock_write_to_read(map);
5904	}
5905
5906	if (entry->needs_copy) {
5907	/*
5908	* Honor copy-on-write for COPY_SYMMETRIC
5909	* strategy.
5910	*/
5911	vm_map_t local_map;
5912	vm_object_t object;
5913	vm_object_offset_t new_offset;
5914	vm_prot_t prot;
5915	boolean_t wired;
5916	vm_map_version_t version;
5917	vm_map_t real_map;
5918	vm_prot_t fault_type;
5919
5920	local_map = map;
5921
5922	if (caller_flags & UPL_COPYOUT_FROM) {
5923	fault_type = VM_PROT_READ \| VM_PROT_COPY;
5924	vm_counters.create_upl_extra_cow++;
5925	vm_counters.create_upl_extra_cow_pages +=
5926	(entry->vme_end - entry->vme_start) / PAGE_SIZE;
5927	} else {
5928	fault_type = VM_PROT_WRITE;
5929	}
5930	if (vm_map_lookup_locked(&local_map,
5931	offset, fault_type,
5932	OBJECT_LOCK_EXCLUSIVE,
5933	&version, &object,
5934	&new_offset, &prot, &wired,
5935	NULL,
5936	&real_map) != KERN_SUCCESS) {
5937	if (fault_type == VM_PROT_WRITE) {
5938	vm_counters.create_upl_lookup_failure_write++;
5939	} else {
5940	vm_counters.create_upl_lookup_failure_copy++;
5941	}
5942	vm_map_unlock_read(local_map);
5943	return KERN_FAILURE;
5944	}
5945	if (real_map != map)
5946	vm_map_unlock(real_map);
5947	vm_map_unlock_read(local_map);
5948
5949	vm_object_unlock(object);
5950
5951	goto REDISCOVER_ENTRY;
5952	}
5953
5954	if (entry->is_sub_map) {
5955	vm_map_t submap;
5956
5957	submap = VME_SUBMAP(entry);
5958	local_start = entry->vme_start;
5959	local_offset = VME_OFFSET(entry);
5960
5961	vm_map_reference(submap);
5962	vm_map_unlock_read(map);
5963
5964	ret = vm_map_create_upl(submap,
5965	local_offset + (offset - local_start),
5966	upl_size, upl, page_list, count, flags, tag);
5967	vm_map_deallocate(submap);
5968
5969	return ret;
5970	}
5971
5972	if (sync_cow_data &&
5973	(VME_OBJECT(entry)->shadow \|\|
5974	VME_OBJECT(entry)->copy)) {
5975	local_object = VME_OBJECT(entry);
5976	local_start = entry->vme_start;
5977	local_offset = VME_OFFSET(entry);
5978
5979	vm_object_reference(local_object);
5980	vm_map_unlock_read(map);
5981
5982	if (local_object->shadow && local_object->copy) {
5983	vm_object_lock_request(local_object->shadow,
5984	((vm_object_offset_t)
5985	((offset - local_start) +
5986	local_offset) +
5987	local_object->vo_shadow_offset),
5988	*upl_size, FALSE,
5989	MEMORY_OBJECT_DATA_SYNC,
5990	VM_PROT_NO_CHANGE);
5991	}
5992	sync_cow_data = FALSE;
5993	vm_object_deallocate(local_object);
5994
5995	goto REDISCOVER_ENTRY;
5996	}
5997	if (force_data_sync) {
5998	local_object = VME_OBJECT(entry);
5999	local_start = entry->vme_start;
6000	local_offset = VME_OFFSET(entry);
6001
6002	vm_object_reference(local_object);
6003	vm_map_unlock_read(map);
6004
6005	vm_object_lock_request(local_object,
6006	((vm_object_offset_t)
6007	((offset - local_start) +
6008	local_offset)),
6009	(vm_object_size_t)*upl_size,
6010	FALSE,
6011	MEMORY_OBJECT_DATA_SYNC,
6012	VM_PROT_NO_CHANGE);
6013
6014	force_data_sync = FALSE;
6015	vm_object_deallocate(local_object);
6016
6017	goto REDISCOVER_ENTRY;
6018	}
6019	if (VME_OBJECT(entry)->private)
6020	*flags = UPL_DEV_MEMORY;
6021	else
6022	*flags = `0`;
6023
6024	if (VME_OBJECT(entry)->phys_contiguous)
6025	*flags \|= UPL_PHYS_CONTIG;
6026
6027	local_object = VME_OBJECT(entry);
6028	local_offset = VME_OFFSET(entry);
6029	local_start = entry->vme_start;
6030
6031	#if CONFIG_EMBEDDED
6032	/*
6033	* Wiring will copy the pages to the shadow object.
6034	* The shadow object will not be code-signed so
6035	* attempting to execute code from these copied pages
6036	* would trigger a code-signing violation.
6037	*/
6038	if (entry->protection & VM_PROT_EXECUTE) {
6039	#if MACH_ASSERT
6040	printf("pid %d[%s] create_upl out of executable range from "
6041	"0x%llx to 0x%llx: side effects may include "
6042	"code-signing violations later on\n",
6043	proc_selfpid(),
6044	(current_task()->bsd_info
6045	? proc_name_address(current_task()->bsd_info)
6046	: "?"),
6047	(uint64_t) entry->vme_start,
6048	(uint64_t) entry->vme_end);
6049	#endif /* MACH_ASSERT */
6050	DTRACE_VM2(cs_executable_create_upl,
6051	uint64_t, (uint64_t)entry->vme_start,
6052	uint64_t, (uint64_t)entry->vme_end);
6053	cs_executable_create_upl++;
6054	}
6055	#endif /* CONFIG_EMBEDDED */
6056
6057	vm_object_lock(local_object);
6058
6059	/*
6060	* Ensure that this object is "true_share" and "copy_delay" now,
6061	* while we're still holding the VM map lock. After we unlock the map,
6062	* anything could happen to that mapping, including some copy-on-write
6063	* activity. We need to make sure that the IOPL will point at the
6064	* same memory as the mapping.
6065	*/
6066	if (local_object->true_share) {
6067	assert(local_object->copy_strategy !=
6068	MEMORY_OBJECT_COPY_SYMMETRIC);
6069	} else if (local_object != kernel_object &&
6070	local_object != compressor_object &&
6071	!local_object->phys_contiguous) {
6072	#if VM_OBJECT_TRACKING_OP_TRUESHARE
6073	if (!local_object->true_share &&
6074	vm_object_tracking_inited) {
6075	void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6076	int num = `0`;
6077	num = OSBacktrace(bt,
6078	VM_OBJECT_TRACKING_BTDEPTH);
6079	btlog_add_entry(vm_object_tracking_btlog,
6080	local_object,
6081	VM_OBJECT_TRACKING_OP_TRUESHARE,
6082	bt,
6083	num);
6084	}
6085	#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6086	local_object->true_share = TRUE;
6087	if (local_object->copy_strategy ==
6088	MEMORY_OBJECT_COPY_SYMMETRIC) {
6089	local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6090	}
6091	}
6092
6093	vm_object_reference_locked(local_object);
6094	vm_object_unlock(local_object);
6095
6096	vm_map_unlock_read(map);
6097
6098	ret = vm_object_iopl_request(local_object,
6099	((vm_object_offset_t)
6100	((offset - local_start) + local_offset)),
6101	*upl_size,
6102	upl,
6103	page_list,
6104	count,
6105	caller_flags,
6106	tag);
6107	vm_object_deallocate(local_object);
6108
6109	return ret;
6110	}
6111
6112	/*
6113	* Internal routine to enter a UPL into a VM map.
6114	*
6115	* JMM - This should just be doable through the standard
6116	* vm_map_enter() API.
6117	*/
6118	kern_return_t
6119	vm_map_enter_upl(
6120	vm_map_t map,
6121	upl_t upl,
6122	vm_map_offset_t *dst_addr)
6123	{
6124	vm_map_size_t size;
6125	vm_object_offset_t offset;
6126	vm_map_offset_t addr;
6127	vm_page_t m;
6128	kern_return_t kr;
6129	int isVectorUPL = `0`, curr_upl=`0`;
6130	upl_t vector_upl = NULL;
6131	vm_offset_t vector_upl_dst_addr = `0`;
6132	vm_map_t vector_upl_submap = NULL;
6133	upl_offset_t subupl_offset = `0`;
6134	upl_size_t subupl_size = `0`;
6135
6136	if (upl == UPL_NULL)
6137	return KERN_INVALID_ARGUMENT;
6138
6139	if((isVectorUPL = vector_upl_is_valid(upl))) {
6140	int mapped=`0`,valid_upls=`0`;
6141	vector_upl = upl;
6142
6143	upl_lock(vector_upl);
6144	for(curr_upl=`0`; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6145	upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6146	if(upl == NULL)
6147	continue;
6148	valid_upls++;
6149	if (UPL_PAGE_LIST_MAPPED & upl->flags)
6150	mapped++;
6151	}
6152
6153	if(mapped) {
6154	if(mapped != valid_upls)
6155	panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6156	else {
6157	upl_unlock(vector_upl);
6158	return KERN_FAILURE;
6159	}
6160	}
6161
6162	kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6163	VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6164	&vector_upl_submap);
6165	if( kr != KERN_SUCCESS )
6166	panic("Vector UPL submap allocation failed\n");
6167	map = vector_upl_submap;
6168	vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6169	curr_upl=`0`;
6170	}
6171	else
6172	upl_lock(upl);
6173
6174	process_upl_to_enter:
6175	if(isVectorUPL){
6176	if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6177	*dst_addr = vector_upl_dst_addr;
6178	upl_unlock(vector_upl);
6179	return KERN_SUCCESS;
6180	}
6181	upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6182	if(upl == NULL)
6183	goto process_upl_to_enter;
6184
6185	vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6186	*dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6187	} else {
6188	/*
6189	* check to see if already mapped
6190	*/
6191	if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6192	upl_unlock(upl);
6193	return KERN_FAILURE;
6194	}
6195	}
6196	if ((!(upl->flags & UPL_SHADOWED)) &&
6197	((upl->flags & UPL_HAS_BUSY) \|\|
6198	!((upl->flags & (UPL_DEVICE_MEMORY \| UPL_IO_WIRE)) \|\| (upl->map_object->phys_contiguous)))) {
6199
6200	vm_object_t object;
6201	vm_page_t alias_page;
6202	vm_object_offset_t new_offset;
6203	unsigned int pg_num;
6204	wpl_array_t lite_list;
6205
6206	if (upl->flags & UPL_INTERNAL) {
6207	lite_list = (wpl_array_t)
6208	((((uintptr_t)upl) + sizeof(struct upl))
6209	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6210	} else {
6211	lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6212	}
6213	object = upl->map_object;
6214	upl->map_object = vm_object_allocate(upl->size);
6215
6216	vm_object_lock(upl->map_object);
6217
6218	upl->map_object->shadow = object;
6219	upl->map_object->pageout = TRUE;
6220	upl->map_object->can_persist = FALSE;
6221	upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6222	upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6223	upl->map_object->wimg_bits = object->wimg_bits;
6224	offset = upl->map_object->vo_shadow_offset;
6225	new_offset = `0`;
6226	size = upl->size;
6227
6228	upl->flags \|= UPL_SHADOWED;
6229
6230	while (size) {
6231	pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6232	assert(pg_num == new_offset / PAGE_SIZE);
6233
6234	if (lite_list[pg_num>>`5`] & (`1` << (pg_num & `31`))) {
6235
6236	VM_PAGE_GRAB_FICTITIOUS(alias_page);
6237
6238	vm_object_lock(object);
6239
6240	m = vm_page_lookup(object, offset);
6241	if (m == VM_PAGE_NULL) {
6242	panic("vm_upl_map: page missing\n");
6243	}
6244
6245	/*
6246	* Convert the fictitious page to a private
6247	* shadow of the real page.
6248	*/
6249	assert(alias_page->vmp_fictitious);
6250	alias_page->vmp_fictitious = FALSE;
6251	alias_page->vmp_private = TRUE;
6252	alias_page->vmp_free_when_done = TRUE;
6253	/*
6254	* since m is a page in the upl it must
6255	* already be wired or BUSY, so it's
6256	* safe to assign the underlying physical
6257	* page to the alias
6258	*/
6259	VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6260
6261	vm_object_unlock(object);
6262
6263	vm_page_lockspin_queues();
6264	vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6265	vm_page_unlock_queues();
6266
6267	vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6268
6269	assert(!alias_page->vmp_wanted);
6270	alias_page->vmp_busy = FALSE;
6271	alias_page->vmp_absent = FALSE;
6272	}
6273	size -= PAGE_SIZE;
6274	offset += PAGE_SIZE_64;
6275	new_offset += PAGE_SIZE_64;
6276	}
6277	vm_object_unlock(upl->map_object);
6278	}
6279	if (upl->flags & UPL_SHADOWED)
6280	offset = `0`;
6281	else
6282	offset = upl->offset - upl->map_object->paging_offset;
6283
6284	size = upl->size;
6285
6286	vm_object_reference(upl->map_object);
6287
6288	if(!isVectorUPL) {
6289	*dst_addr = `0`;
6290	/*
6291	* NEED A UPL_MAP ALIAS
6292	*/
6293	kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) `0`,
6294	VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6295	upl->map_object, offset, FALSE,
6296	VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6297
6298	if (kr != KERN_SUCCESS) {
6299	vm_object_deallocate(upl->map_object);
6300	upl_unlock(upl);
6301	return(kr);
6302	}
6303	}
6304	else {
6305	kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) `0`,
6306	VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6307	upl->map_object, offset, FALSE,
6308	VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6309	if(kr)
6310	panic("vm_map_enter failed for a Vector UPL\n");
6311	}
6312	vm_object_lock(upl->map_object);
6313
6314	for (addr = *dst_addr; size > `0`; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6315	m = vm_page_lookup(upl->map_object, offset);
6316
6317	if (m) {
6318	m->vmp_pmapped = TRUE;
6319
6320	/ CODE SIGNING ENFORCEMENT: page has been wpmapped,*
6321	* but only in kernel space. If this was on a user map,
6322	* we'd have to set the wpmapped bit. */
6323	/ m->vmp_wpmapped = TRUE; /
6324	assert(map->pmap == kernel_pmap);
6325
6326	PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, `0`, TRUE, kr);
6327
6328	assert(kr == KERN_SUCCESS);
6329	#if KASAN
6330	kasan_notify_address(addr, PAGE_SIZE_64);
6331	#endif
6332	}
6333	offset += PAGE_SIZE_64;
6334	}
6335	vm_object_unlock(upl->map_object);
6336
6337	/*
6338	* hold a reference for the mapping
6339	*/
6340	upl->ref_count++;
6341	upl->flags \|= UPL_PAGE_LIST_MAPPED;
6342	upl->kaddr = (vm_offset_t) *dst_addr;
6343	assert(upl->kaddr == *dst_addr);
6344
6345	if(isVectorUPL)
6346	goto process_upl_to_enter;
6347
6348	upl_unlock(upl);
6349
6350	return KERN_SUCCESS;
6351	}
6352
6353	/*
6354	* Internal routine to remove a UPL mapping from a VM map.
6355	*
6356	* XXX - This should just be doable through a standard
6357	* vm_map_remove() operation. Otherwise, implicit clean-up
6358	* of the target map won't be able to correctly remove
6359	* these (and release the reference on the UPL). Having
6360	* to do this means we can't map these into user-space
6361	* maps yet.
6362	*/
6363	kern_return_t
6364	vm_map_remove_upl(
6365	vm_map_t map,
6366	upl_t upl)
6367	{
6368	vm_address_t addr;
6369	upl_size_t size;
6370	int isVectorUPL = `0`, curr_upl = `0`;
6371	upl_t vector_upl = NULL;
6372
6373	if (upl == UPL_NULL)
6374	return KERN_INVALID_ARGUMENT;
6375
6376	if((isVectorUPL = vector_upl_is_valid(upl))) {
6377	int unmapped=`0`, valid_upls=`0`;
6378	vector_upl = upl;
6379	upl_lock(vector_upl);
6380	for(curr_upl=`0`; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6381	upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6382	if(upl == NULL)
6383	continue;
6384	valid_upls++;
6385	if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6386	unmapped++;
6387	}
6388
6389	if(unmapped) {
6390	if(unmapped != valid_upls)
6391	panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6392	else {
6393	upl_unlock(vector_upl);
6394	return KERN_FAILURE;
6395	}
6396	}
6397	curr_upl=`0`;
6398	}
6399	else
6400	upl_lock(upl);
6401
6402	process_upl_to_remove:
6403	if(isVectorUPL) {
6404	if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6405	vm_map_t v_upl_submap;
6406	vm_offset_t v_upl_submap_dst_addr;
6407	vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6408
6409	vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_REMOVE_NO_FLAGS);
6410	vm_map_deallocate(v_upl_submap);
6411	upl_unlock(vector_upl);
6412	return KERN_SUCCESS;
6413	}
6414
6415	upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6416	if(upl == NULL)
6417	goto process_upl_to_remove;
6418	}
6419
6420	if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6421	addr = upl->kaddr;
6422	size = upl->size;
6423
6424	assert(upl->ref_count > `1`);
6425	upl->ref_count--; / removing mapping ref /
6426
6427	upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6428	upl->kaddr = (vm_offset_t) `0`;
6429
6430	if(!isVectorUPL) {
6431	upl_unlock(upl);
6432
6433	vm_map_remove(
6434	map,
6435	vm_map_trunc_page(addr,
6436	VM_MAP_PAGE_MASK(map)),
6437	vm_map_round_page(addr + size,
6438	VM_MAP_PAGE_MASK(map)),
6439	VM_MAP_REMOVE_NO_FLAGS);
6440	return KERN_SUCCESS;
6441	}
6442	else {
6443	/*
6444	* If it's a Vectored UPL, we'll be removing the entire
6445	* submap anyways, so no need to remove individual UPL
6446	* element mappings from within the submap
6447	*/
6448	goto process_upl_to_remove;
6449	}
6450	}
6451	upl_unlock(upl);
6452
6453	return KERN_FAILURE;
6454	}
6455
6456
6457	kern_return_t
6458	upl_commit_range(
6459	upl_t upl,
6460	upl_offset_t offset,
6461	upl_size_t size,
6462	int flags,
6463	upl_page_info_t *page_list,
6464	mach_msg_type_number_t count,
6465	boolean_t *empty)
6466	{
6467	upl_size_t xfer_size, subupl_size = size;
6468	vm_object_t shadow_object;
6469	vm_object_t object;
6470	vm_object_t m_object;
6471	vm_object_offset_t target_offset;
6472	upl_offset_t subupl_offset = offset;
6473	int entry;
6474	wpl_array_t lite_list;
6475	int occupied;
6476	int clear_refmod = `0`;
6477	int pgpgout_count = `0`;
6478	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6479	struct vm_page_delayed_work *dwp;
6480	int dw_count;
6481	int dw_limit;
6482	int isVectorUPL = `0`;
6483	upl_t vector_upl = NULL;
6484	boolean_t should_be_throttled = FALSE;
6485
6486	vm_page_t nxt_page = VM_PAGE_NULL;
6487	int fast_path_possible = `0`;
6488	int fast_path_full_commit = `0`;
6489	int throttle_page = `0`;
6490	int unwired_count = `0`;
6491	int local_queue_count = `0`;
6492	vm_page_t first_local, last_local;
6493
6494	*empty = FALSE;
6495
6496	if (upl == UPL_NULL)
6497	return KERN_INVALID_ARGUMENT;
6498
6499	if (count == `0`)
6500	page_list = NULL;
6501
6502	if((isVectorUPL = vector_upl_is_valid(upl))) {
6503	vector_upl = upl;
6504	upl_lock(vector_upl);
6505	}
6506	else
6507	upl_lock(upl);
6508
6509	process_upl_to_commit:
6510
6511	if(isVectorUPL) {
6512	size = subupl_size;
6513	offset = subupl_offset;
6514	if(size == `0`) {
6515	upl_unlock(vector_upl);
6516	return KERN_SUCCESS;
6517	}
6518	upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6519	if(upl == NULL) {
6520	upl_unlock(vector_upl);
6521	return KERN_FAILURE;
6522	}
6523	page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6524	subupl_size -= size;
6525	subupl_offset += size;
6526	}
6527
6528	#if UPL_DEBUG
6529	if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6530	(void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[`0`], UPL_DEBUG_STACK_FRAMES);
6531
6532	upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6533	upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6534
6535	upl->upl_commit_index++;
6536	}
6537	#endif
6538	if (upl->flags & UPL_DEVICE_MEMORY)
6539	xfer_size = `0`;
6540	else if ((offset + size) <= upl->size)
6541	xfer_size = size;
6542	else {
6543	if(!isVectorUPL)
6544	upl_unlock(upl);
6545	else {
6546	upl_unlock(vector_upl);
6547	}
6548	return KERN_FAILURE;
6549	}
6550	if (upl->flags & UPL_SET_DIRTY)
6551	flags \|= UPL_COMMIT_SET_DIRTY;
6552	if (upl->flags & UPL_CLEAR_DIRTY)
6553	flags \|= UPL_COMMIT_CLEAR_DIRTY;
6554
6555	if (upl->flags & UPL_INTERNAL)
6556	lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6557	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6558	else
6559	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6560
6561	object = upl->map_object;
6562
6563	if (upl->flags & UPL_SHADOWED) {
6564	vm_object_lock(object);
6565	shadow_object = object->shadow;
6566	} else {
6567	shadow_object = object;
6568	}
6569	entry = offset/PAGE_SIZE;
6570	target_offset = (vm_object_offset_t)offset;
6571
6572	assert(!(target_offset & PAGE_MASK));
6573	assert(!(xfer_size & PAGE_MASK));
6574
6575	if (upl->flags & UPL_KERNEL_OBJECT)
6576	vm_object_lock_shared(shadow_object);
6577	else
6578	vm_object_lock(shadow_object);
6579
6580	VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6581
6582	if (upl->flags & UPL_ACCESS_BLOCKED) {
6583	assert(shadow_object->blocked_access);
6584	shadow_object->blocked_access = FALSE;
6585	vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6586	}
6587
6588	if (shadow_object->code_signed) {
6589	/*
6590	* CODE SIGNING:
6591	* If the object is code-signed, do not let this UPL tell
6592	* us if the pages are valid or not. Let the pages be
6593	* validated by VM the normal way (when they get mapped or
6594	* copied).
6595	*/
6596	flags &= ~UPL_COMMIT_CS_VALIDATED;
6597	}
6598	if (! page_list) {
6599	/*
6600	* No page list to get the code-signing info from !?
6601	*/
6602	flags &= ~UPL_COMMIT_CS_VALIDATED;
6603	}
6604	if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6605	should_be_throttled = TRUE;
6606
6607	dwp = &dw_array[`0`];
6608	dw_count = `0`;
6609	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6610
6611	if ((upl->flags & UPL_IO_WIRE) &&
6612	!(flags & UPL_COMMIT_FREE_ABSENT) &&
6613	!isVectorUPL &&
6614	shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6615	shadow_object->purgable != VM_PURGABLE_EMPTY) {
6616
6617	if (!vm_page_queue_empty(&shadow_object->memq)) {
6618
6619	if (size == shadow_object->vo_size) {
6620	nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6621	fast_path_full_commit = `1`;
6622	}
6623	fast_path_possible = `1`;
6624
6625	if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6626	(shadow_object->purgable == VM_PURGABLE_DENY \|\|
6627	shadow_object->purgable == VM_PURGABLE_NONVOLATILE \|\|
6628	shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6629	throttle_page = `1`;
6630	}
6631	}
6632	}
6633	first_local = VM_PAGE_NULL;
6634	last_local = VM_PAGE_NULL;
6635
6636	while (xfer_size) {
6637	vm_page_t t, m;
6638
6639	dwp->dw_mask = `0`;
6640	clear_refmod = `0`;
6641
6642	m = VM_PAGE_NULL;
6643
6644	if (upl->flags & UPL_LITE) {
6645	unsigned int pg_num;
6646
6647	if (nxt_page != VM_PAGE_NULL) {
6648	m = nxt_page;
6649	nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
6650	target_offset = m->vmp_offset;
6651	}
6652	pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6653	assert(pg_num == target_offset/PAGE_SIZE);
6654
6655	if (lite_list[pg_num>>`5`] & (`1` << (pg_num & `31`))) {
6656	lite_list[pg_num>>`5`] &= ~(`1` << (pg_num & `31`));
6657
6658	if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6659	m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6660	} else
6661	m = NULL;
6662	}
6663	if (upl->flags & UPL_SHADOWED) {
6664	if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6665
6666	t->vmp_free_when_done = FALSE;
6667
6668	VM_PAGE_FREE(t);
6669
6670	if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6671	m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6672	}
6673	}
6674	if (m == VM_PAGE_NULL)
6675	goto commit_next_page;
6676
6677	m_object = VM_PAGE_OBJECT(m);
6678
6679	if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6680	assert(m->vmp_busy);
6681
6682	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
6683	goto commit_next_page;
6684	}
6685
6686	if (flags & UPL_COMMIT_CS_VALIDATED) {
6687	/*
6688	* CODE SIGNING:
6689	* Set the code signing bits according to
6690	* what the UPL says they should be.
6691	*/
6692	m->vmp_cs_validated = page_list[entry].cs_validated;
6693	m->vmp_cs_tainted = page_list[entry].cs_tainted;
6694	m->vmp_cs_nx = page_list[entry].cs_nx;
6695	}
6696	if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6697	m->vmp_written_by_kernel = TRUE;
6698
6699	if (upl->flags & UPL_IO_WIRE) {
6700
6701	if (page_list)
6702	page_list[entry].phys_addr = `0`;
6703
6704	if (flags & UPL_COMMIT_SET_DIRTY) {
6705	SET_PAGE_DIRTY(m, FALSE);
6706	} else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6707	m->vmp_dirty = FALSE;
6708
6709	if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6710	m->vmp_cs_validated && !m->vmp_cs_tainted) {
6711	/*
6712	* CODE SIGNING:
6713	* This page is no longer dirty
6714	* but could have been modified,
6715	* so it will need to be
6716	* re-validated.
6717	*/
6718	m->vmp_cs_validated = FALSE;
6719
6720	VM_PAGEOUT_DEBUG(vm_cs_validated_resets, `1`);
6721
6722	pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6723	}
6724	clear_refmod \|= VM_MEM_MODIFIED;
6725	}
6726	if (upl->flags & UPL_ACCESS_BLOCKED) {
6727	/*
6728	* We blocked access to the pages in this UPL.
6729	* Clear the "busy" bit and wake up any waiter
6730	* for this page.
6731	*/
6732	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
6733	}
6734	if (fast_path_possible) {
6735	assert(m_object->purgable != VM_PURGABLE_EMPTY);
6736	assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6737	if (m->vmp_absent) {
6738	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
6739	assert(m->vmp_wire_count == `0`);
6740	assert(m->vmp_busy);
6741
6742	m->vmp_absent = FALSE;
6743	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
6744	} else {
6745	if (m->vmp_wire_count == `0`)
6746	panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6747	assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
6748
6749	/*
6750	* XXX FBDP need to update some other
6751	* counters here (purgeable_wired_count)
6752	* (ledgers), ...
6753	*/
6754	assert(m->vmp_wire_count > `0`);
6755	m->vmp_wire_count--;
6756
6757	if (m->vmp_wire_count == `0`) {
6758	m->vmp_q_state = VM_PAGE_NOT_ON_Q;
6759	unwired_count++;
6760	}
6761	}
6762	if (m->vmp_wire_count == `0`) {
6763	assert(m->vmp_pageq.next == `0` && m->vmp_pageq.prev == `0`);
6764
6765	if (last_local == VM_PAGE_NULL) {
6766	assert(first_local == VM_PAGE_NULL);
6767
6768	last_local = m;
6769	first_local = m;
6770	} else {
6771	assert(first_local != VM_PAGE_NULL);
6772
6773	m->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6774	first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6775	first_local = m;
6776	}
6777	local_queue_count++;
6778
6779	if (throttle_page) {
6780	m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
6781	} else {
6782	if (flags & UPL_COMMIT_INACTIVATE) {
6783	if (shadow_object->internal)
6784	m->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6785	else
6786	m->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6787	} else
6788	m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
6789	}
6790	}
6791	} else {
6792	if (flags & UPL_COMMIT_INACTIVATE) {
6793	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
6794	clear_refmod \|= VM_MEM_REFERENCED;
6795	}
6796	if (m->vmp_absent) {
6797	if (flags & UPL_COMMIT_FREE_ABSENT)
6798	dwp->dw_mask \|= DW_vm_page_free;
6799	else {
6800	m->vmp_absent = FALSE;
6801	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
6802
6803	if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6804	dwp->dw_mask \|= DW_vm_page_activate;
6805	}
6806	} else
6807	dwp->dw_mask \|= DW_vm_page_unwire;
6808	}
6809	goto commit_next_page;
6810	}
6811	assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6812
6813	if (page_list)
6814	page_list[entry].phys_addr = `0`;
6815
6816	/*
6817	* make sure to clear the hardware
6818	* modify or reference bits before
6819	* releasing the BUSY bit on this page
6820	* otherwise we risk losing a legitimate
6821	* change of state
6822	*/
6823	if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6824	m->vmp_dirty = FALSE;
6825
6826	clear_refmod \|= VM_MEM_MODIFIED;
6827	}
6828	if (m->vmp_laundry)
6829	dwp->dw_mask \|= DW_vm_pageout_throttle_up;
6830
6831	if (VM_PAGE_WIRED(m))
6832	m->vmp_free_when_done = FALSE;
6833
6834	if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6835	m->vmp_cs_validated && !m->vmp_cs_tainted) {
6836	/*
6837	* CODE SIGNING:
6838	* This page is no longer dirty
6839	* but could have been modified,
6840	* so it will need to be
6841	* re-validated.
6842	*/
6843	m->vmp_cs_validated = FALSE;
6844
6845	VM_PAGEOUT_DEBUG(vm_cs_validated_resets, `1`);
6846
6847	pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6848	}
6849	if (m->vmp_overwriting) {
6850	/*
6851	* the (COPY_OUT_FROM == FALSE) request_page_list case
6852	*/
6853	if (m->vmp_busy) {
6854	#if CONFIG_PHANTOM_CACHE
6855	if (m->vmp_absent && !m_object->internal)
6856	dwp->dw_mask \|= DW_vm_phantom_cache_update;
6857	#endif
6858	m->vmp_absent = FALSE;
6859
6860	dwp->dw_mask \|= DW_clear_busy;
6861	} else {
6862	/*
6863	* alternate (COPY_OUT_FROM == FALSE) page_list case
6864	* Occurs when the original page was wired
6865	* at the time of the list request
6866	*/
6867	assert(VM_PAGE_WIRED(m));
6868
6869	dwp->dw_mask \|= DW_vm_page_unwire; / reactivates /
6870	}
6871	m->vmp_overwriting = FALSE;
6872	}
6873	m->vmp_cleaning = FALSE;
6874
6875	if (m->vmp_free_when_done) {
6876	/*
6877	* With the clean queue enabled, UPL_PAGEOUT should
6878	* no longer set the pageout bit. It's pages now go
6879	* to the clean queue.
6880	*/
6881	assert(!(flags & UPL_PAGEOUT));
6882	assert(!m_object->internal);
6883
6884	m->vmp_free_when_done = FALSE;
6885
6886	if ((flags & UPL_COMMIT_SET_DIRTY) \|\|
6887	(m->vmp_pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
6888	/*
6889	* page was re-dirtied after we started
6890	* the pageout... reactivate it since
6891	* we don't know whether the on-disk
6892	* copy matches what is now in memory
6893	*/
6894	SET_PAGE_DIRTY(m, FALSE);
6895
6896	dwp->dw_mask \|= DW_vm_page_activate \| DW_PAGE_WAKEUP;
6897
6898	if (upl->flags & UPL_PAGEOUT) {
6899	VM_STAT_INCR(reactivations);
6900	DTRACE_VM2(pgrec, int, `1`, (uint64_t *), NULL);
6901	}
6902	} else {
6903	/*
6904	* page has been successfully cleaned
6905	* go ahead and free it for other use
6906	*/
6907	if (m_object->internal) {
6908	DTRACE_VM2(anonpgout, int, `1`, (uint64_t *), NULL);
6909	} else {
6910	DTRACE_VM2(fspgout, int, `1`, (uint64_t *), NULL);
6911	}
6912	m->vmp_dirty = FALSE;
6913	m->vmp_busy = TRUE;
6914
6915	dwp->dw_mask \|= DW_vm_page_free;
6916	}
6917	goto commit_next_page;
6918	}
6919	/*
6920	* It is a part of the semantic of COPYOUT_FROM
6921	* UPLs that a commit implies cache sync
6922	* between the vm page and the backing store
6923	* this can be used to strip the precious bit
6924	* as well as clean
6925	*/
6926	if ((upl->flags & UPL_PAGE_SYNC_DONE) \|\| (flags & UPL_COMMIT_CLEAR_PRECIOUS))
6927	m->vmp_precious = FALSE;
6928
6929	if (flags & UPL_COMMIT_SET_DIRTY) {
6930	SET_PAGE_DIRTY(m, FALSE);
6931	} else {
6932	m->vmp_dirty = FALSE;
6933	}
6934
6935	/ with the clean queue on, move all cleaned pages to the clean queue /
6936	if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
6937	pgpgout_count++;
6938
6939	VM_STAT_INCR(pageouts);
6940	DTRACE_VM2(pgout, int, `1`, (uint64_t *), NULL);
6941
6942	dwp->dw_mask \|= DW_enqueue_cleaned;
6943	} else if (should_be_throttled == TRUE && (m->vmp_q_state == VM_PAGE_NOT_ON_Q)) {
6944	/*
6945	* page coming back in from being 'frozen'...
6946	* it was dirty before it was frozen, so keep it so
6947	* the vm_page_activate will notice that it really belongs
6948	* on the throttle queue and put it there
6949	*/
6950	SET_PAGE_DIRTY(m, FALSE);
6951	dwp->dw_mask \|= DW_vm_page_activate;
6952
6953	} else {
6954	if ((flags & UPL_COMMIT_INACTIVATE) && !m->vmp_clustered && (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
6955	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
6956	clear_refmod \|= VM_MEM_REFERENCED;
6957	} else if ( !VM_PAGE_PAGEABLE(m)) {
6958
6959	if (m->vmp_clustered \|\| (flags & UPL_COMMIT_SPECULATE))
6960	dwp->dw_mask \|= DW_vm_page_speculate;
6961	else if (m->vmp_reference)
6962	dwp->dw_mask \|= DW_vm_page_activate;
6963	else {
6964	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
6965	clear_refmod \|= VM_MEM_REFERENCED;
6966	}
6967	}
6968	}
6969	if (upl->flags & UPL_ACCESS_BLOCKED) {
6970	/*
6971	* We blocked access to the pages in this URL.
6972	* Clear the "busy" bit on this page before we
6973	* wake up any waiter.
6974	*/
6975	dwp->dw_mask \|= DW_clear_busy;
6976	}
6977	/*
6978	* Wakeup any thread waiting for the page to be un-cleaning.
6979	*/
6980	dwp->dw_mask \|= DW_PAGE_WAKEUP;
6981
6982	commit_next_page:
6983	if (clear_refmod)
6984	pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
6985
6986	target_offset += PAGE_SIZE_64;
6987	xfer_size -= PAGE_SIZE;
6988	entry++;
6989
6990	if (dwp->dw_mask) {
6991	if (dwp->dw_mask & ~(DW_clear_busy \| DW_PAGE_WAKEUP)) {
6992	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
6993
6994	if (dw_count >= dw_limit) {
6995	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[`0`], dw_count);
6996
6997	dwp = &dw_array[`0`];
6998	dw_count = `0`;
6999	}
7000	} else {
7001	if (dwp->dw_mask & DW_clear_busy)
7002	m->vmp_busy = FALSE;
7003
7004	if (dwp->dw_mask & DW_PAGE_WAKEUP)
7005	PAGE_WAKEUP(m);
7006	}
7007	}
7008	}
7009	if (dw_count)
7010	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[`0`], dw_count);
7011
7012	if (fast_path_possible) {
7013
7014	assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7015	assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7016
7017	if (local_queue_count \|\| unwired_count) {
7018
7019	if (local_queue_count) {
7020	vm_page_t first_target;
7021	vm_page_queue_head_t *target_queue;
7022
7023	if (throttle_page)
7024	target_queue = &vm_page_queue_throttled;
7025	else {
7026	if (flags & UPL_COMMIT_INACTIVATE) {
7027	if (shadow_object->internal)
7028	target_queue = &vm_page_queue_anonymous;
7029	else
7030	target_queue = &vm_page_queue_inactive;
7031	} else
7032	target_queue = &vm_page_queue_active;
7033	}
7034	/*
7035	* Transfer the entire local queue to a regular LRU page queues.
7036	*/
7037	vm_page_lockspin_queues();
7038
7039	first_target = (vm_page_t) vm_page_queue_first(target_queue);
7040
7041	if (vm_page_queue_empty(target_queue))
7042	target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7043	else
7044	first_target->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7045
7046	target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7047	first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7048	last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7049
7050	/*
7051	* Adjust the global page counts.
7052	*/
7053	if (throttle_page) {
7054	vm_page_throttled_count += local_queue_count;
7055	} else {
7056	if (flags & UPL_COMMIT_INACTIVATE) {
7057	if (shadow_object->internal)
7058	vm_page_anonymous_count += local_queue_count;
7059	vm_page_inactive_count += local_queue_count;
7060
7061	token_new_pagecount += local_queue_count;
7062	} else
7063	vm_page_active_count += local_queue_count;
7064
7065	if (shadow_object->internal)
7066	vm_page_pageable_internal_count += local_queue_count;
7067	else
7068	vm_page_pageable_external_count += local_queue_count;
7069	}
7070	} else {
7071	vm_page_lockspin_queues();
7072	}
7073	if (unwired_count) {
7074	vm_page_wire_count -= unwired_count;
7075	VM_CHECK_MEMORYSTATUS;
7076	}
7077	vm_page_unlock_queues();
7078
7079	VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7080	}
7081	}
7082	occupied = `1`;
7083
7084	if (upl->flags & UPL_DEVICE_MEMORY) {
7085	occupied = `0`;
7086	} else if (upl->flags & UPL_LITE) {
7087	int pg_num;
7088	int i;
7089
7090	occupied = `0`;
7091
7092	if (!fast_path_full_commit) {
7093	pg_num = upl->size/PAGE_SIZE;
7094	pg_num = (pg_num + `31`) >> `5`;
7095
7096	for (i = `0`; i < pg_num; i++) {
7097	if (lite_list[i] != `0`) {
7098	occupied = `1`;
7099	break;
7100	}
7101	}
7102	}
7103	} else {
7104	if (vm_page_queue_empty(&upl->map_object->memq))
7105	occupied = `0`;
7106	}
7107	if (occupied == `0`) {
7108	/*
7109	* If this UPL element belongs to a Vector UPL and is
7110	* empty, then this is the right function to deallocate
7111	* it. So go ahead set the *empty variable. The flag
7112	* UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7113	* should be considered relevant for the Vector UPL and not
7114	* the internal UPLs.
7115	*/
7116	if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) \|\| isVectorUPL)
7117	*empty = TRUE;
7118
7119	if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7120	/*
7121	* this is not a paging object
7122	* so we need to drop the paging reference
7123	* that was taken when we created the UPL
7124	* against this object
7125	*/
7126	vm_object_activity_end(shadow_object);
7127	vm_object_collapse(shadow_object, `0`, TRUE);
7128	} else {
7129	/*
7130	* we dontated the paging reference to
7131	* the map object... vm_pageout_object_terminate
7132	* will drop this reference
7133	*/
7134	}
7135	}
7136	VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7137	vm_object_unlock(shadow_object);
7138	if (object != shadow_object)
7139	vm_object_unlock(object);
7140
7141	if(!isVectorUPL)
7142	upl_unlock(upl);
7143	else {
7144	/*
7145	* If we completed our operations on an UPL that is
7146	* part of a Vectored UPL and if empty is TRUE, then
7147	* we should go ahead and deallocate this UPL element.
7148	* Then we check if this was the last of the UPL elements
7149	* within that Vectored UPL. If so, set empty to TRUE
7150	* so that in ubc_upl_commit_range or ubc_upl_commit, we
7151	* can go ahead and deallocate the Vector UPL too.
7152	*/
7153	if(*empty==TRUE) {
7154	*empty = vector_upl_set_subupl(vector_upl, upl, `0`);
7155	upl_deallocate(upl);
7156	}
7157	goto process_upl_to_commit;
7158	}
7159	if (pgpgout_count) {
7160	DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7161	}
7162
7163	return KERN_SUCCESS;
7164	}
7165
7166	kern_return_t
7167	upl_abort_range(
7168	upl_t upl,
7169	upl_offset_t offset,
7170	upl_size_t size,
7171	int error,
7172	boolean_t *empty)
7173	{
7174	upl_page_info_t *user_page_list = NULL;
7175	upl_size_t xfer_size, subupl_size = size;
7176	vm_object_t shadow_object;
7177	vm_object_t object;
7178	vm_object_offset_t target_offset;
7179	upl_offset_t subupl_offset = offset;
7180	int entry;
7181	wpl_array_t lite_list;
7182	int occupied;
7183	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7184	struct vm_page_delayed_work *dwp;
7185	int dw_count;
7186	int dw_limit;
7187	int isVectorUPL = `0`;
7188	upl_t vector_upl = NULL;
7189
7190	*empty = FALSE;
7191
7192	if (upl == UPL_NULL)
7193	return KERN_INVALID_ARGUMENT;
7194
7195	if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7196	return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, `0`, empty);
7197
7198	if((isVectorUPL = vector_upl_is_valid(upl))) {
7199	vector_upl = upl;
7200	upl_lock(vector_upl);
7201	}
7202	else
7203	upl_lock(upl);
7204
7205	process_upl_to_abort:
7206	if(isVectorUPL) {
7207	size = subupl_size;
7208	offset = subupl_offset;
7209	if(size == `0`) {
7210	upl_unlock(vector_upl);
7211	return KERN_SUCCESS;
7212	}
7213	upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7214	if(upl == NULL) {
7215	upl_unlock(vector_upl);
7216	return KERN_FAILURE;
7217	}
7218	subupl_size -= size;
7219	subupl_offset += size;
7220	}
7221
7222	*empty = FALSE;
7223
7224	#if UPL_DEBUG
7225	if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7226	(void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[`0`], UPL_DEBUG_STACK_FRAMES);
7227
7228	upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7229	upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7230	upl->upl_commit_records[upl->upl_commit_index].c_aborted = `1`;
7231
7232	upl->upl_commit_index++;
7233	}
7234	#endif
7235	if (upl->flags & UPL_DEVICE_MEMORY)
7236	xfer_size = `0`;
7237	else if ((offset + size) <= upl->size)
7238	xfer_size = size;
7239	else {
7240	if(!isVectorUPL)
7241	upl_unlock(upl);
7242	else {
7243	upl_unlock(vector_upl);
7244	}
7245
7246	return KERN_FAILURE;
7247	}
7248	if (upl->flags & UPL_INTERNAL) {
7249	lite_list = (wpl_array_t)
7250	((((uintptr_t)upl) + sizeof(struct upl))
7251	+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7252
7253	user_page_list = (upl_page_info_t ) (((uintptr_t)upl) + sizeof(struct* upl));
7254	} else {
7255	lite_list = (wpl_array_t)
7256	(((uintptr_t)upl) + sizeof(struct upl));
7257	}
7258	object = upl->map_object;
7259
7260	if (upl->flags & UPL_SHADOWED) {
7261	vm_object_lock(object);
7262	shadow_object = object->shadow;
7263	} else
7264	shadow_object = object;
7265
7266	entry = offset/PAGE_SIZE;
7267	target_offset = (vm_object_offset_t)offset;
7268
7269	assert(!(target_offset & PAGE_MASK));
7270	assert(!(xfer_size & PAGE_MASK));
7271
7272	if (upl->flags & UPL_KERNEL_OBJECT)
7273	vm_object_lock_shared(shadow_object);
7274	else
7275	vm_object_lock(shadow_object);
7276
7277	if (upl->flags & UPL_ACCESS_BLOCKED) {
7278	assert(shadow_object->blocked_access);
7279	shadow_object->blocked_access = FALSE;
7280	vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7281	}
7282
7283	dwp = &dw_array[`0`];
7284	dw_count = `0`;
7285	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7286
7287	if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7288	panic("upl_abort_range: kernel_object being DUMPED");
7289
7290	while (xfer_size) {
7291	vm_page_t t, m;
7292	unsigned int pg_num;
7293	boolean_t needed;
7294
7295	pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7296	assert(pg_num == target_offset/PAGE_SIZE);
7297
7298	needed = FALSE;
7299
7300	if (user_page_list)
7301	needed = user_page_list[pg_num].needed;
7302
7303	dwp->dw_mask = `0`;
7304	m = VM_PAGE_NULL;
7305
7306	if (upl->flags & UPL_LITE) {
7307
7308	if (lite_list[pg_num>>`5`] & (`1` << (pg_num & `31`))) {
7309	lite_list[pg_num>>`5`] &= ~(`1` << (pg_num & `31`));
7310
7311	if ( !(upl->flags & UPL_KERNEL_OBJECT))
7312	m = vm_page_lookup(shadow_object, target_offset +
7313	(upl->offset - shadow_object->paging_offset));
7314	}
7315	}
7316	if (upl->flags & UPL_SHADOWED) {
7317	if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7318	t->vmp_free_when_done = FALSE;
7319
7320	VM_PAGE_FREE(t);
7321
7322	if (m == VM_PAGE_NULL)
7323	m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7324	}
7325	}
7326	if ((upl->flags & UPL_KERNEL_OBJECT))
7327	goto abort_next_page;
7328
7329	if (m != VM_PAGE_NULL) {
7330
7331	assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7332
7333	if (m->vmp_absent) {
7334	boolean_t must_free = TRUE;
7335
7336	/*
7337	* COPYOUT = FALSE case
7338	* check for error conditions which must
7339	* be passed back to the pages customer
7340	*/
7341	if (error & UPL_ABORT_RESTART) {
7342	m->vmp_restart = TRUE;
7343	m->vmp_absent = FALSE;
7344	m->vmp_unusual = TRUE;
7345	must_free = FALSE;
7346	} else if (error & UPL_ABORT_UNAVAILABLE) {
7347	m->vmp_restart = FALSE;
7348	m->vmp_unusual = TRUE;
7349	must_free = FALSE;
7350	} else if (error & UPL_ABORT_ERROR) {
7351	m->vmp_restart = FALSE;
7352	m->vmp_absent = FALSE;
7353	m->vmp_error = TRUE;
7354	m->vmp_unusual = TRUE;
7355	must_free = FALSE;
7356	}
7357	if (m->vmp_clustered && needed == FALSE) {
7358	/*
7359	* This page was a part of a speculative
7360	* read-ahead initiated by the kernel
7361	* itself. No one is expecting this
7362	* page and no one will clean up its
7363	* error state if it ever becomes valid
7364	* in the future.
7365	* We have to free it here.
7366	*/
7367	must_free = TRUE;
7368	}
7369	m->vmp_cleaning = FALSE;
7370
7371	if (m->vmp_overwriting && !m->vmp_busy) {
7372	/*
7373	* this shouldn't happen since
7374	* this is an 'absent' page, but
7375	* it doesn't hurt to check for
7376	* the 'alternate' method of
7377	* stabilizing the page...
7378	* we will mark 'busy' to be cleared
7379	* in the following code which will
7380	* take care of the primary stabilzation
7381	* method (i.e. setting 'busy' to TRUE)
7382	*/
7383	dwp->dw_mask \|= DW_vm_page_unwire;
7384	}
7385	m->vmp_overwriting = FALSE;
7386
7387	dwp->dw_mask \|= (DW_clear_busy \| DW_PAGE_WAKEUP);
7388
7389	if (must_free == TRUE)
7390	dwp->dw_mask \|= DW_vm_page_free;
7391	else
7392	dwp->dw_mask \|= DW_vm_page_activate;
7393	} else {
7394	/*
7395	* Handle the trusted pager throttle.
7396	*/
7397	if (m->vmp_laundry)
7398	dwp->dw_mask \|= DW_vm_pageout_throttle_up;
7399
7400	if (upl->flags & UPL_ACCESS_BLOCKED) {
7401	/*
7402	* We blocked access to the pages in this UPL.
7403	* Clear the "busy" bit and wake up any waiter
7404	* for this page.
7405	*/
7406	dwp->dw_mask \|= DW_clear_busy;
7407	}
7408	if (m->vmp_overwriting) {
7409	if (m->vmp_busy)
7410	dwp->dw_mask \|= DW_clear_busy;
7411	else {
7412	/*
7413	* deal with the 'alternate' method
7414	* of stabilizing the page...
7415	* we will either free the page
7416	* or mark 'busy' to be cleared
7417	* in the following code which will
7418	* take care of the primary stabilzation
7419	* method (i.e. setting 'busy' to TRUE)
7420	*/
7421	dwp->dw_mask \|= DW_vm_page_unwire;
7422	}
7423	m->vmp_overwriting = FALSE;
7424	}
7425	m->vmp_free_when_done = FALSE;
7426	m->vmp_cleaning = FALSE;
7427
7428	if (error & UPL_ABORT_DUMP_PAGES) {
7429	pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7430
7431	dwp->dw_mask \|= DW_vm_page_free;
7432	} else {
7433	if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7434	if (error & UPL_ABORT_REFERENCE) {
7435	/*
7436	* we've been told to explictly
7437	* reference this page... for
7438	* file I/O, this is done by
7439	* implementing an LRU on the inactive q
7440	*/
7441	dwp->dw_mask \|= DW_vm_page_lru;
7442
7443	} else if ( !VM_PAGE_PAGEABLE(m))
7444	dwp->dw_mask \|= DW_vm_page_deactivate_internal;
7445	}
7446	dwp->dw_mask \|= DW_PAGE_WAKEUP;
7447	}
7448	}
7449	}
7450	abort_next_page:
7451	target_offset += PAGE_SIZE_64;
7452	xfer_size -= PAGE_SIZE;
7453	entry++;
7454
7455	if (dwp->dw_mask) {
7456	if (dwp->dw_mask & ~(DW_clear_busy \| DW_PAGE_WAKEUP)) {
7457	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7458
7459	if (dw_count >= dw_limit) {
7460	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[`0`], dw_count);
7461
7462	dwp = &dw_array[`0`];
7463	dw_count = `0`;
7464	}
7465	} else {
7466	if (dwp->dw_mask & DW_clear_busy)
7467	m->vmp_busy = FALSE;
7468
7469	if (dwp->dw_mask & DW_PAGE_WAKEUP)
7470	PAGE_WAKEUP(m);
7471	}
7472	}
7473	}
7474	if (dw_count)
7475	vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[`0`], dw_count);
7476
7477	occupied = `1`;
7478
7479	if (upl->flags & UPL_DEVICE_MEMORY) {
7480	occupied = `0`;
7481	} else if (upl->flags & UPL_LITE) {
7482	int pg_num;
7483	int i;
7484
7485	pg_num = upl->size/PAGE_SIZE;
7486	pg_num = (pg_num + `31`) >> `5`;
7487	occupied = `0`;
7488
7489	for (i = `0`; i < pg_num; i++) {
7490	if (lite_list[i] != `0`) {
7491	occupied = `1`;
7492	break;
7493	}
7494	}
7495	} else {
7496	if (vm_page_queue_empty(&upl->map_object->memq))
7497	occupied = `0`;
7498	}
7499	if (occupied == `0`) {
7500	/*
7501	* If this UPL element belongs to a Vector UPL and is
7502	* empty, then this is the right function to deallocate
7503	* it. So go ahead set the *empty variable. The flag
7504	* UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7505	* should be considered relevant for the Vector UPL and
7506	* not the internal UPLs.
7507	*/
7508	if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) \|\| isVectorUPL)
7509	*empty = TRUE;
7510
7511	if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7512	/*
7513	* this is not a paging object
7514	* so we need to drop the paging reference
7515	* that was taken when we created the UPL
7516	* against this object
7517	*/
7518	vm_object_activity_end(shadow_object);
7519	vm_object_collapse(shadow_object, `0`, TRUE);
7520	} else {
7521	/*
7522	* we dontated the paging reference to
7523	* the map object... vm_pageout_object_terminate
7524	* will drop this reference
7525	*/
7526	}
7527	}
7528	vm_object_unlock(shadow_object);
7529	if (object != shadow_object)
7530	vm_object_unlock(object);
7531
7532	if(!isVectorUPL)
7533	upl_unlock(upl);
7534	else {
7535	/*
7536	* If we completed our operations on an UPL that is
7537	* part of a Vectored UPL and if empty is TRUE, then
7538	* we should go ahead and deallocate this UPL element.
7539	* Then we check if this was the last of the UPL elements
7540	* within that Vectored UPL. If so, set empty to TRUE
7541	* so that in ubc_upl_abort_range or ubc_upl_abort, we
7542	* can go ahead and deallocate the Vector UPL too.
7543	*/
7544	if(*empty == TRUE) {
7545	*empty = vector_upl_set_subupl(vector_upl, upl,`0`);
7546	upl_deallocate(upl);
7547	}
7548	goto process_upl_to_abort;
7549	}
7550
7551	return KERN_SUCCESS;
7552	}
7553
7554
7555	kern_return_t
7556	upl_abort(
7557	upl_t upl,
7558	int error)
7559	{
7560	boolean_t empty;
7561
7562	if (upl == UPL_NULL)
7563	return KERN_INVALID_ARGUMENT;
7564
7565	return upl_abort_range(upl, `0`, upl->size, error, &empty);
7566	}
7567
7568
7569	/ an option on commit should be wire /
7570	kern_return_t
7571	upl_commit(
7572	upl_t upl,
7573	upl_page_info_t *page_list,
7574	mach_msg_type_number_t count)
7575	{
7576	boolean_t empty;
7577
7578	if (upl == UPL_NULL)
7579	return KERN_INVALID_ARGUMENT;
7580
7581	return upl_commit_range(upl, `0`, upl->size, `0`, page_list, count, &empty);
7582	}
7583
7584
7585	void
7586	iopl_valid_data(
7587	upl_t upl,
7588	vm_tag_t tag)
7589	{
7590	vm_object_t object;
7591	vm_offset_t offset;
7592	vm_page_t m, nxt_page = VM_PAGE_NULL;
7593	upl_size_t size;
7594	int wired_count = `0`;
7595
7596	if (upl == NULL)
7597	panic("iopl_valid_data: NULL upl");
7598	if (vector_upl_is_valid(upl))
7599	panic("iopl_valid_data: vector upl");
7600	if ((upl->flags & (UPL_DEVICE_MEMORY\|UPL_SHADOWED\|UPL_ACCESS_BLOCKED\|UPL_IO_WIRE\|UPL_INTERNAL)) != UPL_IO_WIRE)
7601	panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7602
7603	object = upl->map_object;
7604
7605	if (object == kernel_object \|\| object == compressor_object)
7606	panic("iopl_valid_data: object == kernel or compressor");
7607
7608	if (object->purgable == VM_PURGABLE_VOLATILE \|\|
7609	object->purgable == VM_PURGABLE_EMPTY)
7610	panic("iopl_valid_data: object %p purgable %d",
7611	object, object->purgable);
7612
7613	size = upl->size;
7614
7615	vm_object_lock(object);
7616	VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7617
7618	if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7619	nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7620	else
7621	offset = `0` + upl->offset - object->paging_offset;
7622
7623	while (size) {
7624
7625	if (nxt_page != VM_PAGE_NULL) {
7626	m = nxt_page;
7627	nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7628	} else {
7629	m = vm_page_lookup(object, offset);
7630	offset += PAGE_SIZE;
7631
7632	if (m == VM_PAGE_NULL)
7633	panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7634	}
7635	if (m->vmp_busy) {
7636	if (!m->vmp_absent)
7637	panic("iopl_valid_data: busy page w/o absent");
7638
7639	if (m->vmp_pageq.next \|\| m->vmp_pageq.prev)
7640	panic("iopl_valid_data: busy+absent page on page queue");
7641	if (m->vmp_reusable) {
7642	panic("iopl_valid_data: %p is reusable", m);
7643	}
7644
7645	m->vmp_absent = FALSE;
7646	m->vmp_dirty = TRUE;
7647	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7648	assert(m->vmp_wire_count == `0`);
7649	m->vmp_wire_count++;
7650	assert(m->vmp_wire_count);
7651	if (m->vmp_wire_count == `1`) {
7652	m->vmp_q_state = VM_PAGE_IS_WIRED;
7653	wired_count++;
7654	} else {
7655	panic("iopl_valid_data: %p already wired\n", m);
7656	}
7657
7658	PAGE_WAKEUP_DONE(m);
7659	}
7660	size -= PAGE_SIZE;
7661	}
7662	if (wired_count) {
7663
7664	VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7665	assert(object->resident_page_count >= object->wired_page_count);
7666
7667	/ no need to adjust purgeable accounting for this object: /
7668	assert(object->purgable != VM_PURGABLE_VOLATILE);
7669	assert(object->purgable != VM_PURGABLE_EMPTY);
7670
7671	vm_page_lockspin_queues();
7672	vm_page_wire_count += wired_count;
7673	vm_page_unlock_queues();
7674	}
7675	VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7676	vm_object_unlock(object);
7677	}
7678
7679
7680	void
7681	vm_object_set_pmap_cache_attr(
7682	vm_object_t object,
7683	upl_page_info_array_t user_page_list,
7684	unsigned int num_pages,
7685	boolean_t batch_pmap_op)
7686	{
7687	unsigned int cache_attr = `0`;
7688
7689	cache_attr = object->wimg_bits & VM_WIMG_MASK;
7690	assert(user_page_list);
7691	if (cache_attr != VM_WIMG_USE_DEFAULT) {
7692	PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7693	}
7694	}
7695
7696
7697	boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7698	kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t , int, int**);
7699
7700
7701
7702	boolean_t
7703	vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7704	wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7705	{
7706	vm_page_t dst_page;
7707	unsigned int entry;
7708	int page_count;
7709	int delayed_unlock = `0`;
7710	boolean_t retval = TRUE;
7711	ppnum_t phys_page;
7712
7713	vm_object_lock_assert_exclusive(object);
7714	assert(object->purgable != VM_PURGABLE_VOLATILE);
7715	assert(object->purgable != VM_PURGABLE_EMPTY);
7716	assert(object->pager == NULL);
7717	assert(object->copy == NULL);
7718	assert(object->shadow == NULL);
7719
7720	page_count = object->resident_page_count;
7721	dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7722
7723	vm_page_lock_queues();
7724
7725	while (page_count--) {
7726
7727	if (dst_page->vmp_busy \|\|
7728	dst_page->vmp_fictitious \|\|
7729	dst_page->vmp_absent \|\|
7730	dst_page->vmp_error \|\|
7731	dst_page->vmp_cleaning \|\|
7732	dst_page->vmp_restart \|\|
7733	dst_page->vmp_laundry) {
7734	retval = FALSE;
7735	goto done;
7736	}
7737	if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7738	retval = FALSE;
7739	goto done;
7740	}
7741	dst_page->vmp_reference = TRUE;
7742
7743	vm_page_wire(dst_page, tag, FALSE);
7744
7745	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7746	SET_PAGE_DIRTY(dst_page, FALSE);
7747	}
7748	entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7749	assert(entry >= `0` && entry < object->resident_page_count);
7750	lite_list[entry>>`5`] \|= `1` << (entry & `31`);
7751
7752	phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7753
7754	if (phys_page > upl->highest_page)
7755	upl->highest_page = phys_page;
7756
7757	if (user_page_list) {
7758	user_page_list[entry].phys_addr = phys_page;
7759	user_page_list[entry].absent = dst_page->vmp_absent;
7760	user_page_list[entry].dirty = dst_page->vmp_dirty;
7761	user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
7762	user_page_list[entry].precious = dst_page->vmp_precious;
7763	user_page_list[entry].device = FALSE;
7764	user_page_list[entry].speculative = FALSE;
7765	user_page_list[entry].cs_validated = FALSE;
7766	user_page_list[entry].cs_tainted = FALSE;
7767	user_page_list[entry].cs_nx = FALSE;
7768	user_page_list[entry].needed = FALSE;
7769	user_page_list[entry].mark = FALSE;
7770	}
7771	if (delayed_unlock++ > `256`) {
7772	delayed_unlock = `0`;
7773	lck_mtx_yield(&vm_page_queue_lock);
7774
7775	VM_CHECK_MEMORYSTATUS;
7776	}
7777	dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7778	}
7779	done:
7780	vm_page_unlock_queues();
7781
7782	VM_CHECK_MEMORYSTATUS;
7783
7784	return (retval);
7785	}
7786
7787
7788	kern_return_t
7789	vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7790	wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset,
7791	int page_count, int* page_grab_count)
7792	{
7793	vm_page_t dst_page;
7794	boolean_t no_zero_fill = FALSE;
7795	int interruptible;
7796	int pages_wired = `0`;
7797	int pages_inserted = `0`;
7798	int entry = `0`;
7799	uint64_t delayed_ledger_update = `0`;
7800	kern_return_t ret = KERN_SUCCESS;
7801	int grab_options;
7802	ppnum_t phys_page;
7803
7804	vm_object_lock_assert_exclusive(object);
7805	assert(object->purgable != VM_PURGABLE_VOLATILE);
7806	assert(object->purgable != VM_PURGABLE_EMPTY);
7807	assert(object->pager == NULL);
7808	assert(object->copy == NULL);
7809	assert(object->shadow == NULL);
7810
7811	if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7812	interruptible = THREAD_ABORTSAFE;
7813	else
7814	interruptible = THREAD_UNINT;
7815
7816	if (cntrl_flags & (UPL_NOZEROFILL \| UPL_NOZEROFILLIO))
7817	no_zero_fill = TRUE;
7818
7819	grab_options = `0`;
7820	#if CONFIG_SECLUDED_MEMORY
7821	if (object->can_grab_secluded) {
7822	grab_options \|= VM_PAGE_GRAB_SECLUDED;
7823	}
7824	#endif /* CONFIG_SECLUDED_MEMORY */
7825
7826	while (page_count--) {
7827
7828	while ((dst_page = vm_page_grab_options(grab_options))
7829	== VM_PAGE_NULL) {
7830
7831	OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7832
7833	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, `0`, `0`, `0`);
7834
7835	if (vm_page_wait(interruptible) == FALSE) {
7836	/*
7837	* interrupted case
7838	*/
7839	OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7840
7841	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, `0`, `0`, -`1`);
7842
7843	ret = MACH_SEND_INTERRUPTED;
7844	goto done;
7845	}
7846	OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7847
7848	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, `0`, `0`, `0`);
7849	}
7850	if (no_zero_fill == FALSE)
7851	vm_page_zero_fill(dst_page);
7852	else
7853	dst_page->vmp_absent = TRUE;
7854
7855	dst_page->vmp_reference = TRUE;
7856
7857	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7858	SET_PAGE_DIRTY(dst_page, FALSE);
7859	}
7860	if (dst_page->vmp_absent == FALSE) {
7861	assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7862	assert(dst_page->vmp_wire_count == `0`);
7863	dst_page->vmp_wire_count++;
7864	dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7865	assert(dst_page->vmp_wire_count);
7866	pages_wired++;
7867	PAGE_WAKEUP_DONE(dst_page);
7868	}
7869	pages_inserted++;
7870
7871	vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7872
7873	lite_list[entry>>`5`] \|= `1` << (entry & `31`);
7874
7875	phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7876
7877	if (phys_page > upl->highest_page)
7878	upl->highest_page = phys_page;
7879
7880	if (user_page_list) {
7881	user_page_list[entry].phys_addr = phys_page;
7882	user_page_list[entry].absent = dst_page->vmp_absent;
7883	user_page_list[entry].dirty = dst_page->vmp_dirty;
7884	user_page_list[entry].free_when_done = FALSE;
7885	user_page_list[entry].precious = FALSE;
7886	user_page_list[entry].device = FALSE;
7887	user_page_list[entry].speculative = FALSE;
7888	user_page_list[entry].cs_validated = FALSE;
7889	user_page_list[entry].cs_tainted = FALSE;
7890	user_page_list[entry].cs_nx = FALSE;
7891	user_page_list[entry].needed = FALSE;
7892	user_page_list[entry].mark = FALSE;
7893	}
7894	entry++;
7895	*dst_offset += PAGE_SIZE_64;
7896	}
7897	done:
7898	if (pages_wired) {
7899	vm_page_lockspin_queues();
7900	vm_page_wire_count += pages_wired;
7901	vm_page_unlock_queues();
7902	}
7903	if (pages_inserted) {
7904	if (object->internal) {
7905	OSAddAtomic(pages_inserted, &vm_page_internal_count);
7906	} else {
7907	OSAddAtomic(pages_inserted, &vm_page_external_count);
7908	}
7909	}
7910	if (delayed_ledger_update) {
7911	task_t owner;
7912	int ledger_idx_volatile;
7913	int ledger_idx_nonvolatile;
7914	int ledger_idx_volatile_compressed;
7915	int ledger_idx_nonvolatile_compressed;
7916	boolean_t do_footprint;
7917
7918	owner = VM_OBJECT_OWNER(object);
7919	assert(owner);
7920
7921	vm_object_ledger_tag_ledgers(object,
7922	&ledger_idx_volatile,
7923	&ledger_idx_nonvolatile,
7924	&ledger_idx_volatile_compressed,
7925	&ledger_idx_nonvolatile_compressed,
7926	&do_footprint);
7927
7928	/ more non-volatile bytes /
7929	ledger_credit(owner->ledger,
7930	ledger_idx_nonvolatile,
7931	delayed_ledger_update);
7932	if (do_footprint) {
7933	/ more footprint /
7934	ledger_credit(owner->ledger,
7935	task_ledgers.phys_footprint,
7936	delayed_ledger_update);
7937	}
7938	}
7939
7940	assert(page_grab_count);
7941	*page_grab_count = pages_inserted;
7942
7943	return (ret);
7944	}
7945
7946
7947
7948	kern_return_t
7949	vm_object_iopl_request(
7950	vm_object_t object,
7951	vm_object_offset_t offset,
7952	upl_size_t size,
7953	upl_t *upl_ptr,
7954	upl_page_info_array_t user_page_list,
7955	unsigned int *page_list_count,
7956	upl_control_flags_t cntrl_flags,
7957	vm_tag_t tag)
7958	{
7959	vm_page_t dst_page;
7960	vm_object_offset_t dst_offset;
7961	upl_size_t xfer_size;
7962	upl_t upl = NULL;
7963	unsigned int entry;
7964	wpl_array_t lite_list = NULL;
7965	int no_zero_fill = FALSE;
7966	unsigned int size_in_pages;
7967	int page_grab_count = `0`;
7968	u_int32_t psize;
7969	kern_return_t ret;
7970	vm_prot_t prot;
7971	struct vm_object_fault_info fault_info = {};
7972	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7973	struct vm_page_delayed_work *dwp;
7974	int dw_count;
7975	int dw_limit;
7976	int dw_index;
7977	boolean_t caller_lookup;
7978	int io_tracking_flag = `0`;
7979	int interruptible;
7980	ppnum_t phys_page;
7981
7982	boolean_t set_cache_attr_needed = FALSE;
7983	boolean_t free_wired_pages = FALSE;
7984	boolean_t fast_path_empty_req = FALSE;
7985	boolean_t fast_path_full_req = FALSE;
7986
7987	if (cntrl_flags & ~UPL_VALID_FLAGS) {
7988	/*
7989	* For forward compatibility's sake,
7990	* reject any unknown flag.
7991	*/
7992	return KERN_INVALID_VALUE;
7993	}
7994	if (vm_lopage_needed == FALSE)
7995	cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
7996
7997	if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
7998	if ( (cntrl_flags & (UPL_SET_IO_WIRE \| UPL_SET_LITE)) != (UPL_SET_IO_WIRE \| UPL_SET_LITE))
7999	return KERN_INVALID_VALUE;
8000
8001	if (object->phys_contiguous) {
8002	if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8003	return KERN_INVALID_ADDRESS;
8004
8005	if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8006	return KERN_INVALID_ADDRESS;
8007	}
8008	}
8009	if (cntrl_flags & (UPL_NOZEROFILL \| UPL_NOZEROFILLIO))
8010	no_zero_fill = TRUE;
8011
8012	if (cntrl_flags & UPL_COPYOUT_FROM)
8013	prot = VM_PROT_READ;
8014	else
8015	prot = VM_PROT_READ \| VM_PROT_WRITE;
8016
8017	if ((!object->internal) && (object->paging_offset != `0`))
8018	panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8019
8020	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, `0`);
8021
8022	#if CONFIG_IOSCHED \|\| UPL_DEBUG
8023	if ((object->io_tracking && object != kernel_object) \|\| upl_debug_enabled)
8024	io_tracking_flag \|= UPL_CREATE_IO_TRACKING;
8025	#endif
8026
8027	#if CONFIG_IOSCHED
8028	if (object->io_tracking) {
8029	/ Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs /
8030	if (object != kernel_object)
8031	io_tracking_flag \|= UPL_CREATE_EXPEDITE_SUP;
8032	}
8033	#endif
8034
8035	if (object->phys_contiguous)
8036	psize = PAGE_SIZE;
8037	else
8038	psize = size;
8039
8040	if (cntrl_flags & UPL_SET_INTERNAL) {
8041	upl = upl_create(UPL_CREATE_INTERNAL \| UPL_CREATE_LITE \| io_tracking_flag, UPL_IO_WIRE, psize);
8042
8043	user_page_list = (upl_page_info_t ) (((uintptr_t)upl) + sizeof(struct* upl));
8044	lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8045	((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8046	if (size == `0`) {
8047	user_page_list = NULL;
8048	lite_list = NULL;
8049	}
8050	} else {
8051	upl = upl_create(UPL_CREATE_LITE \| io_tracking_flag, UPL_IO_WIRE, psize);
8052
8053	lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8054	if (size == `0`) {
8055	lite_list = NULL;
8056	}
8057	}
8058	if (user_page_list)
8059	user_page_list[`0`].device = FALSE;
8060	*upl_ptr = upl;
8061
8062	if (cntrl_flags & UPL_NOZEROFILLIO) {
8063	DTRACE_VM4(upl_nozerofillio,
8064	vm_object_t, object,
8065	vm_object_offset_t, offset,
8066	upl_size_t, size,
8067	upl_t, upl);
8068	}
8069
8070	upl->map_object = object;
8071	upl->size = size;
8072
8073	size_in_pages = size / PAGE_SIZE;
8074
8075	if (object == kernel_object &&
8076	!(cntrl_flags & (UPL_NEED_32BIT_ADDR \| UPL_BLOCK_ACCESS))) {
8077	upl->flags \|= UPL_KERNEL_OBJECT;
8078	#if UPL_DEBUG
8079	vm_object_lock(object);
8080	#else
8081	vm_object_lock_shared(object);
8082	#endif
8083	} else {
8084	vm_object_lock(object);
8085	vm_object_activity_begin(object);
8086	}
8087	/*
8088	* paging in progress also protects the paging_offset
8089	*/
8090	upl->offset = offset + object->paging_offset;
8091
8092	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8093	/*
8094	* The user requested that access to the pages in this UPL
8095	* be blocked until the UPL is commited or aborted.
8096	*/
8097	upl->flags \|= UPL_ACCESS_BLOCKED;
8098	}
8099
8100	#if CONFIG_IOSCHED \|\| UPL_DEBUG
8101	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8102	vm_object_activity_begin(object);
8103	queue_enter(&object->uplq, upl, upl_t, uplq);
8104	}
8105	#endif
8106
8107	if (object->phys_contiguous) {
8108
8109	if (upl->flags & UPL_ACCESS_BLOCKED) {
8110	assert(!object->blocked_access);
8111	object->blocked_access = TRUE;
8112	}
8113
8114	vm_object_unlock(object);
8115
8116	/*
8117	* don't need any shadow mappings for this one
8118	* since it is already I/O memory
8119	*/
8120	upl->flags \|= UPL_DEVICE_MEMORY;
8121
8122	upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - `1`)>>PAGE_SHIFT);
8123
8124	if (user_page_list) {
8125	user_page_list[`0`].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8126	user_page_list[`0`].device = TRUE;
8127	}
8128	if (page_list_count != NULL) {
8129	if (upl->flags & UPL_INTERNAL)
8130	*page_list_count = `0`;
8131	else
8132	*page_list_count = `1`;
8133	}
8134
8135	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, `0`, `0`);
8136	return KERN_SUCCESS;
8137	}
8138	if (object != kernel_object && object != compressor_object) {
8139	/*
8140	* Protect user space from future COW operations
8141	*/
8142	#if VM_OBJECT_TRACKING_OP_TRUESHARE
8143	if (!object->true_share &&
8144	vm_object_tracking_inited) {
8145	void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8146	int num = `0`;
8147
8148	num = OSBacktrace(bt,
8149	VM_OBJECT_TRACKING_BTDEPTH);
8150	btlog_add_entry(vm_object_tracking_btlog,
8151	object,
8152	VM_OBJECT_TRACKING_OP_TRUESHARE,
8153	bt,
8154	num);
8155	}
8156	#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8157
8158	vm_object_lock_assert_exclusive(object);
8159	object->true_share = TRUE;
8160
8161	if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8162	object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8163	}
8164
8165	if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8166	object->copy != VM_OBJECT_NULL) {
8167	/*
8168	* Honor copy-on-write obligations
8169	*
8170	* The caller is gathering these pages and
8171	* might modify their contents. We need to
8172	* make sure that the copy object has its own
8173	* private copies of these pages before we let
8174	* the caller modify them.
8175	*
8176	* NOTE: someone else could map the original object
8177	* after we've done this copy-on-write here, and they
8178	* could then see an inconsistent picture of the memory
8179	* while it's being modified via the UPL. To prevent this,
8180	* we would have to block access to these pages until the
8181	* UPL is released. We could use the UPL_BLOCK_ACCESS
8182	* code path for that...
8183	*/
8184	vm_object_update(object,
8185	offset,
8186	size,
8187	NULL,
8188	NULL,
8189	FALSE, / should_return /
8190	MEMORY_OBJECT_COPY_SYNC,
8191	VM_PROT_NO_CHANGE);
8192	VM_PAGEOUT_DEBUG(iopl_cow, `1`);
8193	VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8194	}
8195	if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR \| UPL_BLOCK_ACCESS)) &&
8196	object->purgable != VM_PURGABLE_VOLATILE &&
8197	object->purgable != VM_PURGABLE_EMPTY &&
8198	object->copy == NULL &&
8199	size == object->vo_size &&
8200	offset == `0` &&
8201	object->shadow == NULL &&
8202	object->pager == NULL)
8203	{
8204	if (object->resident_page_count == size_in_pages)
8205	{
8206	assert(object != compressor_object);
8207	assert(object != kernel_object);
8208	fast_path_full_req = TRUE;
8209	}
8210	else if (object->resident_page_count == `0`)
8211	{
8212	assert(object != compressor_object);
8213	assert(object != kernel_object);
8214	fast_path_empty_req = TRUE;
8215	set_cache_attr_needed = TRUE;
8216	}
8217	}
8218
8219	if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8220	interruptible = THREAD_ABORTSAFE;
8221	else
8222	interruptible = THREAD_UNINT;
8223
8224	entry = `0`;
8225
8226	xfer_size = size;
8227	dst_offset = offset;
8228	dw_count = `0`;
8229
8230	if (fast_path_full_req) {
8231
8232	if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8233	goto finish;
8234	/*
8235	* we couldn't complete the processing of this request on the fast path
8236	* so fall through to the slow path and finish up
8237	*/
8238
8239	} else if (fast_path_empty_req) {
8240
8241	if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8242	ret = KERN_MEMORY_ERROR;
8243	goto return_err;
8244	}
8245	ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8246
8247	if (ret) {
8248	free_wired_pages = TRUE;
8249	goto return_err;
8250	}
8251	goto finish;
8252	}
8253
8254	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8255	fault_info.lo_offset = offset;
8256	fault_info.hi_offset = offset + xfer_size;
8257	fault_info.mark_zf_absent = TRUE;
8258	fault_info.interruptible = interruptible;
8259	fault_info.batch_pmap_op = TRUE;
8260
8261	dwp = &dw_array[`0`];
8262	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8263
8264	while (xfer_size) {
8265	vm_fault_return_t result;
8266
8267	dwp->dw_mask = `0`;
8268
8269	if (fast_path_full_req) {
8270	/*
8271	* if we get here, it means that we ran into a page
8272	* state we couldn't handle in the fast path and
8273	* bailed out to the slow path... since the order
8274	* we look at pages is different between the 2 paths,
8275	* the following check is needed to determine whether
8276	* this page was already processed in the fast path
8277	*/
8278	if (lite_list[entry>>`5`] & (`1` << (entry & `31`)))
8279	goto skip_page;
8280	}
8281	dst_page = vm_page_lookup(object, dst_offset);
8282
8283	if (dst_page == VM_PAGE_NULL \|\|
8284	dst_page->vmp_busy \|\|
8285	dst_page->vmp_error \|\|
8286	dst_page->vmp_restart \|\|
8287	dst_page->vmp_absent \|\|
8288	dst_page->vmp_fictitious) {
8289
8290	if (object == kernel_object)
8291	panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8292	if (object == compressor_object)
8293	panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8294
8295	if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8296	ret = KERN_MEMORY_ERROR;
8297	goto return_err;
8298	}
8299	set_cache_attr_needed = TRUE;
8300
8301	/*
8302	* We just looked up the page and the result remains valid
8303	* until the object lock is release, so send it to
8304	* vm_fault_page() (as "dst_page"), to avoid having to
8305	* look it up again there.
8306	*/
8307	caller_lookup = TRUE;
8308
8309	do {
8310	vm_page_t top_page;
8311	kern_return_t error_code;
8312
8313	fault_info.cluster_size = xfer_size;
8314
8315	vm_object_paging_begin(object);
8316
8317	result = vm_fault_page(object, dst_offset,
8318	prot \| VM_PROT_WRITE, FALSE,
8319	caller_lookup,
8320	&prot, &dst_page, &top_page,
8321	(int *)`0`,
8322	&error_code, no_zero_fill,
8323	FALSE, &fault_info);
8324
8325	/ our lookup is no longer valid at this point /
8326	caller_lookup = FALSE;
8327
8328	switch (result) {
8329
8330	case VM_FAULT_SUCCESS:
8331	page_grab_count++;
8332
8333	if ( !dst_page->vmp_absent) {
8334	PAGE_WAKEUP_DONE(dst_page);
8335	} else {
8336	/*
8337	* we only get back an absent page if we
8338	* requested that it not be zero-filled
8339	* because we are about to fill it via I/O
8340	*
8341	* absent pages should be left BUSY
8342	* to prevent them from being faulted
8343	* into an address space before we've
8344	* had a chance to complete the I/O on
8345	* them since they may contain info that
8346	* shouldn't be seen by the faulting task
8347	*/
8348	}
8349	/*
8350	* Release paging references and
8351	* top-level placeholder page, if any.
8352	*/
8353	if (top_page != VM_PAGE_NULL) {
8354	vm_object_t local_object;
8355
8356	local_object = VM_PAGE_OBJECT(top_page);
8357
8358	/*
8359	* comparing 2 packed pointers
8360	*/
8361	if (top_page->vmp_object != dst_page->vmp_object) {
8362	vm_object_lock(local_object);
8363	VM_PAGE_FREE(top_page);
8364	vm_object_paging_end(local_object);
8365	vm_object_unlock(local_object);
8366	} else {
8367	VM_PAGE_FREE(top_page);
8368	vm_object_paging_end(local_object);
8369	}
8370	}
8371	vm_object_paging_end(object);
8372	break;
8373
8374	case VM_FAULT_RETRY:
8375	vm_object_lock(object);
8376	break;
8377
8378	case VM_FAULT_MEMORY_SHORTAGE:
8379	OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8380
8381	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, `0`, `0`, `0`);
8382
8383	if (vm_page_wait(interruptible)) {
8384	OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8385
8386	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, `0`, `0`, `0`);
8387	vm_object_lock(object);
8388
8389	break;
8390	}
8391	OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8392
8393	VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, `0`, `0`, -`1`);
8394
8395	/ fall thru /
8396
8397	case VM_FAULT_INTERRUPTED:
8398	error_code = MACH_SEND_INTERRUPTED;
8399	case VM_FAULT_MEMORY_ERROR:
8400	memory_error:
8401	ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8402
8403	vm_object_lock(object);
8404	goto return_err;
8405
8406	case VM_FAULT_SUCCESS_NO_VM_PAGE:
8407	/ success but no page: fail /
8408	vm_object_paging_end(object);
8409	vm_object_unlock(object);
8410	goto memory_error;
8411
8412	default:
8413	panic("vm_object_iopl_request: unexpected error"
8414	" 0x%x from vm_fault_page()\n", result);
8415	}
8416	} while (result != VM_FAULT_SUCCESS);
8417
8418	}
8419	phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8420
8421	if (upl->flags & UPL_KERNEL_OBJECT)
8422	goto record_phys_addr;
8423
8424	if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8425	dst_page->vmp_busy = TRUE;
8426	goto record_phys_addr;
8427	}
8428
8429	if (dst_page->vmp_cleaning) {
8430	/*
8431	* Someone else is cleaning this page in place.
8432	* In theory, we should be able to proceed and use this
8433	* page but they'll probably end up clearing the "busy"
8434	* bit on it in upl_commit_range() but they didn't set
8435	* it, so they would clear our "busy" bit and open
8436	* us to race conditions.
8437	* We'd better wait for the cleaning to complete and
8438	* then try again.
8439	*/
8440	VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, `1`);
8441	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8442	continue;
8443	}
8444	if (dst_page->vmp_laundry)
8445	vm_pageout_steal_laundry(dst_page, FALSE);
8446
8447	if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8448	phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8449	vm_page_t low_page;
8450	int refmod;
8451
8452	/*
8453	* support devices that can't DMA above 32 bits
8454	* by substituting pages from a pool of low address
8455	* memory for any pages we find above the 4G mark
8456	* can't substitute if the page is already wired because
8457	* we don't know whether that physical address has been
8458	* handed out to some other 64 bit capable DMA device to use
8459	*/
8460	if (VM_PAGE_WIRED(dst_page)) {
8461	ret = KERN_PROTECTION_FAILURE;
8462	goto return_err;
8463	}
8464	low_page = vm_page_grablo();
8465
8466	if (low_page == VM_PAGE_NULL) {
8467	ret = KERN_RESOURCE_SHORTAGE;
8468	goto return_err;
8469	}
8470	/*
8471	* from here until the vm_page_replace completes
8472	* we musn't drop the object lock... we don't
8473	* want anyone refaulting this page in and using
8474	* it after we disconnect it... we want the fault
8475	* to find the new page being substituted.
8476	*/
8477	if (dst_page->vmp_pmapped)
8478	refmod = pmap_disconnect(phys_page);
8479	else
8480	refmod = `0`;
8481
8482	if (!dst_page->vmp_absent)
8483	vm_page_copy(dst_page, low_page);
8484
8485	low_page->vmp_reference = dst_page->vmp_reference;
8486	low_page->vmp_dirty = dst_page->vmp_dirty;
8487	low_page->vmp_absent = dst_page->vmp_absent;
8488
8489	if (refmod & VM_MEM_REFERENCED)
8490	low_page->vmp_reference = TRUE;
8491	if (refmod & VM_MEM_MODIFIED) {
8492	SET_PAGE_DIRTY(low_page, FALSE);
8493	}
8494
8495	vm_page_replace(low_page, object, dst_offset);
8496
8497	dst_page = low_page;
8498	/*
8499	* vm_page_grablo returned the page marked
8500	* BUSY... we don't need a PAGE_WAKEUP_DONE
8501	* here, because we've never dropped the object lock
8502	*/
8503	if ( !dst_page->vmp_absent)
8504	dst_page->vmp_busy = FALSE;
8505
8506	phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8507	}
8508	if ( !dst_page->vmp_busy)
8509	dwp->dw_mask \|= DW_vm_page_wire;
8510
8511	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8512	/*
8513	* Mark the page "busy" to block any future page fault
8514	* on this page in addition to wiring it.
8515	* We'll also remove the mapping
8516	* of all these pages before leaving this routine.
8517	*/
8518	assert(!dst_page->vmp_fictitious);
8519	dst_page->vmp_busy = TRUE;
8520	}
8521	/*
8522	* expect the page to be used
8523	* page queues lock must be held to set 'reference'
8524	*/
8525	dwp->dw_mask \|= DW_set_reference;
8526
8527	if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8528	SET_PAGE_DIRTY(dst_page, TRUE);
8529	}
8530	if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8531	pmap_sync_page_attributes_phys(phys_page);
8532	dst_page->vmp_written_by_kernel = FALSE;
8533	}
8534
8535	record_phys_addr:
8536	if (dst_page->vmp_busy)
8537	upl->flags \|= UPL_HAS_BUSY;
8538
8539	lite_list[entry>>`5`] \|= `1` << (entry & `31`);
8540
8541	if (phys_page > upl->highest_page)
8542	upl->highest_page = phys_page;
8543
8544	if (user_page_list) {
8545	user_page_list[entry].phys_addr = phys_page;
8546	user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
8547	user_page_list[entry].absent = dst_page->vmp_absent;
8548	user_page_list[entry].dirty = dst_page->vmp_dirty;
8549	user_page_list[entry].precious = dst_page->vmp_precious;
8550	user_page_list[entry].device = FALSE;
8551	user_page_list[entry].needed = FALSE;
8552	if (dst_page->vmp_clustered == TRUE)
8553	user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8554	else
8555	user_page_list[entry].speculative = FALSE;
8556	user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8557	user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8558	user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8559	user_page_list[entry].mark = FALSE;
8560	}
8561	if (object != kernel_object && object != compressor_object) {
8562	/*
8563	* someone is explicitly grabbing this page...
8564	* update clustered and speculative state
8565	*
8566	*/
8567	if (dst_page->vmp_clustered)
8568	VM_PAGE_CONSUME_CLUSTERED(dst_page);
8569	}
8570	skip_page:
8571	entry++;
8572	dst_offset += PAGE_SIZE_64;
8573	xfer_size -= PAGE_SIZE;
8574
8575	if (dwp->dw_mask) {
8576	VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8577
8578	if (dw_count >= dw_limit) {
8579	vm_page_do_delayed_work(object, tag, &dw_array[`0`], dw_count);
8580
8581	dwp = &dw_array[`0`];
8582	dw_count = `0`;
8583	}
8584	}
8585	}
8586	assert(entry == size_in_pages);
8587
8588	if (dw_count)
8589	vm_page_do_delayed_work(object, tag, &dw_array[`0`], dw_count);
8590	finish:
8591	if (user_page_list && set_cache_attr_needed == TRUE)
8592	vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8593
8594	if (page_list_count != NULL) {
8595	if (upl->flags & UPL_INTERNAL)
8596	*page_list_count = `0`;
8597	else if (*page_list_count > size_in_pages)
8598	*page_list_count = size_in_pages;
8599	}
8600	vm_object_unlock(object);
8601
8602	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8603	/*
8604	* We've marked all the pages "busy" so that future
8605	* page faults will block.
8606	* Now remove the mapping for these pages, so that they
8607	* can't be accessed without causing a page fault.
8608	*/
8609	vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8610	PMAP_NULL, `0`, VM_PROT_NONE);
8611	assert(!object->blocked_access);
8612	object->blocked_access = TRUE;
8613	}
8614
8615	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, `0`, `0`);
8616	return KERN_SUCCESS;
8617
8618	return_err:
8619	dw_index = `0`;
8620
8621	for (; offset < dst_offset; offset += PAGE_SIZE) {
8622	boolean_t need_unwire;
8623
8624	dst_page = vm_page_lookup(object, offset);
8625
8626	if (dst_page == VM_PAGE_NULL)
8627	panic("vm_object_iopl_request: Wired page missing. \n");
8628
8629	/*
8630	* if we've already processed this page in an earlier
8631	* dw_do_work, we need to undo the wiring... we will
8632	* leave the dirty and reference bits on if they
8633	* were set, since we don't have a good way of knowing
8634	* what the previous state was and we won't get here
8635	* under any normal circumstances... we will always
8636	* clear BUSY and wakeup any waiters via vm_page_free
8637	* or PAGE_WAKEUP_DONE
8638	*/
8639	need_unwire = TRUE;
8640
8641	if (dw_count) {
8642	if (dw_array[dw_index].dw_m == dst_page) {
8643	/*
8644	* still in the deferred work list
8645	* which means we haven't yet called
8646	* vm_page_wire on this page
8647	*/
8648	need_unwire = FALSE;
8649
8650	dw_index++;
8651	dw_count--;
8652	}
8653	}
8654	vm_page_lock_queues();
8655
8656	if (dst_page->vmp_absent \|\| free_wired_pages == TRUE) {
8657	vm_page_free(dst_page);
8658
8659	need_unwire = FALSE;
8660	} else {
8661	if (need_unwire == TRUE)
8662	vm_page_unwire(dst_page, TRUE);
8663
8664	PAGE_WAKEUP_DONE(dst_page);
8665	}
8666	vm_page_unlock_queues();
8667
8668	if (need_unwire == TRUE)
8669	VM_STAT_INCR(reactivations);
8670	}
8671	#if UPL_DEBUG
8672	upl->upl_state = `2`;
8673	#endif
8674	if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8675	vm_object_activity_end(object);
8676	vm_object_collapse(object, `0`, TRUE);
8677	}
8678	vm_object_unlock(object);
8679	upl_destroy(upl);
8680
8681	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, `0`, `0`);
8682	return ret;
8683	}
8684
8685	kern_return_t
8686	upl_transpose(
8687	upl_t upl1,
8688	upl_t upl2)
8689	{
8690	kern_return_t retval;
8691	boolean_t upls_locked;
8692	vm_object_t object1, object2;
8693
8694	if (upl1 == UPL_NULL \|\| upl2 == UPL_NULL \|\| upl1 == upl2 \|\| ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) \|\| ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8695	return KERN_INVALID_ARGUMENT;
8696	}
8697
8698	upls_locked = FALSE;
8699
8700	/*
8701	* Since we need to lock both UPLs at the same time,
8702	* avoid deadlocks by always taking locks in the same order.
8703	*/
8704	if (upl1 < upl2) {
8705	upl_lock(upl1);
8706	upl_lock(upl2);
8707	} else {
8708	upl_lock(upl2);
8709	upl_lock(upl1);
8710	}
8711	upls_locked = TRUE; / the UPLs will need to be unlocked /
8712
8713	object1 = upl1->map_object;
8714	object2 = upl2->map_object;
8715
8716	if (upl1->offset != `0` \|\| upl2->offset != `0` \|\|
8717	upl1->size != upl2->size) {
8718	/*
8719	* We deal only with full objects, not subsets.
8720	* That's because we exchange the entire backing store info
8721	* for the objects: pager, resident pages, etc... We can't do
8722	* only part of it.
8723	*/
8724	retval = KERN_INVALID_VALUE;
8725	goto done;
8726	}
8727
8728	/*
8729	* Tranpose the VM objects' backing store.
8730	*/
8731	retval = vm_object_transpose(object1, object2,
8732	(vm_object_size_t) upl1->size);
8733
8734	if (retval == KERN_SUCCESS) {
8735	/*
8736	* Make each UPL point to the correct VM object, i.e. the
8737	* object holding the pages that the UPL refers to...
8738	*/
8739	#if CONFIG_IOSCHED \|\| UPL_DEBUG
8740	if ((upl1->flags & UPL_TRACKED_BY_OBJECT) \|\| (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8741	vm_object_lock(object1);
8742	vm_object_lock(object2);
8743	}
8744	if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8745	queue_remove(&object1->uplq, upl1, upl_t, uplq);
8746	if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8747	queue_remove(&object2->uplq, upl2, upl_t, uplq);
8748	#endif
8749	upl1->map_object = object2;
8750	upl2->map_object = object1;
8751
8752	#if CONFIG_IOSCHED \|\| UPL_DEBUG
8753	if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8754	queue_enter(&object2->uplq, upl1, upl_t, uplq);
8755	if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8756	queue_enter(&object1->uplq, upl2, upl_t, uplq);
8757	if ((upl1->flags & UPL_TRACKED_BY_OBJECT) \|\| (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8758	vm_object_unlock(object2);
8759	vm_object_unlock(object1);
8760	}
8761	#endif
8762	}
8763
8764	done:
8765	/*
8766	* Cleanup.
8767	*/
8768	if (upls_locked) {
8769	upl_unlock(upl1);
8770	upl_unlock(upl2);
8771	upls_locked = FALSE;
8772	}
8773
8774	return retval;
8775	}
8776
8777	void
8778	upl_range_needed(
8779	upl_t upl,
8780	int index,
8781	int count)
8782	{
8783	upl_page_info_t *user_page_list;
8784	int size_in_pages;
8785
8786	if ( !(upl->flags & UPL_INTERNAL) \|\| count <= `0`)
8787	return;
8788
8789	size_in_pages = upl->size / PAGE_SIZE;
8790
8791	user_page_list = (upl_page_info_t ) (((uintptr_t)upl) + sizeof(struct* upl));
8792
8793	while (count-- && index < size_in_pages)
8794	user_page_list[index++].needed = TRUE;
8795	}
8796
8797
8798	/*
8799	* Reserve of virtual addresses in the kernel address space.
8800	* We need to map the physical pages in the kernel, so that we
8801	* can call the code-signing or slide routines with a kernel
8802	* virtual address. We keep this pool of pre-allocated kernel
8803	* virtual addresses so that we don't have to scan the kernel's
8804	* virtaul address space each time we need to work with
8805	* a physical page.
8806	*/
8807	decl_simple_lock_data(,vm_paging_lock)
8808	#define VM_PAGING_NUM_PAGES 64
8809	vm_map_offset_t vm_paging_base_address = `0`;
8810	boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8811	int vm_paging_max_index = `0`;
8812	int vm_paging_page_waiter = `0`;
8813	int vm_paging_page_waiter_total = `0`;
8814
8815	unsigned long vm_paging_no_kernel_page = `0`;
8816	unsigned long vm_paging_objects_mapped = `0`;
8817	unsigned long vm_paging_pages_mapped = `0`;
8818	unsigned long vm_paging_objects_mapped_slow = `0`;
8819	unsigned long vm_paging_pages_mapped_slow = `0`;
8820
8821	void
8822	vm_paging_map_init(void)
8823	{
8824	kern_return_t kr;
8825	vm_map_offset_t page_map_offset;
8826	vm_map_entry_t map_entry;
8827
8828	assert(vm_paging_base_address == `0`);
8829
8830	/*
8831	* Initialize our pool of pre-allocated kernel
8832	* virtual addresses.
8833	*/
8834	page_map_offset = `0`;
8835	kr = vm_map_find_space(kernel_map,
8836	&page_map_offset,
8837	VM_PAGING_NUM_PAGES * PAGE_SIZE,
8838	`0`,
8839	`0`,
8840	VM_MAP_KERNEL_FLAGS_NONE,
8841	VM_KERN_MEMORY_NONE,
8842	&map_entry);
8843	if (kr != KERN_SUCCESS) {
8844	panic("vm_paging_map_init: kernel_map full\n");
8845	}
8846	VME_OBJECT_SET(map_entry, kernel_object);
8847	VME_OFFSET_SET(map_entry, page_map_offset);
8848	map_entry->protection = VM_PROT_NONE;
8849	map_entry->max_protection = VM_PROT_NONE;
8850	map_entry->permanent = TRUE;
8851	vm_object_reference(kernel_object);
8852	vm_map_unlock(kernel_map);
8853
8854	assert(vm_paging_base_address == `0`);
8855	vm_paging_base_address = page_map_offset;
8856	}
8857
8858	/*
8859	* vm_paging_map_object:
8860	* Maps part of a VM object's pages in the kernel
8861	* virtual address space, using the pre-allocated
8862	* kernel virtual addresses, if possible.
8863	* Context:
8864	* The VM object is locked. This lock will get
8865	* dropped and re-acquired though, so the caller
8866	* must make sure the VM object is kept alive
8867	* (by holding a VM map that has a reference
8868	* on it, for example, or taking an extra reference).
8869	* The page should also be kept busy to prevent
8870	* it from being reclaimed.
8871	*/
8872	kern_return_t
8873	vm_paging_map_object(
8874	vm_page_t page,
8875	vm_object_t object,
8876	vm_object_offset_t offset,
8877	vm_prot_t protection,
8878	boolean_t can_unlock_object,
8879	vm_map_size_t size, /* IN/OUT /
8880	vm_map_offset_t address, /* OUT /
8881	boolean_t need_unmap) /* OUT /
8882	{
8883	kern_return_t kr;
8884	vm_map_offset_t page_map_offset;
8885	vm_map_size_t map_size;
8886	vm_object_offset_t object_offset;
8887	int i;
8888
8889	if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
8890	/ use permanent 1-to-1 kernel mapping of physical memory ? /
8891	#if __x86_64__
8892	*address = (vm_map_offset_t)
8893	PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
8894	PAGE_SHIFT);
8895	*need_unmap = FALSE;
8896	return KERN_SUCCESS;
8897	#elif __arm__ \|\| __arm64__
8898	*address = (vm_map_offset_t)
8899	phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
8900	*need_unmap = FALSE;
8901	return KERN_SUCCESS;
8902	#else
8903	#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
8904	#endif
8905
8906	assert(page->vmp_busy);
8907	/*
8908	* Use one of the pre-allocated kernel virtual addresses
8909	* and just enter the VM page in the kernel address space
8910	* at that virtual address.
8911	*/
8912	simple_lock(&vm_paging_lock);
8913
8914	/*
8915	* Try and find an available kernel virtual address
8916	* from our pre-allocated pool.
8917	*/
8918	page_map_offset = `0`;
8919	for (;;) {
8920	for (i = `0`; i < VM_PAGING_NUM_PAGES; i++) {
8921	if (vm_paging_page_inuse[i] == FALSE) {
8922	page_map_offset =
8923	vm_paging_base_address +
8924	(i * PAGE_SIZE);
8925	break;
8926	}
8927	}
8928	if (page_map_offset != `0`) {
8929	/ found a space to map our page ! /
8930	break;
8931	}
8932
8933	if (can_unlock_object) {
8934	/*
8935	* If we can afford to unlock the VM object,
8936	* let's take the slow path now...
8937	*/
8938	break;
8939	}
8940	/*
8941	* We can't afford to unlock the VM object, so
8942	* let's wait for a space to become available...
8943	*/
8944	vm_paging_page_waiter_total++;
8945	vm_paging_page_waiter++;
8946	kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
8947	if (kr == THREAD_WAITING) {
8948	simple_unlock(&vm_paging_lock);
8949	kr = thread_block(THREAD_CONTINUE_NULL);
8950	simple_lock(&vm_paging_lock);
8951	}
8952	vm_paging_page_waiter--;
8953	/ ... and try again /
8954	}
8955
8956	if (page_map_offset != `0`) {
8957	/*
8958	* We found a kernel virtual address;
8959	* map the physical page to that virtual address.
8960	*/
8961	if (i > vm_paging_max_index) {
8962	vm_paging_max_index = i;
8963	}
8964	vm_paging_page_inuse[i] = TRUE;
8965	simple_unlock(&vm_paging_lock);
8966
8967	page->vmp_pmapped = TRUE;
8968
8969	/*
8970	* Keep the VM object locked over the PMAP_ENTER
8971	* and the actual use of the page by the kernel,
8972	* or this pmap mapping might get undone by a
8973	* vm_object_pmap_protect() call...
8974	*/
8975	PMAP_ENTER(kernel_pmap,
8976	page_map_offset,
8977	page,
8978	protection,
8979	VM_PROT_NONE,
8980	`0`,
8981	TRUE,
8982	kr);
8983	assert(kr == KERN_SUCCESS);
8984	vm_paging_objects_mapped++;
8985	vm_paging_pages_mapped++;
8986	*address = page_map_offset;
8987	*need_unmap = TRUE;
8988
8989	#if KASAN
8990	kasan_notify_address(page_map_offset, PAGE_SIZE);
8991	#endif
8992
8993	/ all done and mapped, ready to use ! /
8994	return KERN_SUCCESS;
8995	}
8996
8997	/*
8998	* We ran out of pre-allocated kernel virtual
8999	* addresses. Just map the page in the kernel
9000	* the slow and regular way.
9001	*/
9002	vm_paging_no_kernel_page++;
9003	simple_unlock(&vm_paging_lock);
9004	}
9005
9006	if (! can_unlock_object) {
9007	*address = `0`;
9008	*size = `0`;
9009	*need_unmap = FALSE;
9010	return KERN_NOT_SUPPORTED;
9011	}
9012
9013	object_offset = vm_object_trunc_page(offset);
9014	map_size = vm_map_round_page(*size,
9015	VM_MAP_PAGE_MASK(kernel_map));
9016
9017	/*
9018	* Try and map the required range of the object
9019	* in the kernel_map
9020	*/
9021
9022	vm_object_reference_locked(object); / for the map entry /
9023	vm_object_unlock(object);
9024
9025	kr = vm_map_enter(kernel_map,
9026	address,
9027	map_size,
9028	`0`,
9029	VM_FLAGS_ANYWHERE,
9030	VM_MAP_KERNEL_FLAGS_NONE,
9031	VM_KERN_MEMORY_NONE,
9032	object,
9033	object_offset,
9034	FALSE,
9035	protection,
9036	VM_PROT_ALL,
9037	VM_INHERIT_NONE);
9038	if (kr != KERN_SUCCESS) {
9039	*address = `0`;
9040	*size = `0`;
9041	*need_unmap = FALSE;
9042	vm_object_deallocate(object); / for the map entry /
9043	vm_object_lock(object);
9044	return kr;
9045	}
9046
9047	*size = map_size;
9048
9049	/*
9050	* Enter the mapped pages in the page table now.
9051	*/
9052	vm_object_lock(object);
9053	/*
9054	* VM object must be kept locked from before PMAP_ENTER()
9055	* until after the kernel is done accessing the page(s).
9056	* Otherwise, the pmap mappings in the kernel could be
9057	* undone by a call to vm_object_pmap_protect().
9058	*/
9059
9060	for (page_map_offset = `0`;
9061	map_size != `0`;
9062	map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9063
9064	page = vm_page_lookup(object, offset + page_map_offset);
9065	if (page == VM_PAGE_NULL) {
9066	printf("vm_paging_map_object: no page !?");
9067	vm_object_unlock(object);
9068	kr = vm_map_remove(kernel_map, address, size,
9069	VM_MAP_REMOVE_NO_FLAGS);
9070	assert(kr == KERN_SUCCESS);
9071	*address = `0`;
9072	*size = `0`;
9073	*need_unmap = FALSE;
9074	vm_object_lock(object);
9075	return KERN_MEMORY_ERROR;
9076	}
9077	page->vmp_pmapped = TRUE;
9078
9079	//assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9080	PMAP_ENTER(kernel_pmap,
9081	*address + page_map_offset,
9082	page,
9083	protection,
9084	VM_PROT_NONE,
9085	`0`,
9086	TRUE,
9087	kr);
9088	assert(kr == KERN_SUCCESS);
9089	#if KASAN
9090	kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9091	#endif
9092	}
9093
9094	vm_paging_objects_mapped_slow++;
9095	vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9096
9097	*need_unmap = TRUE;
9098
9099	return KERN_SUCCESS;
9100	}
9101
9102	/*
9103	* vm_paging_unmap_object:
9104	* Unmaps part of a VM object's pages from the kernel
9105	* virtual address space.
9106	* Context:
9107	* The VM object is locked. This lock will get
9108	* dropped and re-acquired though.
9109	*/
9110	void
9111	vm_paging_unmap_object(
9112	vm_object_t object,
9113	vm_map_offset_t start,
9114	vm_map_offset_t end)
9115	{
9116	kern_return_t kr;
9117	int i;
9118
9119	if ((vm_paging_base_address == `0`) \|\|
9120	(start < vm_paging_base_address) \|\|
9121	(end > (vm_paging_base_address
9122	+ (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9123	/*
9124	* We didn't use our pre-allocated pool of
9125	* kernel virtual address. Deallocate the
9126	* virtual memory.
9127	*/
9128	if (object != VM_OBJECT_NULL) {
9129	vm_object_unlock(object);
9130	}
9131	kr = vm_map_remove(kernel_map, start, end,
9132	VM_MAP_REMOVE_NO_FLAGS);
9133	if (object != VM_OBJECT_NULL) {
9134	vm_object_lock(object);
9135	}
9136	assert(kr == KERN_SUCCESS);
9137	} else {
9138	/*
9139	* We used a kernel virtual address from our
9140	* pre-allocated pool. Put it back in the pool
9141	* for next time.
9142	*/
9143	assert(end - start == PAGE_SIZE);
9144	i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9145	assert(i >= `0` && i < VM_PAGING_NUM_PAGES);
9146
9147	/ undo the pmap mapping /
9148	pmap_remove(kernel_pmap, start, end);
9149
9150	simple_lock(&vm_paging_lock);
9151	vm_paging_page_inuse[i] = FALSE;
9152	if (vm_paging_page_waiter) {
9153	thread_wakeup(&vm_paging_page_waiter);
9154	}
9155	simple_unlock(&vm_paging_lock);
9156	}
9157	}
9158
9159
9160	/*
9161	* page->vmp_object must be locked
9162	*/
9163	void
9164	vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9165	{
9166	if (!queues_locked) {
9167	vm_page_lockspin_queues();
9168	}
9169
9170	page->vmp_free_when_done = FALSE;
9171	/*
9172	* need to drop the laundry count...
9173	* we may also need to remove it
9174	* from the I/O paging queue...
9175	* vm_pageout_throttle_up handles both cases
9176	*
9177	* the laundry and pageout_queue flags are cleared...
9178	*/
9179	vm_pageout_throttle_up(page);
9180
9181	if (!queues_locked) {
9182	vm_page_unlock_queues();
9183	}
9184	}
9185
9186	upl_t
9187	vector_upl_create(vm_offset_t upl_offset)
9188	{
9189	int vector_upl_size = sizeof(struct _vector_upl);
9190	int i=`0`;
9191	upl_t upl;
9192	vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9193
9194	upl = upl_create(`0`,UPL_VECTOR,`0`);
9195	upl->vector_upl = vector_upl;
9196	upl->offset = upl_offset;
9197	vector_upl->size = `0`;
9198	vector_upl->offset = upl_offset;
9199	vector_upl->invalid_upls=`0`;
9200	vector_upl->num_upls=`0`;
9201	vector_upl->pagelist = NULL;
9202
9203	for(i=`0`; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9204	vector_upl->upl_iostates[i].size = `0`;
9205	vector_upl->upl_iostates[i].offset = `0`;
9206
9207	}
9208	return upl;
9209	}
9210
9211	void
9212	vector_upl_deallocate(upl_t upl)
9213	{
9214	if(upl) {
9215	vector_upl_t vector_upl = upl->vector_upl;
9216	if(vector_upl) {
9217	if(vector_upl->invalid_upls != vector_upl->num_upls)
9218	panic("Deallocating non-empty Vectored UPL\n");
9219	kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9220	vector_upl->invalid_upls=`0`;
9221	vector_upl->num_upls = `0`;
9222	vector_upl->pagelist = NULL;
9223	vector_upl->size = `0`;
9224	vector_upl->offset = `0`;
9225	kfree(vector_upl, sizeof(struct _vector_upl));
9226	vector_upl = (vector_upl_t)`0xfeedfeed`;
9227	}
9228	else
9229	panic("vector_upl_deallocate was passed a non-vectored upl\n");
9230	}
9231	else
9232	panic("vector_upl_deallocate was passed a NULL upl\n");
9233	}
9234
9235	boolean_t
9236	vector_upl_is_valid(upl_t upl)
9237	{
9238	if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9239	vector_upl_t vector_upl = upl->vector_upl;
9240	if(vector_upl == NULL \|\| vector_upl == (vector_upl_t)`0xfeedfeed` \|\| vector_upl == (vector_upl_t)`0xfeedbeef`)
9241	return FALSE;
9242	else
9243	return TRUE;
9244	}
9245	return FALSE;
9246	}
9247
9248	boolean_t
9249	vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9250	{
9251	if(vector_upl_is_valid(upl)) {
9252	vector_upl_t vector_upl = upl->vector_upl;
9253
9254	if(vector_upl) {
9255	if(subupl) {
9256	if(io_size) {
9257	if(io_size < PAGE_SIZE)
9258	io_size = PAGE_SIZE;
9259	subupl->vector_upl = (void*)vector_upl;
9260	vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9261	vector_upl->size += io_size;
9262	upl->size += io_size;
9263	}
9264	else {
9265	uint32_t i=`0`,invalid_upls=`0`;
9266	for(i = `0`; i < vector_upl->num_upls; i++) {
9267	if(vector_upl->upl_elems[i] == subupl)
9268	break;
9269	}
9270	if(i == vector_upl->num_upls)
9271	panic("Trying to remove sub-upl when none exists");
9272
9273	vector_upl->upl_elems[i] = NULL;
9274	invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, `1`);
9275	if(invalid_upls == vector_upl->num_upls)
9276	return TRUE;
9277	else
9278	return FALSE;
9279	}
9280	}
9281	else
9282	panic("vector_upl_set_subupl was passed a NULL upl element\n");
9283	}
9284	else
9285	panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9286	}
9287	else
9288	panic("vector_upl_set_subupl was passed a NULL upl\n");
9289
9290	return FALSE;
9291	}
9292
9293	void
9294	vector_upl_set_pagelist(upl_t upl)
9295	{
9296	if(vector_upl_is_valid(upl)) {
9297	uint32_t i=`0`;
9298	vector_upl_t vector_upl = upl->vector_upl;
9299
9300	if(vector_upl) {
9301	vm_offset_t pagelist_size=`0`, cur_upl_pagelist_size=`0`;
9302
9303	vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9304
9305	for(i=`0`; i < vector_upl->num_upls; i++) {
9306	cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9307	bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9308	pagelist_size += cur_upl_pagelist_size;
9309	if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9310	upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9311	}
9312	assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9313	}
9314	else
9315	panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9316	}
9317	else
9318	panic("vector_upl_set_pagelist was passed a NULL upl\n");
9319
9320	}
9321
9322	upl_t
9323	vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9324	{
9325	if(vector_upl_is_valid(upl)) {
9326	vector_upl_t vector_upl = upl->vector_upl;
9327	if(vector_upl) {
9328	if(index < vector_upl->num_upls)
9329	return vector_upl->upl_elems[index];
9330	}
9331	else
9332	panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9333	}
9334	return NULL;
9335	}
9336
9337	upl_t
9338	vector_upl_subupl_byoffset(upl_t upl, upl_offset_t upl_offset, upl_size_t upl_size)
9339	{
9340	if(vector_upl_is_valid(upl)) {
9341	uint32_t i=`0`;
9342	vector_upl_t vector_upl = upl->vector_upl;
9343
9344	if(vector_upl) {
9345	upl_t subupl = NULL;
9346	vector_upl_iostates_t subupl_state;
9347
9348	for(i=`0`; i < vector_upl->num_upls; i++) {
9349	subupl = vector_upl->upl_elems[i];
9350	subupl_state = vector_upl->upl_iostates[i];
9351	if( *upl_offset <= (subupl_state.offset + subupl_state.size - `1`)) {
9352	/ We could have been passed an offset/size pair that belongs*
9353	* to an UPL element that has already been committed/aborted.
9354	* If so, return NULL.
9355	*/
9356	if(subupl == NULL)
9357	return NULL;
9358	if((subupl_state.offset + subupl_state.size) < (upl_offset + upl_size)) {
9359	upl_size = (subupl_state.offset + subupl_state.size) - upl_offset;
9360	if(*upl_size > subupl_state.size)
9361	*upl_size = subupl_state.size;
9362	}
9363	if(*upl_offset >= subupl_state.offset)
9364	*upl_offset -= subupl_state.offset;
9365	else if(i)
9366	panic("Vector UPL offset miscalculation\n");
9367	return subupl;
9368	}
9369	}
9370	}
9371	else
9372	panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9373	}
9374	return NULL;
9375	}
9376
9377	void
9378	vector_upl_get_submap(upl_t upl, vm_map_t v_upl_submap, vm_offset_t submap_dst_addr)
9379	{
9380	*v_upl_submap = NULL;
9381
9382	if(vector_upl_is_valid(upl)) {
9383	vector_upl_t vector_upl = upl->vector_upl;
9384	if(vector_upl) {
9385	*v_upl_submap = vector_upl->submap;
9386	*submap_dst_addr = vector_upl->submap_dst_addr;
9387	}
9388	else
9389	panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9390	}
9391	else
9392	panic("vector_upl_get_submap was passed a null UPL\n");
9393	}
9394
9395	void
9396	vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9397	{
9398	if(vector_upl_is_valid(upl)) {
9399	vector_upl_t vector_upl = upl->vector_upl;
9400	if(vector_upl) {
9401	vector_upl->submap = submap;
9402	vector_upl->submap_dst_addr = submap_dst_addr;
9403	}
9404	else
9405	panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9406	}
9407	else
9408	panic("vector_upl_get_submap was passed a NULL UPL\n");
9409	}
9410
9411	void
9412	vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9413	{
9414	if(vector_upl_is_valid(upl)) {
9415	uint32_t i = `0`;
9416	vector_upl_t vector_upl = upl->vector_upl;
9417
9418	if(vector_upl) {
9419	for(i = `0`; i < vector_upl->num_upls; i++) {
9420	if(vector_upl->upl_elems[i] == subupl)
9421	break;
9422	}
9423
9424	if(i == vector_upl->num_upls)
9425	panic("setting sub-upl iostate when none exists");
9426
9427	vector_upl->upl_iostates[i].offset = offset;
9428	if(size < PAGE_SIZE)
9429	size = PAGE_SIZE;
9430	vector_upl->upl_iostates[i].size = size;
9431	}
9432	else
9433	panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9434	}
9435	else
9436	panic("vector_upl_set_iostate was passed a NULL UPL\n");
9437	}
9438
9439	void
9440	vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9441	{
9442	if(vector_upl_is_valid(upl)) {
9443	uint32_t i = `0`;
9444	vector_upl_t vector_upl = upl->vector_upl;
9445
9446	if(vector_upl) {
9447	for(i = `0`; i < vector_upl->num_upls; i++) {
9448	if(vector_upl->upl_elems[i] == subupl)
9449	break;
9450	}
9451
9452	if(i == vector_upl->num_upls)
9453	panic("getting sub-upl iostate when none exists");
9454
9455	*offset = vector_upl->upl_iostates[i].offset;
9456	*size = vector_upl->upl_iostates[i].size;
9457	}
9458	else
9459	panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9460	}
9461	else
9462	panic("vector_upl_get_iostate was passed a NULL UPL\n");
9463	}
9464
9465	void
9466	vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t offset, upl_size_t size)
9467	{
9468	if(vector_upl_is_valid(upl)) {
9469	vector_upl_t vector_upl = upl->vector_upl;
9470	if(vector_upl) {
9471	if(index < vector_upl->num_upls) {
9472	*offset = vector_upl->upl_iostates[index].offset;
9473	*size = vector_upl->upl_iostates[index].size;
9474	}
9475	else
9476	offset = size = `0`;
9477	}
9478	else
9479	panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9480	}
9481	else
9482	panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9483	}
9484
9485	upl_page_info_t *
9486	upl_get_internal_vectorupl_pagelist(upl_t upl)
9487	{
9488	return ((vector_upl_t)(upl->vector_upl))->pagelist;
9489	}
9490
9491	void *
9492	upl_get_internal_vectorupl(upl_t upl)
9493	{
9494	return upl->vector_upl;
9495	}
9496
9497	vm_size_t
9498	upl_get_internal_pagelist_offset(void)
9499	{
9500	return sizeof(struct upl);
9501	}
9502
9503	void
9504	upl_clear_dirty(
9505	upl_t upl,
9506	boolean_t value)
9507	{
9508	if (value) {
9509	upl->flags \|= UPL_CLEAR_DIRTY;
9510	} else {
9511	upl->flags &= ~UPL_CLEAR_DIRTY;
9512	}
9513	}
9514
9515	void
9516	upl_set_referenced(
9517	upl_t upl,
9518	boolean_t value)
9519	{
9520	upl_lock(upl);
9521	if (value) {
9522	upl->ext_ref_count++;
9523	} else {
9524	if (!upl->ext_ref_count) {
9525	panic("upl_set_referenced not %p\n", upl);
9526	}
9527	upl->ext_ref_count--;
9528	}
9529	upl_unlock(upl);
9530	}
9531
9532	#if CONFIG_IOSCHED
9533	void
9534	upl_set_blkno(
9535	upl_t upl,
9536	vm_offset_t upl_offset,
9537	int io_size,
9538	int64_t blkno)
9539	{
9540	int i,j;
9541	if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == `0`)
9542	return;
9543
9544	assert(upl->upl_reprio_info != `0`);
9545	for(i = (int)(upl_offset / PAGE_SIZE), j = `0`; j < io_size; i++, j += PAGE_SIZE) {
9546	UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9547	}
9548	}
9549	#endif
9550
9551	void inline memoryshot(unsigned int event, unsigned int control)
9552	{
9553	if (vm_debug_events) {
9554	KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) \| control,
9555	vm_page_active_count, vm_page_inactive_count,
9556	vm_page_free_count, vm_page_speculative_count,
9557	vm_page_throttled_count);
9558	} else {
9559	(void) event;
9560	(void) control;
9561	}
9562
9563	}
9564
9565	#ifdef MACH_BSD
9566
9567	boolean_t upl_device_page(upl_page_info_t *upl)
9568	{
9569	return(UPL_DEVICE_PAGE(upl));
9570	}
9571	boolean_t upl_page_present(upl_page_info_t upl, int* index)
9572	{
9573	return(UPL_PAGE_PRESENT(upl, index));
9574	}
9575	boolean_t upl_speculative_page(upl_page_info_t upl, int* index)
9576	{
9577	return(UPL_SPECULATIVE_PAGE(upl, index));
9578	}
9579	boolean_t upl_dirty_page(upl_page_info_t upl, int* index)
9580	{
9581	return(UPL_DIRTY_PAGE(upl, index));
9582	}
9583	boolean_t upl_valid_page(upl_page_info_t upl, int* index)
9584	{
9585	return(UPL_VALID_PAGE(upl, index));
9586	}
9587	ppnum_t upl_phys_page(upl_page_info_t upl, int* index)
9588	{
9589	return(UPL_PHYS_PAGE(upl, index));
9590	}
9591
9592	void upl_page_set_mark(upl_page_info_t upl, int* index, boolean_t v)
9593	{
9594	upl[index].mark = v;
9595	}
9596
9597	boolean_t upl_page_get_mark(upl_page_info_t upl, int* index)
9598	{
9599	return upl[index].mark;
9600	}
9601
9602	void
9603	vm_countdirtypages(void)
9604	{
9605	vm_page_t m;
9606	int dpages;
9607	int pgopages;
9608	int precpages;
9609
9610
9611	dpages=`0`;
9612	pgopages=`0`;
9613	precpages=`0`;
9614
9615	vm_page_lock_queues();
9616	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9617	do {
9618	if (m ==(vm_page_t )`0`) break;
9619
9620	if(m->vmp_dirty) dpages++;
9621	if(m->vmp_free_when_done) pgopages++;
9622	if(m->vmp_precious) precpages++;
9623
9624	assert(VM_PAGE_OBJECT(m) != kernel_object);
9625	m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9626	if (m ==(vm_page_t )`0`) break;
9627
9628	} while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9629	vm_page_unlock_queues();
9630
9631	vm_page_lock_queues();
9632	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9633	do {
9634	if (m ==(vm_page_t )`0`) break;
9635
9636	dpages++;
9637	assert(m->vmp_dirty);
9638	assert(!m->vmp_free_when_done);
9639	assert(VM_PAGE_OBJECT(m) != kernel_object);
9640	m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9641	if (m ==(vm_page_t )`0`) break;
9642
9643	} while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9644	vm_page_unlock_queues();
9645
9646	vm_page_lock_queues();
9647	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9648	do {
9649	if (m ==(vm_page_t )`0`) break;
9650
9651	if(m->vmp_dirty) dpages++;
9652	if(m->vmp_free_when_done) pgopages++;
9653	if(m->vmp_precious) precpages++;
9654
9655	assert(VM_PAGE_OBJECT(m) != kernel_object);
9656	m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9657	if (m ==(vm_page_t )`0`) break;
9658
9659	} while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9660	vm_page_unlock_queues();
9661
9662	printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9663
9664	dpages=`0`;
9665	pgopages=`0`;
9666	precpages=`0`;
9667
9668	vm_page_lock_queues();
9669	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9670
9671	do {
9672	if(m == (vm_page_t )`0`) break;
9673	if(m->vmp_dirty) dpages++;
9674	if(m->vmp_free_when_done) pgopages++;
9675	if(m->vmp_precious) precpages++;
9676
9677	assert(VM_PAGE_OBJECT(m) != kernel_object);
9678	m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9679	if(m == (vm_page_t )`0`) break;
9680
9681	} while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9682	vm_page_unlock_queues();
9683
9684	printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9685
9686	}
9687	#endif /* MACH_BSD */
9688
9689
9690	#if CONFIG_IOSCHED
9691	int upl_get_cached_tier(upl_t upl)
9692	{
9693	assert(upl);
9694	if (upl->flags & UPL_TRACKED_BY_OBJECT)
9695	return (upl->upl_priority);
9696	return (-`1`);
9697	}
9698	#endif /* CONFIG_IOSCHED */
9699
9700
9701	void upl_callout_iodone(upl_t upl)
9702	{
9703	struct upl_io_completion *upl_ctx = upl->upl_iodone;
9704
9705	if (upl_ctx) {
9706	void (iodone_func)(void* , int*) = upl_ctx->io_done;
9707
9708	assert(upl_ctx->io_done);
9709
9710	(*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9711	}
9712	}
9713
9714	void upl_set_iodone(upl_t upl, void *upl_iodone)
9715	{
9716	upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9717	}
9718
9719	void upl_set_iodone_error(upl_t upl, int error)
9720	{
9721	struct upl_io_completion *upl_ctx = upl->upl_iodone;
9722
9723	if (upl_ctx)
9724	upl_ctx->io_error = error;
9725	}
9726
9727
9728	ppnum_t upl_get_highest_page(
9729	upl_t upl)
9730	{
9731	return upl->highest_page;
9732	}
9733
9734	upl_size_t upl_get_size(
9735	upl_t upl)
9736	{
9737	return upl->size;
9738	}
9739
9740	upl_t upl_associated_upl(upl_t upl)
9741	{
9742	return upl->associated_upl;
9743	}
9744
9745	void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
9746	{
9747	upl->associated_upl = associated_upl;
9748	}
9749
9750	struct vnode * upl_lookup_vnode(upl_t upl)
9751	{
9752	if (!upl->map_object->internal)
9753	return vnode_pager_lookup_vnode(upl->map_object->pager);
9754	else
9755	return NULL;
9756	}
9757
9758	#if UPL_DEBUG
9759	kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
9760	{
9761	upl->ubc_alias1 = alias1;
9762	upl->ubc_alias2 = alias2;
9763	return KERN_SUCCESS;
9764	}
9765	int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
9766	{
9767	if(al)
9768	*al = upl->ubc_alias1;
9769	if(al2)
9770	*al2 = upl->ubc_alias2;
9771	return KERN_SUCCESS;
9772	}
9773	#endif /* UPL_DEBUG */
9774
9775	#if VM_PRESSURE_EVENTS
9776	/*
9777	* Upward trajectory.
9778	*/
9779	extern boolean_t vm_compressor_low_on_space(void);
9780
9781	boolean_t
9782	VM_PRESSURE_NORMAL_TO_WARNING(void) {
9783
9784	if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9785
9786	/ Available pages below our threshold /
9787	if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
9788	/ No frozen processes to kill /
9789	if (memorystatus_frozen_count == `0`) {
9790	/ Not enough suspended processes available. /
9791	if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
9792	return TRUE;
9793	}
9794	}
9795	}
9796	return FALSE;
9797
9798	} else {
9799	return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? `1` : `0`);
9800	}
9801	}
9802
9803	boolean_t
9804	VM_PRESSURE_WARNING_TO_CRITICAL(void) {
9805
9806	if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9807
9808	/ Available pages below our threshold /
9809	if (memorystatus_available_pages < memorystatus_available_pages_critical) {
9810	return TRUE;
9811	}
9812	return FALSE;
9813	} else {
9814	return (vm_compressor_low_on_space() \|\| (AVAILABLE_NON_COMPRESSED_MEMORY < ((`12` * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / `10`)) ? `1` : `0`);
9815	}
9816	}
9817
9818	/*
9819	* Downward trajectory.
9820	*/
9821	boolean_t
9822	VM_PRESSURE_WARNING_TO_NORMAL(void) {
9823
9824	if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9825
9826	/ Available pages above our threshold /
9827	unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((`15` * memorystatus_available_pages_pressure) / `100`));
9828	if (memorystatus_available_pages > target_threshold) {
9829	return TRUE;
9830	}
9831	return FALSE;
9832	} else {
9833	return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((`12` * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / `10`)) ? `1` : `0`);
9834	}
9835	}
9836
9837	boolean_t
9838	VM_PRESSURE_CRITICAL_TO_WARNING(void) {
9839
9840	if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9841
9842	/ Available pages above our threshold /
9843	unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((`15` * memorystatus_available_pages_critical) / `100`));
9844	if (memorystatus_available_pages > target_threshold) {
9845	return TRUE;
9846	}
9847	return FALSE;
9848	} else {
9849	return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((`14` * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / `10`)) ? `1` : `0`);
9850	}
9851	}
9852	#endif /* VM_PRESSURE_EVENTS */
9853
9854
9855
9856	#define VM_TEST_COLLAPSE_COMPRESSOR 0
9857	#define VM_TEST_WIRE_AND_EXTRACT 0
9858	#define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
9859	#if __arm64__
9860	#define VM_TEST_KERNEL_OBJECT_FAULT 0
9861	#endif /* __arm64__ */
9862	#define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT \|\| DEBUG)
9863
9864	#if VM_TEST_COLLAPSE_COMPRESSOR
9865	extern boolean_t vm_object_collapse_compressor_allowed;
9866	#include <IOKit/IOLib.h>
9867	static void
9868	vm_test_collapse_compressor(void)
9869	{
9870	vm_object_size_t backing_size, top_size;
9871	vm_object_t backing_object, top_object;
9872	vm_map_offset_t backing_offset, top_offset;
9873	unsigned char backing_address, top_address;
9874	kern_return_t kr;
9875
9876	printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
9877
9878	/ create backing object /
9879	backing_size = `15` * PAGE_SIZE;
9880	backing_object = vm_object_allocate(backing_size);
9881	assert(backing_object != VM_OBJECT_NULL);
9882	printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
9883	backing_object);
9884	/ map backing object /
9885	backing_offset = `0`;
9886	kr = vm_map_enter(kernel_map, &backing_offset, backing_size, `0`,
9887	VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9888	backing_object, `0`, FALSE,
9889	VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9890	assert(kr == KERN_SUCCESS);
9891	backing_address = (unsigned char *) backing_offset;
9892	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9893	"mapped backing object %p at 0x%llx\n",
9894	backing_object, (uint64_t) backing_offset);
9895	/ populate with pages to be compressed in backing object /
9896	backing_address[`0x1`*PAGE_SIZE] = `0xB1`;
9897	backing_address[`0x4`*PAGE_SIZE] = `0xB4`;
9898	backing_address[`0x7`*PAGE_SIZE] = `0xB7`;
9899	backing_address[`0xa`*PAGE_SIZE] = `0xBA`;
9900	backing_address[`0xd`*PAGE_SIZE] = `0xBD`;
9901	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9902	"populated pages to be compressed in "
9903	"backing_object %p\n", backing_object);
9904	/ compress backing object /
9905	vm_object_pageout(backing_object);
9906	printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
9907	backing_object);
9908	/ wait for all the pages to be gone /
9909	while ((volatile* int *)&backing_object->resident_page_count != `0`)
9910	IODelay(`10`);
9911	printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
9912	backing_object);
9913	/ populate with pages to be resident in backing object /
9914	backing_address[`0x0`*PAGE_SIZE] = `0xB0`;
9915	backing_address[`0x3`*PAGE_SIZE] = `0xB3`;
9916	backing_address[`0x6`*PAGE_SIZE] = `0xB6`;
9917	backing_address[`0x9`*PAGE_SIZE] = `0xB9`;
9918	backing_address[`0xc`*PAGE_SIZE] = `0xBC`;
9919	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9920	"populated pages to be resident in "
9921	"backing_object %p\n", backing_object);
9922	/ leave the other pages absent /
9923	/ mess with the paging_offset of the backing_object /
9924	assert(backing_object->paging_offset == `0`);
9925	backing_object->paging_offset = `0x3000`;
9926
9927	/ create top object /
9928	top_size = `9` * PAGE_SIZE;
9929	top_object = vm_object_allocate(top_size);
9930	assert(top_object != VM_OBJECT_NULL);
9931	printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
9932	top_object);
9933	/ map top object /
9934	top_offset = `0`;
9935	kr = vm_map_enter(kernel_map, &top_offset, top_size, `0`,
9936	VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9937	top_object, `0`, FALSE,
9938	VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9939	assert(kr == KERN_SUCCESS);
9940	top_address = (unsigned char *) top_offset;
9941	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9942	"mapped top object %p at 0x%llx\n",
9943	top_object, (uint64_t) top_offset);
9944	/ populate with pages to be compressed in top object /
9945	top_address[`0x3`*PAGE_SIZE] = `0xA3`;
9946	top_address[`0x4`*PAGE_SIZE] = `0xA4`;
9947	top_address[`0x5`*PAGE_SIZE] = `0xA5`;
9948	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9949	"populated pages to be compressed in "
9950	"top_object %p\n", top_object);
9951	/ compress top object /
9952	vm_object_pageout(top_object);
9953	printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
9954	top_object);
9955	/ wait for all the pages to be gone /
9956	while (top_object->resident_page_count != `0`)
9957	IODelay(`10`);
9958	printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
9959	top_object);
9960	/ populate with pages to be resident in top object /
9961	top_address[`0x0`*PAGE_SIZE] = `0xA0`;
9962	top_address[`0x1`*PAGE_SIZE] = `0xA1`;
9963	top_address[`0x2`*PAGE_SIZE] = `0xA2`;
9964	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9965	"populated pages to be resident in "
9966	"top_object %p\n", top_object);
9967	/ leave the other pages absent /
9968
9969	/ link the 2 objects /
9970	vm_object_reference(backing_object);
9971	top_object->shadow = backing_object;
9972	top_object->vo_shadow_offset = `0x3000`;
9973	printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
9974	top_object, backing_object);
9975
9976	/ unmap backing object /
9977	vm_map_remove(kernel_map,
9978	backing_offset,
9979	backing_offset + backing_size,
9980	VM_MAP_REMOVE_NO_FLAGS);
9981	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9982	"unmapped backing_object %p [0x%llx:0x%llx]\n",
9983	backing_object,
9984	(uint64_t) backing_offset,
9985	(uint64_t) (backing_offset + backing_size));
9986
9987	/ collapse /
9988	printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
9989	vm_object_lock(top_object);
9990	vm_object_collapse(top_object, `0`, FALSE);
9991	vm_object_unlock(top_object);
9992	printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
9993
9994	/ did it work? /
9995	if (top_object->shadow != VM_OBJECT_NULL) {
9996	printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
9997	printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
9998	if (vm_object_collapse_compressor_allowed) {
9999	panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10000	}
10001	} else {
10002	/ check the contents of the mapping /
10003	unsigned char expect[`9`] =
10004	{ `0xA0`, `0xA1`, `0xA2`, / resident in top /
10005	`0xA3`, `0xA4`, `0xA5`, / compressed in top /
10006	`0xB9`, / resident in backing + shadow_offset /
10007	`0xBD`, / compressed in backing + shadow_offset + paging_offset /
10008	`0x00` }; / absent in both /
10009	unsigned char actual[`9`];
10010	unsigned int i, errors;
10011
10012	errors = `0`;
10013	for (i = `0`; i < sizeof (actual); i++) {
10014	actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10015	if (actual[i] != expect[i]) {
10016	errors++;
10017	}
10018	}
10019	printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10020	"actual [%x %x %x %x %x %x %x %x %x] "
10021	"expect [%x %x %x %x %x %x %x %x %x] "
10022	"%d errors\n",
10023	actual[`0`], actual[`1`], actual[`2`], actual[`3`],
10024	actual[`4`], actual[`5`], actual[`6`], actual[`7`],
10025	actual[`8`],
10026	expect[`0`], expect[`1`], expect[`2`], expect[`3`],
10027	expect[`4`], expect[`5`], expect[`6`], expect[`7`],
10028	expect[`8`],
10029	errors);
10030	if (errors) {
10031	panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10032	} else {
10033	printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10034	}
10035	}
10036	}
10037	#else /* VM_TEST_COLLAPSE_COMPRESSOR */
10038	#define vm_test_collapse_compressor()
10039	#endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10040
10041	#if VM_TEST_WIRE_AND_EXTRACT
10042	extern ledger_template_t task_ledger_template;
10043	#include <mach/mach_vm.h>
10044	extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10045	vm_offset_t offset);
10046	static void
10047	vm_test_wire_and_extract(void)
10048	{
10049	ledger_t ledger;
10050	vm_map_t user_map, wire_map;
10051	mach_vm_address_t user_addr, wire_addr;
10052	mach_vm_size_t user_size, wire_size;
10053	mach_vm_offset_t cur_offset;
10054	vm_prot_t cur_prot, max_prot;
10055	ppnum_t user_ppnum, wire_ppnum;
10056	kern_return_t kr;
10057
10058	ledger = ledger_instantiate(task_ledger_template,
10059	LEDGER_CREATE_ACTIVE_ENTRIES);
10060	user_map = vm_map_create(pmap_create(ledger, `0`, PMAP_CREATE_64BIT),
10061	`0x100000000ULL`,
10062	`0x200000000ULL`,
10063	TRUE);
10064	wire_map = vm_map_create(NULL,
10065	`0x100000000ULL`,
10066	`0x200000000ULL`,
10067	TRUE);
10068	user_addr = `0`;
10069	user_size = `0x10000`;
10070	kr = mach_vm_allocate(user_map,
10071	&user_addr,
10072	user_size,
10073	VM_FLAGS_ANYWHERE);
10074	assert(kr == KERN_SUCCESS);
10075	wire_addr = `0`;
10076	wire_size = user_size;
10077	kr = mach_vm_remap(wire_map,
10078	&wire_addr,
10079	wire_size,
10080	`0`,
10081	VM_FLAGS_ANYWHERE,
10082	user_map,
10083	user_addr,
10084	FALSE,
10085	&cur_prot,
10086	&max_prot,
10087	VM_INHERIT_NONE);
10088	assert(kr == KERN_SUCCESS);
10089	for (cur_offset = `0`;
10090	cur_offset < wire_size;
10091	cur_offset += PAGE_SIZE) {
10092	kr = vm_map_wire_and_extract(wire_map,
10093	wire_addr + cur_offset,
10094	VM_PROT_DEFAULT \| VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10095	TRUE,
10096	&wire_ppnum);
10097	assert(kr == KERN_SUCCESS);
10098	user_ppnum = vm_map_get_phys_page(user_map,
10099	user_addr + cur_offset);
10100	printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10101	"user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10102	kr,
10103	user_map, user_addr + cur_offset, user_ppnum,
10104	wire_map, wire_addr + cur_offset, wire_ppnum);
10105	if (kr != KERN_SUCCESS \|\|
10106	wire_ppnum == `0` \|\|
10107	wire_ppnum != user_ppnum) {
10108	panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10109	}
10110	}
10111	cur_offset -= PAGE_SIZE;
10112	kr = vm_map_wire_and_extract(wire_map,
10113	wire_addr + cur_offset,
10114	VM_PROT_DEFAULT,
10115	TRUE,
10116	&wire_ppnum);
10117	assert(kr == KERN_SUCCESS);
10118	printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10119	"user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10120	kr,
10121	user_map, user_addr + cur_offset, user_ppnum,
10122	wire_map, wire_addr + cur_offset, wire_ppnum);
10123	if (kr != KERN_SUCCESS \|\|
10124	wire_ppnum == `0` \|\|
10125	wire_ppnum != user_ppnum) {
10126	panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10127	}
10128
10129	printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10130	}
10131	#else /* VM_TEST_WIRE_AND_EXTRACT */
10132	#define vm_test_wire_and_extract()
10133	#endif /* VM_TEST_WIRE_AND_EXTRACT */
10134
10135	#if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10136	static void
10137	vm_test_page_wire_overflow_panic(void)
10138	{
10139	vm_object_t object;
10140	vm_page_t page;
10141
10142	printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10143
10144	object = vm_object_allocate(PAGE_SIZE);
10145	vm_object_lock(object);
10146	page = vm_page_alloc(object, `0x0`);
10147	vm_page_lock_queues();
10148	do {
10149	vm_page_wire(page, `1`, FALSE);
10150	} while (page->wire_count != `0`);
10151	vm_page_unlock_queues();
10152	vm_object_unlock(object);
10153	panic("FBDP(%p,%p): wire_count overflow not detected\n",
10154	object, page);
10155	}
10156	#else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10157	#define vm_test_page_wire_overflow_panic()
10158	#endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10159
10160	#if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10161	extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10162	static void
10163	vm_test_kernel_object_fault(void)
10164	{
10165	kern_return_t kr;
10166	vm_offset_t stack;
10167	uintptr_t frameb[`2`];
10168	int ret;
10169
10170	kr = kernel_memory_allocate(kernel_map, &stack,
10171	kernel_stack_size + (`2`*PAGE_SIZE),
10172	`0`,
10173	(KMA_KSTACK \| KMA_KOBJECT \|
10174	KMA_GUARD_FIRST \| KMA_GUARD_LAST),
10175	VM_KERN_MEMORY_STACK);
10176	if (kr != KERN_SUCCESS) {
10177	panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10178	}
10179	ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10180	if (ret != `0`) {
10181	printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10182	} else {
10183	printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10184	}
10185	vm_map_remove(kernel_map,
10186	stack,
10187	stack + kernel_stack_size + (`2`*PAGE_SIZE),
10188	VM_MAP_REMOVE_KUNWIRE);
10189	stack = `0`;
10190	}
10191	#else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10192	#define vm_test_kernel_object_fault()
10193	#endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10194
10195	#if VM_TEST_DEVICE_PAGER_TRANSPOSE
10196	static void
10197	vm_test_device_pager_transpose(void)
10198	{
10199	memory_object_t device_pager;
10200	vm_object_t anon_object, device_object;
10201	vm_size_t size;
10202	vm_map_offset_t anon_mapping, device_mapping;
10203	kern_return_t kr;
10204
10205	size = `3` * PAGE_SIZE;
10206	anon_object = vm_object_allocate(size);
10207	assert(anon_object != VM_OBJECT_NULL);
10208	device_pager = device_pager_setup(NULL, `0`, size, `0`);
10209	assert(device_pager != NULL);
10210	device_object = memory_object_to_vm_object(device_pager);
10211	assert(device_object != VM_OBJECT_NULL);
10212	anon_mapping = `0`;
10213	kr = vm_map_enter(kernel_map, &anon_mapping, size, `0`,
10214	VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10215	anon_object, `0`, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10216	VM_INHERIT_DEFAULT);
10217	assert(kr == KERN_SUCCESS);
10218	device_mapping = `0`;
10219	kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, `0`,
10220	VM_FLAGS_ANYWHERE,
10221	VM_MAP_KERNEL_FLAGS_NONE,
10222	VM_KERN_MEMORY_NONE,
10223	(void *)device_pager, `0`, FALSE,
10224	VM_PROT_DEFAULT, VM_PROT_ALL,
10225	VM_INHERIT_DEFAULT);
10226	assert(kr == KERN_SUCCESS);
10227	memory_object_deallocate(device_pager);
10228
10229	vm_object_lock(anon_object);
10230	vm_object_activity_begin(anon_object);
10231	anon_object->blocked_access = TRUE;
10232	vm_object_unlock(anon_object);
10233	vm_object_lock(device_object);
10234	vm_object_activity_begin(device_object);
10235	device_object->blocked_access = TRUE;
10236	vm_object_unlock(device_object);
10237
10238	assert(anon_object->ref_count == `1`);
10239	assert(!anon_object->named);
10240	assert(device_object->ref_count == `2`);
10241	assert(device_object->named);
10242
10243	kr = vm_object_transpose(device_object, anon_object, size);
10244	assert(kr == KERN_SUCCESS);
10245
10246	vm_object_lock(anon_object);
10247	vm_object_activity_end(anon_object);
10248	anon_object->blocked_access = FALSE;
10249	vm_object_unlock(anon_object);
10250	vm_object_lock(device_object);
10251	vm_object_activity_end(device_object);
10252	device_object->blocked_access = FALSE;
10253	vm_object_unlock(device_object);
10254
10255	assert(anon_object->ref_count == `2`);
10256	assert(anon_object->named);
10257	kr = vm_deallocate(kernel_map, anon_mapping, size);
10258	assert(kr == KERN_SUCCESS);
10259	assert(device_object->ref_count == `1`);
10260	assert(!device_object->named);
10261	kr = vm_deallocate(kernel_map, device_mapping, size);
10262	assert(kr == KERN_SUCCESS);
10263
10264	printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10265	}
10266	#else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10267	#define vm_test_device_pager_transpose()
10268	#endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10269
10270	void
10271	vm_tests(void)
10272	{
10273	vm_test_collapse_compressor();
10274	vm_test_wire_and_extract();
10275	vm_test_page_wire_overflow_panic();
10276	vm_test_kernel_object_fault();
10277	vm_test_device_pager_transpose();
10278	}
10279

Browse the source code of xnu/osfmk/vm/vm_pageout.c