1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66#include <stdint.h>
67
68#include <debug.h>
69#include <mach_pagemap.h>
70#include <mach_cluster_stats.h>
71
72#include <mach/mach_types.h>
73#include <mach/memory_object.h>
74#include <mach/memory_object_default.h>
75#include <mach/memory_object_control_server.h>
76#include <mach/mach_host_server.h>
77#include <mach/upl.h>
78#include <mach/vm_map.h>
79#include <mach/vm_param.h>
80#include <mach/vm_statistics.h>
81#include <mach/sdt.h>
82
83#include <kern/kern_types.h>
84#include <kern/counters.h>
85#include <kern/host_statistics.h>
86#include <kern/machine.h>
87#include <kern/misc_protos.h>
88#include <kern/sched.h>
89#include <kern/thread.h>
90#include <kern/xpr.h>
91#include <kern/kalloc.h>
92#include <kern/policy_internal.h>
93#include <kern/thread_group.h>
94
95#include <machine/vm_tuning.h>
96#include <machine/commpage.h>
97
98#include <vm/pmap.h>
99#include <vm/vm_compressor_pager.h>
100#include <vm/vm_fault.h>
101#include <vm/vm_map.h>
102#include <vm/vm_object.h>
103#include <vm/vm_page.h>
104#include <vm/vm_pageout.h>
105#include <vm/vm_protos.h> /* must be last */
106#include <vm/memory_object.h>
107#include <vm/vm_purgeable_internal.h>
108#include <vm/vm_shared_region.h>
109#include <vm/vm_compressor.h>
110
111#include <san/kasan.h>
112
113#if CONFIG_PHANTOM_CACHE
114#include <vm/vm_phantom_cache.h>
115#endif
116
117#if UPL_DEBUG
118#include <libkern/OSDebug.h>
119#endif
120
121extern int cs_debug;
122
123extern void mbuf_drain(boolean_t);
124
125#if VM_PRESSURE_EVENTS
126#if CONFIG_JETSAM
127extern unsigned int memorystatus_available_pages;
128extern unsigned int memorystatus_available_pages_pressure;
129extern unsigned int memorystatus_available_pages_critical;
130#else /* CONFIG_JETSAM */
131extern uint64_t memorystatus_available_pages;
132extern uint64_t memorystatus_available_pages_pressure;
133extern uint64_t memorystatus_available_pages_critical;
134#endif /* CONFIG_JETSAM */
135
136extern unsigned int memorystatus_frozen_count;
137extern unsigned int memorystatus_suspended_count;
138extern vm_pressure_level_t memorystatus_vm_pressure_level;
139
140void vm_pressure_response(void);
141extern void consider_vm_pressure_events(void);
142
143#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
144#endif /* VM_PRESSURE_EVENTS */
145
146
147#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
148#ifdef CONFIG_EMBEDDED
149#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
150#else
151#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
152#endif
153#endif
154
155#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
156#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
157#endif
158
159#ifndef VM_PAGE_LAUNDRY_MAX
160#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
161#endif /* VM_PAGEOUT_LAUNDRY_MAX */
162
163#ifndef VM_PAGEOUT_BURST_WAIT
164#define VM_PAGEOUT_BURST_WAIT 1 /* milliseconds */
165#endif /* VM_PAGEOUT_BURST_WAIT */
166
167#ifndef VM_PAGEOUT_EMPTY_WAIT
168#define VM_PAGEOUT_EMPTY_WAIT 50 /* milliseconds */
169#endif /* VM_PAGEOUT_EMPTY_WAIT */
170
171#ifndef VM_PAGEOUT_DEADLOCK_WAIT
172#define VM_PAGEOUT_DEADLOCK_WAIT 100 /* milliseconds */
173#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
174
175#ifndef VM_PAGEOUT_IDLE_WAIT
176#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
177#endif /* VM_PAGEOUT_IDLE_WAIT */
178
179#ifndef VM_PAGEOUT_SWAP_WAIT
180#define VM_PAGEOUT_SWAP_WAIT 10 /* milliseconds */
181#endif /* VM_PAGEOUT_SWAP_WAIT */
182
183
184#ifndef VM_PAGE_SPECULATIVE_TARGET
185#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
186#endif /* VM_PAGE_SPECULATIVE_TARGET */
187
188
189/*
190 * To obtain a reasonable LRU approximation, the inactive queue
191 * needs to be large enough to give pages on it a chance to be
192 * referenced a second time. This macro defines the fraction
193 * of active+inactive pages that should be inactive.
194 * The pageout daemon uses it to update vm_page_inactive_target.
195 *
196 * If vm_page_free_count falls below vm_page_free_target and
197 * vm_page_inactive_count is below vm_page_inactive_target,
198 * then the pageout daemon starts running.
199 */
200
201#ifndef VM_PAGE_INACTIVE_TARGET
202#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
203#endif /* VM_PAGE_INACTIVE_TARGET */
204
205/*
206 * Once the pageout daemon starts running, it keeps going
207 * until vm_page_free_count meets or exceeds vm_page_free_target.
208 */
209
210#ifndef VM_PAGE_FREE_TARGET
211#ifdef CONFIG_EMBEDDED
212#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
213#else
214#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
215#endif
216#endif /* VM_PAGE_FREE_TARGET */
217
218
219/*
220 * The pageout daemon always starts running once vm_page_free_count
221 * falls below vm_page_free_min.
222 */
223
224#ifndef VM_PAGE_FREE_MIN
225#ifdef CONFIG_EMBEDDED
226#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
227#else
228#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
229#endif
230#endif /* VM_PAGE_FREE_MIN */
231
232#ifdef CONFIG_EMBEDDED
233#define VM_PAGE_FREE_RESERVED_LIMIT 100
234#define VM_PAGE_FREE_MIN_LIMIT 1500
235#define VM_PAGE_FREE_TARGET_LIMIT 2000
236#else
237#define VM_PAGE_FREE_RESERVED_LIMIT 1700
238#define VM_PAGE_FREE_MIN_LIMIT 3500
239#define VM_PAGE_FREE_TARGET_LIMIT 4000
240#endif
241
242/*
243 * When vm_page_free_count falls below vm_page_free_reserved,
244 * only vm-privileged threads can allocate pages. vm-privilege
245 * allows the pageout daemon and default pager (and any other
246 * associated threads needed for default pageout) to continue
247 * operation by dipping into the reserved pool of pages.
248 */
249
250#ifndef VM_PAGE_FREE_RESERVED
251#define VM_PAGE_FREE_RESERVED(n) \
252 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
253#endif /* VM_PAGE_FREE_RESERVED */
254
255/*
256 * When we dequeue pages from the inactive list, they are
257 * reactivated (ie, put back on the active queue) if referenced.
258 * However, it is possible to starve the free list if other
259 * processors are referencing pages faster than we can turn off
260 * the referenced bit. So we limit the number of reactivations
261 * we will make per call of vm_pageout_scan().
262 */
263#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
264
265#ifndef VM_PAGE_REACTIVATE_LIMIT
266#ifdef CONFIG_EMBEDDED
267#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
268#else
269#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
270#endif
271#endif /* VM_PAGE_REACTIVATE_LIMIT */
272#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
273
274extern boolean_t hibernate_cleaning_in_progress;
275
276/*
277 * Forward declarations for internal routines.
278 */
279struct cq {
280 struct vm_pageout_queue *q;
281 void *current_chead;
282 char *scratch_buf;
283 int id;
284};
285
286struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
287
288
289#if VM_PRESSURE_EVENTS
290void vm_pressure_thread(void);
291
292boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
293boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
294
295boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
296boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
297#endif
298
299void vm_pageout_garbage_collect(int);
300static void vm_pageout_iothread_external(void);
301static void vm_pageout_iothread_internal(struct cq *cq);
302static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
303
304extern void vm_pageout_continue(void);
305extern void vm_pageout_scan(void);
306
307void vm_tests(void); /* forward */
308
309#if !CONFIG_EMBEDDED
310static boolean_t vm_pageout_waiter = FALSE;
311static boolean_t vm_pageout_running = FALSE;
312#endif /* !CONFIG_EMBEDDED */
313
314
315#if DEVELOPMENT || DEBUG
316struct vm_pageout_debug vm_pageout_debug;
317#endif
318struct vm_pageout_vminfo vm_pageout_vminfo;
319struct vm_pageout_state vm_pageout_state;
320struct vm_config vm_config;
321
322struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
323struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
324
325int vm_upl_wait_for_pages = 0;
326vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
327
328boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
329
330int vm_debug_events = 0;
331
332#if CONFIG_MEMORYSTATUS
333extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
334
335uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
336uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
337
338#endif
339
340
341
342/*
343 * Routine: vm_pageout_object_terminate
344 * Purpose:
345 * Destroy the pageout_object, and perform all of the
346 * required cleanup actions.
347 *
348 * In/Out conditions:
349 * The object must be locked, and will be returned locked.
350 */
351void
352vm_pageout_object_terminate(
353 vm_object_t object)
354{
355 vm_object_t shadow_object;
356
357 /*
358 * Deal with the deallocation (last reference) of a pageout object
359 * (used for cleaning-in-place) by dropping the paging references/
360 * freeing pages in the original object.
361 */
362
363 assert(object->pageout);
364 shadow_object = object->shadow;
365 vm_object_lock(shadow_object);
366
367 while (!vm_page_queue_empty(&object->memq)) {
368 vm_page_t p, m;
369 vm_object_offset_t offset;
370
371 p = (vm_page_t) vm_page_queue_first(&object->memq);
372
373 assert(p->vmp_private);
374 assert(p->vmp_free_when_done);
375 p->vmp_free_when_done = FALSE;
376 assert(!p->vmp_cleaning);
377 assert(!p->vmp_laundry);
378
379 offset = p->vmp_offset;
380 VM_PAGE_FREE(p);
381 p = VM_PAGE_NULL;
382
383 m = vm_page_lookup(shadow_object,
384 offset + object->vo_shadow_offset);
385
386 if(m == VM_PAGE_NULL)
387 continue;
388
389 assert((m->vmp_dirty) || (m->vmp_precious) ||
390 (m->vmp_busy && m->vmp_cleaning));
391
392 /*
393 * Handle the trusted pager throttle.
394 * Also decrement the burst throttle (if external).
395 */
396 vm_page_lock_queues();
397 if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
398 vm_pageout_throttle_up(m);
399
400 /*
401 * Handle the "target" page(s). These pages are to be freed if
402 * successfully cleaned. Target pages are always busy, and are
403 * wired exactly once. The initial target pages are not mapped,
404 * (so cannot be referenced or modified) but converted target
405 * pages may have been modified between the selection as an
406 * adjacent page and conversion to a target.
407 */
408 if (m->vmp_free_when_done) {
409 assert(m->vmp_busy);
410 assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
411 assert(m->vmp_wire_count == 1);
412 m->vmp_cleaning = FALSE;
413 m->vmp_free_when_done = FALSE;
414 /*
415 * Revoke all access to the page. Since the object is
416 * locked, and the page is busy, this prevents the page
417 * from being dirtied after the pmap_disconnect() call
418 * returns.
419 *
420 * Since the page is left "dirty" but "not modifed", we
421 * can detect whether the page was redirtied during
422 * pageout by checking the modify state.
423 */
424 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
425 SET_PAGE_DIRTY(m, FALSE);
426 } else {
427 m->vmp_dirty = FALSE;
428 }
429
430 if (m->vmp_dirty) {
431 vm_page_unwire(m, TRUE); /* reactivates */
432 VM_STAT_INCR(reactivations);
433 PAGE_WAKEUP_DONE(m);
434 } else {
435 vm_page_free(m); /* clears busy, etc. */
436 }
437 vm_page_unlock_queues();
438 continue;
439 }
440 /*
441 * Handle the "adjacent" pages. These pages were cleaned in
442 * place, and should be left alone.
443 * If prep_pin_count is nonzero, then someone is using the
444 * page, so make it active.
445 */
446 if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
447 if (m->vmp_reference)
448 vm_page_activate(m);
449 else
450 vm_page_deactivate(m);
451 }
452 if (m->vmp_overwriting) {
453 /*
454 * the (COPY_OUT_FROM == FALSE) request_page_list case
455 */
456 if (m->vmp_busy) {
457 /*
458 * We do not re-set m->vmp_dirty !
459 * The page was busy so no extraneous activity
460 * could have occurred. COPY_INTO is a read into the
461 * new pages. CLEAN_IN_PLACE does actually write
462 * out the pages but handling outside of this code
463 * will take care of resetting dirty. We clear the
464 * modify however for the Programmed I/O case.
465 */
466 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
467
468 m->vmp_busy = FALSE;
469 m->vmp_absent = FALSE;
470 } else {
471 /*
472 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
473 * Occurs when the original page was wired
474 * at the time of the list request
475 */
476 assert(VM_PAGE_WIRED(m));
477 vm_page_unwire(m, TRUE); /* reactivates */
478 }
479 m->vmp_overwriting = FALSE;
480 } else {
481 m->vmp_dirty = FALSE;
482 }
483 m->vmp_cleaning = FALSE;
484
485 /*
486 * Wakeup any thread waiting for the page to be un-cleaning.
487 */
488 PAGE_WAKEUP(m);
489 vm_page_unlock_queues();
490 }
491 /*
492 * Account for the paging reference taken in vm_paging_object_allocate.
493 */
494 vm_object_activity_end(shadow_object);
495 vm_object_unlock(shadow_object);
496
497 assert(object->ref_count == 0);
498 assert(object->paging_in_progress == 0);
499 assert(object->activity_in_progress == 0);
500 assert(object->resident_page_count == 0);
501 return;
502}
503
504/*
505 * Routine: vm_pageclean_setup
506 *
507 * Purpose: setup a page to be cleaned (made non-dirty), but not
508 * necessarily flushed from the VM page cache.
509 * This is accomplished by cleaning in place.
510 *
511 * The page must not be busy, and new_object
512 * must be locked.
513 *
514 */
515static void
516vm_pageclean_setup(
517 vm_page_t m,
518 vm_page_t new_m,
519 vm_object_t new_object,
520 vm_object_offset_t new_offset)
521{
522 assert(!m->vmp_busy);
523#if 0
524 assert(!m->vmp_cleaning);
525#endif
526
527 XPR(XPR_VM_PAGEOUT,
528 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
529 VM_PAGE_OBJECT(m), m->vmp_offset, m,
530 new_m, new_offset);
531
532 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
533
534 /*
535 * Mark original page as cleaning in place.
536 */
537 m->vmp_cleaning = TRUE;
538 SET_PAGE_DIRTY(m, FALSE);
539 m->vmp_precious = FALSE;
540
541 /*
542 * Convert the fictitious page to a private shadow of
543 * the real page.
544 */
545 assert(new_m->vmp_fictitious);
546 assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
547 new_m->vmp_fictitious = FALSE;
548 new_m->vmp_private = TRUE;
549 new_m->vmp_free_when_done = TRUE;
550 VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
551
552 vm_page_lockspin_queues();
553 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
554 vm_page_unlock_queues();
555
556 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
557 assert(!new_m->vmp_wanted);
558 new_m->vmp_busy = FALSE;
559}
560
561/*
562 * Routine: vm_pageout_initialize_page
563 * Purpose:
564 * Causes the specified page to be initialized in
565 * the appropriate memory object. This routine is used to push
566 * pages into a copy-object when they are modified in the
567 * permanent object.
568 *
569 * The page is moved to a temporary object and paged out.
570 *
571 * In/out conditions:
572 * The page in question must not be on any pageout queues.
573 * The object to which it belongs must be locked.
574 * The page must be busy, but not hold a paging reference.
575 *
576 * Implementation:
577 * Move this page to a completely new object.
578 */
579void
580vm_pageout_initialize_page(
581 vm_page_t m)
582{
583 vm_object_t object;
584 vm_object_offset_t paging_offset;
585 memory_object_t pager;
586
587 XPR(XPR_VM_PAGEOUT,
588 "vm_pageout_initialize_page, page 0x%X\n",
589 m, 0, 0, 0, 0);
590
591 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
592
593 object = VM_PAGE_OBJECT(m);
594
595 assert(m->vmp_busy);
596 assert(object->internal);
597
598 /*
599 * Verify that we really want to clean this page
600 */
601 assert(!m->vmp_absent);
602 assert(!m->vmp_error);
603 assert(m->vmp_dirty);
604
605 /*
606 * Create a paging reference to let us play with the object.
607 */
608 paging_offset = m->vmp_offset + object->paging_offset;
609
610 if (m->vmp_absent || m->vmp_error || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) {
611 panic("reservation without pageout?"); /* alan */
612
613 VM_PAGE_FREE(m);
614 vm_object_unlock(object);
615
616 return;
617 }
618
619 /*
620 * If there's no pager, then we can't clean the page. This should
621 * never happen since this should be a copy object and therefore not
622 * an external object, so the pager should always be there.
623 */
624
625 pager = object->pager;
626
627 if (pager == MEMORY_OBJECT_NULL) {
628 panic("missing pager for copy object");
629
630 VM_PAGE_FREE(m);
631 return;
632 }
633
634 /*
635 * set the page for future call to vm_fault_list_request
636 */
637 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
638 SET_PAGE_DIRTY(m, FALSE);
639
640 /*
641 * keep the object from collapsing or terminating
642 */
643 vm_object_paging_begin(object);
644 vm_object_unlock(object);
645
646 /*
647 * Write the data to its pager.
648 * Note that the data is passed by naming the new object,
649 * not a virtual address; the pager interface has been
650 * manipulated to use the "internal memory" data type.
651 * [The object reference from its allocation is donated
652 * to the eventual recipient.]
653 */
654 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
655
656 vm_object_lock(object);
657 vm_object_paging_end(object);
658}
659
660
661/*
662 * vm_pageout_cluster:
663 *
664 * Given a page, queue it to the appropriate I/O thread,
665 * which will page it out and attempt to clean adjacent pages
666 * in the same operation.
667 *
668 * The object and queues must be locked. We will take a
669 * paging reference to prevent deallocation or collapse when we
670 * release the object lock back at the call site. The I/O thread
671 * is responsible for consuming this reference
672 *
673 * The page must not be on any pageout queue.
674 */
675#if DEVELOPMENT || DEBUG
676vmct_stats_t vmct_stats;
677
678int32_t vmct_active = 0;
679uint64_t vm_compressor_epoch_start = 0;
680uint64_t vm_compressor_epoch_stop = 0;
681
682typedef enum vmct_state_t {
683 VMCT_IDLE,
684 VMCT_AWAKENED,
685 VMCT_ACTIVE,
686} vmct_state_t;
687vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
688#endif
689
690
691void
692vm_pageout_cluster(vm_page_t m)
693{
694 vm_object_t object = VM_PAGE_OBJECT(m);
695 struct vm_pageout_queue *q;
696
697
698 XPR(XPR_VM_PAGEOUT,
699 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
700 object, m->vmp_offset, m, 0, 0);
701
702 VM_PAGE_CHECK(m);
703 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
704 vm_object_lock_assert_exclusive(object);
705
706 /*
707 * Only a certain kind of page is appreciated here.
708 */
709 assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m)));
710 assert(!m->vmp_cleaning && !m->vmp_laundry);
711 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
712
713 /*
714 * protect the object from collapse or termination
715 */
716 vm_object_activity_begin(object);
717
718 if (object->internal == TRUE) {
719 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
720
721 m->vmp_busy = TRUE;
722
723 q = &vm_pageout_queue_internal;
724 } else
725 q = &vm_pageout_queue_external;
726
727 /*
728 * pgo_laundry count is tied to the laundry bit
729 */
730 m->vmp_laundry = TRUE;
731 q->pgo_laundry++;
732
733 m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
734 vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, vmp_pageq);
735
736 if (q->pgo_idle == TRUE) {
737 q->pgo_idle = FALSE;
738 thread_wakeup((event_t) &q->pgo_pending);
739 }
740 VM_PAGE_CHECK(m);
741}
742
743
744/*
745 * A page is back from laundry or we are stealing it back from
746 * the laundering state. See if there are some pages waiting to
747 * go to laundry and if we can let some of them go now.
748 *
749 * Object and page queues must be locked.
750 */
751void
752vm_pageout_throttle_up(
753 vm_page_t m)
754{
755 struct vm_pageout_queue *q;
756 vm_object_t m_object;
757
758 m_object = VM_PAGE_OBJECT(m);
759
760 assert(m_object != VM_OBJECT_NULL);
761 assert(m_object != kernel_object);
762
763 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
764 vm_object_lock_assert_exclusive(m_object);
765
766 if (m_object->internal == TRUE)
767 q = &vm_pageout_queue_internal;
768 else
769 q = &vm_pageout_queue_external;
770
771 if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
772
773 vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, vmp_pageq);
774 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
775
776 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
777
778 vm_object_activity_end(m_object);
779
780 VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1);
781 }
782 if (m->vmp_laundry == TRUE) {
783
784 m->vmp_laundry = FALSE;
785 q->pgo_laundry--;
786
787 if (q->pgo_throttled == TRUE) {
788 q->pgo_throttled = FALSE;
789 thread_wakeup((event_t) &q->pgo_laundry);
790 }
791 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
792 q->pgo_draining = FALSE;
793 thread_wakeup((event_t) (&q->pgo_laundry+1));
794 }
795 VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1);
796 }
797}
798
799
800static void
801vm_pageout_throttle_up_batch(
802 struct vm_pageout_queue *q,
803 int batch_cnt)
804{
805 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
806
807 VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
808
809 q->pgo_laundry -= batch_cnt;
810
811 if (q->pgo_throttled == TRUE) {
812 q->pgo_throttled = FALSE;
813 thread_wakeup((event_t) &q->pgo_laundry);
814 }
815 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
816 q->pgo_draining = FALSE;
817 thread_wakeup((event_t) (&q->pgo_laundry+1));
818 }
819}
820
821
822
823/*
824 * VM memory pressure monitoring.
825 *
826 * vm_pageout_scan() keeps track of the number of pages it considers and
827 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
828 *
829 * compute_memory_pressure() is called every second from compute_averages()
830 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
831 * of recalimed pages in a new vm_pageout_stat[] bucket.
832 *
833 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
834 * The caller provides the number of seconds ("nsecs") worth of statistics
835 * it wants, up to 30 seconds.
836 * It computes the number of pages reclaimed in the past "nsecs" seconds and
837 * also returns the number of pages the system still needs to reclaim at this
838 * moment in time.
839 */
840#if DEVELOPMENT || DEBUG
841#define VM_PAGEOUT_STAT_SIZE (30 * 8) + 1
842#else
843#define VM_PAGEOUT_STAT_SIZE (1 * 8) + 1
844#endif
845struct vm_pageout_stat {
846 unsigned long vm_page_active_count;
847 unsigned long vm_page_speculative_count;
848 unsigned long vm_page_inactive_count;
849 unsigned long vm_page_anonymous_count;
850
851 unsigned long vm_page_free_count;
852 unsigned long vm_page_wire_count;
853 unsigned long vm_page_compressor_count;
854
855 unsigned long vm_page_pages_compressed;
856 unsigned long vm_page_pageable_internal_count;
857 unsigned long vm_page_pageable_external_count;
858 unsigned long vm_page_xpmapped_external_count;
859
860 unsigned int pages_grabbed;
861 unsigned int pages_freed;
862
863 unsigned int pages_compressed;
864 unsigned int pages_grabbed_by_compressor;
865 unsigned int failed_compressions;
866
867 unsigned int pages_evicted;
868 unsigned int pages_purged;
869
870 unsigned int considered;
871 unsigned int considered_bq_internal;
872 unsigned int considered_bq_external;
873
874 unsigned int skipped_external;
875 unsigned int filecache_min_reactivations;
876
877 unsigned int freed_speculative;
878 unsigned int freed_cleaned;
879 unsigned int freed_internal;
880 unsigned int freed_external;
881
882 unsigned int cleaned_dirty_external;
883 unsigned int cleaned_dirty_internal;
884
885 unsigned int inactive_referenced;
886 unsigned int inactive_nolock;
887 unsigned int reactivation_limit_exceeded;
888 unsigned int forced_inactive_reclaim;
889
890 unsigned int throttled_internal_q;
891 unsigned int throttled_external_q;
892
893 unsigned int phantom_ghosts_found;
894 unsigned int phantom_ghosts_added;
895} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, };
896
897unsigned int vm_pageout_stat_now = 0;
898
899#define VM_PAGEOUT_STAT_BEFORE(i) \
900 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
901#define VM_PAGEOUT_STAT_AFTER(i) \
902 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
903
904#if VM_PAGE_BUCKETS_CHECK
905int vm_page_buckets_check_interval = 80; /* in eighths of a second */
906#endif /* VM_PAGE_BUCKETS_CHECK */
907
908
909void
910record_memory_pressure(void);
911void
912record_memory_pressure(void)
913{
914 unsigned int vm_pageout_next;
915
916#if VM_PAGE_BUCKETS_CHECK
917 /* check the consistency of VM page buckets at regular interval */
918 static int counter = 0;
919 if ((++counter % vm_page_buckets_check_interval) == 0) {
920 vm_page_buckets_check();
921 }
922#endif /* VM_PAGE_BUCKETS_CHECK */
923
924 vm_pageout_state.vm_memory_pressure =
925 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
926 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
927 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
928 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
929
930 commpage_set_memory_pressure( (unsigned int)vm_pageout_state.vm_memory_pressure );
931
932 /* move "now" forward */
933 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
934
935 bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
936
937 vm_pageout_stat_now = vm_pageout_next;
938}
939
940
941/*
942 * IMPORTANT
943 * mach_vm_ctl_page_free_wanted() is called indirectly, via
944 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
945 * it must be safe in the restricted stackshot context. Locks and/or
946 * blocking are not allowable.
947 */
948unsigned int
949mach_vm_ctl_page_free_wanted(void)
950{
951 unsigned int page_free_target, page_free_count, page_free_wanted;
952
953 page_free_target = vm_page_free_target;
954 page_free_count = vm_page_free_count;
955 if (page_free_target > page_free_count) {
956 page_free_wanted = page_free_target - page_free_count;
957 } else {
958 page_free_wanted = 0;
959 }
960
961 return page_free_wanted;
962}
963
964
965/*
966 * IMPORTANT:
967 * mach_vm_pressure_monitor() is called when taking a stackshot, with
968 * wait_for_pressure FALSE, so that code path must remain safe in the
969 * restricted stackshot context. No blocking or locks are allowable.
970 * on that code path.
971 */
972
973kern_return_t
974mach_vm_pressure_monitor(
975 boolean_t wait_for_pressure,
976 unsigned int nsecs_monitored,
977 unsigned int *pages_reclaimed_p,
978 unsigned int *pages_wanted_p)
979{
980 wait_result_t wr;
981 unsigned int vm_pageout_then, vm_pageout_now;
982 unsigned int pages_reclaimed;
983 unsigned int units_of_monitor;
984
985 units_of_monitor = 8 * nsecs_monitored;
986 /*
987 * We don't take the vm_page_queue_lock here because we don't want
988 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
989 * thread when it's trying to reclaim memory. We don't need fully
990 * accurate monitoring anyway...
991 */
992
993 if (wait_for_pressure) {
994 /* wait until there's memory pressure */
995 while (vm_page_free_count >= vm_page_free_target) {
996 wr = assert_wait((event_t) &vm_page_free_wanted,
997 THREAD_INTERRUPTIBLE);
998 if (wr == THREAD_WAITING) {
999 wr = thread_block(THREAD_CONTINUE_NULL);
1000 }
1001 if (wr == THREAD_INTERRUPTED) {
1002 return KERN_ABORTED;
1003 }
1004 if (wr == THREAD_AWAKENED) {
1005 /*
1006 * The memory pressure might have already
1007 * been relieved but let's not block again
1008 * and let's report that there was memory
1009 * pressure at some point.
1010 */
1011 break;
1012 }
1013 }
1014 }
1015
1016 /* provide the number of pages the system wants to reclaim */
1017 if (pages_wanted_p != NULL) {
1018 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1019 }
1020
1021 if (pages_reclaimed_p == NULL) {
1022 return KERN_SUCCESS;
1023 }
1024
1025 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1026 vm_pageout_now = vm_pageout_stat_now;
1027 pages_reclaimed = 0;
1028 for (vm_pageout_then =
1029 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1030 vm_pageout_then != vm_pageout_now &&
1031 units_of_monitor-- != 0;
1032 vm_pageout_then =
1033 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1034 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1035 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1036 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1037 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1038 }
1039 *pages_reclaimed_p = pages_reclaimed;
1040
1041 return KERN_SUCCESS;
1042}
1043
1044
1045
1046#if DEVELOPMENT || DEBUG
1047
1048static void
1049vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1050
1051/*
1052 * condition variable used to make sure there is
1053 * only a single sweep going on at a time
1054 */
1055boolean_t vm_pageout_disconnect_all_pages_active = FALSE;
1056
1057
1058void
1059vm_pageout_disconnect_all_pages()
1060{
1061 vm_page_lock_queues();
1062
1063 if (vm_pageout_disconnect_all_pages_active == TRUE) {
1064 vm_page_unlock_queues();
1065 return;
1066 }
1067 vm_pageout_disconnect_all_pages_active = TRUE;
1068 vm_page_unlock_queues();
1069
1070 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1071 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1072 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1073
1074 vm_pageout_disconnect_all_pages_active = FALSE;
1075}
1076
1077
1078void
1079vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1080{
1081 vm_page_t m;
1082 vm_object_t t_object = NULL;
1083 vm_object_t l_object = NULL;
1084 vm_object_t m_object = NULL;
1085 int delayed_unlock = 0;
1086 int try_failed_count = 0;
1087 int disconnected_count = 0;
1088 int paused_count = 0;
1089 int object_locked_count = 0;
1090
1091 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1092 q, qcount, 0, 0, 0);
1093
1094 vm_page_lock_queues();
1095
1096 while (qcount && !vm_page_queue_empty(q)) {
1097
1098 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1099
1100 m = (vm_page_t) vm_page_queue_first(q);
1101 m_object = VM_PAGE_OBJECT(m);
1102
1103 /*
1104 * check to see if we currently are working
1105 * with the same object... if so, we've
1106 * already got the lock
1107 */
1108 if (m_object != l_object) {
1109 /*
1110 * the object associated with candidate page is
1111 * different from the one we were just working
1112 * with... dump the lock if we still own it
1113 */
1114 if (l_object != NULL) {
1115 vm_object_unlock(l_object);
1116 l_object = NULL;
1117 }
1118 if (m_object != t_object)
1119 try_failed_count = 0;
1120
1121 /*
1122 * Try to lock object; since we've alread got the
1123 * page queues lock, we can only 'try' for this one.
1124 * if the 'try' fails, we need to do a mutex_pause
1125 * to allow the owner of the object lock a chance to
1126 * run...
1127 */
1128 if ( !vm_object_lock_try_scan(m_object)) {
1129
1130 if (try_failed_count > 20) {
1131 goto reenter_pg_on_q;
1132 }
1133 vm_page_unlock_queues();
1134 mutex_pause(try_failed_count++);
1135 vm_page_lock_queues();
1136 delayed_unlock = 0;
1137
1138 paused_count++;
1139
1140 t_object = m_object;
1141 continue;
1142 }
1143 object_locked_count++;
1144
1145 l_object = m_object;
1146 }
1147 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1148 /*
1149 * put it back on the head of its queue
1150 */
1151 goto reenter_pg_on_q;
1152 }
1153 if (m->vmp_pmapped == TRUE) {
1154
1155 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1156
1157 disconnected_count++;
1158 }
1159reenter_pg_on_q:
1160 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1161 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1162
1163 qcount--;
1164 try_failed_count = 0;
1165
1166 if (delayed_unlock++ > 128) {
1167
1168 if (l_object != NULL) {
1169 vm_object_unlock(l_object);
1170 l_object = NULL;
1171 }
1172 lck_mtx_yield(&vm_page_queue_lock);
1173 delayed_unlock = 0;
1174 }
1175 }
1176 if (l_object != NULL) {
1177 vm_object_unlock(l_object);
1178 l_object = NULL;
1179 }
1180 vm_page_unlock_queues();
1181
1182 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1183 q, disconnected_count, object_locked_count, paused_count, 0);
1184}
1185
1186#endif
1187
1188
1189static void
1190vm_pageout_page_queue(vm_page_queue_head_t *, int);
1191
1192/*
1193 * condition variable used to make sure there is
1194 * only a single sweep going on at a time
1195 */
1196boolean_t vm_pageout_anonymous_pages_active = FALSE;
1197
1198
1199void
1200vm_pageout_anonymous_pages()
1201{
1202 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1203
1204 vm_page_lock_queues();
1205
1206 if (vm_pageout_anonymous_pages_active == TRUE) {
1207 vm_page_unlock_queues();
1208 return;
1209 }
1210 vm_pageout_anonymous_pages_active = TRUE;
1211 vm_page_unlock_queues();
1212
1213 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217 if (VM_CONFIG_SWAP_IS_PRESENT)
1218 vm_consider_swapping();
1219
1220 vm_page_lock_queues();
1221 vm_pageout_anonymous_pages_active = FALSE;
1222 vm_page_unlock_queues();
1223 }
1224}
1225
1226
1227void
1228vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1229{
1230 vm_page_t m;
1231 vm_object_t t_object = NULL;
1232 vm_object_t l_object = NULL;
1233 vm_object_t m_object = NULL;
1234 int delayed_unlock = 0;
1235 int try_failed_count = 0;
1236 int refmod_state;
1237 int pmap_options;
1238 struct vm_pageout_queue *iq;
1239 ppnum_t phys_page;
1240
1241
1242 iq = &vm_pageout_queue_internal;
1243
1244 vm_page_lock_queues();
1245
1246 while (qcount && !vm_page_queue_empty(q)) {
1247
1248 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1249
1250 if (VM_PAGE_Q_THROTTLED(iq)) {
1251
1252 if (l_object != NULL) {
1253 vm_object_unlock(l_object);
1254 l_object = NULL;
1255 }
1256 iq->pgo_draining = TRUE;
1257
1258 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1259 vm_page_unlock_queues();
1260
1261 thread_block(THREAD_CONTINUE_NULL);
1262
1263 vm_page_lock_queues();
1264 delayed_unlock = 0;
1265 continue;
1266 }
1267 m = (vm_page_t) vm_page_queue_first(q);
1268 m_object = VM_PAGE_OBJECT(m);
1269
1270 /*
1271 * check to see if we currently are working
1272 * with the same object... if so, we've
1273 * already got the lock
1274 */
1275 if (m_object != l_object) {
1276 if ( !m_object->internal)
1277 goto reenter_pg_on_q;
1278
1279 /*
1280 * the object associated with candidate page is
1281 * different from the one we were just working
1282 * with... dump the lock if we still own it
1283 */
1284 if (l_object != NULL) {
1285 vm_object_unlock(l_object);
1286 l_object = NULL;
1287 }
1288 if (m_object != t_object)
1289 try_failed_count = 0;
1290
1291 /*
1292 * Try to lock object; since we've alread got the
1293 * page queues lock, we can only 'try' for this one.
1294 * if the 'try' fails, we need to do a mutex_pause
1295 * to allow the owner of the object lock a chance to
1296 * run...
1297 */
1298 if ( !vm_object_lock_try_scan(m_object)) {
1299
1300 if (try_failed_count > 20) {
1301 goto reenter_pg_on_q;
1302 }
1303 vm_page_unlock_queues();
1304 mutex_pause(try_failed_count++);
1305 vm_page_lock_queues();
1306 delayed_unlock = 0;
1307
1308 t_object = m_object;
1309 continue;
1310 }
1311 l_object = m_object;
1312 }
1313 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1314 /*
1315 * page is not to be cleaned
1316 * put it back on the head of its queue
1317 */
1318 goto reenter_pg_on_q;
1319 }
1320 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1321
1322 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1323 refmod_state = pmap_get_refmod(phys_page);
1324
1325 if (refmod_state & VM_MEM_REFERENCED)
1326 m->vmp_reference = TRUE;
1327 if (refmod_state & VM_MEM_MODIFIED) {
1328 SET_PAGE_DIRTY(m, FALSE);
1329 }
1330 }
1331 if (m->vmp_reference == TRUE) {
1332 m->vmp_reference = FALSE;
1333 pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1334 goto reenter_pg_on_q;
1335 }
1336 if (m->vmp_pmapped == TRUE) {
1337 if (m->vmp_dirty || m->vmp_precious) {
1338 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1339 } else {
1340 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1341 }
1342 refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1343 if (refmod_state & VM_MEM_MODIFIED) {
1344 SET_PAGE_DIRTY(m, FALSE);
1345 }
1346 }
1347
1348 if ( !m->vmp_dirty && !m->vmp_precious) {
1349 vm_page_unlock_queues();
1350 VM_PAGE_FREE(m);
1351 vm_page_lock_queues();
1352 delayed_unlock = 0;
1353
1354 goto next_pg;
1355 }
1356 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1357
1358 if (!m_object->pager_initialized) {
1359
1360 vm_page_unlock_queues();
1361
1362 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1363
1364 if (!m_object->pager_initialized)
1365 vm_object_compressor_pager_create(m_object);
1366
1367 vm_page_lock_queues();
1368 delayed_unlock = 0;
1369 }
1370 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1371 goto reenter_pg_on_q;
1372 /*
1373 * vm_object_compressor_pager_create will drop the object lock
1374 * which means 'm' may no longer be valid to use
1375 */
1376 continue;
1377 }
1378 /*
1379 * we've already factored out pages in the laundry which
1380 * means this page can't be on the pageout queue so it's
1381 * safe to do the vm_page_queues_remove
1382 */
1383 vm_page_queues_remove(m, TRUE);
1384
1385 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1386
1387 vm_pageout_cluster(m);
1388
1389 goto next_pg;
1390
1391reenter_pg_on_q:
1392 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1393 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1394next_pg:
1395 qcount--;
1396 try_failed_count = 0;
1397
1398 if (delayed_unlock++ > 128) {
1399
1400 if (l_object != NULL) {
1401 vm_object_unlock(l_object);
1402 l_object = NULL;
1403 }
1404 lck_mtx_yield(&vm_page_queue_lock);
1405 delayed_unlock = 0;
1406 }
1407 }
1408 if (l_object != NULL) {
1409 vm_object_unlock(l_object);
1410 l_object = NULL;
1411 }
1412 vm_page_unlock_queues();
1413}
1414
1415
1416
1417/*
1418 * function in BSD to apply I/O throttle to the pageout thread
1419 */
1420extern void vm_pageout_io_throttle(void);
1421
1422#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1423 MACRO_BEGIN \
1424 /* \
1425 * If a "reusable" page somehow made it back into \
1426 * the active queue, it's been re-used and is not \
1427 * quite re-usable. \
1428 * If the VM object was "all_reusable", consider it \
1429 * as "all re-used" instead of converting it to \
1430 * "partially re-used", which could be expensive. \
1431 */ \
1432 assert(VM_PAGE_OBJECT((m)) == (obj)); \
1433 if ((m)->vmp_reusable || \
1434 (obj)->all_reusable) { \
1435 vm_object_reuse_pages((obj), \
1436 (m)->vmp_offset, \
1437 (m)->vmp_offset + PAGE_SIZE_64, \
1438 FALSE); \
1439 } \
1440 MACRO_END
1441
1442
1443#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1444#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1445
1446#define FCS_IDLE 0
1447#define FCS_DELAYED 1
1448#define FCS_DEADLOCK_DETECTED 2
1449
1450struct flow_control {
1451 int state;
1452 mach_timespec_t ts;
1453};
1454
1455
1456#if CONFIG_BACKGROUND_QUEUE
1457uint64_t vm_pageout_rejected_bq_internal = 0;
1458uint64_t vm_pageout_rejected_bq_external = 0;
1459uint64_t vm_pageout_skipped_bq_internal = 0;
1460#endif
1461
1462#define ANONS_GRABBED_LIMIT 2
1463
1464
1465#if 0
1466static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1467#endif
1468static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1469
1470#define VM_PAGEOUT_PB_NO_ACTION 0
1471#define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1472#define VM_PAGEOUT_PB_THREAD_YIELD 2
1473
1474
1475#if 0
1476static void
1477vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1478{
1479 if (*local_freeq) {
1480 vm_page_unlock_queues();
1481
1482 VM_DEBUG_CONSTANT_EVENT(
1483 vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1484 vm_page_free_count, 0, 0, 1);
1485
1486 vm_page_free_list(*local_freeq, TRUE);
1487
1488 VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1489 vm_page_free_count, *local_freed, 0, 1);
1490
1491 *local_freeq = NULL;
1492 *local_freed = 0;
1493
1494 vm_page_lock_queues();
1495 } else {
1496 lck_mtx_yield(&vm_page_queue_lock);
1497 }
1498 *delayed_unlock = 1;
1499}
1500#endif
1501
1502
1503static void
1504vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1505 vm_page_t *local_freeq, int *local_freed, int action)
1506{
1507 vm_page_unlock_queues();
1508
1509 if (*object != NULL) {
1510 vm_object_unlock(*object);
1511 *object = NULL;
1512 }
1513 if (*local_freeq) {
1514
1515 vm_page_free_list(*local_freeq, TRUE);
1516
1517 *local_freeq = NULL;
1518 *local_freed = 0;
1519 }
1520 *delayed_unlock = 1;
1521
1522 switch (action) {
1523
1524 case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1525 vm_consider_waking_compactor_swapper();
1526 break;
1527 case VM_PAGEOUT_PB_THREAD_YIELD:
1528 thread_yield_internal(1);
1529 break;
1530 case VM_PAGEOUT_PB_NO_ACTION:
1531 default:
1532 break;
1533 }
1534 vm_page_lock_queues();
1535}
1536
1537
1538static struct vm_pageout_vminfo last;
1539
1540uint64_t last_vm_page_pages_grabbed = 0;
1541
1542extern uint32_t c_segment_pages_compressed;
1543
1544extern uint64_t shared_region_pager_reclaimed;
1545extern struct memory_object_pager_ops shared_region_pager_ops;
1546
1547void update_vm_info(void)
1548{
1549 uint64_t tmp;
1550
1551 vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1552 vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1553 vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1554 vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1555
1556 vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1557 vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1558 vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1559
1560 vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1561 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1562 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1563 vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1564
1565
1566 tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1567 vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1568 last.vm_pageout_considered_page = tmp;
1569
1570 tmp = vm_pageout_vminfo.vm_pageout_compressions;
1571 vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp - last.vm_pageout_compressions);
1572 last.vm_pageout_compressions = tmp;
1573
1574 tmp = vm_pageout_vminfo.vm_compressor_failed;
1575 vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1576 last.vm_compressor_failed = tmp;
1577
1578 tmp = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1579 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp - last.vm_compressor_pages_grabbed);
1580 last.vm_compressor_pages_grabbed = tmp;
1581
1582 tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1583 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1584 last.vm_phantom_cache_found_ghost = tmp;
1585
1586 tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1587 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1588 last.vm_phantom_cache_added_ghost = tmp;
1589
1590 tmp = get_pages_grabbed_count();
1591 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp - last_vm_page_pages_grabbed);
1592 last_vm_page_pages_grabbed = tmp;
1593
1594 tmp = vm_pageout_vminfo.vm_page_pages_freed;
1595 vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1596 last.vm_page_pages_freed = tmp;
1597
1598
1599 if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1600
1601 tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1602 vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1603 last.vm_pageout_pages_evicted = tmp;
1604
1605 tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1606 vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1607 last.vm_pageout_pages_purged = tmp;
1608
1609 tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1610 vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1611 last.vm_pageout_freed_speculative = tmp;
1612
1613 tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1614 vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1615 last.vm_pageout_freed_external = tmp;
1616
1617 tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1618 vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1619 last.vm_pageout_inactive_referenced = tmp;
1620
1621 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1622 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1623 last.vm_pageout_scan_inactive_throttled_external = tmp;
1624
1625 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1626 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1627 last.vm_pageout_inactive_dirty_external = tmp;
1628
1629 tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1630 vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1631 last.vm_pageout_freed_cleaned = tmp;
1632
1633 tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1634 vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1635 last.vm_pageout_inactive_nolock = tmp;
1636
1637 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1638 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1639 last.vm_pageout_scan_inactive_throttled_internal = tmp;
1640
1641 tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1642 vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1643 last.vm_pageout_skipped_external = tmp;
1644
1645 tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1646 vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1647 last.vm_pageout_reactivation_limit_exceeded = tmp;
1648
1649 tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1650 vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1651 last.vm_pageout_inactive_force_reclaim = tmp;
1652
1653 tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1654 vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1655 last.vm_pageout_freed_internal = tmp;
1656
1657 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1658 vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1659 last.vm_pageout_considered_bq_internal = tmp;
1660
1661 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1662 vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1663 last.vm_pageout_considered_bq_external = tmp;
1664
1665 tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1666 vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1667 last.vm_pageout_filecache_min_reactivated = tmp;
1668
1669 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1670 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1671 last.vm_pageout_inactive_dirty_internal = tmp;
1672 }
1673
1674 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1675 vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1676 vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1677 vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1678 vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count,
1679 0);
1680
1681 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1682 vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1683 vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1684 vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count,
1685 0,
1686 0);
1687
1688 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1689 vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1690 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1691 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1692 vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count,
1693 0);
1694
1695 if (vm_pageout_stats[vm_pageout_stat_now].considered ||
1696 vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
1697 vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1698
1699 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1700 vm_pageout_stats[vm_pageout_stat_now].considered,
1701 vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1702 vm_pageout_stats[vm_pageout_stat_now].freed_external,
1703 vm_pageout_stats[vm_pageout_stat_now].inactive_referenced,
1704 0);
1705
1706 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1707 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1708 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1709 vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1710 vm_pageout_stats[vm_pageout_stat_now].inactive_nolock,
1711 0);
1712
1713 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1714 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1715 vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1716 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1717 vm_pageout_stats[vm_pageout_stat_now].skipped_external,
1718 0);
1719
1720 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1721 vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1722 vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1723 vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1724 vm_pageout_stats[vm_pageout_stat_now].freed_internal,
1725 0);
1726
1727 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) | DBG_FUNC_NONE,
1728 vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1729 vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1730 vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1731 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal,
1732 0);
1733
1734 }
1735 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) | DBG_FUNC_NONE,
1736 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1737 vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1738 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1739 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added,
1740 0);
1741
1742 record_memory_pressure();
1743}
1744
1745
1746void
1747vm_page_balance_inactive(int max_to_move)
1748{
1749 vm_page_t m;
1750
1751 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1752
1753 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1754 vm_page_inactive_count +
1755 vm_page_speculative_count);
1756
1757 while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
1758
1759 VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
1760
1761 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
1762
1763 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
1764 assert(!m->vmp_laundry);
1765 assert(VM_PAGE_OBJECT(m) != kernel_object);
1766 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1767
1768 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1769
1770 /*
1771 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1772 *
1773 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1774 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1775 * new reference happens. If no futher references happen on the page after that remote TLB flushes
1776 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1777 * by pageout_scan, which is just fine since the last reference would have happened quite far
1778 * in the past (TLB caches don't hang around for very long), and of course could just as easily
1779 * have happened before we moved the page
1780 */
1781 if (m->vmp_pmapped == TRUE)
1782 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1783
1784 /*
1785 * The page might be absent or busy,
1786 * but vm_page_deactivate can handle that.
1787 * FALSE indicates that we don't want a H/W clear reference
1788 */
1789 vm_page_deactivate_internal(m, FALSE);
1790 }
1791}
1792
1793
1794/*
1795 * vm_pageout_scan does the dirty work for the pageout daemon.
1796 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1797 * held and vm_page_free_wanted == 0.
1798 */
1799void
1800vm_pageout_scan(void)
1801{
1802 unsigned int loop_count = 0;
1803 unsigned int inactive_burst_count = 0;
1804 unsigned int reactivated_this_call;
1805 unsigned int reactivate_limit;
1806 vm_page_t local_freeq = NULL;
1807 int local_freed = 0;
1808 int delayed_unlock;
1809 int delayed_unlock_limit = 0;
1810 int refmod_state = 0;
1811 int vm_pageout_deadlock_target = 0;
1812 struct vm_pageout_queue *iq;
1813 struct vm_pageout_queue *eq;
1814 struct vm_speculative_age_q *sq;
1815 struct flow_control flow_control = { 0, { 0, 0 } };
1816 boolean_t inactive_throttled = FALSE;
1817 mach_timespec_t ts;
1818 unsigned int msecs = 0;
1819 vm_object_t object = NULL;
1820 uint32_t inactive_reclaim_run;
1821 boolean_t exceeded_burst_throttle;
1822 boolean_t grab_anonymous = FALSE;
1823 boolean_t force_anonymous = FALSE;
1824 boolean_t force_speculative_aging = FALSE;
1825 int anons_grabbed = 0;
1826 int page_prev_q_state = 0;
1827#if CONFIG_BACKGROUND_QUEUE
1828 boolean_t page_from_bg_q = FALSE;
1829#endif
1830 int cache_evict_throttle = 0;
1831 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
1832 uint32_t inactive_external_count;
1833 int force_purge = 0;
1834 int divisor;
1835#define DELAY_SPECULATIVE_AGE 1000
1836 int delay_speculative_age = 0;
1837 vm_object_t m_object = VM_OBJECT_NULL;
1838
1839#if VM_PRESSURE_EVENTS
1840 vm_pressure_level_t pressure_level;
1841#endif /* VM_PRESSURE_EVENTS */
1842
1843 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1844 vm_pageout_vminfo.vm_pageout_freed_speculative,
1845 vm_pageout_state.vm_pageout_inactive_clean,
1846 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
1847 vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
1848
1849 flow_control.state = FCS_IDLE;
1850 iq = &vm_pageout_queue_internal;
1851 eq = &vm_pageout_queue_external;
1852 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1853
1854
1855 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1856
1857 /* Ask the pmap layer to return any pages it no longer needs. */
1858 uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
1859
1860 vm_page_lock_queues();
1861
1862 vm_page_wire_count -= pmap_wired_pages_freed;
1863
1864 delayed_unlock = 1;
1865
1866 /*
1867 * Calculate the max number of referenced pages on the inactive
1868 * queue that we will reactivate.
1869 */
1870 reactivated_this_call = 0;
1871 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1872 vm_page_inactive_count);
1873 inactive_reclaim_run = 0;
1874
1875 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1876
1877 /*
1878 * We must limit the rate at which we send pages to the pagers
1879 * so that we don't tie up too many pages in the I/O queues.
1880 * We implement a throttling mechanism using the laundry count
1881 * to limit the number of pages outstanding to the default
1882 * and external pagers. We can bypass the throttles and look
1883 * for clean pages if the pageout queues don't drain in a timely
1884 * fashion since this may indicate that the pageout paths are
1885 * stalled waiting for memory, which only we can provide.
1886 */
1887
1888Restart:
1889
1890 assert(object == NULL);
1891 assert(delayed_unlock != 0);
1892
1893 vm_page_anonymous_min = vm_page_inactive_target / 20;
1894
1895 if (vm_pageout_state.vm_page_speculative_percentage > 50)
1896 vm_pageout_state.vm_page_speculative_percentage = 50;
1897 else if (vm_pageout_state.vm_page_speculative_percentage <= 0)
1898 vm_pageout_state.vm_page_speculative_percentage = 1;
1899
1900 vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1901 vm_page_inactive_count);
1902
1903 for (;;) {
1904 vm_page_t m;
1905
1906 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1907
1908 if (vm_upl_wait_for_pages < 0)
1909 vm_upl_wait_for_pages = 0;
1910
1911 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1912
1913 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1914 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1915
1916#if CONFIG_SECLUDED_MEMORY
1917 /*
1918 * Deal with secluded_q overflow.
1919 */
1920 if (vm_page_secluded_count > vm_page_secluded_target) {
1921 vm_page_t secluded_page;
1922
1923 /*
1924 * SECLUDED_AGING_BEFORE_ACTIVE:
1925 * Excess secluded pages go to the active queue and
1926 * will later go to the inactive queue.
1927 */
1928 assert((vm_page_secluded_count_free +
1929 vm_page_secluded_count_inuse) ==
1930 vm_page_secluded_count);
1931 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1932 assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1933
1934 vm_page_queues_remove(secluded_page, FALSE);
1935 assert(!secluded_page->vmp_fictitious);
1936 assert(!VM_PAGE_WIRED(secluded_page));
1937
1938 if (secluded_page->vmp_object == 0) {
1939 /* transfer to free queue */
1940 assert(secluded_page->vmp_busy);
1941 secluded_page->vmp_snext = local_freeq;
1942 local_freeq = secluded_page;
1943 local_freed++;
1944 } else {
1945 /* transfer to head of active queue */
1946 vm_page_enqueue_active(secluded_page, FALSE);
1947 secluded_page = VM_PAGE_NULL;
1948 }
1949 }
1950#endif /* CONFIG_SECLUDED_MEMORY */
1951
1952 assert(delayed_unlock);
1953
1954 /*
1955 * maintain our balance
1956 */
1957 vm_page_balance_inactive(1);
1958
1959
1960 /**********************************************************************
1961 * above this point we're playing with the active and secluded queues
1962 * below this point we're playing with the throttling mechanisms
1963 * and the inactive queue
1964 **********************************************************************/
1965
1966 if (vm_page_free_count + local_freed >= vm_page_free_target)
1967 {
1968 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1969
1970 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
1971 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
1972 /*
1973 * make sure the pageout I/O threads are running
1974 * throttled in case there are still requests
1975 * in the laundry... since we have met our targets
1976 * we don't need the laundry to be cleaned in a timely
1977 * fashion... so let's avoid interfering with foreground
1978 * activity
1979 */
1980 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
1981
1982 lck_mtx_lock(&vm_page_queue_free_lock);
1983
1984 if ((vm_page_free_count >= vm_page_free_target) &&
1985 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1986 /*
1987 * done - we have met our target *and*
1988 * there is no one waiting for a page.
1989 */
1990return_from_scan:
1991 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1992
1993 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
1994 vm_pageout_state.vm_pageout_inactive,
1995 vm_pageout_state.vm_pageout_inactive_used, 0, 0);
1996 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
1997 vm_pageout_vminfo.vm_pageout_freed_speculative,
1998 vm_pageout_state.vm_pageout_inactive_clean,
1999 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
2000 vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
2001
2002 return;
2003 }
2004 lck_mtx_unlock(&vm_page_queue_free_lock);
2005 }
2006
2007 /*
2008 * Before anything, we check if we have any ripe volatile
2009 * objects around. If so, try to purge the first object.
2010 * If the purge fails, fall through to reclaim a page instead.
2011 * If the purge succeeds, go back to the top and reevalute
2012 * the new memory situation.
2013 */
2014
2015 assert (available_for_purge>=0);
2016 force_purge = 0; /* no force-purging */
2017
2018#if VM_PRESSURE_EVENTS
2019 pressure_level = memorystatus_vm_pressure_level;
2020
2021 if (pressure_level > kVMPressureNormal) {
2022
2023 if (pressure_level >= kVMPressureCritical) {
2024 force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2025 } else if (pressure_level >= kVMPressureUrgent) {
2026 force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2027 } else if (pressure_level >= kVMPressureWarning) {
2028 force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2029 }
2030 }
2031#endif /* VM_PRESSURE_EVENTS */
2032
2033 if (available_for_purge || force_purge) {
2034
2035 if (object != NULL) {
2036 vm_object_unlock(object);
2037 object = NULL;
2038 }
2039
2040 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2041
2042 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2043 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2044 VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
2045 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2046 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2047 continue;
2048 }
2049 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2050 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2051 }
2052
2053 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2054 /*
2055 * try to pull pages from the aging bins...
2056 * see vm_page.h for an explanation of how
2057 * this mechanism works
2058 */
2059 struct vm_speculative_age_q *aq;
2060 boolean_t can_steal = FALSE;
2061 int num_scanned_queues;
2062
2063 aq = &vm_page_queue_speculative[speculative_steal_index];
2064
2065 num_scanned_queues = 0;
2066 while (vm_page_queue_empty(&aq->age_q) &&
2067 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2068
2069 speculative_steal_index++;
2070
2071 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2072 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2073
2074 aq = &vm_page_queue_speculative[speculative_steal_index];
2075 }
2076
2077 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2078 /*
2079 * XXX We've scanned all the speculative
2080 * queues but still haven't found one
2081 * that is not empty, even though
2082 * vm_page_speculative_count is not 0.
2083 */
2084 if (!vm_page_queue_empty(&sq->age_q))
2085 continue;
2086#if DEVELOPMENT || DEBUG
2087 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2088#endif
2089 /* readjust... */
2090 vm_page_speculative_count = 0;
2091 /* ... and continue */
2092 continue;
2093 }
2094
2095 if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE)
2096 can_steal = TRUE;
2097 else {
2098 if (!delay_speculative_age) {
2099 mach_timespec_t ts_fully_aged;
2100
2101 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
2102 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
2103 * 1000 * NSEC_PER_USEC;
2104
2105 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2106
2107 clock_sec_t sec;
2108 clock_nsec_t nsec;
2109 clock_get_system_nanotime(&sec, &nsec);
2110 ts.tv_sec = (unsigned int) sec;
2111 ts.tv_nsec = nsec;
2112
2113 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2114 can_steal = TRUE;
2115 else
2116 delay_speculative_age++;
2117 } else {
2118 delay_speculative_age++;
2119 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2120 delay_speculative_age = 0;
2121 }
2122 }
2123 if (can_steal == TRUE)
2124 vm_page_speculate_ageit(aq);
2125 }
2126 force_speculative_aging = FALSE;
2127
2128 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
2129
2130 int pages_evicted;
2131
2132 if (object != NULL) {
2133 vm_object_unlock(object);
2134 object = NULL;
2135 }
2136 KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
2137
2138 pages_evicted = vm_object_cache_evict(100, 10);
2139
2140 KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
2141
2142 if (pages_evicted) {
2143
2144 vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2145
2146 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2147 vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
2148 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2149
2150 /*
2151 * we just freed up to 100 pages,
2152 * so go back to the top of the main loop
2153 * and re-evaulate the memory situation
2154 */
2155 continue;
2156 } else
2157 cache_evict_throttle = 1000;
2158 }
2159 if (cache_evict_throttle)
2160 cache_evict_throttle--;
2161
2162 divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2163
2164#if CONFIG_JETSAM
2165 /*
2166 * don't let the filecache_min fall below 15% of available memory
2167 * on systems with an active compressor that isn't nearing its
2168 * limits w/r to accepting new data
2169 *
2170 * on systems w/o the compressor/swapper, the filecache is always
2171 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2172 * since most (if not all) of the anonymous pages are in the
2173 * throttled queue (which isn't counted as available) which
2174 * effectively disables this filter
2175 */
2176 if (vm_compressor_low_on_space() || divisor == 0)
2177 vm_pageout_state.vm_page_filecache_min = 0;
2178 else
2179 vm_pageout_state.vm_page_filecache_min =
2180 ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2181#else
2182 if (vm_compressor_out_of_space() || divisor == 0)
2183 vm_pageout_state.vm_page_filecache_min = 0;
2184 else {
2185 /*
2186 * don't let the filecache_min fall below the specified critical level
2187 */
2188 vm_pageout_state.vm_page_filecache_min =
2189 ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2190 }
2191#endif
2192 if (vm_page_free_count < (vm_page_free_reserved / 4))
2193 vm_pageout_state.vm_page_filecache_min = 0;
2194
2195 exceeded_burst_throttle = FALSE;
2196 /*
2197 * Sometimes we have to pause:
2198 * 1) No inactive pages - nothing to do.
2199 * 2) Loop control - no acceptable pages found on the inactive queue
2200 * within the last vm_pageout_burst_inactive_throttle iterations
2201 * 3) Flow control - default pageout queue is full
2202 */
2203 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2204 vm_page_queue_empty(&vm_page_queue_anonymous) &&
2205 vm_page_queue_empty(&vm_page_queue_cleaned) &&
2206 vm_page_queue_empty(&sq->age_q)) {
2207 VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
2208 msecs = vm_pageout_state.vm_pageout_empty_wait;
2209 goto vm_pageout_scan_delay;
2210
2211 } else if (inactive_burst_count >=
2212 MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2213 (vm_page_inactive_count +
2214 vm_page_speculative_count))) {
2215 VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
2216 msecs = vm_pageout_state.vm_pageout_burst_wait;
2217
2218 exceeded_burst_throttle = TRUE;
2219 goto vm_pageout_scan_delay;
2220
2221 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2222 VM_DYNAMIC_PAGING_ENABLED()) {
2223 clock_sec_t sec;
2224 clock_nsec_t nsec;
2225
2226 switch (flow_control.state) {
2227
2228 case FCS_IDLE:
2229 if ((vm_page_free_count + local_freed) < vm_page_free_target &&
2230 vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2231 /*
2232 * since the compressor is running independently of vm_pageout_scan
2233 * let's not wait for it just yet... as long as we have a healthy supply
2234 * of filecache pages to work with, let's keep stealing those.
2235 */
2236 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2237
2238 if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2239 (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2240 anons_grabbed = ANONS_GRABBED_LIMIT;
2241 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
2242 goto consider_inactive;
2243 }
2244 }
2245reset_deadlock_timer:
2246 ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
2247 ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2248 clock_get_system_nanotime(&sec, &nsec);
2249 flow_control.ts.tv_sec = (unsigned int) sec;
2250 flow_control.ts.tv_nsec = nsec;
2251 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2252
2253 flow_control.state = FCS_DELAYED;
2254 msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2255
2256 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2257 break;
2258
2259 case FCS_DELAYED:
2260 clock_get_system_nanotime(&sec, &nsec);
2261 ts.tv_sec = (unsigned int) sec;
2262 ts.tv_nsec = nsec;
2263
2264 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2265 /*
2266 * the pageout thread for the default pager is potentially
2267 * deadlocked since the
2268 * default pager queue has been throttled for more than the
2269 * allowable time... we need to move some clean pages or dirty
2270 * pages belonging to the external pagers if they aren't throttled
2271 * vm_page_free_wanted represents the number of threads currently
2272 * blocked waiting for pages... we'll move one page for each of
2273 * these plus a fixed amount to break the logjam... once we're done
2274 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2275 * with a new timeout target since we have no way of knowing
2276 * whether we've broken the deadlock except through observation
2277 * of the queue associated with the default pager... we need to
2278 * stop moving pages and allow the system to run to see what
2279 * state it settles into.
2280 */
2281 vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2282 vm_page_free_wanted + vm_page_free_wanted_privileged;
2283 VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
2284 flow_control.state = FCS_DEADLOCK_DETECTED;
2285 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2286 goto consider_inactive;
2287 }
2288 /*
2289 * just resniff instead of trying
2290 * to compute a new delay time... we're going to be
2291 * awakened immediately upon a laundry completion,
2292 * so we won't wait any longer than necessary
2293 */
2294 msecs = vm_pageout_state.vm_pageout_idle_wait;
2295 break;
2296
2297 case FCS_DEADLOCK_DETECTED:
2298 if (vm_pageout_deadlock_target)
2299 goto consider_inactive;
2300 goto reset_deadlock_timer;
2301
2302 }
2303vm_pageout_scan_delay:
2304 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2305
2306 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2307 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2308
2309 if (vm_page_free_count >= vm_page_free_target) {
2310 /*
2311 * we're here because
2312 * 1) someone else freed up some pages while we had
2313 * the queues unlocked above
2314 * and we've hit one of the 3 conditions that
2315 * cause us to pause the pageout scan thread
2316 *
2317 * since we already have enough free pages,
2318 * let's avoid stalling and return normally
2319 *
2320 * before we return, make sure the pageout I/O threads
2321 * are running throttled in case there are still requests
2322 * in the laundry... since we have enough free pages
2323 * we don't need the laundry to be cleaned in a timely
2324 * fashion... so let's avoid interfering with foreground
2325 * activity
2326 *
2327 * we don't want to hold vm_page_queue_free_lock when
2328 * calling vm_pageout_adjust_eq_iothrottle (since it
2329 * may cause other locks to be taken), we do the intitial
2330 * check outside of the lock. Once we take the lock,
2331 * we recheck the condition since it may have changed.
2332 * if it has, no problem, we will make the threads
2333 * non-throttled before actually blocking
2334 */
2335 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2336 }
2337 lck_mtx_lock(&vm_page_queue_free_lock);
2338
2339 if (vm_page_free_count >= vm_page_free_target &&
2340 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2341 goto return_from_scan;
2342 }
2343 lck_mtx_unlock(&vm_page_queue_free_lock);
2344
2345 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2346 /*
2347 * we're most likely about to block due to one of
2348 * the 3 conditions that cause vm_pageout_scan to
2349 * not be able to make forward progress w/r
2350 * to providing new pages to the free queue,
2351 * so unthrottle the I/O threads in case we
2352 * have laundry to be cleaned... it needs
2353 * to be completed ASAP.
2354 *
2355 * even if we don't block, we want the io threads
2356 * running unthrottled since the sum of free +
2357 * clean pages is still under our free target
2358 */
2359 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2360 }
2361 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2362 /*
2363 * if we get here we're below our free target and
2364 * we're stalling due to a full laundry queue or
2365 * we don't have any inactive pages other then
2366 * those in the clean queue...
2367 * however, we have pages on the clean queue that
2368 * can be moved to the free queue, so let's not
2369 * stall the pageout scan
2370 */
2371 flow_control.state = FCS_IDLE;
2372 goto consider_inactive;
2373 }
2374 if (flow_control.state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2375 flow_control.state = FCS_IDLE;
2376 goto consider_inactive;
2377 }
2378
2379 VM_CHECK_MEMORYSTATUS;
2380
2381 if (flow_control.state != FCS_IDLE)
2382 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
2383
2384 iq->pgo_throttled = TRUE;
2385 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2386
2387 counter(c_vm_pageout_scan_block++);
2388
2389 vm_page_unlock_queues();
2390
2391 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2392
2393 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2394 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2395 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2396
2397 thread_block(THREAD_CONTINUE_NULL);
2398
2399 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2400 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2401 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2402
2403 vm_page_lock_queues();
2404
2405 iq->pgo_throttled = FALSE;
2406
2407 if (loop_count >= vm_page_inactive_count)
2408 loop_count = 0;
2409 inactive_burst_count = 0;
2410
2411 goto Restart;
2412 /*NOTREACHED*/
2413 }
2414
2415
2416 flow_control.state = FCS_IDLE;
2417consider_inactive:
2418 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2419 vm_pageout_inactive_external_forced_reactivate_limit);
2420 loop_count++;
2421 inactive_burst_count++;
2422 vm_pageout_state.vm_pageout_inactive++;
2423
2424 /*
2425 * Choose a victim.
2426 */
2427 while (1) {
2428
2429#if CONFIG_BACKGROUND_QUEUE
2430 page_from_bg_q = FALSE;
2431#endif /* CONFIG_BACKGROUND_QUEUE */
2432
2433 m = NULL;
2434 m_object = VM_OBJECT_NULL;
2435
2436 if (VM_DYNAMIC_PAGING_ENABLED()) {
2437 assert(vm_page_throttled_count == 0);
2438 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2439 }
2440
2441 /*
2442 * Try for a clean-queue inactive page.
2443 * These are pages that vm_pageout_scan tried to steal earlier, but
2444 * were dirty and had to be cleaned. Pick them up now that they are clean.
2445 */
2446 if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2447 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2448
2449 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2450
2451 break;
2452 }
2453
2454 /*
2455 * The next most eligible pages are ones we paged in speculatively,
2456 * but which have not yet been touched and have been aged out.
2457 */
2458 if (!vm_page_queue_empty(&sq->age_q)) {
2459 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2460
2461 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2462
2463 if (!m->vmp_dirty || force_anonymous == FALSE)
2464 break;
2465 else
2466 m = NULL;
2467 }
2468
2469#if CONFIG_BACKGROUND_QUEUE
2470 if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2471 vm_object_t bg_m_object = NULL;
2472
2473 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2474
2475 bg_m_object = VM_PAGE_OBJECT(m);
2476
2477 if (!VM_PAGE_PAGEABLE(m)) {
2478 /*
2479 * This page is on the background queue
2480 * but not on a pageable queue. This is
2481 * likely a transient state and whoever
2482 * took it out of its pageable queue
2483 * will likely put it back on a pageable
2484 * queue soon but we can't deal with it
2485 * at this point, so let's ignore this
2486 * page.
2487 */
2488 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2489
2490 if (bg_m_object->internal &&
2491 (VM_PAGE_Q_THROTTLED(iq) ||
2492 vm_compressor_out_of_space() == TRUE ||
2493 vm_page_free_count < (vm_page_free_reserved / 4))) {
2494
2495 vm_pageout_skipped_bq_internal++;
2496 } else {
2497 page_from_bg_q = TRUE;
2498
2499 if (bg_m_object->internal)
2500 vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2501 else
2502 vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2503 break;
2504 }
2505 }
2506 }
2507#endif
2508 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2509
2510 if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
2511 (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2512 grab_anonymous = TRUE;
2513 anons_grabbed = 0;
2514
2515 vm_pageout_vminfo.vm_pageout_skipped_external++;
2516 goto want_anonymous;
2517 }
2518 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2519
2520#if CONFIG_JETSAM
2521 /* If the file-backed pool has accumulated
2522 * significantly more pages than the jetsam
2523 * threshold, prefer to reclaim those
2524 * inline to minimise compute overhead of reclaiming
2525 * anonymous pages.
2526 * This calculation does not account for the CPU local
2527 * external page queues, as those are expected to be
2528 * much smaller relative to the global pools.
2529 */
2530 if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2531 if (vm_page_pageable_external_count >
2532 vm_pageout_state.vm_page_filecache_min) {
2533 if ((vm_page_pageable_external_count *
2534 vm_pageout_memorystatus_fb_factor_dr) >
2535 (memorystatus_available_pages_critical *
2536 vm_pageout_memorystatus_fb_factor_nr)) {
2537 grab_anonymous = FALSE;
2538
2539 VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
2540 }
2541 }
2542 if (grab_anonymous) {
2543 VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
2544 }
2545 }
2546#endif /* CONFIG_JETSAM */
2547
2548want_anonymous:
2549 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2550
2551 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2552 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2553
2554 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2555 anons_grabbed = 0;
2556
2557 if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2558
2559 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2560 if ((++reactivated_this_call % 100)) {
2561 vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2562 goto must_activate_page;
2563 }
2564 /*
2565 * steal 1% of the file backed pages even if
2566 * we are under the limit that has been set
2567 * for a healthy filecache
2568 */
2569 }
2570 }
2571 break;
2572 }
2573 }
2574 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2575 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2576
2577 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2578 anons_grabbed++;
2579
2580 break;
2581 }
2582
2583 /*
2584 * if we've gotten here, we have no victim page.
2585 * check to see if we've not finished balancing the queues
2586 * or we have a page on the aged speculative queue that we
2587 * skipped due to force_anonymous == TRUE.. or we have
2588 * speculative pages that we can prematurely age... if
2589 * one of these cases we'll keep going, else panic
2590 */
2591 force_anonymous = FALSE;
2592 VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
2593
2594 if (!vm_page_queue_empty(&sq->age_q))
2595 goto done_with_inactivepage;
2596
2597 if (vm_page_speculative_count) {
2598 force_speculative_aging = TRUE;
2599 goto done_with_inactivepage;
2600 }
2601 panic("vm_pageout: no victim");
2602
2603 /* NOTREACHED */
2604 }
2605 assert(VM_PAGE_PAGEABLE(m));
2606 m_object = VM_PAGE_OBJECT(m);
2607 force_anonymous = FALSE;
2608
2609 page_prev_q_state = m->vmp_q_state;
2610 /*
2611 * we just found this page on one of our queues...
2612 * it can't also be on the pageout queue, so safe
2613 * to call vm_page_queues_remove
2614 */
2615 vm_page_queues_remove(m, TRUE);
2616
2617 assert(!m->vmp_laundry);
2618 assert(!m->vmp_private);
2619 assert(!m->vmp_fictitious);
2620 assert(m_object != kernel_object);
2621 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2622
2623 vm_pageout_vminfo.vm_pageout_considered_page++;
2624
2625 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2626
2627 /*
2628 * check to see if we currently are working
2629 * with the same object... if so, we've
2630 * already got the lock
2631 */
2632 if (m_object != object) {
2633 /*
2634 * the object associated with candidate page is
2635 * different from the one we were just working
2636 * with... dump the lock if we still own it
2637 */
2638 if (object != NULL) {
2639 vm_object_unlock(object);
2640 object = NULL;
2641 }
2642 /*
2643 * Try to lock object; since we've alread got the
2644 * page queues lock, we can only 'try' for this one.
2645 * if the 'try' fails, we need to do a mutex_pause
2646 * to allow the owner of the object lock a chance to
2647 * run... otherwise, we're likely to trip over this
2648 * object in the same state as we work our way through
2649 * the queue... clumps of pages associated with the same
2650 * object are fairly typical on the inactive and active queues
2651 */
2652 if (!vm_object_lock_try_scan(m_object)) {
2653 vm_page_t m_want = NULL;
2654
2655 vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2656
2657 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2658 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
2659
2660 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2661
2662 m->vmp_reference = FALSE;
2663
2664 if ( !m_object->object_is_shared_cache) {
2665 /*
2666 * don't apply this optimization if this is the shared cache
2667 * object, it's too easy to get rid of very hot and important
2668 * pages...
2669 * m->vmp_object must be stable since we hold the page queues lock...
2670 * we can update the scan_collisions field sans the object lock
2671 * since it is a separate field and this is the only spot that does
2672 * a read-modify-write operation and it is never executed concurrently...
2673 * we can asynchronously set this field to 0 when creating a UPL, so it
2674 * is possible for the value to be a bit non-determistic, but that's ok
2675 * since it's only used as a hint
2676 */
2677 m_object->scan_collisions = 1;
2678 }
2679 if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2680 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2681 else if ( !vm_page_queue_empty(&sq->age_q))
2682 m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2683 else if ( (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT ||
2684 vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2685 !vm_page_queue_empty(&vm_page_queue_inactive))
2686 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2687 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2688 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2689
2690 /*
2691 * this is the next object we're going to be interested in
2692 * try to make sure its available after the mutex_pause
2693 * returns control
2694 */
2695 if (m_want)
2696 vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2697
2698 goto requeue_page;
2699 }
2700 object = m_object;
2701 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2702 }
2703 assert(m_object == object);
2704 assert(VM_PAGE_OBJECT(m) == m_object);
2705
2706 if (m->vmp_busy) {
2707 /*
2708 * Somebody is already playing with this page.
2709 * Put it back on the appropriate queue
2710 *
2711 */
2712 VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1);
2713
2714 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2715 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
2716requeue_page:
2717 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2718 vm_page_enqueue_inactive(m, FALSE);
2719 else
2720 vm_page_activate(m);
2721#if CONFIG_BACKGROUND_QUEUE
2722#if DEVELOPMENT || DEBUG
2723 if (page_from_bg_q == TRUE) {
2724 if (m_object->internal)
2725 vm_pageout_rejected_bq_internal++;
2726 else
2727 vm_pageout_rejected_bq_external++;
2728 }
2729#endif
2730#endif
2731 goto done_with_inactivepage;
2732 }
2733
2734 /*
2735 * if (m->vmp_cleaning && !m->vmp_free_when_done)
2736 * If already cleaning this page in place
2737 * just leave if off the paging queues.
2738 * We can leave the page mapped, and upl_commit_range
2739 * will put it on the clean queue.
2740 *
2741 * if (m->vmp_free_when_done && !m->vmp_cleaning)
2742 * an msync INVALIDATE is in progress...
2743 * this page has been marked for destruction
2744 * after it has been cleaned,
2745 * but not yet gathered into a UPL
2746 * where 'cleaning' will be set...
2747 * just leave it off the paging queues
2748 *
2749 * if (m->vmp_free_when_done && m->vmp_clenaing)
2750 * an msync INVALIDATE is in progress
2751 * and the UPL has already gathered this page...
2752 * just leave it off the paging queues
2753 */
2754 if (m->vmp_free_when_done || m->vmp_cleaning) {
2755 goto done_with_inactivepage;
2756 }
2757
2758
2759 /*
2760 * If it's absent, in error or the object is no longer alive,
2761 * we can reclaim the page... in the no longer alive case,
2762 * there are 2 states the page can be in that preclude us
2763 * from reclaiming it - busy or cleaning - that we've already
2764 * dealt with
2765 */
2766 if (m->vmp_absent || m->vmp_error || !object->alive) {
2767
2768 if (m->vmp_absent)
2769 VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1);
2770 else if (!object->alive)
2771 VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1);
2772 else
2773 VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1);
2774reclaim_page:
2775 if (vm_pageout_deadlock_target) {
2776 VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1);
2777 vm_pageout_deadlock_target--;
2778 }
2779
2780 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2781
2782 if (object->internal) {
2783 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2784 } else {
2785 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2786 }
2787 assert(!m->vmp_cleaning);
2788 assert(!m->vmp_laundry);
2789
2790 if (!object->internal &&
2791 object->pager != NULL &&
2792 object->pager->mo_pager_ops == &shared_region_pager_ops) {
2793 shared_region_pager_reclaimed++;
2794 }
2795
2796 m->vmp_busy = TRUE;
2797
2798 /*
2799 * remove page from object here since we're already
2800 * behind the object lock... defer the rest of the work
2801 * we'd normally do in vm_page_free_prepare_object
2802 * until 'vm_page_free_list' is called
2803 */
2804 if (m->vmp_tabled)
2805 vm_page_remove(m, TRUE);
2806
2807 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
2808 m->vmp_snext = local_freeq;
2809 local_freeq = m;
2810 local_freed++;
2811
2812 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2813 vm_pageout_vminfo.vm_pageout_freed_speculative++;
2814 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2815 vm_pageout_vminfo.vm_pageout_freed_cleaned++;
2816 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
2817 vm_pageout_vminfo.vm_pageout_freed_internal++;
2818 else
2819 vm_pageout_vminfo.vm_pageout_freed_external++;
2820
2821 inactive_burst_count = 0;
2822 goto done_with_inactivepage;
2823 }
2824 if (object->copy == VM_OBJECT_NULL) {
2825 /*
2826 * No one else can have any interest in this page.
2827 * If this is an empty purgable object, the page can be
2828 * reclaimed even if dirty.
2829 * If the page belongs to a volatile purgable object, we
2830 * reactivate it if the compressor isn't active.
2831 */
2832 if (object->purgable == VM_PURGABLE_EMPTY) {
2833 if (m->vmp_pmapped == TRUE) {
2834 /* unmap the page */
2835 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2836 if (refmod_state & VM_MEM_MODIFIED) {
2837 SET_PAGE_DIRTY(m, FALSE);
2838 }
2839 }
2840 if (m->vmp_dirty || m->vmp_precious) {
2841 /* we saved the cost of cleaning this page ! */
2842 vm_page_purged_count++;
2843 }
2844 goto reclaim_page;
2845 }
2846
2847 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2848 /*
2849 * With the VM compressor, the cost of
2850 * reclaiming a page is much lower (no I/O),
2851 * so if we find a "volatile" page, it's better
2852 * to let it get compressed rather than letting
2853 * it occupy a full page until it gets purged.
2854 * So no need to check for "volatile" here.
2855 */
2856 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2857 /*
2858 * Avoid cleaning a "volatile" page which might
2859 * be purged soon.
2860 */
2861
2862 /* if it's wired, we can't put it on our queue */
2863 assert(!VM_PAGE_WIRED(m));
2864
2865 /* just stick it back on! */
2866 reactivated_this_call++;
2867
2868 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2869 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1);
2870
2871 goto reactivate_page;
2872 }
2873 }
2874 /*
2875 * If it's being used, reactivate.
2876 * (Fictitious pages are either busy or absent.)
2877 * First, update the reference and dirty bits
2878 * to make sure the page is unreferenced.
2879 */
2880 refmod_state = -1;
2881
2882 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
2883 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2884
2885 if (refmod_state & VM_MEM_REFERENCED)
2886 m->vmp_reference = TRUE;
2887 if (refmod_state & VM_MEM_MODIFIED) {
2888 SET_PAGE_DIRTY(m, FALSE);
2889 }
2890 }
2891
2892 if (m->vmp_reference || m->vmp_dirty) {
2893 /* deal with a rogue "reusable" page */
2894 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
2895 }
2896 divisor = vm_pageout_state.vm_page_xpmapped_min_divisor;
2897
2898 if (divisor == 0)
2899 vm_pageout_state.vm_page_xpmapped_min = 0;
2900 else
2901 vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / divisor;
2902
2903 if (!m->vmp_no_cache &&
2904#if CONFIG_BACKGROUND_QUEUE
2905 page_from_bg_q == FALSE &&
2906#endif
2907 (m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
2908 (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
2909 /*
2910 * The page we pulled off the inactive list has
2911 * been referenced. It is possible for other
2912 * processors to be touching pages faster than we
2913 * can clear the referenced bit and traverse the
2914 * inactive queue, so we limit the number of
2915 * reactivations.
2916 */
2917 if (++reactivated_this_call >= reactivate_limit) {
2918 vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
2919 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2920 vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
2921 } else {
2922 uint32_t isinuse;
2923
2924 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2925 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1);
2926
2927 vm_pageout_vminfo.vm_pageout_inactive_referenced++;
2928reactivate_page:
2929 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2930 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2931 /*
2932 * no explict mappings of this object exist
2933 * and it's not open via the filesystem
2934 */
2935 vm_page_deactivate(m);
2936 VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
2937 } else {
2938must_activate_page:
2939 /*
2940 * The page was/is being used, so put back on active list.
2941 */
2942 vm_page_activate(m);
2943 VM_STAT_INCR(reactivations);
2944 inactive_burst_count = 0;
2945 }
2946#if CONFIG_BACKGROUND_QUEUE
2947#if DEVELOPMENT || DEBUG
2948 if (page_from_bg_q == TRUE) {
2949 if (m_object->internal)
2950 vm_pageout_rejected_bq_internal++;
2951 else
2952 vm_pageout_rejected_bq_external++;
2953 }
2954#endif
2955#endif
2956 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2957 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
2958 vm_pageout_state.vm_pageout_inactive_used++;
2959
2960 goto done_with_inactivepage;
2961 }
2962 /*
2963 * Make sure we call pmap_get_refmod() if it
2964 * wasn't already called just above, to update
2965 * the dirty bit.
2966 */
2967 if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) {
2968 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2969 if (refmod_state & VM_MEM_MODIFIED) {
2970 SET_PAGE_DIRTY(m, FALSE);
2971 }
2972 }
2973 }
2974
2975 XPR(XPR_VM_PAGEOUT,
2976 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2977 object, m->vmp_offset, m, 0,0);
2978
2979 /*
2980 * we've got a candidate page to steal...
2981 *
2982 * m->vmp_dirty is up to date courtesy of the
2983 * preceding check for m->vmp_reference... if
2984 * we get here, then m->vmp_reference had to be
2985 * FALSE (or possibly "reactivate_limit" was
2986 * exceeded), but in either case we called
2987 * pmap_get_refmod() and updated both
2988 * m->vmp_reference and m->vmp_dirty
2989 *
2990 * if it's dirty or precious we need to
2991 * see if the target queue is throtttled
2992 * it if is, we need to skip over it by moving it back
2993 * to the end of the inactive queue
2994 */
2995
2996 inactive_throttled = FALSE;
2997
2998 if (m->vmp_dirty || m->vmp_precious) {
2999 if (object->internal) {
3000 if (VM_PAGE_Q_THROTTLED(iq))
3001 inactive_throttled = TRUE;
3002 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3003 inactive_throttled = TRUE;
3004 }
3005 }
3006throttle_inactive:
3007 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3008 object->internal && m->vmp_dirty &&
3009 (object->purgable == VM_PURGABLE_DENY ||
3010 object->purgable == VM_PURGABLE_NONVOLATILE ||
3011 object->purgable == VM_PURGABLE_VOLATILE)) {
3012 vm_page_check_pageable_safe(m);
3013 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3014 vm_page_queue_enter(&vm_page_queue_throttled, m,
3015 vm_page_t, vmp_pageq);
3016 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3017 vm_page_throttled_count++;
3018
3019 VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
3020
3021 inactive_burst_count = 0;
3022 goto done_with_inactivepage;
3023 }
3024 if (inactive_throttled == TRUE) {
3025
3026 if (object->internal == FALSE) {
3027 /*
3028 * we need to break up the following potential deadlock case...
3029 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3030 * b) The thread doing the writing is waiting for pages while holding the truncate lock
3031 * c) Most of the pages in the inactive queue belong to this file.
3032 *
3033 * we are potentially in this deadlock because...
3034 * a) the external pageout queue is throttled
3035 * b) we're done with the active queue and moved on to the inactive queue
3036 * c) we've got a dirty external page
3037 *
3038 * since we don't know the reason for the external pageout queue being throttled we
3039 * must suspect that we are deadlocked, so move the current page onto the active queue
3040 * in an effort to cause a page from the active queue to 'age' to the inactive queue
3041 *
3042 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3043 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3044 * pool the next time we select a victim page... if we can make enough new free pages,
3045 * the deadlock will break, the external pageout queue will empty and it will no longer
3046 * be throttled
3047 *
3048 * if we have jetsam configured, keep a count of the pages reactivated this way so
3049 * that we can try to find clean pages in the active/inactive queues before
3050 * deciding to jetsam a process
3051 */
3052 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
3053
3054 vm_page_check_pageable_safe(m);
3055 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3056 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, vmp_pageq);
3057 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
3058 vm_page_active_count++;
3059 vm_page_pageable_external_count++;
3060
3061 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3062
3063#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3064 vm_pageout_inactive_external_forced_reactivate_limit--;
3065
3066 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3067 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3068 /*
3069 * Possible deadlock scenario so request jetsam action
3070 */
3071 assert(object);
3072 vm_object_unlock(object);
3073 object = VM_OBJECT_NULL;
3074 vm_page_unlock_queues();
3075
3076 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3077 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3078
3079 /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3080 if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3081 VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
3082 }
3083
3084 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3085 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3086
3087 vm_page_lock_queues();
3088 delayed_unlock = 1;
3089 }
3090#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3091 force_anonymous = TRUE;
3092#endif
3093 inactive_burst_count = 0;
3094 goto done_with_inactivepage;
3095 } else {
3096 goto must_activate_page;
3097 }
3098 }
3099
3100 /*
3101 * we've got a page that we can steal...
3102 * eliminate all mappings and make sure
3103 * we have the up-to-date modified state
3104 *
3105 * if we need to do a pmap_disconnect then we
3106 * need to re-evaluate m->vmp_dirty since the pmap_disconnect
3107 * provides the true state atomically... the
3108 * page was still mapped up to the pmap_disconnect
3109 * and may have been dirtied at the last microsecond
3110 *
3111 * Note that if 'pmapped' is FALSE then the page is not
3112 * and has not been in any map, so there is no point calling
3113 * pmap_disconnect(). m->vmp_dirty could have been set in anticipation
3114 * of likely usage of the page.
3115 */
3116 if (m->vmp_pmapped == TRUE) {
3117 int pmap_options;
3118
3119 /*
3120 * Don't count this page as going into the compressor
3121 * if any of these are true:
3122 * 1) compressed pager isn't enabled
3123 * 2) Freezer enabled device with compressed pager
3124 * backend (exclusive use) i.e. most of the VM system
3125 * (including vm_pageout_scan) has no knowledge of
3126 * the compressor
3127 * 3) This page belongs to a file and hence will not be
3128 * sent into the compressor
3129 */
3130 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3131 object->internal == FALSE) {
3132 pmap_options = 0;
3133 } else if (m->vmp_dirty || m->vmp_precious) {
3134 /*
3135 * VM knows that this page is dirty (or
3136 * precious) and needs to be compressed
3137 * rather than freed.
3138 * Tell the pmap layer to count this page
3139 * as "compressed".
3140 */
3141 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3142 } else {
3143 /*
3144 * VM does not know if the page needs to
3145 * be preserved but the pmap layer might tell
3146 * us if any mapping has "modified" it.
3147 * Let's the pmap layer to count this page
3148 * as compressed if and only if it has been
3149 * modified.
3150 */
3151 pmap_options =
3152 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3153 }
3154 refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3155 pmap_options,
3156 NULL);
3157 if (refmod_state & VM_MEM_MODIFIED) {
3158 SET_PAGE_DIRTY(m, FALSE);
3159 }
3160 }
3161
3162 /*
3163 * reset our count of pages that have been reclaimed
3164 * since the last page was 'stolen'
3165 */
3166 inactive_reclaim_run = 0;
3167
3168 /*
3169 * If it's clean and not precious, we can free the page.
3170 */
3171 if (!m->vmp_dirty && !m->vmp_precious) {
3172
3173 vm_pageout_state.vm_pageout_inactive_clean++;
3174
3175 /*
3176 * OK, at this point we have found a page we are going to free.
3177 */
3178#if CONFIG_PHANTOM_CACHE
3179 if (!object->internal)
3180 vm_phantom_cache_add_ghost(m);
3181#endif
3182 goto reclaim_page;
3183 }
3184
3185 /*
3186 * The page may have been dirtied since the last check
3187 * for a throttled target queue (which may have been skipped
3188 * if the page was clean then). With the dirty page
3189 * disconnected here, we can make one final check.
3190 */
3191 if (object->internal) {
3192 if (VM_PAGE_Q_THROTTLED(iq))
3193 inactive_throttled = TRUE;
3194 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3195 inactive_throttled = TRUE;
3196 }
3197
3198 if (inactive_throttled == TRUE)
3199 goto throttle_inactive;
3200
3201#if VM_PRESSURE_EVENTS
3202#if CONFIG_JETSAM
3203
3204 /*
3205 * If Jetsam is enabled, then the sending
3206 * of memory pressure notifications is handled
3207 * from the same thread that takes care of high-water
3208 * and other jetsams i.e. the memorystatus_thread.
3209 */
3210
3211#else /* CONFIG_JETSAM */
3212
3213 vm_pressure_response();
3214
3215#endif /* CONFIG_JETSAM */
3216#endif /* VM_PRESSURE_EVENTS */
3217
3218 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3219 VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1);
3220
3221 if (object->internal)
3222 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3223 else
3224 vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3225
3226 /*
3227 * internal pages will go to the compressor...
3228 * external pages will go to the appropriate pager to be cleaned
3229 * and upon completion will end up on 'vm_page_queue_cleaned' which
3230 * is a preferred queue to steal from
3231 */
3232 vm_pageout_cluster(m);
3233 inactive_burst_count = 0;
3234
3235done_with_inactivepage:
3236
3237 if (delayed_unlock++ > delayed_unlock_limit) {
3238 int freed = local_freed;
3239
3240 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3241 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3242 if (freed == 0)
3243 lck_mtx_yield(&vm_page_queue_lock);
3244 } else if (vm_pageout_scan_wants_object) {
3245 vm_page_unlock_queues();
3246 mutex_pause(0);
3247 vm_page_lock_queues();
3248 }
3249 /*
3250 * back to top of pageout scan loop
3251 */
3252 }
3253}
3254
3255
3256void
3257vm_page_free_reserve(
3258 int pages)
3259{
3260 int free_after_reserve;
3261
3262 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3263
3264 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3265 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3266 else
3267 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3268
3269 } else {
3270 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3271 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3272 else
3273 vm_page_free_reserved += pages;
3274 }
3275 free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3276
3277 vm_page_free_min = vm_page_free_reserved +
3278 VM_PAGE_FREE_MIN(free_after_reserve);
3279
3280 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3281 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3282
3283 vm_page_free_target = vm_page_free_reserved +
3284 VM_PAGE_FREE_TARGET(free_after_reserve);
3285
3286 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3287 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3288
3289 if (vm_page_free_target < vm_page_free_min + 5)
3290 vm_page_free_target = vm_page_free_min + 5;
3291
3292 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3293}
3294
3295/*
3296 * vm_pageout is the high level pageout daemon.
3297 */
3298
3299void
3300vm_pageout_continue(void)
3301{
3302 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3303 VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
3304
3305#if !CONFIG_EMBEDDED
3306 lck_mtx_lock(&vm_page_queue_free_lock);
3307 vm_pageout_running = TRUE;
3308 lck_mtx_unlock(&vm_page_queue_free_lock);
3309#endif /* CONFIG_EMBEDDED */
3310
3311 vm_pageout_scan();
3312 /*
3313 * we hold both the vm_page_queue_free_lock
3314 * and the vm_page_queues_lock at this point
3315 */
3316 assert(vm_page_free_wanted == 0);
3317 assert(vm_page_free_wanted_privileged == 0);
3318 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3319
3320#if !CONFIG_EMBEDDED
3321 vm_pageout_running = FALSE;
3322 if (vm_pageout_waiter) {
3323 vm_pageout_waiter = FALSE;
3324 thread_wakeup((event_t)&vm_pageout_waiter);
3325 }
3326#endif /* !CONFIG_EMBEDDED */
3327
3328 lck_mtx_unlock(&vm_page_queue_free_lock);
3329 vm_page_unlock_queues();
3330
3331 counter(c_vm_pageout_block++);
3332 thread_block((thread_continue_t)vm_pageout_continue);
3333 /*NOTREACHED*/
3334}
3335
3336#if !CONFIG_EMBEDDED
3337kern_return_t
3338vm_pageout_wait(uint64_t deadline)
3339{
3340 kern_return_t kr;
3341
3342 lck_mtx_lock(&vm_page_queue_free_lock);
3343 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3344 vm_pageout_waiter = TRUE;
3345 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3346 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3347 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3348 kr = KERN_OPERATION_TIMED_OUT;
3349 }
3350 }
3351 lck_mtx_unlock(&vm_page_queue_free_lock);
3352
3353 return (kr);
3354}
3355#endif /* !CONFIG_EMBEDDED */
3356
3357
3358static void
3359vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3360{
3361 vm_page_t m = NULL;
3362 vm_object_t object;
3363 vm_object_offset_t offset;
3364 memory_object_t pager;
3365
3366 /* On systems with a compressor, the external IO thread clears its
3367 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3368 * creation)
3369 */
3370 if (vm_pageout_state.vm_pageout_internal_iothread != THREAD_NULL)
3371 current_thread()->options &= ~TH_OPT_VMPRIV;
3372
3373 vm_page_lockspin_queues();
3374
3375 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3376
3377 q->pgo_busy = TRUE;
3378 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3379
3380 assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3381 VM_PAGE_CHECK(m);
3382 /*
3383 * grab a snapshot of the object and offset this
3384 * page is tabled in so that we can relookup this
3385 * page after we've taken the object lock - these
3386 * fields are stable while we hold the page queues lock
3387 * but as soon as we drop it, there is nothing to keep
3388 * this page in this object... we hold an activity_in_progress
3389 * on this object which will keep it from terminating
3390 */
3391 object = VM_PAGE_OBJECT(m);
3392 offset = m->vmp_offset;
3393
3394 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3395 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3396
3397 vm_page_unlock_queues();
3398
3399 vm_object_lock(object);
3400
3401 m = vm_page_lookup(object, offset);
3402
3403 if (m == NULL ||
3404 m->vmp_busy || m->vmp_cleaning || !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
3405 /*
3406 * it's either the same page that someone else has
3407 * started cleaning (or it's finished cleaning or
3408 * been put back on the pageout queue), or
3409 * the page has been freed or we have found a
3410 * new page at this offset... in all of these cases
3411 * we merely need to release the activity_in_progress
3412 * we took when we put the page on the pageout queue
3413 */
3414 vm_object_activity_end(object);
3415 vm_object_unlock(object);
3416
3417 vm_page_lockspin_queues();
3418 continue;
3419 }
3420 pager = object->pager;
3421
3422 if (pager == MEMORY_OBJECT_NULL) {
3423 /*
3424 * This pager has been destroyed by either
3425 * memory_object_destroy or vm_object_destroy, and
3426 * so there is nowhere for the page to go.
3427 */
3428 if (m->vmp_free_when_done) {
3429 /*
3430 * Just free the page... VM_PAGE_FREE takes
3431 * care of cleaning up all the state...
3432 * including doing the vm_pageout_throttle_up
3433 */
3434 VM_PAGE_FREE(m);
3435 } else {
3436 vm_page_lockspin_queues();
3437
3438 vm_pageout_throttle_up(m);
3439 vm_page_activate(m);
3440
3441 vm_page_unlock_queues();
3442
3443 /*
3444 * And we are done with it.
3445 */
3446 }
3447 vm_object_activity_end(object);
3448 vm_object_unlock(object);
3449
3450 vm_page_lockspin_queues();
3451 continue;
3452 }
3453#if 0
3454 /*
3455 * we don't hold the page queue lock
3456 * so this check isn't safe to make
3457 */
3458 VM_PAGE_CHECK(m);
3459#endif
3460 /*
3461 * give back the activity_in_progress reference we
3462 * took when we queued up this page and replace it
3463 * it with a paging_in_progress reference that will
3464 * also hold the paging offset from changing and
3465 * prevent the object from terminating
3466 */
3467 vm_object_activity_end(object);
3468 vm_object_paging_begin(object);
3469 vm_object_unlock(object);
3470
3471 /*
3472 * Send the data to the pager.
3473 * any pageout clustering happens there
3474 */
3475 memory_object_data_return(pager,
3476 m->vmp_offset + object->paging_offset,
3477 PAGE_SIZE,
3478 NULL,
3479 NULL,
3480 FALSE,
3481 FALSE,
3482 0);
3483
3484 vm_object_lock(object);
3485 vm_object_paging_end(object);
3486 vm_object_unlock(object);
3487
3488 vm_pageout_io_throttle();
3489
3490 vm_page_lockspin_queues();
3491 }
3492 q->pgo_busy = FALSE;
3493 q->pgo_idle = TRUE;
3494
3495 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3496 vm_page_unlock_queues();
3497
3498 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3499 /*NOTREACHED*/
3500}
3501
3502
3503#define MAX_FREE_BATCH 32
3504uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3505 * this thread.
3506 */
3507
3508
3509void
3510vm_pageout_iothread_internal_continue(struct cq *);
3511void
3512vm_pageout_iothread_internal_continue(struct cq *cq)
3513{
3514 struct vm_pageout_queue *q;
3515 vm_page_t m = NULL;
3516 boolean_t pgo_draining;
3517 vm_page_t local_q;
3518 int local_cnt;
3519 vm_page_t local_freeq = NULL;
3520 int local_freed = 0;
3521 int local_batch_size;
3522#if DEVELOPMENT || DEBUG
3523 int ncomps = 0;
3524 boolean_t marked_active = FALSE;
3525#endif
3526 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3527
3528 q = cq->q;
3529 local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
3530
3531#if RECORD_THE_COMPRESSED_DATA
3532 if (q->pgo_laundry)
3533 c_compressed_record_init();
3534#endif
3535 while (TRUE) {
3536 int pages_left_on_q = 0;
3537
3538 local_cnt = 0;
3539 local_q = NULL;
3540
3541 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3542
3543 vm_page_lock_queues();
3544#if DEVELOPMENT || DEBUG
3545 if (marked_active == FALSE) {
3546 vmct_active++;
3547 vmct_state[cq->id] = VMCT_ACTIVE;
3548 marked_active = TRUE;
3549 if (vmct_active == 1) {
3550 vm_compressor_epoch_start = mach_absolute_time();
3551 }
3552 }
3553#endif
3554 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3555
3556 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3557
3558 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3559
3560 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3561 assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3562 VM_PAGE_CHECK(m);
3563
3564 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3565 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3566 m->vmp_laundry = FALSE;
3567
3568 m->vmp_snext = local_q;
3569 local_q = m;
3570 local_cnt++;
3571 }
3572 if (local_q == NULL)
3573 break;
3574
3575 q->pgo_busy = TRUE;
3576
3577 if ((pgo_draining = q->pgo_draining) == FALSE) {
3578 vm_pageout_throttle_up_batch(q, local_cnt);
3579 pages_left_on_q = q->pgo_laundry;
3580 } else
3581 pages_left_on_q = q->pgo_laundry - local_cnt;
3582
3583 vm_page_unlock_queues();
3584
3585#if !RECORD_THE_COMPRESSED_DATA
3586 if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) {
3587 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3588 }
3589#endif
3590 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3591
3592 while (local_q) {
3593
3594 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3595
3596 m = local_q;
3597 local_q = m->vmp_snext;
3598 m->vmp_snext = NULL;
3599
3600 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m) == KERN_SUCCESS) {
3601#if DEVELOPMENT || DEBUG
3602 ncomps++;
3603#endif
3604 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_END, local_cnt, 0, 0, 0, 0);
3605
3606 m->vmp_snext = local_freeq;
3607 local_freeq = m;
3608 local_freed++;
3609
3610 if (local_freed >= MAX_FREE_BATCH) {
3611
3612 OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3613
3614 vm_page_free_list(local_freeq, TRUE);
3615
3616 local_freeq = NULL;
3617 local_freed = 0;
3618 }
3619 }
3620#if !CONFIG_JETSAM
3621 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3622 kern_return_t wait_result;
3623 int need_wakeup = 0;
3624
3625 if (local_freeq) {
3626 OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3627
3628 vm_page_free_list(local_freeq, TRUE);
3629 local_freeq = NULL;
3630 local_freed = 0;
3631
3632 continue;
3633 }
3634 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3635
3636 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3637
3638 if (vm_page_free_wanted_privileged++ == 0)
3639 need_wakeup = 1;
3640 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3641
3642 lck_mtx_unlock(&vm_page_queue_free_lock);
3643
3644 if (need_wakeup)
3645 thread_wakeup((event_t)&vm_page_free_wanted);
3646
3647 if (wait_result == THREAD_WAITING)
3648
3649 thread_block(THREAD_CONTINUE_NULL);
3650 } else
3651 lck_mtx_unlock(&vm_page_queue_free_lock);
3652 }
3653#endif
3654 }
3655 if (local_freeq) {
3656 OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3657
3658 vm_page_free_list(local_freeq, TRUE);
3659 local_freeq = NULL;
3660 local_freed = 0;
3661 }
3662 if (pgo_draining == TRUE) {
3663 vm_page_lockspin_queues();
3664 vm_pageout_throttle_up_batch(q, local_cnt);
3665 vm_page_unlock_queues();
3666 }
3667 }
3668 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3669
3670 /*
3671 * queue lock is held and our q is empty
3672 */
3673 q->pgo_busy = FALSE;
3674 q->pgo_idle = TRUE;
3675
3676 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3677#if DEVELOPMENT || DEBUG
3678 if (marked_active == TRUE) {
3679 vmct_active--;
3680 vmct_state[cq->id] = VMCT_IDLE;
3681
3682 if (vmct_active == 0) {
3683 vm_compressor_epoch_stop = mach_absolute_time();
3684 assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
3685 "Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
3686 vm_compressor_epoch_start, vm_compressor_epoch_stop);
3687 /* This interval includes intervals where one or more
3688 * compressor threads were pre-empted
3689 */
3690 vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
3691 }
3692 }
3693#endif
3694 vm_page_unlock_queues();
3695#if DEVELOPMENT || DEBUG
3696 if (__improbable(vm_compressor_time_thread)) {
3697 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3698 vmct_stats.vmct_pages[cq->id] += ncomps;
3699 vmct_stats.vmct_iterations[cq->id]++;
3700 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3701 vmct_stats.vmct_maxpages[cq->id] = ncomps;
3702 }
3703 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3704 vmct_stats.vmct_minpages[cq->id] = ncomps;
3705 }
3706 }
3707#endif
3708
3709 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3710
3711 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3712 /*NOTREACHED*/
3713}
3714
3715
3716kern_return_t
3717vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m)
3718{
3719 vm_object_t object;
3720 memory_object_t pager;
3721 int compressed_count_delta;
3722 kern_return_t retval;
3723
3724 object = VM_PAGE_OBJECT(m);
3725
3726 assert(!m->vmp_free_when_done);
3727 assert(!m->vmp_laundry);
3728
3729 pager = object->pager;
3730
3731 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3732
3733 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3734
3735 vm_object_lock(object);
3736
3737 /*
3738 * If there is no memory object for the page, create
3739 * one and hand it to the compression pager.
3740 */
3741
3742 if (!object->pager_initialized)
3743 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3744 if (!object->pager_initialized)
3745 vm_object_compressor_pager_create(object);
3746
3747 pager = object->pager;
3748
3749 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3750 /*
3751 * Still no pager for the object,
3752 * or the pager has been destroyed.
3753 * Reactivate the page.
3754 *
3755 * Should only happen if there is no
3756 * compression pager
3757 */
3758 PAGE_WAKEUP_DONE(m);
3759
3760 vm_page_lockspin_queues();
3761 vm_page_activate(m);
3762 VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1);
3763 vm_page_unlock_queues();
3764
3765 /*
3766 * And we are done with it.
3767 */
3768 vm_object_activity_end(object);
3769 vm_object_unlock(object);
3770
3771 return KERN_FAILURE;
3772 }
3773 vm_object_unlock(object);
3774
3775 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3776 }
3777 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3778 assert(object->activity_in_progress > 0);
3779
3780 retval = vm_compressor_pager_put(
3781 pager,
3782 m->vmp_offset + object->paging_offset,
3783 VM_PAGE_GET_PHYS_PAGE(m),
3784 current_chead,
3785 scratch_buf,
3786 &compressed_count_delta);
3787
3788 vm_object_lock(object);
3789
3790 assert(object->activity_in_progress > 0);
3791 assert(VM_PAGE_OBJECT(m) == object);
3792 assert( !VM_PAGE_WIRED(m));
3793
3794 vm_compressor_pager_count(pager,
3795 compressed_count_delta,
3796 FALSE, /* shared_lock */
3797 object);
3798
3799 if (retval == KERN_SUCCESS) {
3800 /*
3801 * If the object is purgeable, its owner's
3802 * purgeable ledgers will be updated in
3803 * vm_page_remove() but the page still
3804 * contributes to the owner's memory footprint,
3805 * so account for it as such.
3806 */
3807 if ((object->purgable != VM_PURGABLE_DENY ||
3808 object->vo_ledger_tag) &&
3809 object->vo_owner != NULL) {
3810 /* one more compressed purgeable/tagged page */
3811 vm_object_owner_compressed_update(object,
3812 +1);
3813 }
3814 VM_STAT_INCR(compressions);
3815
3816 if (m->vmp_tabled)
3817 vm_page_remove(m, TRUE);
3818
3819 } else {
3820 PAGE_WAKEUP_DONE(m);
3821
3822 vm_page_lockspin_queues();
3823
3824 vm_page_activate(m);
3825 vm_pageout_vminfo.vm_compressor_failed++;
3826
3827 vm_page_unlock_queues();
3828 }
3829 vm_object_activity_end(object);
3830 vm_object_unlock(object);
3831
3832 return retval;
3833}
3834
3835
3836static void
3837vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3838{
3839 uint32_t policy;
3840
3841 if (hibernate_cleaning_in_progress == TRUE)
3842 req_lowpriority = FALSE;
3843
3844 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
3845
3846 vm_page_unlock_queues();
3847
3848 if (req_lowpriority == TRUE) {
3849 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3850 DTRACE_VM(laundrythrottle);
3851 } else {
3852 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3853 DTRACE_VM(laundryunthrottle);
3854 }
3855 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
3856 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3857
3858 eq->pgo_lowpriority = req_lowpriority;
3859
3860 vm_page_lock_queues();
3861 }
3862}
3863
3864
3865static void
3866vm_pageout_iothread_external(void)
3867{
3868 thread_t self = current_thread();
3869
3870 self->options |= TH_OPT_VMPRIV;
3871
3872 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3873
3874 proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
3875 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3876
3877 vm_page_lock_queues();
3878
3879 vm_pageout_queue_external.pgo_tid = self->thread_id;
3880 vm_pageout_queue_external.pgo_lowpriority = TRUE;
3881 vm_pageout_queue_external.pgo_inited = TRUE;
3882
3883 vm_page_unlock_queues();
3884
3885 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3886
3887 /*NOTREACHED*/
3888}
3889
3890
3891static void
3892vm_pageout_iothread_internal(struct cq *cq)
3893{
3894 thread_t self = current_thread();
3895
3896 self->options |= TH_OPT_VMPRIV;
3897
3898 vm_page_lock_queues();
3899
3900 vm_pageout_queue_internal.pgo_tid = self->thread_id;
3901 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3902 vm_pageout_queue_internal.pgo_inited = TRUE;
3903
3904 vm_page_unlock_queues();
3905
3906 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
3907 thread_vm_bind_group_add();
3908
3909
3910 thread_set_thread_name(current_thread(), "VM_compressor");
3911#if DEVELOPMENT || DEBUG
3912 vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
3913#endif
3914 vm_pageout_iothread_internal_continue(cq);
3915
3916 /*NOTREACHED*/
3917}
3918
3919kern_return_t
3920vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
3921{
3922 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3923 return KERN_SUCCESS;
3924 } else {
3925 return KERN_FAILURE; /* Already set */
3926 }
3927}
3928
3929extern boolean_t memorystatus_manual_testing_on;
3930extern unsigned int memorystatus_level;
3931
3932
3933#if VM_PRESSURE_EVENTS
3934
3935boolean_t vm_pressure_events_enabled = FALSE;
3936
3937void
3938vm_pressure_response(void)
3939{
3940
3941 vm_pressure_level_t old_level = kVMPressureNormal;
3942 int new_level = -1;
3943 unsigned int total_pages;
3944 uint64_t available_memory = 0;
3945
3946 if (vm_pressure_events_enabled == FALSE)
3947 return;
3948
3949#if CONFIG_EMBEDDED
3950
3951 available_memory = (uint64_t) memorystatus_available_pages;
3952
3953#else /* CONFIG_EMBEDDED */
3954
3955 available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3956 memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3957
3958#endif /* CONFIG_EMBEDDED */
3959
3960 total_pages = (unsigned int) atop_64(max_mem);
3961#if CONFIG_SECLUDED_MEMORY
3962 total_pages -= vm_page_secluded_count;
3963#endif /* CONFIG_SECLUDED_MEMORY */
3964 memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
3965
3966 if (memorystatus_manual_testing_on) {
3967 return;
3968 }
3969
3970 old_level = memorystatus_vm_pressure_level;
3971
3972 switch (memorystatus_vm_pressure_level) {
3973
3974 case kVMPressureNormal:
3975 {
3976 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3977 new_level = kVMPressureCritical;
3978 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
3979 new_level = kVMPressureWarning;
3980 }
3981 break;
3982 }
3983
3984 case kVMPressureWarning:
3985 case kVMPressureUrgent:
3986 {
3987 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3988 new_level = kVMPressureNormal;
3989 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3990 new_level = kVMPressureCritical;
3991 }
3992 break;
3993 }
3994
3995 case kVMPressureCritical:
3996 {
3997 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3998 new_level = kVMPressureNormal;
3999 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4000 new_level = kVMPressureWarning;
4001 }
4002 break;
4003 }
4004
4005 default:
4006 return;
4007 }
4008
4009 if (new_level != -1) {
4010 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4011
4012 if (new_level != old_level) {
4013 VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4014 new_level, old_level, 0, 0);
4015 }
4016
4017 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level)) {
4018 if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4019 thread_wakeup(&vm_pressure_thread);
4020 }
4021
4022 if (old_level != memorystatus_vm_pressure_level) {
4023 thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4024 }
4025 }
4026 }
4027
4028}
4029#endif /* VM_PRESSURE_EVENTS */
4030
4031kern_return_t
4032mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4033
4034#if CONFIG_EMBEDDED
4035
4036 return KERN_FAILURE;
4037
4038#elif !VM_PRESSURE_EVENTS
4039
4040 return KERN_FAILURE;
4041
4042#else /* VM_PRESSURE_EVENTS */
4043
4044 kern_return_t kr = KERN_SUCCESS;
4045
4046 if (pressure_level != NULL) {
4047
4048 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4049
4050 if (wait_for_pressure == TRUE) {
4051 wait_result_t wr = 0;
4052
4053 while (old_level == *pressure_level) {
4054 wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4055 THREAD_INTERRUPTIBLE);
4056 if (wr == THREAD_WAITING) {
4057 wr = thread_block(THREAD_CONTINUE_NULL);
4058 }
4059 if (wr == THREAD_INTERRUPTED) {
4060 return KERN_ABORTED;
4061 }
4062 if (wr == THREAD_AWAKENED) {
4063
4064 old_level = memorystatus_vm_pressure_level;
4065
4066 if (old_level != *pressure_level) {
4067 break;
4068 }
4069 }
4070 }
4071 }
4072
4073 *pressure_level = old_level;
4074 kr = KERN_SUCCESS;
4075 } else {
4076 kr = KERN_INVALID_ARGUMENT;
4077 }
4078
4079 return kr;
4080#endif /* VM_PRESSURE_EVENTS */
4081}
4082
4083#if VM_PRESSURE_EVENTS
4084void
4085vm_pressure_thread(void) {
4086 static boolean_t thread_initialized = FALSE;
4087
4088 if (thread_initialized == TRUE) {
4089 vm_pageout_state.vm_pressure_thread_running = TRUE;
4090 consider_vm_pressure_events();
4091 vm_pageout_state.vm_pressure_thread_running = FALSE;
4092 }
4093
4094 thread_set_thread_name(current_thread(), "VM_pressure");
4095 thread_initialized = TRUE;
4096 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4097 thread_block((thread_continue_t)vm_pressure_thread);
4098}
4099#endif /* VM_PRESSURE_EVENTS */
4100
4101
4102/*
4103 * called once per-second via "compute_averages"
4104 */
4105void
4106compute_pageout_gc_throttle(__unused void *arg)
4107{
4108 if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4109
4110 vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4111
4112 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4113 }
4114}
4115
4116/*
4117 * vm_pageout_garbage_collect can also be called when the zone allocator needs
4118 * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4119 * jetsams. We need to check if the zone map size is above its jetsam limit to
4120 * decide if this was indeed the case.
4121 *
4122 * We need to do this on a different thread because of the following reasons:
4123 *
4124 * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4125 * itself causing the system to hang. We perform synchronous jetsams if we're
4126 * leaking in the VM map entries zone, so the leaking process could be doing a
4127 * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4128 * jetsam itself. We also need the vm_map lock on the process termination path,
4129 * which would now lead the dying process to deadlock against itself.
4130 *
4131 * 2. The jetsam path might need to allocate zone memory itself. We could try
4132 * using the non-blocking variant of zalloc for this path, but we can still
4133 * end up trying to do a kernel_memory_allocate when the zone_map is almost
4134 * full.
4135 */
4136
4137extern boolean_t is_zone_map_nearing_exhaustion(void);
4138
4139void
4140vm_pageout_garbage_collect(int collect)
4141{
4142 if (collect) {
4143 if (is_zone_map_nearing_exhaustion()) {
4144 /*
4145 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4146 *
4147 * Bail out after calling zone_gc (which triggers the
4148 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4149 * operations that clear out a bunch of caches might allocate zone
4150 * memory themselves (for eg. vm_map operations would need VM map
4151 * entries). Since the zone map is almost full at this point, we
4152 * could end up with a panic. We just need to quickly jetsam a
4153 * process and exit here.
4154 *
4155 * It could so happen that we were woken up to relieve memory
4156 * pressure and the zone map also happened to be near its limit at
4157 * the time, in which case we'll skip out early. But that should be
4158 * ok; if memory pressure persists, the thread will simply be woken
4159 * up again.
4160 */
4161 consider_zone_gc(TRUE);
4162
4163 } else {
4164 /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4165 boolean_t buf_large_zfree = FALSE;
4166 boolean_t first_try = TRUE;
4167
4168 stack_collect();
4169
4170 consider_machine_collect();
4171 mbuf_drain(FALSE);
4172
4173 do {
4174 if (consider_buffer_cache_collect != NULL) {
4175 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4176 }
4177 if (first_try == TRUE || buf_large_zfree == TRUE) {
4178 /*
4179 * consider_zone_gc should be last, because the other operations
4180 * might return memory to zones.
4181 */
4182 consider_zone_gc(FALSE);
4183 }
4184 first_try = FALSE;
4185
4186 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4187
4188 consider_machine_adjust();
4189 }
4190 }
4191
4192 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4193
4194 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4195 /*NOTREACHED*/
4196}
4197
4198
4199#if VM_PAGE_BUCKETS_CHECK
4200#if VM_PAGE_FAKE_BUCKETS
4201extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4202#endif /* VM_PAGE_FAKE_BUCKETS */
4203#endif /* VM_PAGE_BUCKETS_CHECK */
4204
4205
4206
4207void
4208vm_set_restrictions()
4209{
4210 host_basic_info_data_t hinfo;
4211 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4212
4213#define BSD_HOST 1
4214 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4215
4216 assert(hinfo.max_cpus > 0);
4217
4218 if (hinfo.max_cpus <= 3) {
4219 /*
4220 * on systems with a limited number of CPUS, bind the
4221 * 4 major threads that can free memory and that tend to use
4222 * a fair bit of CPU under pressured conditions to a single processor.
4223 * This insures that these threads don't hog all of the available CPUs
4224 * (important for camera launch), while allowing them to run independently
4225 * w/r to locks... the 4 threads are
4226 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4227 * vm_compressor_swap_trigger_thread (minor and major compactions),
4228 * memorystatus_thread (jetsams).
4229 *
4230 * the first time the thread is run, it is responsible for checking the
4231 * state of vm_restricted_to_single_processor, and if TRUE it calls
4232 * thread_bind_master... someday this should be replaced with a group
4233 * scheduling mechanism and KPI.
4234 */
4235 vm_pageout_state.vm_restricted_to_single_processor = TRUE;
4236 } else
4237 vm_pageout_state.vm_restricted_to_single_processor = FALSE;
4238}
4239
4240void
4241vm_pageout(void)
4242{
4243 thread_t self = current_thread();
4244 thread_t thread;
4245 kern_return_t result;
4246 spl_t s;
4247
4248 /*
4249 * Set thread privileges.
4250 */
4251 s = splsched();
4252
4253 thread_lock(self);
4254 self->options |= TH_OPT_VMPRIV;
4255 sched_set_thread_base_priority(self, BASEPRI_VM);
4256 thread_unlock(self);
4257
4258 if (!self->reserved_stack)
4259 self->reserved_stack = self->kernel_stack;
4260
4261 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
4262 thread_vm_bind_group_add();
4263
4264 splx(s);
4265
4266 thread_set_thread_name(current_thread(), "VM_pageout_scan");
4267
4268 /*
4269 * Initialize some paging parameters.
4270 */
4271
4272 vm_pageout_state.vm_pressure_thread_running = FALSE;
4273 vm_pageout_state.vm_pressure_changed = FALSE;
4274 vm_pageout_state.memorystatus_purge_on_warning = 2;
4275 vm_pageout_state.memorystatus_purge_on_urgent = 5;
4276 vm_pageout_state.memorystatus_purge_on_critical = 8;
4277 vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
4278 vm_pageout_state.vm_page_speculative_percentage = 5;
4279 vm_pageout_state.vm_page_speculative_target = 0;
4280
4281 vm_pageout_state.vm_pageout_external_iothread = THREAD_NULL;
4282 vm_pageout_state.vm_pageout_internal_iothread = THREAD_NULL;
4283
4284 vm_pageout_state.vm_pageout_swap_wait = 0;
4285 vm_pageout_state.vm_pageout_idle_wait = 0;
4286 vm_pageout_state.vm_pageout_empty_wait = 0;
4287 vm_pageout_state.vm_pageout_burst_wait = 0;
4288 vm_pageout_state.vm_pageout_deadlock_wait = 0;
4289 vm_pageout_state.vm_pageout_deadlock_relief = 0;
4290 vm_pageout_state.vm_pageout_burst_inactive_throttle = 0;
4291
4292 vm_pageout_state.vm_pageout_inactive = 0;
4293 vm_pageout_state.vm_pageout_inactive_used = 0;
4294 vm_pageout_state.vm_pageout_inactive_clean = 0;
4295
4296 vm_pageout_state.vm_memory_pressure = 0;
4297 vm_pageout_state.vm_page_filecache_min = 0;
4298#if CONFIG_JETSAM
4299 vm_pageout_state.vm_page_filecache_min_divisor = 70;
4300 vm_pageout_state.vm_page_xpmapped_min_divisor = 40;
4301#else
4302 vm_pageout_state.vm_page_filecache_min_divisor = 27;
4303 vm_pageout_state.vm_page_xpmapped_min_divisor = 36;
4304#endif
4305 vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
4306
4307 vm_pageout_state.vm_pageout_considered_page_last = 0;
4308
4309 if (vm_pageout_state.vm_pageout_swap_wait == 0)
4310 vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4311
4312 if (vm_pageout_state.vm_pageout_idle_wait == 0)
4313 vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4314
4315 if (vm_pageout_state.vm_pageout_burst_wait == 0)
4316 vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4317
4318 if (vm_pageout_state.vm_pageout_empty_wait == 0)
4319 vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4320
4321 if (vm_pageout_state.vm_pageout_deadlock_wait == 0)
4322 vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4323
4324 if (vm_pageout_state.vm_pageout_deadlock_relief == 0)
4325 vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4326
4327 if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0)
4328 vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4329 /*
4330 * even if we've already called vm_page_free_reserve
4331 * call it again here to insure that the targets are
4332 * accurately calculated (it uses vm_page_free_count_init)
4333 * calling it with an arg of 0 will not change the reserve
4334 * but will re-calculate free_min and free_target
4335 */
4336 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4337 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4338 } else
4339 vm_page_free_reserve(0);
4340
4341
4342 vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4343 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4344 vm_pageout_queue_external.pgo_laundry = 0;
4345 vm_pageout_queue_external.pgo_idle = FALSE;
4346 vm_pageout_queue_external.pgo_busy = FALSE;
4347 vm_pageout_queue_external.pgo_throttled = FALSE;
4348 vm_pageout_queue_external.pgo_draining = FALSE;
4349 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4350 vm_pageout_queue_external.pgo_tid = -1;
4351 vm_pageout_queue_external.pgo_inited = FALSE;
4352
4353 vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4354 vm_pageout_queue_internal.pgo_maxlaundry = 0;
4355 vm_pageout_queue_internal.pgo_laundry = 0;
4356 vm_pageout_queue_internal.pgo_idle = FALSE;
4357 vm_pageout_queue_internal.pgo_busy = FALSE;
4358 vm_pageout_queue_internal.pgo_throttled = FALSE;
4359 vm_pageout_queue_internal.pgo_draining = FALSE;
4360 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4361 vm_pageout_queue_internal.pgo_tid = -1;
4362 vm_pageout_queue_internal.pgo_inited = FALSE;
4363
4364 /* internal pageout thread started when default pager registered first time */
4365 /* external pageout and garbage collection threads started here */
4366
4367 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4368 BASEPRI_VM,
4369 &vm_pageout_state.vm_pageout_external_iothread);
4370 if (result != KERN_SUCCESS)
4371 panic("vm_pageout_iothread_external: create failed");
4372
4373 thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
4374
4375 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4376 BASEPRI_DEFAULT,
4377 &thread);
4378 if (result != KERN_SUCCESS)
4379 panic("vm_pageout_garbage_collect: create failed");
4380
4381 thread_deallocate(thread);
4382
4383#if VM_PRESSURE_EVENTS
4384 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4385 BASEPRI_DEFAULT,
4386 &thread);
4387
4388 if (result != KERN_SUCCESS)
4389 panic("vm_pressure_thread: create failed");
4390
4391 thread_deallocate(thread);
4392#endif
4393
4394 vm_object_reaper_init();
4395
4396
4397 bzero(&vm_config, sizeof(vm_config));
4398
4399 switch(vm_compressor_mode) {
4400
4401 case VM_PAGER_DEFAULT:
4402 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4403
4404 case VM_PAGER_COMPRESSOR_WITH_SWAP:
4405 vm_config.compressor_is_present = TRUE;
4406 vm_config.swap_is_present = TRUE;
4407 vm_config.compressor_is_active = TRUE;
4408 vm_config.swap_is_active = TRUE;
4409 break;
4410
4411 case VM_PAGER_COMPRESSOR_NO_SWAP:
4412 vm_config.compressor_is_present = TRUE;
4413 vm_config.swap_is_present = TRUE;
4414 vm_config.compressor_is_active = TRUE;
4415 break;
4416
4417 case VM_PAGER_FREEZER_DEFAULT:
4418 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4419
4420 case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4421 vm_config.compressor_is_present = TRUE;
4422 vm_config.swap_is_present = TRUE;
4423 break;
4424
4425 case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4426 vm_config.compressor_is_present = TRUE;
4427 vm_config.swap_is_present = TRUE;
4428 vm_config.compressor_is_active = TRUE;
4429 vm_config.freezer_swap_is_active = TRUE;
4430 break;
4431
4432 case VM_PAGER_NOT_CONFIGURED:
4433 break;
4434
4435 default:
4436 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4437 break;
4438 }
4439 if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4440 vm_compressor_pager_init();
4441
4442#if VM_PRESSURE_EVENTS
4443 vm_pressure_events_enabled = TRUE;
4444#endif /* VM_PRESSURE_EVENTS */
4445
4446#if CONFIG_PHANTOM_CACHE
4447 vm_phantom_cache_init();
4448#endif
4449#if VM_PAGE_BUCKETS_CHECK
4450#if VM_PAGE_FAKE_BUCKETS
4451 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4452 (uint64_t) vm_page_fake_buckets_start,
4453 (uint64_t) vm_page_fake_buckets_end);
4454 pmap_protect(kernel_pmap,
4455 vm_page_fake_buckets_start,
4456 vm_page_fake_buckets_end,
4457 VM_PROT_READ);
4458// *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4459#endif /* VM_PAGE_FAKE_BUCKETS */
4460#endif /* VM_PAGE_BUCKETS_CHECK */
4461
4462#if VM_OBJECT_TRACKING
4463 vm_object_tracking_init();
4464#endif /* VM_OBJECT_TRACKING */
4465
4466 vm_tests();
4467
4468 vm_pageout_continue();
4469
4470 /*
4471 * Unreached code!
4472 *
4473 * The vm_pageout_continue() call above never returns, so the code below is never
4474 * executed. We take advantage of this to declare several DTrace VM related probe
4475 * points that our kernel doesn't have an analog for. These are probe points that
4476 * exist in Solaris and are in the DTrace documentation, so people may have written
4477 * scripts that use them. Declaring the probe points here means their scripts will
4478 * compile and execute which we want for portability of the scripts, but since this
4479 * section of code is never reached, the probe points will simply never fire. Yes,
4480 * this is basically a hack. The problem is the DTrace probe points were chosen with
4481 * Solaris specific VM events in mind, not portability to different VM implementations.
4482 */
4483
4484 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4485 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4486 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4487 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4488 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4489 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4490 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4491 /*NOTREACHED*/
4492}
4493
4494
4495
4496kern_return_t
4497vm_pageout_internal_start(void)
4498{
4499 kern_return_t result;
4500 int i;
4501 host_basic_info_data_t hinfo;
4502
4503 assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4504
4505 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4506#define BSD_HOST 1
4507 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4508
4509 assert(hinfo.max_cpus > 0);
4510
4511#if CONFIG_EMBEDDED
4512 vm_pageout_state.vm_compressor_thread_count = 1;
4513#else
4514 if (hinfo.max_cpus > 4)
4515 vm_pageout_state.vm_compressor_thread_count = 2;
4516 else
4517 vm_pageout_state.vm_compressor_thread_count = 1;
4518#endif
4519 PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
4520 sizeof(vm_pageout_state.vm_compressor_thread_count));
4521
4522 if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus)
4523 vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
4524 if (vm_pageout_state.vm_compressor_thread_count <= 0)
4525 vm_pageout_state.vm_compressor_thread_count = 1;
4526 else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4527 vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4528
4529 vm_pageout_queue_internal.pgo_maxlaundry = (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4530
4531 PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4532
4533 for (i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4534 ciq[i].id = i;
4535 ciq[i].q = &vm_pageout_queue_internal;
4536 ciq[i].current_chead = NULL;
4537 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4538
4539 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i],
4540 BASEPRI_VM, &vm_pageout_state.vm_pageout_internal_iothread);
4541
4542 if (result == KERN_SUCCESS)
4543 thread_deallocate(vm_pageout_state.vm_pageout_internal_iothread);
4544 else
4545 break;
4546 }
4547 return result;
4548}
4549
4550#if CONFIG_IOSCHED
4551/*
4552 * To support I/O Expedite for compressed files we mark the upls with special flags.
4553 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4554 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4555 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4556 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4557 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4558 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4559 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4560 * unless the real I/O upl is being destroyed).
4561 */
4562
4563
4564static void
4565upl_set_decmp_info(upl_t upl, upl_t src_upl)
4566{
4567 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4568
4569 upl_lock(src_upl);
4570 if (src_upl->decmp_io_upl) {
4571 /*
4572 * If there is already an alive real I/O UPL, ignore this new UPL.
4573 * This case should rarely happen and even if it does, it just means
4574 * that we might issue a spurious expedite which the driver is expected
4575 * to handle.
4576 */
4577 upl_unlock(src_upl);
4578 return;
4579 }
4580 src_upl->decmp_io_upl = (void *)upl;
4581 src_upl->ref_count++;
4582
4583 upl->flags |= UPL_DECMP_REAL_IO;
4584 upl->decmp_io_upl = (void *)src_upl;
4585 upl_unlock(src_upl);
4586}
4587#endif /* CONFIG_IOSCHED */
4588
4589#if UPL_DEBUG
4590int upl_debug_enabled = 1;
4591#else
4592int upl_debug_enabled = 0;
4593#endif
4594
4595static upl_t
4596upl_create(int type, int flags, upl_size_t size)
4597{
4598 upl_t upl;
4599 vm_size_t page_field_size = 0;
4600 int upl_flags = 0;
4601 vm_size_t upl_size = sizeof(struct upl);
4602
4603 size = round_page_32(size);
4604
4605 if (type & UPL_CREATE_LITE) {
4606 page_field_size = (atop(size) + 7) >> 3;
4607 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4608
4609 upl_flags |= UPL_LITE;
4610 }
4611 if (type & UPL_CREATE_INTERNAL) {
4612 upl_size += sizeof(struct upl_page_info) * atop(size);
4613
4614 upl_flags |= UPL_INTERNAL;
4615 }
4616 upl = (upl_t)kalloc(upl_size + page_field_size);
4617
4618 if (page_field_size)
4619 bzero((char *)upl + upl_size, page_field_size);
4620
4621 upl->flags = upl_flags | flags;
4622 upl->kaddr = (vm_offset_t)0;
4623 upl->size = 0;
4624 upl->map_object = NULL;
4625 upl->ref_count = 1;
4626 upl->ext_ref_count = 0;
4627 upl->highest_page = 0;
4628 upl_lock_init(upl);
4629 upl->vector_upl = NULL;
4630 upl->associated_upl = NULL;
4631 upl->upl_iodone = NULL;
4632#if CONFIG_IOSCHED
4633 if (type & UPL_CREATE_IO_TRACKING) {
4634 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4635 }
4636
4637 upl->upl_reprio_info = 0;
4638 upl->decmp_io_upl = 0;
4639 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4640 /* Only support expedite on internal UPLs */
4641 thread_t curthread = current_thread();
4642 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4643 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4644 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4645 if (curthread->decmp_upl != NULL)
4646 upl_set_decmp_info(upl, curthread->decmp_upl);
4647 }
4648#endif
4649#if CONFIG_IOSCHED || UPL_DEBUG
4650 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4651 upl->upl_creator = current_thread();
4652 upl->uplq.next = 0;
4653 upl->uplq.prev = 0;
4654 upl->flags |= UPL_TRACKED_BY_OBJECT;
4655 }
4656#endif
4657
4658#if UPL_DEBUG
4659 upl->ubc_alias1 = 0;
4660 upl->ubc_alias2 = 0;
4661
4662 upl->upl_state = 0;
4663 upl->upl_commit_index = 0;
4664 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4665
4666 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4667#endif /* UPL_DEBUG */
4668
4669 return(upl);
4670}
4671
4672static void
4673upl_destroy(upl_t upl)
4674{
4675 int page_field_size; /* bit field in word size buf */
4676 int size;
4677
4678 if (upl->ext_ref_count) {
4679 panic("upl(%p) ext_ref_count", upl);
4680 }
4681
4682#if CONFIG_IOSCHED
4683 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4684 upl_t src_upl;
4685 src_upl = upl->decmp_io_upl;
4686 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4687 upl_lock(src_upl);
4688 src_upl->decmp_io_upl = NULL;
4689 upl_unlock(src_upl);
4690 upl_deallocate(src_upl);
4691 }
4692#endif /* CONFIG_IOSCHED */
4693
4694#if CONFIG_IOSCHED || UPL_DEBUG
4695 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4696 vm_object_t object;
4697
4698 if (upl->flags & UPL_SHADOWED) {
4699 object = upl->map_object->shadow;
4700 } else {
4701 object = upl->map_object;
4702 }
4703
4704 vm_object_lock(object);
4705 queue_remove(&object->uplq, upl, upl_t, uplq);
4706 vm_object_activity_end(object);
4707 vm_object_collapse(object, 0, TRUE);
4708 vm_object_unlock(object);
4709 }
4710#endif
4711 /*
4712 * drop a reference on the map_object whether or
4713 * not a pageout object is inserted
4714 */
4715 if (upl->flags & UPL_SHADOWED)
4716 vm_object_deallocate(upl->map_object);
4717
4718 if (upl->flags & UPL_DEVICE_MEMORY)
4719 size = PAGE_SIZE;
4720 else
4721 size = upl->size;
4722 page_field_size = 0;
4723
4724 if (upl->flags & UPL_LITE) {
4725 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4726 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4727 }
4728 upl_lock_destroy(upl);
4729 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4730
4731#if CONFIG_IOSCHED
4732 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4733 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4734#endif
4735
4736 if (upl->flags & UPL_INTERNAL) {
4737 kfree(upl,
4738 sizeof(struct upl) +
4739 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4740 + page_field_size);
4741 } else {
4742 kfree(upl, sizeof(struct upl) + page_field_size);
4743 }
4744}
4745
4746void
4747upl_deallocate(upl_t upl)
4748{
4749 upl_lock(upl);
4750
4751 if (--upl->ref_count == 0) {
4752 if(vector_upl_is_valid(upl))
4753 vector_upl_deallocate(upl);
4754 upl_unlock(upl);
4755
4756 if (upl->upl_iodone)
4757 upl_callout_iodone(upl);
4758
4759 upl_destroy(upl);
4760 } else
4761 upl_unlock(upl);
4762}
4763
4764#if CONFIG_IOSCHED
4765void
4766upl_mark_decmp(upl_t upl)
4767{
4768 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4769 upl->flags |= UPL_DECMP_REQ;
4770 upl->upl_creator->decmp_upl = (void *)upl;
4771 }
4772}
4773
4774void
4775upl_unmark_decmp(upl_t upl)
4776{
4777 if(upl && (upl->flags & UPL_DECMP_REQ)) {
4778 upl->upl_creator->decmp_upl = NULL;
4779 }
4780}
4781
4782#endif /* CONFIG_IOSCHED */
4783
4784#define VM_PAGE_Q_BACKING_UP(q) \
4785 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4786
4787boolean_t must_throttle_writes(void);
4788
4789boolean_t
4790must_throttle_writes()
4791{
4792 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4793 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4794 return (TRUE);
4795
4796 return (FALSE);
4797}
4798
4799
4800/*
4801 * Routine: vm_object_upl_request
4802 * Purpose:
4803 * Cause the population of a portion of a vm_object.
4804 * Depending on the nature of the request, the pages
4805 * returned may be contain valid data or be uninitialized.
4806 * A page list structure, listing the physical pages
4807 * will be returned upon request.
4808 * This function is called by the file system or any other
4809 * supplier of backing store to a pager.
4810 * IMPORTANT NOTE: The caller must still respect the relationship
4811 * between the vm_object and its backing memory object. The
4812 * caller MUST NOT substitute changes in the backing file
4813 * without first doing a memory_object_lock_request on the
4814 * target range unless it is know that the pages are not
4815 * shared with another entity at the pager level.
4816 * Copy_in_to:
4817 * if a page list structure is present
4818 * return the mapped physical pages, where a
4819 * page is not present, return a non-initialized
4820 * one. If the no_sync bit is turned on, don't
4821 * call the pager unlock to synchronize with other
4822 * possible copies of the page. Leave pages busy
4823 * in the original object, if a page list structure
4824 * was specified. When a commit of the page list
4825 * pages is done, the dirty bit will be set for each one.
4826 * Copy_out_from:
4827 * If a page list structure is present, return
4828 * all mapped pages. Where a page does not exist
4829 * map a zero filled one. Leave pages busy in
4830 * the original object. If a page list structure
4831 * is not specified, this call is a no-op.
4832 *
4833 * Note: access of default pager objects has a rather interesting
4834 * twist. The caller of this routine, presumably the file system
4835 * page cache handling code, will never actually make a request
4836 * against a default pager backed object. Only the default
4837 * pager will make requests on backing store related vm_objects
4838 * In this way the default pager can maintain the relationship
4839 * between backing store files (abstract memory objects) and
4840 * the vm_objects (cache objects), they support.
4841 *
4842 */
4843
4844__private_extern__ kern_return_t
4845vm_object_upl_request(
4846 vm_object_t object,
4847 vm_object_offset_t offset,
4848 upl_size_t size,
4849 upl_t *upl_ptr,
4850 upl_page_info_array_t user_page_list,
4851 unsigned int *page_list_count,
4852 upl_control_flags_t cntrl_flags,
4853 vm_tag_t tag)
4854{
4855 vm_page_t dst_page = VM_PAGE_NULL;
4856 vm_object_offset_t dst_offset;
4857 upl_size_t xfer_size;
4858 unsigned int size_in_pages;
4859 boolean_t dirty;
4860 boolean_t hw_dirty;
4861 upl_t upl = NULL;
4862 unsigned int entry;
4863 vm_page_t alias_page = NULL;
4864 int refmod_state = 0;
4865 wpl_array_t lite_list = NULL;
4866 vm_object_t last_copy_object;
4867 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
4868 struct vm_page_delayed_work *dwp;
4869 int dw_count;
4870 int dw_limit;
4871 int io_tracking_flag = 0;
4872 int grab_options;
4873 int page_grab_count = 0;
4874 ppnum_t phys_page;
4875 pmap_flush_context pmap_flush_context_storage;
4876 boolean_t pmap_flushes_delayed = FALSE;
4877
4878 if (cntrl_flags & ~UPL_VALID_FLAGS) {
4879 /*
4880 * For forward compatibility's sake,
4881 * reject any unknown flag.
4882 */
4883 return KERN_INVALID_VALUE;
4884 }
4885 if ( (!object->internal) && (object->paging_offset != 0) )
4886 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4887 if (object->phys_contiguous)
4888 panic("vm_object_upl_request: contiguous object specified\n");
4889
4890 VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
4891
4892 if (size > MAX_UPL_SIZE_BYTES)
4893 size = MAX_UPL_SIZE_BYTES;
4894
4895 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
4896 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
4897
4898#if CONFIG_IOSCHED || UPL_DEBUG
4899 if (object->io_tracking || upl_debug_enabled)
4900 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
4901#endif
4902#if CONFIG_IOSCHED
4903 if (object->io_tracking)
4904 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
4905#endif
4906
4907 if (cntrl_flags & UPL_SET_INTERNAL) {
4908 if (cntrl_flags & UPL_SET_LITE) {
4909
4910 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4911
4912 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4913 lite_list = (wpl_array_t)
4914 (((uintptr_t)user_page_list) +
4915 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4916 if (size == 0) {
4917 user_page_list = NULL;
4918 lite_list = NULL;
4919 }
4920 } else {
4921 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
4922
4923 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4924 if (size == 0) {
4925 user_page_list = NULL;
4926 }
4927 }
4928 } else {
4929 if (cntrl_flags & UPL_SET_LITE) {
4930
4931 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4932
4933 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4934 if (size == 0) {
4935 lite_list = NULL;
4936 }
4937 } else {
4938 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
4939 }
4940 }
4941 *upl_ptr = upl;
4942
4943 if (user_page_list)
4944 user_page_list[0].device = FALSE;
4945
4946 if (cntrl_flags & UPL_SET_LITE) {
4947 upl->map_object = object;
4948 } else {
4949 upl->map_object = vm_object_allocate(size);
4950 /*
4951 * No neeed to lock the new object: nobody else knows
4952 * about it yet, so it's all ours so far.
4953 */
4954 upl->map_object->shadow = object;
4955 upl->map_object->pageout = TRUE;
4956 upl->map_object->can_persist = FALSE;
4957 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4958 upl->map_object->vo_shadow_offset = offset;
4959 upl->map_object->wimg_bits = object->wimg_bits;
4960
4961 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4962
4963 upl->flags |= UPL_SHADOWED;
4964 }
4965 if (cntrl_flags & UPL_FOR_PAGEOUT)
4966 upl->flags |= UPL_PAGEOUT;
4967
4968 vm_object_lock(object);
4969 vm_object_activity_begin(object);
4970
4971 grab_options = 0;
4972#if CONFIG_SECLUDED_MEMORY
4973 if (object->can_grab_secluded) {
4974 grab_options |= VM_PAGE_GRAB_SECLUDED;
4975 }
4976#endif /* CONFIG_SECLUDED_MEMORY */
4977
4978 /*
4979 * we can lock in the paging_offset once paging_in_progress is set
4980 */
4981 upl->size = size;
4982 upl->offset = offset + object->paging_offset;
4983
4984#if CONFIG_IOSCHED || UPL_DEBUG
4985 if (object->io_tracking || upl_debug_enabled) {
4986 vm_object_activity_begin(object);
4987 queue_enter(&object->uplq, upl, upl_t, uplq);
4988 }
4989#endif
4990 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
4991 /*
4992 * Honor copy-on-write obligations
4993 *
4994 * The caller is gathering these pages and
4995 * might modify their contents. We need to
4996 * make sure that the copy object has its own
4997 * private copies of these pages before we let
4998 * the caller modify them.
4999 */
5000 vm_object_update(object,
5001 offset,
5002 size,
5003 NULL,
5004 NULL,
5005 FALSE, /* should_return */
5006 MEMORY_OBJECT_COPY_SYNC,
5007 VM_PROT_NO_CHANGE);
5008
5009 VM_PAGEOUT_DEBUG(upl_cow, 1);
5010 VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
5011 }
5012 /*
5013 * remember which copy object we synchronized with
5014 */
5015 last_copy_object = object->copy;
5016 entry = 0;
5017
5018 xfer_size = size;
5019 dst_offset = offset;
5020 size_in_pages = size / PAGE_SIZE;
5021
5022 dwp = &dw_array[0];
5023 dw_count = 0;
5024 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5025
5026 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5027 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5028 object->scan_collisions = 0;
5029
5030 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5031 boolean_t isSSD = FALSE;
5032
5033#if CONFIG_EMBEDDED
5034 isSSD = TRUE;
5035#else
5036 vnode_pager_get_isSSD(object->pager, &isSSD);
5037#endif
5038 vm_object_unlock(object);
5039
5040 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5041
5042 if (isSSD == TRUE)
5043 delay(1000 * size_in_pages);
5044 else
5045 delay(5000 * size_in_pages);
5046 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5047
5048 vm_object_lock(object);
5049 }
5050
5051 while (xfer_size) {
5052
5053 dwp->dw_mask = 0;
5054
5055 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5056 vm_object_unlock(object);
5057 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5058 vm_object_lock(object);
5059 }
5060 if (cntrl_flags & UPL_COPYOUT_FROM) {
5061 upl->flags |= UPL_PAGE_SYNC_DONE;
5062
5063 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5064 dst_page->vmp_fictitious ||
5065 dst_page->vmp_absent ||
5066 dst_page->vmp_error ||
5067 dst_page->vmp_cleaning ||
5068 (VM_PAGE_WIRED(dst_page))) {
5069
5070 if (user_page_list)
5071 user_page_list[entry].phys_addr = 0;
5072
5073 goto try_next_page;
5074 }
5075 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5076
5077 /*
5078 * grab this up front...
5079 * a high percentange of the time we're going to
5080 * need the hardware modification state a bit later
5081 * anyway... so we can eliminate an extra call into
5082 * the pmap layer by grabbing it here and recording it
5083 */
5084 if (dst_page->vmp_pmapped)
5085 refmod_state = pmap_get_refmod(phys_page);
5086 else
5087 refmod_state = 0;
5088
5089 if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5090 /*
5091 * page is on inactive list and referenced...
5092 * reactivate it now... this gets it out of the
5093 * way of vm_pageout_scan which would have to
5094 * reactivate it upon tripping over it
5095 */
5096 dwp->dw_mask |= DW_vm_page_activate;
5097 }
5098 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5099 /*
5100 * we're only asking for DIRTY pages to be returned
5101 */
5102 if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5103 /*
5104 * if we were the page stolen by vm_pageout_scan to be
5105 * cleaned (as opposed to a buddy being clustered in
5106 * or this request is not being driven by a PAGEOUT cluster
5107 * then we only need to check for the page being dirty or
5108 * precious to decide whether to return it
5109 */
5110 if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED))
5111 goto check_busy;
5112 goto dont_return;
5113 }
5114 /*
5115 * this is a request for a PAGEOUT cluster and this page
5116 * is merely along for the ride as a 'buddy'... not only
5117 * does it have to be dirty to be returned, but it also
5118 * can't have been referenced recently...
5119 */
5120 if ( (hibernate_cleaning_in_progress == TRUE ||
5121 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) ||
5122 (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5123 ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious) ) {
5124 goto check_busy;
5125 }
5126dont_return:
5127 /*
5128 * if we reach here, we're not to return
5129 * the page... go on to the next one
5130 */
5131 if (dst_page->vmp_laundry == TRUE) {
5132 /*
5133 * if we get here, the page is not 'cleaning' (filtered out above).
5134 * since it has been referenced, remove it from the laundry
5135 * so we don't pay the cost of an I/O to clean a page
5136 * we're just going to take back
5137 */
5138 vm_page_lockspin_queues();
5139
5140 vm_pageout_steal_laundry(dst_page, TRUE);
5141 vm_page_activate(dst_page);
5142
5143 vm_page_unlock_queues();
5144 }
5145 if (user_page_list)
5146 user_page_list[entry].phys_addr = 0;
5147
5148 goto try_next_page;
5149 }
5150check_busy:
5151 if (dst_page->vmp_busy) {
5152 if (cntrl_flags & UPL_NOBLOCK) {
5153 if (user_page_list)
5154 user_page_list[entry].phys_addr = 0;
5155 dwp->dw_mask = 0;
5156
5157 goto try_next_page;
5158 }
5159 /*
5160 * someone else is playing with the
5161 * page. We will have to wait.
5162 */
5163 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5164
5165 continue;
5166 }
5167 if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5168
5169 vm_page_lockspin_queues();
5170
5171 if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5172 /*
5173 * we've buddied up a page for a clustered pageout
5174 * that has already been moved to the pageout
5175 * queue by pageout_scan... we need to remove
5176 * it from the queue and drop the laundry count
5177 * on that queue
5178 */
5179 vm_pageout_throttle_up(dst_page);
5180 }
5181 vm_page_unlock_queues();
5182 }
5183 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5184 dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5185
5186 if (phys_page > upl->highest_page)
5187 upl->highest_page = phys_page;
5188
5189 assert (!pmap_is_noencrypt(phys_page));
5190
5191 if (cntrl_flags & UPL_SET_LITE) {
5192 unsigned int pg_num;
5193
5194 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5195 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5196 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5197
5198 if (hw_dirty) {
5199 if (pmap_flushes_delayed == FALSE) {
5200 pmap_flush_context_init(&pmap_flush_context_storage);
5201 pmap_flushes_delayed = TRUE;
5202 }
5203 pmap_clear_refmod_options(phys_page,
5204 VM_MEM_MODIFIED,
5205 PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE,
5206 &pmap_flush_context_storage);
5207 }
5208
5209 /*
5210 * Mark original page as cleaning
5211 * in place.
5212 */
5213 dst_page->vmp_cleaning = TRUE;
5214 dst_page->vmp_precious = FALSE;
5215 } else {
5216 /*
5217 * use pageclean setup, it is more
5218 * convenient even for the pageout
5219 * cases here
5220 */
5221 vm_object_lock(upl->map_object);
5222 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5223 vm_object_unlock(upl->map_object);
5224
5225 alias_page->vmp_absent = FALSE;
5226 alias_page = NULL;
5227 }
5228 if (dirty) {
5229 SET_PAGE_DIRTY(dst_page, FALSE);
5230 } else {
5231 dst_page->vmp_dirty = FALSE;
5232 }
5233
5234 if (!dirty)
5235 dst_page->vmp_precious = TRUE;
5236
5237 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5238 if ( !VM_PAGE_WIRED(dst_page))
5239 dst_page->vmp_free_when_done = TRUE;
5240 }
5241 } else {
5242 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5243 /*
5244 * Honor copy-on-write obligations
5245 *
5246 * The copy object has changed since we
5247 * last synchronized for copy-on-write.
5248 * Another copy object might have been
5249 * inserted while we released the object's
5250 * lock. Since someone could have seen the
5251 * original contents of the remaining pages
5252 * through that new object, we have to
5253 * synchronize with it again for the remaining
5254 * pages only. The previous pages are "busy"
5255 * so they can not be seen through the new
5256 * mapping. The new mapping will see our
5257 * upcoming changes for those previous pages,
5258 * but that's OK since they couldn't see what
5259 * was there before. It's just a race anyway
5260 * and there's no guarantee of consistency or
5261 * atomicity. We just don't want new mappings
5262 * to see both the *before* and *after* pages.
5263 */
5264 if (object->copy != VM_OBJECT_NULL) {
5265 vm_object_update(
5266 object,
5267 dst_offset,/* current offset */
5268 xfer_size, /* remaining size */
5269 NULL,
5270 NULL,
5271 FALSE, /* should_return */
5272 MEMORY_OBJECT_COPY_SYNC,
5273 VM_PROT_NO_CHANGE);
5274
5275 VM_PAGEOUT_DEBUG(upl_cow_again, 1);
5276 VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
5277 }
5278 /*
5279 * remember the copy object we synced with
5280 */
5281 last_copy_object = object->copy;
5282 }
5283 dst_page = vm_page_lookup(object, dst_offset);
5284
5285 if (dst_page != VM_PAGE_NULL) {
5286
5287 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5288 /*
5289 * skip over pages already present in the cache
5290 */
5291 if (user_page_list)
5292 user_page_list[entry].phys_addr = 0;
5293
5294 goto try_next_page;
5295 }
5296 if (dst_page->vmp_fictitious) {
5297 panic("need corner case for fictitious page");
5298 }
5299
5300 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
5301 /*
5302 * someone else is playing with the
5303 * page. We will have to wait.
5304 */
5305 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5306
5307 continue;
5308 }
5309 if (dst_page->vmp_laundry)
5310 vm_pageout_steal_laundry(dst_page, FALSE);
5311 } else {
5312 if (object->private) {
5313 /*
5314 * This is a nasty wrinkle for users
5315 * of upl who encounter device or
5316 * private memory however, it is
5317 * unavoidable, only a fault can
5318 * resolve the actual backing
5319 * physical page by asking the
5320 * backing device.
5321 */
5322 if (user_page_list)
5323 user_page_list[entry].phys_addr = 0;
5324
5325 goto try_next_page;
5326 }
5327 if (object->scan_collisions) {
5328 /*
5329 * the pageout_scan thread is trying to steal
5330 * pages from this object, but has run into our
5331 * lock... grab 2 pages from the head of the object...
5332 * the first is freed on behalf of pageout_scan, the
5333 * 2nd is for our own use... we use vm_object_page_grab
5334 * in both cases to avoid taking pages from the free
5335 * list since we are under memory pressure and our
5336 * lock on this object is getting in the way of
5337 * relieving it
5338 */
5339 dst_page = vm_object_page_grab(object);
5340
5341 if (dst_page != VM_PAGE_NULL)
5342 vm_page_release(dst_page,
5343 FALSE);
5344
5345 dst_page = vm_object_page_grab(object);
5346 }
5347 if (dst_page == VM_PAGE_NULL) {
5348 /*
5349 * need to allocate a page
5350 */
5351 dst_page = vm_page_grab_options(grab_options);
5352 if (dst_page != VM_PAGE_NULL)
5353 page_grab_count++;
5354 }
5355 if (dst_page == VM_PAGE_NULL) {
5356 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5357 /*
5358 * we don't want to stall waiting for pages to come onto the free list
5359 * while we're already holding absent pages in this UPL
5360 * the caller will deal with the empty slots
5361 */
5362 if (user_page_list)
5363 user_page_list[entry].phys_addr = 0;
5364
5365 goto try_next_page;
5366 }
5367 /*
5368 * no pages available... wait
5369 * then try again for the same
5370 * offset...
5371 */
5372 vm_object_unlock(object);
5373
5374 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5375
5376 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5377
5378 VM_PAGE_WAIT();
5379 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5380
5381 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5382
5383 vm_object_lock(object);
5384
5385 continue;
5386 }
5387 vm_page_insert(dst_page, object, dst_offset);
5388
5389 dst_page->vmp_absent = TRUE;
5390 dst_page->vmp_busy = FALSE;
5391
5392 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5393 /*
5394 * if UPL_RET_ONLY_ABSENT was specified,
5395 * than we're definitely setting up a
5396 * upl for a clustered read/pagein
5397 * operation... mark the pages as clustered
5398 * so upl_commit_range can put them on the
5399 * speculative list
5400 */
5401 dst_page->vmp_clustered = TRUE;
5402
5403 if ( !(cntrl_flags & UPL_FILE_IO))
5404 VM_STAT_INCR(pageins);
5405 }
5406 }
5407 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5408
5409 dst_page->vmp_overwriting = TRUE;
5410
5411 if (dst_page->vmp_pmapped) {
5412 if ( !(cntrl_flags & UPL_FILE_IO))
5413 /*
5414 * eliminate all mappings from the
5415 * original object and its prodigy
5416 */
5417 refmod_state = pmap_disconnect(phys_page);
5418 else
5419 refmod_state = pmap_get_refmod(phys_page);
5420 } else
5421 refmod_state = 0;
5422
5423 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5424 dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5425
5426 if (cntrl_flags & UPL_SET_LITE) {
5427 unsigned int pg_num;
5428
5429 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5430 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5431 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5432
5433 if (hw_dirty)
5434 pmap_clear_modify(phys_page);
5435
5436 /*
5437 * Mark original page as cleaning
5438 * in place.
5439 */
5440 dst_page->vmp_cleaning = TRUE;
5441 dst_page->vmp_precious = FALSE;
5442 } else {
5443 /*
5444 * use pageclean setup, it is more
5445 * convenient even for the pageout
5446 * cases here
5447 */
5448 vm_object_lock(upl->map_object);
5449 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5450 vm_object_unlock(upl->map_object);
5451
5452 alias_page->vmp_absent = FALSE;
5453 alias_page = NULL;
5454 }
5455
5456 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5457 upl->flags &= ~UPL_CLEAR_DIRTY;
5458 upl->flags |= UPL_SET_DIRTY;
5459 dirty = TRUE;
5460 upl->flags |= UPL_SET_DIRTY;
5461 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5462 /*
5463 * clean in place for read implies
5464 * that a write will be done on all
5465 * the pages that are dirty before
5466 * a upl commit is done. The caller
5467 * is obligated to preserve the
5468 * contents of all pages marked dirty
5469 */
5470 upl->flags |= UPL_CLEAR_DIRTY;
5471 }
5472 dst_page->vmp_dirty = dirty;
5473
5474 if (!dirty)
5475 dst_page->vmp_precious = TRUE;
5476
5477 if ( !VM_PAGE_WIRED(dst_page)) {
5478 /*
5479 * deny access to the target page while
5480 * it is being worked on
5481 */
5482 dst_page->vmp_busy = TRUE;
5483 } else
5484 dwp->dw_mask |= DW_vm_page_wire;
5485
5486 /*
5487 * We might be about to satisfy a fault which has been
5488 * requested. So no need for the "restart" bit.
5489 */
5490 dst_page->vmp_restart = FALSE;
5491 if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5492 /*
5493 * expect the page to be used
5494 */
5495 dwp->dw_mask |= DW_set_reference;
5496 }
5497 if (cntrl_flags & UPL_PRECIOUS) {
5498 if (object->internal) {
5499 SET_PAGE_DIRTY(dst_page, FALSE);
5500 dst_page->vmp_precious = FALSE;
5501 } else {
5502 dst_page->vmp_precious = TRUE;
5503 }
5504 } else {
5505 dst_page->vmp_precious = FALSE;
5506 }
5507 }
5508 if (dst_page->vmp_busy)
5509 upl->flags |= UPL_HAS_BUSY;
5510
5511 if (phys_page > upl->highest_page)
5512 upl->highest_page = phys_page;
5513 assert (!pmap_is_noencrypt(phys_page));
5514 if (user_page_list) {
5515 user_page_list[entry].phys_addr = phys_page;
5516 user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
5517 user_page_list[entry].absent = dst_page->vmp_absent;
5518 user_page_list[entry].dirty = dst_page->vmp_dirty;
5519 user_page_list[entry].precious = dst_page->vmp_precious;
5520 user_page_list[entry].device = FALSE;
5521 user_page_list[entry].needed = FALSE;
5522 if (dst_page->vmp_clustered == TRUE)
5523 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5524 else
5525 user_page_list[entry].speculative = FALSE;
5526 user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
5527 user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
5528 user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
5529 user_page_list[entry].mark = FALSE;
5530 }
5531 /*
5532 * if UPL_RET_ONLY_ABSENT is set, then
5533 * we are working with a fresh page and we've
5534 * just set the clustered flag on it to
5535 * indicate that it was drug in as part of a
5536 * speculative cluster... so leave it alone
5537 */
5538 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5539 /*
5540 * someone is explicitly grabbing this page...
5541 * update clustered and speculative state
5542 *
5543 */
5544 if (dst_page->vmp_clustered)
5545 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5546 }
5547try_next_page:
5548 if (dwp->dw_mask) {
5549 if (dwp->dw_mask & DW_vm_page_activate)
5550 VM_STAT_INCR(reactivations);
5551
5552 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5553
5554 if (dw_count >= dw_limit) {
5555 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5556
5557 dwp = &dw_array[0];
5558 dw_count = 0;
5559 }
5560 }
5561 entry++;
5562 dst_offset += PAGE_SIZE_64;
5563 xfer_size -= PAGE_SIZE;
5564 }
5565 if (dw_count)
5566 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5567
5568 if (alias_page != NULL) {
5569 VM_PAGE_FREE(alias_page);
5570 }
5571 if (pmap_flushes_delayed == TRUE)
5572 pmap_flush(&pmap_flush_context_storage);
5573
5574 if (page_list_count != NULL) {
5575 if (upl->flags & UPL_INTERNAL)
5576 *page_list_count = 0;
5577 else if (*page_list_count > entry)
5578 *page_list_count = entry;
5579 }
5580#if UPL_DEBUG
5581 upl->upl_state = 1;
5582#endif
5583 vm_object_unlock(object);
5584
5585 VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
5586
5587 return KERN_SUCCESS;
5588}
5589
5590/*
5591 * Routine: vm_object_super_upl_request
5592 * Purpose:
5593 * Cause the population of a portion of a vm_object
5594 * in much the same way as memory_object_upl_request.
5595 * Depending on the nature of the request, the pages
5596 * returned may be contain valid data or be uninitialized.
5597 * However, the region may be expanded up to the super
5598 * cluster size provided.
5599 */
5600
5601__private_extern__ kern_return_t
5602vm_object_super_upl_request(
5603 vm_object_t object,
5604 vm_object_offset_t offset,
5605 upl_size_t size,
5606 upl_size_t super_cluster,
5607 upl_t *upl,
5608 upl_page_info_t *user_page_list,
5609 unsigned int *page_list_count,
5610 upl_control_flags_t cntrl_flags,
5611 vm_tag_t tag)
5612{
5613 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5614 return KERN_FAILURE;
5615
5616 assert(object->paging_in_progress);
5617 offset = offset - object->paging_offset;
5618
5619 if (super_cluster > size) {
5620
5621 vm_object_offset_t base_offset;
5622 upl_size_t super_size;
5623 vm_object_size_t super_size_64;
5624
5625 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5626 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5627 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5628 super_size = (upl_size_t) super_size_64;
5629 assert(super_size == super_size_64);
5630
5631 if (offset > (base_offset + super_size)) {
5632 panic("vm_object_super_upl_request: Missed target pageout"
5633 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5634 offset, base_offset, super_size, super_cluster,
5635 size, object->paging_offset);
5636 }
5637 /*
5638 * apparently there is a case where the vm requests a
5639 * page to be written out who's offset is beyond the
5640 * object size
5641 */
5642 if ((offset + size) > (base_offset + super_size)) {
5643 super_size_64 = (offset + size) - base_offset;
5644 super_size = (upl_size_t) super_size_64;
5645 assert(super_size == super_size_64);
5646 }
5647
5648 offset = base_offset;
5649 size = super_size;
5650 }
5651 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5652}
5653
5654#if CONFIG_EMBEDDED
5655int cs_executable_create_upl = 0;
5656extern int proc_selfpid(void);
5657extern char *proc_name_address(void *p);
5658#endif /* CONFIG_EMBEDDED */
5659
5660kern_return_t
5661vm_map_create_upl(
5662 vm_map_t map,
5663 vm_map_address_t offset,
5664 upl_size_t *upl_size,
5665 upl_t *upl,
5666 upl_page_info_array_t page_list,
5667 unsigned int *count,
5668 upl_control_flags_t *flags,
5669 vm_tag_t tag)
5670{
5671 vm_map_entry_t entry;
5672 upl_control_flags_t caller_flags;
5673 int force_data_sync;
5674 int sync_cow_data;
5675 vm_object_t local_object;
5676 vm_map_offset_t local_offset;
5677 vm_map_offset_t local_start;
5678 kern_return_t ret;
5679
5680 assert(page_aligned(offset));
5681
5682 caller_flags = *flags;
5683
5684 if (caller_flags & ~UPL_VALID_FLAGS) {
5685 /*
5686 * For forward compatibility's sake,
5687 * reject any unknown flag.
5688 */
5689 return KERN_INVALID_VALUE;
5690 }
5691 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5692 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5693
5694 if (upl == NULL)
5695 return KERN_INVALID_ARGUMENT;
5696
5697REDISCOVER_ENTRY:
5698 vm_map_lock_read(map);
5699
5700 if (!vm_map_lookup_entry(map, offset, &entry)) {
5701 vm_map_unlock_read(map);
5702 return KERN_FAILURE;
5703 }
5704
5705 if ((entry->vme_end - offset) < *upl_size) {
5706 *upl_size = (upl_size_t) (entry->vme_end - offset);
5707 assert(*upl_size == entry->vme_end - offset);
5708 }
5709
5710 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5711 *flags = 0;
5712
5713 if (!entry->is_sub_map &&
5714 VME_OBJECT(entry) != VM_OBJECT_NULL) {
5715 if (VME_OBJECT(entry)->private)
5716 *flags = UPL_DEV_MEMORY;
5717
5718 if (VME_OBJECT(entry)->phys_contiguous)
5719 *flags |= UPL_PHYS_CONTIG;
5720 }
5721 vm_map_unlock_read(map);
5722 return KERN_SUCCESS;
5723 }
5724
5725 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5726 !VME_OBJECT(entry)->phys_contiguous) {
5727 if (*upl_size > MAX_UPL_SIZE_BYTES)
5728 *upl_size = MAX_UPL_SIZE_BYTES;
5729 }
5730
5731 /*
5732 * Create an object if necessary.
5733 */
5734 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5735
5736 if (vm_map_lock_read_to_write(map))
5737 goto REDISCOVER_ENTRY;
5738
5739 VME_OBJECT_SET(entry,
5740 vm_object_allocate((vm_size_t)
5741 (entry->vme_end -
5742 entry->vme_start)));
5743 VME_OFFSET_SET(entry, 0);
5744 assert(entry->use_pmap);
5745
5746 vm_map_lock_write_to_read(map);
5747 }
5748
5749 if (!(caller_flags & UPL_COPYOUT_FROM) &&
5750 !entry->is_sub_map &&
5751 !(entry->protection & VM_PROT_WRITE)) {
5752 vm_map_unlock_read(map);
5753 return KERN_PROTECTION_FAILURE;
5754 }
5755
5756#if CONFIG_EMBEDDED
5757 if (map->pmap != kernel_pmap &&
5758 (caller_flags & UPL_COPYOUT_FROM) &&
5759 (entry->protection & VM_PROT_EXECUTE) &&
5760 !(entry->protection & VM_PROT_WRITE)) {
5761 vm_offset_t kaddr;
5762 vm_size_t ksize;
5763
5764 /*
5765 * We're about to create a read-only UPL backed by
5766 * memory from an executable mapping.
5767 * Wiring the pages would result in the pages being copied
5768 * (due to the "MAP_PRIVATE" mapping) and no longer
5769 * code-signed, so no longer eligible for execution.
5770 * Instead, let's copy the data into a kernel buffer and
5771 * create the UPL from this kernel buffer.
5772 * The kernel buffer is then freed, leaving the UPL holding
5773 * the last reference on the VM object, so the memory will
5774 * be released when the UPL is committed.
5775 */
5776
5777 vm_map_unlock_read(map);
5778 /* allocate kernel buffer */
5779 ksize = round_page(*upl_size);
5780 kaddr = 0;
5781 ret = kmem_alloc_pageable(kernel_map,
5782 &kaddr,
5783 ksize,
5784 tag);
5785 if (ret == KERN_SUCCESS) {
5786 /* copyin the user data */
5787 assert(page_aligned(offset));
5788 ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5789 }
5790 if (ret == KERN_SUCCESS) {
5791 if (ksize > *upl_size) {
5792 /* zero out the extra space in kernel buffer */
5793 memset((void *)(kaddr + *upl_size),
5794 0,
5795 ksize - *upl_size);
5796 }
5797 /* create the UPL from the kernel buffer */
5798 ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5799 upl, page_list, count, flags, tag);
5800 }
5801 if (kaddr != 0) {
5802 /* free the kernel buffer */
5803 kmem_free(kernel_map, kaddr, ksize);
5804 kaddr = 0;
5805 ksize = 0;
5806 }
5807#if DEVELOPMENT || DEBUG
5808 DTRACE_VM4(create_upl_from_executable,
5809 vm_map_t, map,
5810 vm_map_address_t, offset,
5811 upl_size_t, *upl_size,
5812 kern_return_t, ret);
5813#endif /* DEVELOPMENT || DEBUG */
5814 return ret;
5815 }
5816#endif /* CONFIG_EMBEDDED */
5817
5818 local_object = VME_OBJECT(entry);
5819 assert(local_object != VM_OBJECT_NULL);
5820
5821 if (!entry->is_sub_map &&
5822 !entry->needs_copy &&
5823 *upl_size != 0 &&
5824 local_object->vo_size > *upl_size && /* partial UPL */
5825 entry->wired_count == 0 && /* No COW for entries that are wired */
5826 (map->pmap != kernel_pmap) && /* alias checks */
5827 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5828 ||
5829 (/* case 2 */
5830 local_object->internal &&
5831 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5832 local_object->ref_count > 1))) {
5833 vm_prot_t prot;
5834
5835 /*
5836 * Case 1:
5837 * Set up the targeted range for copy-on-write to avoid
5838 * applying true_share/copy_delay to the entire object.
5839 *
5840 * Case 2:
5841 * This map entry covers only part of an internal
5842 * object. There could be other map entries covering
5843 * other areas of this object and some of these map
5844 * entries could be marked as "needs_copy", which
5845 * assumes that the object is COPY_SYMMETRIC.
5846 * To avoid marking this object as COPY_DELAY and
5847 * "true_share", let's shadow it and mark the new
5848 * (smaller) object as "true_share" and COPY_DELAY.
5849 */
5850
5851 if (vm_map_lock_read_to_write(map)) {
5852 goto REDISCOVER_ENTRY;
5853 }
5854 vm_map_lock_assert_exclusive(map);
5855 assert(VME_OBJECT(entry) == local_object);
5856
5857 vm_map_clip_start(map,
5858 entry,
5859 vm_map_trunc_page(offset,
5860 VM_MAP_PAGE_MASK(map)));
5861 vm_map_clip_end(map,
5862 entry,
5863 vm_map_round_page(offset + *upl_size,
5864 VM_MAP_PAGE_MASK(map)));
5865 if ((entry->vme_end - offset) < *upl_size) {
5866 *upl_size = (upl_size_t) (entry->vme_end - offset);
5867 assert(*upl_size == entry->vme_end - offset);
5868 }
5869
5870 prot = entry->protection & ~VM_PROT_WRITE;
5871 if (override_nx(map, VME_ALIAS(entry)) && prot)
5872 prot |= VM_PROT_EXECUTE;
5873 vm_object_pmap_protect(local_object,
5874 VME_OFFSET(entry),
5875 entry->vme_end - entry->vme_start,
5876 ((entry->is_shared ||
5877 map->mapped_in_other_pmaps)
5878 ? PMAP_NULL
5879 : map->pmap),
5880 entry->vme_start,
5881 prot);
5882
5883 assert(entry->wired_count == 0);
5884
5885 /*
5886 * Lock the VM object and re-check its status: if it's mapped
5887 * in another address space, we could still be racing with
5888 * another thread holding that other VM map exclusively.
5889 */
5890 vm_object_lock(local_object);
5891 if (local_object->true_share) {
5892 /* object is already in proper state: no COW needed */
5893 assert(local_object->copy_strategy !=
5894 MEMORY_OBJECT_COPY_SYMMETRIC);
5895 } else {
5896 /* not true_share: ask for copy-on-write below */
5897 assert(local_object->copy_strategy ==
5898 MEMORY_OBJECT_COPY_SYMMETRIC);
5899 entry->needs_copy = TRUE;
5900 }
5901 vm_object_unlock(local_object);
5902
5903 vm_map_lock_write_to_read(map);
5904 }
5905
5906 if (entry->needs_copy) {
5907 /*
5908 * Honor copy-on-write for COPY_SYMMETRIC
5909 * strategy.
5910 */
5911 vm_map_t local_map;
5912 vm_object_t object;
5913 vm_object_offset_t new_offset;
5914 vm_prot_t prot;
5915 boolean_t wired;
5916 vm_map_version_t version;
5917 vm_map_t real_map;
5918 vm_prot_t fault_type;
5919
5920 local_map = map;
5921
5922 if (caller_flags & UPL_COPYOUT_FROM) {
5923 fault_type = VM_PROT_READ | VM_PROT_COPY;
5924 vm_counters.create_upl_extra_cow++;
5925 vm_counters.create_upl_extra_cow_pages +=
5926 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
5927 } else {
5928 fault_type = VM_PROT_WRITE;
5929 }
5930 if (vm_map_lookup_locked(&local_map,
5931 offset, fault_type,
5932 OBJECT_LOCK_EXCLUSIVE,
5933 &version, &object,
5934 &new_offset, &prot, &wired,
5935 NULL,
5936 &real_map) != KERN_SUCCESS) {
5937 if (fault_type == VM_PROT_WRITE) {
5938 vm_counters.create_upl_lookup_failure_write++;
5939 } else {
5940 vm_counters.create_upl_lookup_failure_copy++;
5941 }
5942 vm_map_unlock_read(local_map);
5943 return KERN_FAILURE;
5944 }
5945 if (real_map != map)
5946 vm_map_unlock(real_map);
5947 vm_map_unlock_read(local_map);
5948
5949 vm_object_unlock(object);
5950
5951 goto REDISCOVER_ENTRY;
5952 }
5953
5954 if (entry->is_sub_map) {
5955 vm_map_t submap;
5956
5957 submap = VME_SUBMAP(entry);
5958 local_start = entry->vme_start;
5959 local_offset = VME_OFFSET(entry);
5960
5961 vm_map_reference(submap);
5962 vm_map_unlock_read(map);
5963
5964 ret = vm_map_create_upl(submap,
5965 local_offset + (offset - local_start),
5966 upl_size, upl, page_list, count, flags, tag);
5967 vm_map_deallocate(submap);
5968
5969 return ret;
5970 }
5971
5972 if (sync_cow_data &&
5973 (VME_OBJECT(entry)->shadow ||
5974 VME_OBJECT(entry)->copy)) {
5975 local_object = VME_OBJECT(entry);
5976 local_start = entry->vme_start;
5977 local_offset = VME_OFFSET(entry);
5978
5979 vm_object_reference(local_object);
5980 vm_map_unlock_read(map);
5981
5982 if (local_object->shadow && local_object->copy) {
5983 vm_object_lock_request(local_object->shadow,
5984 ((vm_object_offset_t)
5985 ((offset - local_start) +
5986 local_offset) +
5987 local_object->vo_shadow_offset),
5988 *upl_size, FALSE,
5989 MEMORY_OBJECT_DATA_SYNC,
5990 VM_PROT_NO_CHANGE);
5991 }
5992 sync_cow_data = FALSE;
5993 vm_object_deallocate(local_object);
5994
5995 goto REDISCOVER_ENTRY;
5996 }
5997 if (force_data_sync) {
5998 local_object = VME_OBJECT(entry);
5999 local_start = entry->vme_start;
6000 local_offset = VME_OFFSET(entry);
6001
6002 vm_object_reference(local_object);
6003 vm_map_unlock_read(map);
6004
6005 vm_object_lock_request(local_object,
6006 ((vm_object_offset_t)
6007 ((offset - local_start) +
6008 local_offset)),
6009 (vm_object_size_t)*upl_size,
6010 FALSE,
6011 MEMORY_OBJECT_DATA_SYNC,
6012 VM_PROT_NO_CHANGE);
6013
6014 force_data_sync = FALSE;
6015 vm_object_deallocate(local_object);
6016
6017 goto REDISCOVER_ENTRY;
6018 }
6019 if (VME_OBJECT(entry)->private)
6020 *flags = UPL_DEV_MEMORY;
6021 else
6022 *flags = 0;
6023
6024 if (VME_OBJECT(entry)->phys_contiguous)
6025 *flags |= UPL_PHYS_CONTIG;
6026
6027 local_object = VME_OBJECT(entry);
6028 local_offset = VME_OFFSET(entry);
6029 local_start = entry->vme_start;
6030
6031#if CONFIG_EMBEDDED
6032 /*
6033 * Wiring will copy the pages to the shadow object.
6034 * The shadow object will not be code-signed so
6035 * attempting to execute code from these copied pages
6036 * would trigger a code-signing violation.
6037 */
6038 if (entry->protection & VM_PROT_EXECUTE) {
6039#if MACH_ASSERT
6040 printf("pid %d[%s] create_upl out of executable range from "
6041 "0x%llx to 0x%llx: side effects may include "
6042 "code-signing violations later on\n",
6043 proc_selfpid(),
6044 (current_task()->bsd_info
6045 ? proc_name_address(current_task()->bsd_info)
6046 : "?"),
6047 (uint64_t) entry->vme_start,
6048 (uint64_t) entry->vme_end);
6049#endif /* MACH_ASSERT */
6050 DTRACE_VM2(cs_executable_create_upl,
6051 uint64_t, (uint64_t)entry->vme_start,
6052 uint64_t, (uint64_t)entry->vme_end);
6053 cs_executable_create_upl++;
6054 }
6055#endif /* CONFIG_EMBEDDED */
6056
6057 vm_object_lock(local_object);
6058
6059 /*
6060 * Ensure that this object is "true_share" and "copy_delay" now,
6061 * while we're still holding the VM map lock. After we unlock the map,
6062 * anything could happen to that mapping, including some copy-on-write
6063 * activity. We need to make sure that the IOPL will point at the
6064 * same memory as the mapping.
6065 */
6066 if (local_object->true_share) {
6067 assert(local_object->copy_strategy !=
6068 MEMORY_OBJECT_COPY_SYMMETRIC);
6069 } else if (local_object != kernel_object &&
6070 local_object != compressor_object &&
6071 !local_object->phys_contiguous) {
6072#if VM_OBJECT_TRACKING_OP_TRUESHARE
6073 if (!local_object->true_share &&
6074 vm_object_tracking_inited) {
6075 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6076 int num = 0;
6077 num = OSBacktrace(bt,
6078 VM_OBJECT_TRACKING_BTDEPTH);
6079 btlog_add_entry(vm_object_tracking_btlog,
6080 local_object,
6081 VM_OBJECT_TRACKING_OP_TRUESHARE,
6082 bt,
6083 num);
6084 }
6085#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6086 local_object->true_share = TRUE;
6087 if (local_object->copy_strategy ==
6088 MEMORY_OBJECT_COPY_SYMMETRIC) {
6089 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6090 }
6091 }
6092
6093 vm_object_reference_locked(local_object);
6094 vm_object_unlock(local_object);
6095
6096 vm_map_unlock_read(map);
6097
6098 ret = vm_object_iopl_request(local_object,
6099 ((vm_object_offset_t)
6100 ((offset - local_start) + local_offset)),
6101 *upl_size,
6102 upl,
6103 page_list,
6104 count,
6105 caller_flags,
6106 tag);
6107 vm_object_deallocate(local_object);
6108
6109 return ret;
6110}
6111
6112/*
6113 * Internal routine to enter a UPL into a VM map.
6114 *
6115 * JMM - This should just be doable through the standard
6116 * vm_map_enter() API.
6117 */
6118kern_return_t
6119vm_map_enter_upl(
6120 vm_map_t map,
6121 upl_t upl,
6122 vm_map_offset_t *dst_addr)
6123{
6124 vm_map_size_t size;
6125 vm_object_offset_t offset;
6126 vm_map_offset_t addr;
6127 vm_page_t m;
6128 kern_return_t kr;
6129 int isVectorUPL = 0, curr_upl=0;
6130 upl_t vector_upl = NULL;
6131 vm_offset_t vector_upl_dst_addr = 0;
6132 vm_map_t vector_upl_submap = NULL;
6133 upl_offset_t subupl_offset = 0;
6134 upl_size_t subupl_size = 0;
6135
6136 if (upl == UPL_NULL)
6137 return KERN_INVALID_ARGUMENT;
6138
6139 if((isVectorUPL = vector_upl_is_valid(upl))) {
6140 int mapped=0,valid_upls=0;
6141 vector_upl = upl;
6142
6143 upl_lock(vector_upl);
6144 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6145 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6146 if(upl == NULL)
6147 continue;
6148 valid_upls++;
6149 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6150 mapped++;
6151 }
6152
6153 if(mapped) {
6154 if(mapped != valid_upls)
6155 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6156 else {
6157 upl_unlock(vector_upl);
6158 return KERN_FAILURE;
6159 }
6160 }
6161
6162 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6163 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6164 &vector_upl_submap);
6165 if( kr != KERN_SUCCESS )
6166 panic("Vector UPL submap allocation failed\n");
6167 map = vector_upl_submap;
6168 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6169 curr_upl=0;
6170 }
6171 else
6172 upl_lock(upl);
6173
6174process_upl_to_enter:
6175 if(isVectorUPL){
6176 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6177 *dst_addr = vector_upl_dst_addr;
6178 upl_unlock(vector_upl);
6179 return KERN_SUCCESS;
6180 }
6181 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6182 if(upl == NULL)
6183 goto process_upl_to_enter;
6184
6185 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6186 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6187 } else {
6188 /*
6189 * check to see if already mapped
6190 */
6191 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6192 upl_unlock(upl);
6193 return KERN_FAILURE;
6194 }
6195 }
6196 if ((!(upl->flags & UPL_SHADOWED)) &&
6197 ((upl->flags & UPL_HAS_BUSY) ||
6198 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6199
6200 vm_object_t object;
6201 vm_page_t alias_page;
6202 vm_object_offset_t new_offset;
6203 unsigned int pg_num;
6204 wpl_array_t lite_list;
6205
6206 if (upl->flags & UPL_INTERNAL) {
6207 lite_list = (wpl_array_t)
6208 ((((uintptr_t)upl) + sizeof(struct upl))
6209 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6210 } else {
6211 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6212 }
6213 object = upl->map_object;
6214 upl->map_object = vm_object_allocate(upl->size);
6215
6216 vm_object_lock(upl->map_object);
6217
6218 upl->map_object->shadow = object;
6219 upl->map_object->pageout = TRUE;
6220 upl->map_object->can_persist = FALSE;
6221 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6222 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6223 upl->map_object->wimg_bits = object->wimg_bits;
6224 offset = upl->map_object->vo_shadow_offset;
6225 new_offset = 0;
6226 size = upl->size;
6227
6228 upl->flags |= UPL_SHADOWED;
6229
6230 while (size) {
6231 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6232 assert(pg_num == new_offset / PAGE_SIZE);
6233
6234 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6235
6236 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6237
6238 vm_object_lock(object);
6239
6240 m = vm_page_lookup(object, offset);
6241 if (m == VM_PAGE_NULL) {
6242 panic("vm_upl_map: page missing\n");
6243 }
6244
6245 /*
6246 * Convert the fictitious page to a private
6247 * shadow of the real page.
6248 */
6249 assert(alias_page->vmp_fictitious);
6250 alias_page->vmp_fictitious = FALSE;
6251 alias_page->vmp_private = TRUE;
6252 alias_page->vmp_free_when_done = TRUE;
6253 /*
6254 * since m is a page in the upl it must
6255 * already be wired or BUSY, so it's
6256 * safe to assign the underlying physical
6257 * page to the alias
6258 */
6259 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6260
6261 vm_object_unlock(object);
6262
6263 vm_page_lockspin_queues();
6264 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6265 vm_page_unlock_queues();
6266
6267 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6268
6269 assert(!alias_page->vmp_wanted);
6270 alias_page->vmp_busy = FALSE;
6271 alias_page->vmp_absent = FALSE;
6272 }
6273 size -= PAGE_SIZE;
6274 offset += PAGE_SIZE_64;
6275 new_offset += PAGE_SIZE_64;
6276 }
6277 vm_object_unlock(upl->map_object);
6278 }
6279 if (upl->flags & UPL_SHADOWED)
6280 offset = 0;
6281 else
6282 offset = upl->offset - upl->map_object->paging_offset;
6283
6284 size = upl->size;
6285
6286 vm_object_reference(upl->map_object);
6287
6288 if(!isVectorUPL) {
6289 *dst_addr = 0;
6290 /*
6291 * NEED A UPL_MAP ALIAS
6292 */
6293 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6294 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6295 upl->map_object, offset, FALSE,
6296 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6297
6298 if (kr != KERN_SUCCESS) {
6299 vm_object_deallocate(upl->map_object);
6300 upl_unlock(upl);
6301 return(kr);
6302 }
6303 }
6304 else {
6305 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6306 VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6307 upl->map_object, offset, FALSE,
6308 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6309 if(kr)
6310 panic("vm_map_enter failed for a Vector UPL\n");
6311 }
6312 vm_object_lock(upl->map_object);
6313
6314 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6315 m = vm_page_lookup(upl->map_object, offset);
6316
6317 if (m) {
6318 m->vmp_pmapped = TRUE;
6319
6320 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6321 * but only in kernel space. If this was on a user map,
6322 * we'd have to set the wpmapped bit. */
6323 /* m->vmp_wpmapped = TRUE; */
6324 assert(map->pmap == kernel_pmap);
6325
6326 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6327
6328 assert(kr == KERN_SUCCESS);
6329#if KASAN
6330 kasan_notify_address(addr, PAGE_SIZE_64);
6331#endif
6332 }
6333 offset += PAGE_SIZE_64;
6334 }
6335 vm_object_unlock(upl->map_object);
6336
6337 /*
6338 * hold a reference for the mapping
6339 */
6340 upl->ref_count++;
6341 upl->flags |= UPL_PAGE_LIST_MAPPED;
6342 upl->kaddr = (vm_offset_t) *dst_addr;
6343 assert(upl->kaddr == *dst_addr);
6344
6345 if(isVectorUPL)
6346 goto process_upl_to_enter;
6347
6348 upl_unlock(upl);
6349
6350 return KERN_SUCCESS;
6351}
6352
6353/*
6354 * Internal routine to remove a UPL mapping from a VM map.
6355 *
6356 * XXX - This should just be doable through a standard
6357 * vm_map_remove() operation. Otherwise, implicit clean-up
6358 * of the target map won't be able to correctly remove
6359 * these (and release the reference on the UPL). Having
6360 * to do this means we can't map these into user-space
6361 * maps yet.
6362 */
6363kern_return_t
6364vm_map_remove_upl(
6365 vm_map_t map,
6366 upl_t upl)
6367{
6368 vm_address_t addr;
6369 upl_size_t size;
6370 int isVectorUPL = 0, curr_upl = 0;
6371 upl_t vector_upl = NULL;
6372
6373 if (upl == UPL_NULL)
6374 return KERN_INVALID_ARGUMENT;
6375
6376 if((isVectorUPL = vector_upl_is_valid(upl))) {
6377 int unmapped=0, valid_upls=0;
6378 vector_upl = upl;
6379 upl_lock(vector_upl);
6380 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6381 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6382 if(upl == NULL)
6383 continue;
6384 valid_upls++;
6385 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6386 unmapped++;
6387 }
6388
6389 if(unmapped) {
6390 if(unmapped != valid_upls)
6391 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6392 else {
6393 upl_unlock(vector_upl);
6394 return KERN_FAILURE;
6395 }
6396 }
6397 curr_upl=0;
6398 }
6399 else
6400 upl_lock(upl);
6401
6402process_upl_to_remove:
6403 if(isVectorUPL) {
6404 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6405 vm_map_t v_upl_submap;
6406 vm_offset_t v_upl_submap_dst_addr;
6407 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6408
6409 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_REMOVE_NO_FLAGS);
6410 vm_map_deallocate(v_upl_submap);
6411 upl_unlock(vector_upl);
6412 return KERN_SUCCESS;
6413 }
6414
6415 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6416 if(upl == NULL)
6417 goto process_upl_to_remove;
6418 }
6419
6420 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6421 addr = upl->kaddr;
6422 size = upl->size;
6423
6424 assert(upl->ref_count > 1);
6425 upl->ref_count--; /* removing mapping ref */
6426
6427 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6428 upl->kaddr = (vm_offset_t) 0;
6429
6430 if(!isVectorUPL) {
6431 upl_unlock(upl);
6432
6433 vm_map_remove(
6434 map,
6435 vm_map_trunc_page(addr,
6436 VM_MAP_PAGE_MASK(map)),
6437 vm_map_round_page(addr + size,
6438 VM_MAP_PAGE_MASK(map)),
6439 VM_MAP_REMOVE_NO_FLAGS);
6440 return KERN_SUCCESS;
6441 }
6442 else {
6443 /*
6444 * If it's a Vectored UPL, we'll be removing the entire
6445 * submap anyways, so no need to remove individual UPL
6446 * element mappings from within the submap
6447 */
6448 goto process_upl_to_remove;
6449 }
6450 }
6451 upl_unlock(upl);
6452
6453 return KERN_FAILURE;
6454}
6455
6456
6457kern_return_t
6458upl_commit_range(
6459 upl_t upl,
6460 upl_offset_t offset,
6461 upl_size_t size,
6462 int flags,
6463 upl_page_info_t *page_list,
6464 mach_msg_type_number_t count,
6465 boolean_t *empty)
6466{
6467 upl_size_t xfer_size, subupl_size = size;
6468 vm_object_t shadow_object;
6469 vm_object_t object;
6470 vm_object_t m_object;
6471 vm_object_offset_t target_offset;
6472 upl_offset_t subupl_offset = offset;
6473 int entry;
6474 wpl_array_t lite_list;
6475 int occupied;
6476 int clear_refmod = 0;
6477 int pgpgout_count = 0;
6478 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6479 struct vm_page_delayed_work *dwp;
6480 int dw_count;
6481 int dw_limit;
6482 int isVectorUPL = 0;
6483 upl_t vector_upl = NULL;
6484 boolean_t should_be_throttled = FALSE;
6485
6486 vm_page_t nxt_page = VM_PAGE_NULL;
6487 int fast_path_possible = 0;
6488 int fast_path_full_commit = 0;
6489 int throttle_page = 0;
6490 int unwired_count = 0;
6491 int local_queue_count = 0;
6492 vm_page_t first_local, last_local;
6493
6494 *empty = FALSE;
6495
6496 if (upl == UPL_NULL)
6497 return KERN_INVALID_ARGUMENT;
6498
6499 if (count == 0)
6500 page_list = NULL;
6501
6502 if((isVectorUPL = vector_upl_is_valid(upl))) {
6503 vector_upl = upl;
6504 upl_lock(vector_upl);
6505 }
6506 else
6507 upl_lock(upl);
6508
6509process_upl_to_commit:
6510
6511 if(isVectorUPL) {
6512 size = subupl_size;
6513 offset = subupl_offset;
6514 if(size == 0) {
6515 upl_unlock(vector_upl);
6516 return KERN_SUCCESS;
6517 }
6518 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6519 if(upl == NULL) {
6520 upl_unlock(vector_upl);
6521 return KERN_FAILURE;
6522 }
6523 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6524 subupl_size -= size;
6525 subupl_offset += size;
6526 }
6527
6528#if UPL_DEBUG
6529 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6530 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6531
6532 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6533 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6534
6535 upl->upl_commit_index++;
6536 }
6537#endif
6538 if (upl->flags & UPL_DEVICE_MEMORY)
6539 xfer_size = 0;
6540 else if ((offset + size) <= upl->size)
6541 xfer_size = size;
6542 else {
6543 if(!isVectorUPL)
6544 upl_unlock(upl);
6545 else {
6546 upl_unlock(vector_upl);
6547 }
6548 return KERN_FAILURE;
6549 }
6550 if (upl->flags & UPL_SET_DIRTY)
6551 flags |= UPL_COMMIT_SET_DIRTY;
6552 if (upl->flags & UPL_CLEAR_DIRTY)
6553 flags |= UPL_COMMIT_CLEAR_DIRTY;
6554
6555 if (upl->flags & UPL_INTERNAL)
6556 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6557 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6558 else
6559 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6560
6561 object = upl->map_object;
6562
6563 if (upl->flags & UPL_SHADOWED) {
6564 vm_object_lock(object);
6565 shadow_object = object->shadow;
6566 } else {
6567 shadow_object = object;
6568 }
6569 entry = offset/PAGE_SIZE;
6570 target_offset = (vm_object_offset_t)offset;
6571
6572 assert(!(target_offset & PAGE_MASK));
6573 assert(!(xfer_size & PAGE_MASK));
6574
6575 if (upl->flags & UPL_KERNEL_OBJECT)
6576 vm_object_lock_shared(shadow_object);
6577 else
6578 vm_object_lock(shadow_object);
6579
6580 VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6581
6582 if (upl->flags & UPL_ACCESS_BLOCKED) {
6583 assert(shadow_object->blocked_access);
6584 shadow_object->blocked_access = FALSE;
6585 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6586 }
6587
6588 if (shadow_object->code_signed) {
6589 /*
6590 * CODE SIGNING:
6591 * If the object is code-signed, do not let this UPL tell
6592 * us if the pages are valid or not. Let the pages be
6593 * validated by VM the normal way (when they get mapped or
6594 * copied).
6595 */
6596 flags &= ~UPL_COMMIT_CS_VALIDATED;
6597 }
6598 if (! page_list) {
6599 /*
6600 * No page list to get the code-signing info from !?
6601 */
6602 flags &= ~UPL_COMMIT_CS_VALIDATED;
6603 }
6604 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6605 should_be_throttled = TRUE;
6606
6607 dwp = &dw_array[0];
6608 dw_count = 0;
6609 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6610
6611 if ((upl->flags & UPL_IO_WIRE) &&
6612 !(flags & UPL_COMMIT_FREE_ABSENT) &&
6613 !isVectorUPL &&
6614 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6615 shadow_object->purgable != VM_PURGABLE_EMPTY) {
6616
6617 if (!vm_page_queue_empty(&shadow_object->memq)) {
6618
6619 if (size == shadow_object->vo_size) {
6620 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6621 fast_path_full_commit = 1;
6622 }
6623 fast_path_possible = 1;
6624
6625 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6626 (shadow_object->purgable == VM_PURGABLE_DENY ||
6627 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6628 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6629 throttle_page = 1;
6630 }
6631 }
6632 }
6633 first_local = VM_PAGE_NULL;
6634 last_local = VM_PAGE_NULL;
6635
6636 while (xfer_size) {
6637 vm_page_t t, m;
6638
6639 dwp->dw_mask = 0;
6640 clear_refmod = 0;
6641
6642 m = VM_PAGE_NULL;
6643
6644 if (upl->flags & UPL_LITE) {
6645 unsigned int pg_num;
6646
6647 if (nxt_page != VM_PAGE_NULL) {
6648 m = nxt_page;
6649 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
6650 target_offset = m->vmp_offset;
6651 }
6652 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6653 assert(pg_num == target_offset/PAGE_SIZE);
6654
6655 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6656 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6657
6658 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6659 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6660 } else
6661 m = NULL;
6662 }
6663 if (upl->flags & UPL_SHADOWED) {
6664 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6665
6666 t->vmp_free_when_done = FALSE;
6667
6668 VM_PAGE_FREE(t);
6669
6670 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6671 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6672 }
6673 }
6674 if (m == VM_PAGE_NULL)
6675 goto commit_next_page;
6676
6677 m_object = VM_PAGE_OBJECT(m);
6678
6679 if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6680 assert(m->vmp_busy);
6681
6682 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6683 goto commit_next_page;
6684 }
6685
6686 if (flags & UPL_COMMIT_CS_VALIDATED) {
6687 /*
6688 * CODE SIGNING:
6689 * Set the code signing bits according to
6690 * what the UPL says they should be.
6691 */
6692 m->vmp_cs_validated = page_list[entry].cs_validated;
6693 m->vmp_cs_tainted = page_list[entry].cs_tainted;
6694 m->vmp_cs_nx = page_list[entry].cs_nx;
6695 }
6696 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6697 m->vmp_written_by_kernel = TRUE;
6698
6699 if (upl->flags & UPL_IO_WIRE) {
6700
6701 if (page_list)
6702 page_list[entry].phys_addr = 0;
6703
6704 if (flags & UPL_COMMIT_SET_DIRTY) {
6705 SET_PAGE_DIRTY(m, FALSE);
6706 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6707 m->vmp_dirty = FALSE;
6708
6709 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6710 m->vmp_cs_validated && !m->vmp_cs_tainted) {
6711 /*
6712 * CODE SIGNING:
6713 * This page is no longer dirty
6714 * but could have been modified,
6715 * so it will need to be
6716 * re-validated.
6717 */
6718 m->vmp_cs_validated = FALSE;
6719
6720 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6721
6722 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6723 }
6724 clear_refmod |= VM_MEM_MODIFIED;
6725 }
6726 if (upl->flags & UPL_ACCESS_BLOCKED) {
6727 /*
6728 * We blocked access to the pages in this UPL.
6729 * Clear the "busy" bit and wake up any waiter
6730 * for this page.
6731 */
6732 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6733 }
6734 if (fast_path_possible) {
6735 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6736 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6737 if (m->vmp_absent) {
6738 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
6739 assert(m->vmp_wire_count == 0);
6740 assert(m->vmp_busy);
6741
6742 m->vmp_absent = FALSE;
6743 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6744 } else {
6745 if (m->vmp_wire_count == 0)
6746 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6747 assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
6748
6749 /*
6750 * XXX FBDP need to update some other
6751 * counters here (purgeable_wired_count)
6752 * (ledgers), ...
6753 */
6754 assert(m->vmp_wire_count > 0);
6755 m->vmp_wire_count--;
6756
6757 if (m->vmp_wire_count == 0) {
6758 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
6759 unwired_count++;
6760 }
6761 }
6762 if (m->vmp_wire_count == 0) {
6763 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6764
6765 if (last_local == VM_PAGE_NULL) {
6766 assert(first_local == VM_PAGE_NULL);
6767
6768 last_local = m;
6769 first_local = m;
6770 } else {
6771 assert(first_local != VM_PAGE_NULL);
6772
6773 m->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6774 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6775 first_local = m;
6776 }
6777 local_queue_count++;
6778
6779 if (throttle_page) {
6780 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
6781 } else {
6782 if (flags & UPL_COMMIT_INACTIVATE) {
6783 if (shadow_object->internal)
6784 m->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6785 else
6786 m->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6787 } else
6788 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
6789 }
6790 }
6791 } else {
6792 if (flags & UPL_COMMIT_INACTIVATE) {
6793 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6794 clear_refmod |= VM_MEM_REFERENCED;
6795 }
6796 if (m->vmp_absent) {
6797 if (flags & UPL_COMMIT_FREE_ABSENT)
6798 dwp->dw_mask |= DW_vm_page_free;
6799 else {
6800 m->vmp_absent = FALSE;
6801 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6802
6803 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6804 dwp->dw_mask |= DW_vm_page_activate;
6805 }
6806 } else
6807 dwp->dw_mask |= DW_vm_page_unwire;
6808 }
6809 goto commit_next_page;
6810 }
6811 assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6812
6813 if (page_list)
6814 page_list[entry].phys_addr = 0;
6815
6816 /*
6817 * make sure to clear the hardware
6818 * modify or reference bits before
6819 * releasing the BUSY bit on this page
6820 * otherwise we risk losing a legitimate
6821 * change of state
6822 */
6823 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6824 m->vmp_dirty = FALSE;
6825
6826 clear_refmod |= VM_MEM_MODIFIED;
6827 }
6828 if (m->vmp_laundry)
6829 dwp->dw_mask |= DW_vm_pageout_throttle_up;
6830
6831 if (VM_PAGE_WIRED(m))
6832 m->vmp_free_when_done = FALSE;
6833
6834 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6835 m->vmp_cs_validated && !m->vmp_cs_tainted) {
6836 /*
6837 * CODE SIGNING:
6838 * This page is no longer dirty
6839 * but could have been modified,
6840 * so it will need to be
6841 * re-validated.
6842 */
6843 m->vmp_cs_validated = FALSE;
6844
6845 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6846
6847 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6848 }
6849 if (m->vmp_overwriting) {
6850 /*
6851 * the (COPY_OUT_FROM == FALSE) request_page_list case
6852 */
6853 if (m->vmp_busy) {
6854#if CONFIG_PHANTOM_CACHE
6855 if (m->vmp_absent && !m_object->internal)
6856 dwp->dw_mask |= DW_vm_phantom_cache_update;
6857#endif
6858 m->vmp_absent = FALSE;
6859
6860 dwp->dw_mask |= DW_clear_busy;
6861 } else {
6862 /*
6863 * alternate (COPY_OUT_FROM == FALSE) page_list case
6864 * Occurs when the original page was wired
6865 * at the time of the list request
6866 */
6867 assert(VM_PAGE_WIRED(m));
6868
6869 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
6870 }
6871 m->vmp_overwriting = FALSE;
6872 }
6873 m->vmp_cleaning = FALSE;
6874
6875 if (m->vmp_free_when_done) {
6876 /*
6877 * With the clean queue enabled, UPL_PAGEOUT should
6878 * no longer set the pageout bit. It's pages now go
6879 * to the clean queue.
6880 */
6881 assert(!(flags & UPL_PAGEOUT));
6882 assert(!m_object->internal);
6883
6884 m->vmp_free_when_done = FALSE;
6885
6886 if ((flags & UPL_COMMIT_SET_DIRTY) ||
6887 (m->vmp_pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
6888 /*
6889 * page was re-dirtied after we started
6890 * the pageout... reactivate it since
6891 * we don't know whether the on-disk
6892 * copy matches what is now in memory
6893 */
6894 SET_PAGE_DIRTY(m, FALSE);
6895
6896 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
6897
6898 if (upl->flags & UPL_PAGEOUT) {
6899 VM_STAT_INCR(reactivations);
6900 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
6901 }
6902 } else {
6903 /*
6904 * page has been successfully cleaned
6905 * go ahead and free it for other use
6906 */
6907 if (m_object->internal) {
6908 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
6909 } else {
6910 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
6911 }
6912 m->vmp_dirty = FALSE;
6913 m->vmp_busy = TRUE;
6914
6915 dwp->dw_mask |= DW_vm_page_free;
6916 }
6917 goto commit_next_page;
6918 }
6919 /*
6920 * It is a part of the semantic of COPYOUT_FROM
6921 * UPLs that a commit implies cache sync
6922 * between the vm page and the backing store
6923 * this can be used to strip the precious bit
6924 * as well as clean
6925 */
6926 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
6927 m->vmp_precious = FALSE;
6928
6929 if (flags & UPL_COMMIT_SET_DIRTY) {
6930 SET_PAGE_DIRTY(m, FALSE);
6931 } else {
6932 m->vmp_dirty = FALSE;
6933 }
6934
6935 /* with the clean queue on, move *all* cleaned pages to the clean queue */
6936 if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
6937 pgpgout_count++;
6938
6939 VM_STAT_INCR(pageouts);
6940 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
6941
6942 dwp->dw_mask |= DW_enqueue_cleaned;
6943 } else if (should_be_throttled == TRUE && (m->vmp_q_state == VM_PAGE_NOT_ON_Q)) {
6944 /*
6945 * page coming back in from being 'frozen'...
6946 * it was dirty before it was frozen, so keep it so
6947 * the vm_page_activate will notice that it really belongs
6948 * on the throttle queue and put it there
6949 */
6950 SET_PAGE_DIRTY(m, FALSE);
6951 dwp->dw_mask |= DW_vm_page_activate;
6952
6953 } else {
6954 if ((flags & UPL_COMMIT_INACTIVATE) && !m->vmp_clustered && (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
6955 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6956 clear_refmod |= VM_MEM_REFERENCED;
6957 } else if ( !VM_PAGE_PAGEABLE(m)) {
6958
6959 if (m->vmp_clustered || (flags & UPL_COMMIT_SPECULATE))
6960 dwp->dw_mask |= DW_vm_page_speculate;
6961 else if (m->vmp_reference)
6962 dwp->dw_mask |= DW_vm_page_activate;
6963 else {
6964 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6965 clear_refmod |= VM_MEM_REFERENCED;
6966 }
6967 }
6968 }
6969 if (upl->flags & UPL_ACCESS_BLOCKED) {
6970 /*
6971 * We blocked access to the pages in this URL.
6972 * Clear the "busy" bit on this page before we
6973 * wake up any waiter.
6974 */
6975 dwp->dw_mask |= DW_clear_busy;
6976 }
6977 /*
6978 * Wakeup any thread waiting for the page to be un-cleaning.
6979 */
6980 dwp->dw_mask |= DW_PAGE_WAKEUP;
6981
6982commit_next_page:
6983 if (clear_refmod)
6984 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
6985
6986 target_offset += PAGE_SIZE_64;
6987 xfer_size -= PAGE_SIZE;
6988 entry++;
6989
6990 if (dwp->dw_mask) {
6991 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
6992 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
6993
6994 if (dw_count >= dw_limit) {
6995 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
6996
6997 dwp = &dw_array[0];
6998 dw_count = 0;
6999 }
7000 } else {
7001 if (dwp->dw_mask & DW_clear_busy)
7002 m->vmp_busy = FALSE;
7003
7004 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7005 PAGE_WAKEUP(m);
7006 }
7007 }
7008 }
7009 if (dw_count)
7010 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7011
7012 if (fast_path_possible) {
7013
7014 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7015 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7016
7017 if (local_queue_count || unwired_count) {
7018
7019 if (local_queue_count) {
7020 vm_page_t first_target;
7021 vm_page_queue_head_t *target_queue;
7022
7023 if (throttle_page)
7024 target_queue = &vm_page_queue_throttled;
7025 else {
7026 if (flags & UPL_COMMIT_INACTIVATE) {
7027 if (shadow_object->internal)
7028 target_queue = &vm_page_queue_anonymous;
7029 else
7030 target_queue = &vm_page_queue_inactive;
7031 } else
7032 target_queue = &vm_page_queue_active;
7033 }
7034 /*
7035 * Transfer the entire local queue to a regular LRU page queues.
7036 */
7037 vm_page_lockspin_queues();
7038
7039 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7040
7041 if (vm_page_queue_empty(target_queue))
7042 target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7043 else
7044 first_target->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7045
7046 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7047 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7048 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7049
7050 /*
7051 * Adjust the global page counts.
7052 */
7053 if (throttle_page) {
7054 vm_page_throttled_count += local_queue_count;
7055 } else {
7056 if (flags & UPL_COMMIT_INACTIVATE) {
7057 if (shadow_object->internal)
7058 vm_page_anonymous_count += local_queue_count;
7059 vm_page_inactive_count += local_queue_count;
7060
7061 token_new_pagecount += local_queue_count;
7062 } else
7063 vm_page_active_count += local_queue_count;
7064
7065 if (shadow_object->internal)
7066 vm_page_pageable_internal_count += local_queue_count;
7067 else
7068 vm_page_pageable_external_count += local_queue_count;
7069 }
7070 } else {
7071 vm_page_lockspin_queues();
7072 }
7073 if (unwired_count) {
7074 vm_page_wire_count -= unwired_count;
7075 VM_CHECK_MEMORYSTATUS;
7076 }
7077 vm_page_unlock_queues();
7078
7079 VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7080 }
7081 }
7082 occupied = 1;
7083
7084 if (upl->flags & UPL_DEVICE_MEMORY) {
7085 occupied = 0;
7086 } else if (upl->flags & UPL_LITE) {
7087 int pg_num;
7088 int i;
7089
7090 occupied = 0;
7091
7092 if (!fast_path_full_commit) {
7093 pg_num = upl->size/PAGE_SIZE;
7094 pg_num = (pg_num + 31) >> 5;
7095
7096 for (i = 0; i < pg_num; i++) {
7097 if (lite_list[i] != 0) {
7098 occupied = 1;
7099 break;
7100 }
7101 }
7102 }
7103 } else {
7104 if (vm_page_queue_empty(&upl->map_object->memq))
7105 occupied = 0;
7106 }
7107 if (occupied == 0) {
7108 /*
7109 * If this UPL element belongs to a Vector UPL and is
7110 * empty, then this is the right function to deallocate
7111 * it. So go ahead set the *empty variable. The flag
7112 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7113 * should be considered relevant for the Vector UPL and not
7114 * the internal UPLs.
7115 */
7116 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7117 *empty = TRUE;
7118
7119 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7120 /*
7121 * this is not a paging object
7122 * so we need to drop the paging reference
7123 * that was taken when we created the UPL
7124 * against this object
7125 */
7126 vm_object_activity_end(shadow_object);
7127 vm_object_collapse(shadow_object, 0, TRUE);
7128 } else {
7129 /*
7130 * we dontated the paging reference to
7131 * the map object... vm_pageout_object_terminate
7132 * will drop this reference
7133 */
7134 }
7135 }
7136 VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7137 vm_object_unlock(shadow_object);
7138 if (object != shadow_object)
7139 vm_object_unlock(object);
7140
7141 if(!isVectorUPL)
7142 upl_unlock(upl);
7143 else {
7144 /*
7145 * If we completed our operations on an UPL that is
7146 * part of a Vectored UPL and if empty is TRUE, then
7147 * we should go ahead and deallocate this UPL element.
7148 * Then we check if this was the last of the UPL elements
7149 * within that Vectored UPL. If so, set empty to TRUE
7150 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7151 * can go ahead and deallocate the Vector UPL too.
7152 */
7153 if(*empty==TRUE) {
7154 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7155 upl_deallocate(upl);
7156 }
7157 goto process_upl_to_commit;
7158 }
7159 if (pgpgout_count) {
7160 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7161 }
7162
7163 return KERN_SUCCESS;
7164}
7165
7166kern_return_t
7167upl_abort_range(
7168 upl_t upl,
7169 upl_offset_t offset,
7170 upl_size_t size,
7171 int error,
7172 boolean_t *empty)
7173{
7174 upl_page_info_t *user_page_list = NULL;
7175 upl_size_t xfer_size, subupl_size = size;
7176 vm_object_t shadow_object;
7177 vm_object_t object;
7178 vm_object_offset_t target_offset;
7179 upl_offset_t subupl_offset = offset;
7180 int entry;
7181 wpl_array_t lite_list;
7182 int occupied;
7183 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7184 struct vm_page_delayed_work *dwp;
7185 int dw_count;
7186 int dw_limit;
7187 int isVectorUPL = 0;
7188 upl_t vector_upl = NULL;
7189
7190 *empty = FALSE;
7191
7192 if (upl == UPL_NULL)
7193 return KERN_INVALID_ARGUMENT;
7194
7195 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7196 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7197
7198 if((isVectorUPL = vector_upl_is_valid(upl))) {
7199 vector_upl = upl;
7200 upl_lock(vector_upl);
7201 }
7202 else
7203 upl_lock(upl);
7204
7205process_upl_to_abort:
7206 if(isVectorUPL) {
7207 size = subupl_size;
7208 offset = subupl_offset;
7209 if(size == 0) {
7210 upl_unlock(vector_upl);
7211 return KERN_SUCCESS;
7212 }
7213 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7214 if(upl == NULL) {
7215 upl_unlock(vector_upl);
7216 return KERN_FAILURE;
7217 }
7218 subupl_size -= size;
7219 subupl_offset += size;
7220 }
7221
7222 *empty = FALSE;
7223
7224#if UPL_DEBUG
7225 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7226 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7227
7228 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7229 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7230 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7231
7232 upl->upl_commit_index++;
7233 }
7234#endif
7235 if (upl->flags & UPL_DEVICE_MEMORY)
7236 xfer_size = 0;
7237 else if ((offset + size) <= upl->size)
7238 xfer_size = size;
7239 else {
7240 if(!isVectorUPL)
7241 upl_unlock(upl);
7242 else {
7243 upl_unlock(vector_upl);
7244 }
7245
7246 return KERN_FAILURE;
7247 }
7248 if (upl->flags & UPL_INTERNAL) {
7249 lite_list = (wpl_array_t)
7250 ((((uintptr_t)upl) + sizeof(struct upl))
7251 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7252
7253 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7254 } else {
7255 lite_list = (wpl_array_t)
7256 (((uintptr_t)upl) + sizeof(struct upl));
7257 }
7258 object = upl->map_object;
7259
7260 if (upl->flags & UPL_SHADOWED) {
7261 vm_object_lock(object);
7262 shadow_object = object->shadow;
7263 } else
7264 shadow_object = object;
7265
7266 entry = offset/PAGE_SIZE;
7267 target_offset = (vm_object_offset_t)offset;
7268
7269 assert(!(target_offset & PAGE_MASK));
7270 assert(!(xfer_size & PAGE_MASK));
7271
7272 if (upl->flags & UPL_KERNEL_OBJECT)
7273 vm_object_lock_shared(shadow_object);
7274 else
7275 vm_object_lock(shadow_object);
7276
7277 if (upl->flags & UPL_ACCESS_BLOCKED) {
7278 assert(shadow_object->blocked_access);
7279 shadow_object->blocked_access = FALSE;
7280 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7281 }
7282
7283 dwp = &dw_array[0];
7284 dw_count = 0;
7285 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7286
7287 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7288 panic("upl_abort_range: kernel_object being DUMPED");
7289
7290 while (xfer_size) {
7291 vm_page_t t, m;
7292 unsigned int pg_num;
7293 boolean_t needed;
7294
7295 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7296 assert(pg_num == target_offset/PAGE_SIZE);
7297
7298 needed = FALSE;
7299
7300 if (user_page_list)
7301 needed = user_page_list[pg_num].needed;
7302
7303 dwp->dw_mask = 0;
7304 m = VM_PAGE_NULL;
7305
7306 if (upl->flags & UPL_LITE) {
7307
7308 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7309 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7310
7311 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7312 m = vm_page_lookup(shadow_object, target_offset +
7313 (upl->offset - shadow_object->paging_offset));
7314 }
7315 }
7316 if (upl->flags & UPL_SHADOWED) {
7317 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7318 t->vmp_free_when_done = FALSE;
7319
7320 VM_PAGE_FREE(t);
7321
7322 if (m == VM_PAGE_NULL)
7323 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7324 }
7325 }
7326 if ((upl->flags & UPL_KERNEL_OBJECT))
7327 goto abort_next_page;
7328
7329 if (m != VM_PAGE_NULL) {
7330
7331 assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7332
7333 if (m->vmp_absent) {
7334 boolean_t must_free = TRUE;
7335
7336 /*
7337 * COPYOUT = FALSE case
7338 * check for error conditions which must
7339 * be passed back to the pages customer
7340 */
7341 if (error & UPL_ABORT_RESTART) {
7342 m->vmp_restart = TRUE;
7343 m->vmp_absent = FALSE;
7344 m->vmp_unusual = TRUE;
7345 must_free = FALSE;
7346 } else if (error & UPL_ABORT_UNAVAILABLE) {
7347 m->vmp_restart = FALSE;
7348 m->vmp_unusual = TRUE;
7349 must_free = FALSE;
7350 } else if (error & UPL_ABORT_ERROR) {
7351 m->vmp_restart = FALSE;
7352 m->vmp_absent = FALSE;
7353 m->vmp_error = TRUE;
7354 m->vmp_unusual = TRUE;
7355 must_free = FALSE;
7356 }
7357 if (m->vmp_clustered && needed == FALSE) {
7358 /*
7359 * This page was a part of a speculative
7360 * read-ahead initiated by the kernel
7361 * itself. No one is expecting this
7362 * page and no one will clean up its
7363 * error state if it ever becomes valid
7364 * in the future.
7365 * We have to free it here.
7366 */
7367 must_free = TRUE;
7368 }
7369 m->vmp_cleaning = FALSE;
7370
7371 if (m->vmp_overwriting && !m->vmp_busy) {
7372 /*
7373 * this shouldn't happen since
7374 * this is an 'absent' page, but
7375 * it doesn't hurt to check for
7376 * the 'alternate' method of
7377 * stabilizing the page...
7378 * we will mark 'busy' to be cleared
7379 * in the following code which will
7380 * take care of the primary stabilzation
7381 * method (i.e. setting 'busy' to TRUE)
7382 */
7383 dwp->dw_mask |= DW_vm_page_unwire;
7384 }
7385 m->vmp_overwriting = FALSE;
7386
7387 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7388
7389 if (must_free == TRUE)
7390 dwp->dw_mask |= DW_vm_page_free;
7391 else
7392 dwp->dw_mask |= DW_vm_page_activate;
7393 } else {
7394 /*
7395 * Handle the trusted pager throttle.
7396 */
7397 if (m->vmp_laundry)
7398 dwp->dw_mask |= DW_vm_pageout_throttle_up;
7399
7400 if (upl->flags & UPL_ACCESS_BLOCKED) {
7401 /*
7402 * We blocked access to the pages in this UPL.
7403 * Clear the "busy" bit and wake up any waiter
7404 * for this page.
7405 */
7406 dwp->dw_mask |= DW_clear_busy;
7407 }
7408 if (m->vmp_overwriting) {
7409 if (m->vmp_busy)
7410 dwp->dw_mask |= DW_clear_busy;
7411 else {
7412 /*
7413 * deal with the 'alternate' method
7414 * of stabilizing the page...
7415 * we will either free the page
7416 * or mark 'busy' to be cleared
7417 * in the following code which will
7418 * take care of the primary stabilzation
7419 * method (i.e. setting 'busy' to TRUE)
7420 */
7421 dwp->dw_mask |= DW_vm_page_unwire;
7422 }
7423 m->vmp_overwriting = FALSE;
7424 }
7425 m->vmp_free_when_done = FALSE;
7426 m->vmp_cleaning = FALSE;
7427
7428 if (error & UPL_ABORT_DUMP_PAGES) {
7429 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7430
7431 dwp->dw_mask |= DW_vm_page_free;
7432 } else {
7433 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7434 if (error & UPL_ABORT_REFERENCE) {
7435 /*
7436 * we've been told to explictly
7437 * reference this page... for
7438 * file I/O, this is done by
7439 * implementing an LRU on the inactive q
7440 */
7441 dwp->dw_mask |= DW_vm_page_lru;
7442
7443 } else if ( !VM_PAGE_PAGEABLE(m))
7444 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7445 }
7446 dwp->dw_mask |= DW_PAGE_WAKEUP;
7447 }
7448 }
7449 }
7450abort_next_page:
7451 target_offset += PAGE_SIZE_64;
7452 xfer_size -= PAGE_SIZE;
7453 entry++;
7454
7455 if (dwp->dw_mask) {
7456 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7457 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7458
7459 if (dw_count >= dw_limit) {
7460 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7461
7462 dwp = &dw_array[0];
7463 dw_count = 0;
7464 }
7465 } else {
7466 if (dwp->dw_mask & DW_clear_busy)
7467 m->vmp_busy = FALSE;
7468
7469 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7470 PAGE_WAKEUP(m);
7471 }
7472 }
7473 }
7474 if (dw_count)
7475 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7476
7477 occupied = 1;
7478
7479 if (upl->flags & UPL_DEVICE_MEMORY) {
7480 occupied = 0;
7481 } else if (upl->flags & UPL_LITE) {
7482 int pg_num;
7483 int i;
7484
7485 pg_num = upl->size/PAGE_SIZE;
7486 pg_num = (pg_num + 31) >> 5;
7487 occupied = 0;
7488
7489 for (i = 0; i < pg_num; i++) {
7490 if (lite_list[i] != 0) {
7491 occupied = 1;
7492 break;
7493 }
7494 }
7495 } else {
7496 if (vm_page_queue_empty(&upl->map_object->memq))
7497 occupied = 0;
7498 }
7499 if (occupied == 0) {
7500 /*
7501 * If this UPL element belongs to a Vector UPL and is
7502 * empty, then this is the right function to deallocate
7503 * it. So go ahead set the *empty variable. The flag
7504 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7505 * should be considered relevant for the Vector UPL and
7506 * not the internal UPLs.
7507 */
7508 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7509 *empty = TRUE;
7510
7511 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7512 /*
7513 * this is not a paging object
7514 * so we need to drop the paging reference
7515 * that was taken when we created the UPL
7516 * against this object
7517 */
7518 vm_object_activity_end(shadow_object);
7519 vm_object_collapse(shadow_object, 0, TRUE);
7520 } else {
7521 /*
7522 * we dontated the paging reference to
7523 * the map object... vm_pageout_object_terminate
7524 * will drop this reference
7525 */
7526 }
7527 }
7528 vm_object_unlock(shadow_object);
7529 if (object != shadow_object)
7530 vm_object_unlock(object);
7531
7532 if(!isVectorUPL)
7533 upl_unlock(upl);
7534 else {
7535 /*
7536 * If we completed our operations on an UPL that is
7537 * part of a Vectored UPL and if empty is TRUE, then
7538 * we should go ahead and deallocate this UPL element.
7539 * Then we check if this was the last of the UPL elements
7540 * within that Vectored UPL. If so, set empty to TRUE
7541 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7542 * can go ahead and deallocate the Vector UPL too.
7543 */
7544 if(*empty == TRUE) {
7545 *empty = vector_upl_set_subupl(vector_upl, upl,0);
7546 upl_deallocate(upl);
7547 }
7548 goto process_upl_to_abort;
7549 }
7550
7551 return KERN_SUCCESS;
7552}
7553
7554
7555kern_return_t
7556upl_abort(
7557 upl_t upl,
7558 int error)
7559{
7560 boolean_t empty;
7561
7562 if (upl == UPL_NULL)
7563 return KERN_INVALID_ARGUMENT;
7564
7565 return upl_abort_range(upl, 0, upl->size, error, &empty);
7566}
7567
7568
7569/* an option on commit should be wire */
7570kern_return_t
7571upl_commit(
7572 upl_t upl,
7573 upl_page_info_t *page_list,
7574 mach_msg_type_number_t count)
7575{
7576 boolean_t empty;
7577
7578 if (upl == UPL_NULL)
7579 return KERN_INVALID_ARGUMENT;
7580
7581 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7582}
7583
7584
7585void
7586iopl_valid_data(
7587 upl_t upl,
7588 vm_tag_t tag)
7589{
7590 vm_object_t object;
7591 vm_offset_t offset;
7592 vm_page_t m, nxt_page = VM_PAGE_NULL;
7593 upl_size_t size;
7594 int wired_count = 0;
7595
7596 if (upl == NULL)
7597 panic("iopl_valid_data: NULL upl");
7598 if (vector_upl_is_valid(upl))
7599 panic("iopl_valid_data: vector upl");
7600 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7601 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7602
7603 object = upl->map_object;
7604
7605 if (object == kernel_object || object == compressor_object)
7606 panic("iopl_valid_data: object == kernel or compressor");
7607
7608 if (object->purgable == VM_PURGABLE_VOLATILE ||
7609 object->purgable == VM_PURGABLE_EMPTY)
7610 panic("iopl_valid_data: object %p purgable %d",
7611 object, object->purgable);
7612
7613 size = upl->size;
7614
7615 vm_object_lock(object);
7616 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7617
7618 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7619 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7620 else
7621 offset = 0 + upl->offset - object->paging_offset;
7622
7623 while (size) {
7624
7625 if (nxt_page != VM_PAGE_NULL) {
7626 m = nxt_page;
7627 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7628 } else {
7629 m = vm_page_lookup(object, offset);
7630 offset += PAGE_SIZE;
7631
7632 if (m == VM_PAGE_NULL)
7633 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7634 }
7635 if (m->vmp_busy) {
7636 if (!m->vmp_absent)
7637 panic("iopl_valid_data: busy page w/o absent");
7638
7639 if (m->vmp_pageq.next || m->vmp_pageq.prev)
7640 panic("iopl_valid_data: busy+absent page on page queue");
7641 if (m->vmp_reusable) {
7642 panic("iopl_valid_data: %p is reusable", m);
7643 }
7644
7645 m->vmp_absent = FALSE;
7646 m->vmp_dirty = TRUE;
7647 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7648 assert(m->vmp_wire_count == 0);
7649 m->vmp_wire_count++;
7650 assert(m->vmp_wire_count);
7651 if (m->vmp_wire_count == 1) {
7652 m->vmp_q_state = VM_PAGE_IS_WIRED;
7653 wired_count++;
7654 } else {
7655 panic("iopl_valid_data: %p already wired\n", m);
7656 }
7657
7658 PAGE_WAKEUP_DONE(m);
7659 }
7660 size -= PAGE_SIZE;
7661 }
7662 if (wired_count) {
7663
7664 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7665 assert(object->resident_page_count >= object->wired_page_count);
7666
7667 /* no need to adjust purgeable accounting for this object: */
7668 assert(object->purgable != VM_PURGABLE_VOLATILE);
7669 assert(object->purgable != VM_PURGABLE_EMPTY);
7670
7671 vm_page_lockspin_queues();
7672 vm_page_wire_count += wired_count;
7673 vm_page_unlock_queues();
7674 }
7675 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7676 vm_object_unlock(object);
7677}
7678
7679
7680void
7681vm_object_set_pmap_cache_attr(
7682 vm_object_t object,
7683 upl_page_info_array_t user_page_list,
7684 unsigned int num_pages,
7685 boolean_t batch_pmap_op)
7686{
7687 unsigned int cache_attr = 0;
7688
7689 cache_attr = object->wimg_bits & VM_WIMG_MASK;
7690 assert(user_page_list);
7691 if (cache_attr != VM_WIMG_USE_DEFAULT) {
7692 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7693 }
7694}
7695
7696
7697boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7698kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int, int*);
7699
7700
7701
7702boolean_t
7703vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7704 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7705{
7706 vm_page_t dst_page;
7707 unsigned int entry;
7708 int page_count;
7709 int delayed_unlock = 0;
7710 boolean_t retval = TRUE;
7711 ppnum_t phys_page;
7712
7713 vm_object_lock_assert_exclusive(object);
7714 assert(object->purgable != VM_PURGABLE_VOLATILE);
7715 assert(object->purgable != VM_PURGABLE_EMPTY);
7716 assert(object->pager == NULL);
7717 assert(object->copy == NULL);
7718 assert(object->shadow == NULL);
7719
7720 page_count = object->resident_page_count;
7721 dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7722
7723 vm_page_lock_queues();
7724
7725 while (page_count--) {
7726
7727 if (dst_page->vmp_busy ||
7728 dst_page->vmp_fictitious ||
7729 dst_page->vmp_absent ||
7730 dst_page->vmp_error ||
7731 dst_page->vmp_cleaning ||
7732 dst_page->vmp_restart ||
7733 dst_page->vmp_laundry) {
7734 retval = FALSE;
7735 goto done;
7736 }
7737 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7738 retval = FALSE;
7739 goto done;
7740 }
7741 dst_page->vmp_reference = TRUE;
7742
7743 vm_page_wire(dst_page, tag, FALSE);
7744
7745 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7746 SET_PAGE_DIRTY(dst_page, FALSE);
7747 }
7748 entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7749 assert(entry >= 0 && entry < object->resident_page_count);
7750 lite_list[entry>>5] |= 1 << (entry & 31);
7751
7752 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7753
7754 if (phys_page > upl->highest_page)
7755 upl->highest_page = phys_page;
7756
7757 if (user_page_list) {
7758 user_page_list[entry].phys_addr = phys_page;
7759 user_page_list[entry].absent = dst_page->vmp_absent;
7760 user_page_list[entry].dirty = dst_page->vmp_dirty;
7761 user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
7762 user_page_list[entry].precious = dst_page->vmp_precious;
7763 user_page_list[entry].device = FALSE;
7764 user_page_list[entry].speculative = FALSE;
7765 user_page_list[entry].cs_validated = FALSE;
7766 user_page_list[entry].cs_tainted = FALSE;
7767 user_page_list[entry].cs_nx = FALSE;
7768 user_page_list[entry].needed = FALSE;
7769 user_page_list[entry].mark = FALSE;
7770 }
7771 if (delayed_unlock++ > 256) {
7772 delayed_unlock = 0;
7773 lck_mtx_yield(&vm_page_queue_lock);
7774
7775 VM_CHECK_MEMORYSTATUS;
7776 }
7777 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7778 }
7779done:
7780 vm_page_unlock_queues();
7781
7782 VM_CHECK_MEMORYSTATUS;
7783
7784 return (retval);
7785}
7786
7787
7788kern_return_t
7789vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7790 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset,
7791 int page_count, int* page_grab_count)
7792{
7793 vm_page_t dst_page;
7794 boolean_t no_zero_fill = FALSE;
7795 int interruptible;
7796 int pages_wired = 0;
7797 int pages_inserted = 0;
7798 int entry = 0;
7799 uint64_t delayed_ledger_update = 0;
7800 kern_return_t ret = KERN_SUCCESS;
7801 int grab_options;
7802 ppnum_t phys_page;
7803
7804 vm_object_lock_assert_exclusive(object);
7805 assert(object->purgable != VM_PURGABLE_VOLATILE);
7806 assert(object->purgable != VM_PURGABLE_EMPTY);
7807 assert(object->pager == NULL);
7808 assert(object->copy == NULL);
7809 assert(object->shadow == NULL);
7810
7811 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7812 interruptible = THREAD_ABORTSAFE;
7813 else
7814 interruptible = THREAD_UNINT;
7815
7816 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7817 no_zero_fill = TRUE;
7818
7819 grab_options = 0;
7820#if CONFIG_SECLUDED_MEMORY
7821 if (object->can_grab_secluded) {
7822 grab_options |= VM_PAGE_GRAB_SECLUDED;
7823 }
7824#endif /* CONFIG_SECLUDED_MEMORY */
7825
7826 while (page_count--) {
7827
7828 while ((dst_page = vm_page_grab_options(grab_options))
7829 == VM_PAGE_NULL) {
7830
7831 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7832
7833 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7834
7835 if (vm_page_wait(interruptible) == FALSE) {
7836 /*
7837 * interrupted case
7838 */
7839 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7840
7841 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7842
7843 ret = MACH_SEND_INTERRUPTED;
7844 goto done;
7845 }
7846 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7847
7848 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7849 }
7850 if (no_zero_fill == FALSE)
7851 vm_page_zero_fill(dst_page);
7852 else
7853 dst_page->vmp_absent = TRUE;
7854
7855 dst_page->vmp_reference = TRUE;
7856
7857 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7858 SET_PAGE_DIRTY(dst_page, FALSE);
7859 }
7860 if (dst_page->vmp_absent == FALSE) {
7861 assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7862 assert(dst_page->vmp_wire_count == 0);
7863 dst_page->vmp_wire_count++;
7864 dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7865 assert(dst_page->vmp_wire_count);
7866 pages_wired++;
7867 PAGE_WAKEUP_DONE(dst_page);
7868 }
7869 pages_inserted++;
7870
7871 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7872
7873 lite_list[entry>>5] |= 1 << (entry & 31);
7874
7875 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7876
7877 if (phys_page > upl->highest_page)
7878 upl->highest_page = phys_page;
7879
7880 if (user_page_list) {
7881 user_page_list[entry].phys_addr = phys_page;
7882 user_page_list[entry].absent = dst_page->vmp_absent;
7883 user_page_list[entry].dirty = dst_page->vmp_dirty;
7884 user_page_list[entry].free_when_done = FALSE;
7885 user_page_list[entry].precious = FALSE;
7886 user_page_list[entry].device = FALSE;
7887 user_page_list[entry].speculative = FALSE;
7888 user_page_list[entry].cs_validated = FALSE;
7889 user_page_list[entry].cs_tainted = FALSE;
7890 user_page_list[entry].cs_nx = FALSE;
7891 user_page_list[entry].needed = FALSE;
7892 user_page_list[entry].mark = FALSE;
7893 }
7894 entry++;
7895 *dst_offset += PAGE_SIZE_64;
7896 }
7897done:
7898 if (pages_wired) {
7899 vm_page_lockspin_queues();
7900 vm_page_wire_count += pages_wired;
7901 vm_page_unlock_queues();
7902 }
7903 if (pages_inserted) {
7904 if (object->internal) {
7905 OSAddAtomic(pages_inserted, &vm_page_internal_count);
7906 } else {
7907 OSAddAtomic(pages_inserted, &vm_page_external_count);
7908 }
7909 }
7910 if (delayed_ledger_update) {
7911 task_t owner;
7912 int ledger_idx_volatile;
7913 int ledger_idx_nonvolatile;
7914 int ledger_idx_volatile_compressed;
7915 int ledger_idx_nonvolatile_compressed;
7916 boolean_t do_footprint;
7917
7918 owner = VM_OBJECT_OWNER(object);
7919 assert(owner);
7920
7921 vm_object_ledger_tag_ledgers(object,
7922 &ledger_idx_volatile,
7923 &ledger_idx_nonvolatile,
7924 &ledger_idx_volatile_compressed,
7925 &ledger_idx_nonvolatile_compressed,
7926 &do_footprint);
7927
7928 /* more non-volatile bytes */
7929 ledger_credit(owner->ledger,
7930 ledger_idx_nonvolatile,
7931 delayed_ledger_update);
7932 if (do_footprint) {
7933 /* more footprint */
7934 ledger_credit(owner->ledger,
7935 task_ledgers.phys_footprint,
7936 delayed_ledger_update);
7937 }
7938 }
7939
7940 assert(page_grab_count);
7941 *page_grab_count = pages_inserted;
7942
7943 return (ret);
7944}
7945
7946
7947
7948kern_return_t
7949vm_object_iopl_request(
7950 vm_object_t object,
7951 vm_object_offset_t offset,
7952 upl_size_t size,
7953 upl_t *upl_ptr,
7954 upl_page_info_array_t user_page_list,
7955 unsigned int *page_list_count,
7956 upl_control_flags_t cntrl_flags,
7957 vm_tag_t tag)
7958{
7959 vm_page_t dst_page;
7960 vm_object_offset_t dst_offset;
7961 upl_size_t xfer_size;
7962 upl_t upl = NULL;
7963 unsigned int entry;
7964 wpl_array_t lite_list = NULL;
7965 int no_zero_fill = FALSE;
7966 unsigned int size_in_pages;
7967 int page_grab_count = 0;
7968 u_int32_t psize;
7969 kern_return_t ret;
7970 vm_prot_t prot;
7971 struct vm_object_fault_info fault_info = {};
7972 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7973 struct vm_page_delayed_work *dwp;
7974 int dw_count;
7975 int dw_limit;
7976 int dw_index;
7977 boolean_t caller_lookup;
7978 int io_tracking_flag = 0;
7979 int interruptible;
7980 ppnum_t phys_page;
7981
7982 boolean_t set_cache_attr_needed = FALSE;
7983 boolean_t free_wired_pages = FALSE;
7984 boolean_t fast_path_empty_req = FALSE;
7985 boolean_t fast_path_full_req = FALSE;
7986
7987 if (cntrl_flags & ~UPL_VALID_FLAGS) {
7988 /*
7989 * For forward compatibility's sake,
7990 * reject any unknown flag.
7991 */
7992 return KERN_INVALID_VALUE;
7993 }
7994 if (vm_lopage_needed == FALSE)
7995 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
7996
7997 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
7998 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
7999 return KERN_INVALID_VALUE;
8000
8001 if (object->phys_contiguous) {
8002 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8003 return KERN_INVALID_ADDRESS;
8004
8005 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8006 return KERN_INVALID_ADDRESS;
8007 }
8008 }
8009 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8010 no_zero_fill = TRUE;
8011
8012 if (cntrl_flags & UPL_COPYOUT_FROM)
8013 prot = VM_PROT_READ;
8014 else
8015 prot = VM_PROT_READ | VM_PROT_WRITE;
8016
8017 if ((!object->internal) && (object->paging_offset != 0))
8018 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8019
8020 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
8021
8022#if CONFIG_IOSCHED || UPL_DEBUG
8023 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8024 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8025#endif
8026
8027#if CONFIG_IOSCHED
8028 if (object->io_tracking) {
8029 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8030 if (object != kernel_object)
8031 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8032 }
8033#endif
8034
8035 if (object->phys_contiguous)
8036 psize = PAGE_SIZE;
8037 else
8038 psize = size;
8039
8040 if (cntrl_flags & UPL_SET_INTERNAL) {
8041 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8042
8043 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8044 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8045 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8046 if (size == 0) {
8047 user_page_list = NULL;
8048 lite_list = NULL;
8049 }
8050 } else {
8051 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8052
8053 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8054 if (size == 0) {
8055 lite_list = NULL;
8056 }
8057 }
8058 if (user_page_list)
8059 user_page_list[0].device = FALSE;
8060 *upl_ptr = upl;
8061
8062 if (cntrl_flags & UPL_NOZEROFILLIO) {
8063 DTRACE_VM4(upl_nozerofillio,
8064 vm_object_t, object,
8065 vm_object_offset_t, offset,
8066 upl_size_t, size,
8067 upl_t, upl);
8068 }
8069
8070 upl->map_object = object;
8071 upl->size = size;
8072
8073 size_in_pages = size / PAGE_SIZE;
8074
8075 if (object == kernel_object &&
8076 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8077 upl->flags |= UPL_KERNEL_OBJECT;
8078#if UPL_DEBUG
8079 vm_object_lock(object);
8080#else
8081 vm_object_lock_shared(object);
8082#endif
8083 } else {
8084 vm_object_lock(object);
8085 vm_object_activity_begin(object);
8086 }
8087 /*
8088 * paging in progress also protects the paging_offset
8089 */
8090 upl->offset = offset + object->paging_offset;
8091
8092 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8093 /*
8094 * The user requested that access to the pages in this UPL
8095 * be blocked until the UPL is commited or aborted.
8096 */
8097 upl->flags |= UPL_ACCESS_BLOCKED;
8098 }
8099
8100#if CONFIG_IOSCHED || UPL_DEBUG
8101 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8102 vm_object_activity_begin(object);
8103 queue_enter(&object->uplq, upl, upl_t, uplq);
8104 }
8105#endif
8106
8107 if (object->phys_contiguous) {
8108
8109 if (upl->flags & UPL_ACCESS_BLOCKED) {
8110 assert(!object->blocked_access);
8111 object->blocked_access = TRUE;
8112 }
8113
8114 vm_object_unlock(object);
8115
8116 /*
8117 * don't need any shadow mappings for this one
8118 * since it is already I/O memory
8119 */
8120 upl->flags |= UPL_DEVICE_MEMORY;
8121
8122 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8123
8124 if (user_page_list) {
8125 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8126 user_page_list[0].device = TRUE;
8127 }
8128 if (page_list_count != NULL) {
8129 if (upl->flags & UPL_INTERNAL)
8130 *page_list_count = 0;
8131 else
8132 *page_list_count = 1;
8133 }
8134
8135 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8136 return KERN_SUCCESS;
8137 }
8138 if (object != kernel_object && object != compressor_object) {
8139 /*
8140 * Protect user space from future COW operations
8141 */
8142#if VM_OBJECT_TRACKING_OP_TRUESHARE
8143 if (!object->true_share &&
8144 vm_object_tracking_inited) {
8145 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8146 int num = 0;
8147
8148 num = OSBacktrace(bt,
8149 VM_OBJECT_TRACKING_BTDEPTH);
8150 btlog_add_entry(vm_object_tracking_btlog,
8151 object,
8152 VM_OBJECT_TRACKING_OP_TRUESHARE,
8153 bt,
8154 num);
8155 }
8156#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8157
8158 vm_object_lock_assert_exclusive(object);
8159 object->true_share = TRUE;
8160
8161 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8162 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8163 }
8164
8165 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8166 object->copy != VM_OBJECT_NULL) {
8167 /*
8168 * Honor copy-on-write obligations
8169 *
8170 * The caller is gathering these pages and
8171 * might modify their contents. We need to
8172 * make sure that the copy object has its own
8173 * private copies of these pages before we let
8174 * the caller modify them.
8175 *
8176 * NOTE: someone else could map the original object
8177 * after we've done this copy-on-write here, and they
8178 * could then see an inconsistent picture of the memory
8179 * while it's being modified via the UPL. To prevent this,
8180 * we would have to block access to these pages until the
8181 * UPL is released. We could use the UPL_BLOCK_ACCESS
8182 * code path for that...
8183 */
8184 vm_object_update(object,
8185 offset,
8186 size,
8187 NULL,
8188 NULL,
8189 FALSE, /* should_return */
8190 MEMORY_OBJECT_COPY_SYNC,
8191 VM_PROT_NO_CHANGE);
8192 VM_PAGEOUT_DEBUG(iopl_cow, 1);
8193 VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8194 }
8195 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8196 object->purgable != VM_PURGABLE_VOLATILE &&
8197 object->purgable != VM_PURGABLE_EMPTY &&
8198 object->copy == NULL &&
8199 size == object->vo_size &&
8200 offset == 0 &&
8201 object->shadow == NULL &&
8202 object->pager == NULL)
8203 {
8204 if (object->resident_page_count == size_in_pages)
8205 {
8206 assert(object != compressor_object);
8207 assert(object != kernel_object);
8208 fast_path_full_req = TRUE;
8209 }
8210 else if (object->resident_page_count == 0)
8211 {
8212 assert(object != compressor_object);
8213 assert(object != kernel_object);
8214 fast_path_empty_req = TRUE;
8215 set_cache_attr_needed = TRUE;
8216 }
8217 }
8218
8219 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8220 interruptible = THREAD_ABORTSAFE;
8221 else
8222 interruptible = THREAD_UNINT;
8223
8224 entry = 0;
8225
8226 xfer_size = size;
8227 dst_offset = offset;
8228 dw_count = 0;
8229
8230 if (fast_path_full_req) {
8231
8232 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8233 goto finish;
8234 /*
8235 * we couldn't complete the processing of this request on the fast path
8236 * so fall through to the slow path and finish up
8237 */
8238
8239 } else if (fast_path_empty_req) {
8240
8241 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8242 ret = KERN_MEMORY_ERROR;
8243 goto return_err;
8244 }
8245 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8246
8247 if (ret) {
8248 free_wired_pages = TRUE;
8249 goto return_err;
8250 }
8251 goto finish;
8252 }
8253
8254 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8255 fault_info.lo_offset = offset;
8256 fault_info.hi_offset = offset + xfer_size;
8257 fault_info.mark_zf_absent = TRUE;
8258 fault_info.interruptible = interruptible;
8259 fault_info.batch_pmap_op = TRUE;
8260
8261 dwp = &dw_array[0];
8262 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8263
8264 while (xfer_size) {
8265 vm_fault_return_t result;
8266
8267 dwp->dw_mask = 0;
8268
8269 if (fast_path_full_req) {
8270 /*
8271 * if we get here, it means that we ran into a page
8272 * state we couldn't handle in the fast path and
8273 * bailed out to the slow path... since the order
8274 * we look at pages is different between the 2 paths,
8275 * the following check is needed to determine whether
8276 * this page was already processed in the fast path
8277 */
8278 if (lite_list[entry>>5] & (1 << (entry & 31)))
8279 goto skip_page;
8280 }
8281 dst_page = vm_page_lookup(object, dst_offset);
8282
8283 if (dst_page == VM_PAGE_NULL ||
8284 dst_page->vmp_busy ||
8285 dst_page->vmp_error ||
8286 dst_page->vmp_restart ||
8287 dst_page->vmp_absent ||
8288 dst_page->vmp_fictitious) {
8289
8290 if (object == kernel_object)
8291 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8292 if (object == compressor_object)
8293 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8294
8295 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8296 ret = KERN_MEMORY_ERROR;
8297 goto return_err;
8298 }
8299 set_cache_attr_needed = TRUE;
8300
8301 /*
8302 * We just looked up the page and the result remains valid
8303 * until the object lock is release, so send it to
8304 * vm_fault_page() (as "dst_page"), to avoid having to
8305 * look it up again there.
8306 */
8307 caller_lookup = TRUE;
8308
8309 do {
8310 vm_page_t top_page;
8311 kern_return_t error_code;
8312
8313 fault_info.cluster_size = xfer_size;
8314
8315 vm_object_paging_begin(object);
8316
8317 result = vm_fault_page(object, dst_offset,
8318 prot | VM_PROT_WRITE, FALSE,
8319 caller_lookup,
8320 &prot, &dst_page, &top_page,
8321 (int *)0,
8322 &error_code, no_zero_fill,
8323 FALSE, &fault_info);
8324
8325 /* our lookup is no longer valid at this point */
8326 caller_lookup = FALSE;
8327
8328 switch (result) {
8329
8330 case VM_FAULT_SUCCESS:
8331 page_grab_count++;
8332
8333 if ( !dst_page->vmp_absent) {
8334 PAGE_WAKEUP_DONE(dst_page);
8335 } else {
8336 /*
8337 * we only get back an absent page if we
8338 * requested that it not be zero-filled
8339 * because we are about to fill it via I/O
8340 *
8341 * absent pages should be left BUSY
8342 * to prevent them from being faulted
8343 * into an address space before we've
8344 * had a chance to complete the I/O on
8345 * them since they may contain info that
8346 * shouldn't be seen by the faulting task
8347 */
8348 }
8349 /*
8350 * Release paging references and
8351 * top-level placeholder page, if any.
8352 */
8353 if (top_page != VM_PAGE_NULL) {
8354 vm_object_t local_object;
8355
8356 local_object = VM_PAGE_OBJECT(top_page);
8357
8358 /*
8359 * comparing 2 packed pointers
8360 */
8361 if (top_page->vmp_object != dst_page->vmp_object) {
8362 vm_object_lock(local_object);
8363 VM_PAGE_FREE(top_page);
8364 vm_object_paging_end(local_object);
8365 vm_object_unlock(local_object);
8366 } else {
8367 VM_PAGE_FREE(top_page);
8368 vm_object_paging_end(local_object);
8369 }
8370 }
8371 vm_object_paging_end(object);
8372 break;
8373
8374 case VM_FAULT_RETRY:
8375 vm_object_lock(object);
8376 break;
8377
8378 case VM_FAULT_MEMORY_SHORTAGE:
8379 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8380
8381 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8382
8383 if (vm_page_wait(interruptible)) {
8384 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8385
8386 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8387 vm_object_lock(object);
8388
8389 break;
8390 }
8391 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8392
8393 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8394
8395 /* fall thru */
8396
8397 case VM_FAULT_INTERRUPTED:
8398 error_code = MACH_SEND_INTERRUPTED;
8399 case VM_FAULT_MEMORY_ERROR:
8400 memory_error:
8401 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8402
8403 vm_object_lock(object);
8404 goto return_err;
8405
8406 case VM_FAULT_SUCCESS_NO_VM_PAGE:
8407 /* success but no page: fail */
8408 vm_object_paging_end(object);
8409 vm_object_unlock(object);
8410 goto memory_error;
8411
8412 default:
8413 panic("vm_object_iopl_request: unexpected error"
8414 " 0x%x from vm_fault_page()\n", result);
8415 }
8416 } while (result != VM_FAULT_SUCCESS);
8417
8418 }
8419 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8420
8421 if (upl->flags & UPL_KERNEL_OBJECT)
8422 goto record_phys_addr;
8423
8424 if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8425 dst_page->vmp_busy = TRUE;
8426 goto record_phys_addr;
8427 }
8428
8429 if (dst_page->vmp_cleaning) {
8430 /*
8431 * Someone else is cleaning this page in place.
8432 * In theory, we should be able to proceed and use this
8433 * page but they'll probably end up clearing the "busy"
8434 * bit on it in upl_commit_range() but they didn't set
8435 * it, so they would clear our "busy" bit and open
8436 * us to race conditions.
8437 * We'd better wait for the cleaning to complete and
8438 * then try again.
8439 */
8440 VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1);
8441 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8442 continue;
8443 }
8444 if (dst_page->vmp_laundry)
8445 vm_pageout_steal_laundry(dst_page, FALSE);
8446
8447 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8448 phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8449 vm_page_t low_page;
8450 int refmod;
8451
8452 /*
8453 * support devices that can't DMA above 32 bits
8454 * by substituting pages from a pool of low address
8455 * memory for any pages we find above the 4G mark
8456 * can't substitute if the page is already wired because
8457 * we don't know whether that physical address has been
8458 * handed out to some other 64 bit capable DMA device to use
8459 */
8460 if (VM_PAGE_WIRED(dst_page)) {
8461 ret = KERN_PROTECTION_FAILURE;
8462 goto return_err;
8463 }
8464 low_page = vm_page_grablo();
8465
8466 if (low_page == VM_PAGE_NULL) {
8467 ret = KERN_RESOURCE_SHORTAGE;
8468 goto return_err;
8469 }
8470 /*
8471 * from here until the vm_page_replace completes
8472 * we musn't drop the object lock... we don't
8473 * want anyone refaulting this page in and using
8474 * it after we disconnect it... we want the fault
8475 * to find the new page being substituted.
8476 */
8477 if (dst_page->vmp_pmapped)
8478 refmod = pmap_disconnect(phys_page);
8479 else
8480 refmod = 0;
8481
8482 if (!dst_page->vmp_absent)
8483 vm_page_copy(dst_page, low_page);
8484
8485 low_page->vmp_reference = dst_page->vmp_reference;
8486 low_page->vmp_dirty = dst_page->vmp_dirty;
8487 low_page->vmp_absent = dst_page->vmp_absent;
8488
8489 if (refmod & VM_MEM_REFERENCED)
8490 low_page->vmp_reference = TRUE;
8491 if (refmod & VM_MEM_MODIFIED) {
8492 SET_PAGE_DIRTY(low_page, FALSE);
8493 }
8494
8495 vm_page_replace(low_page, object, dst_offset);
8496
8497 dst_page = low_page;
8498 /*
8499 * vm_page_grablo returned the page marked
8500 * BUSY... we don't need a PAGE_WAKEUP_DONE
8501 * here, because we've never dropped the object lock
8502 */
8503 if ( !dst_page->vmp_absent)
8504 dst_page->vmp_busy = FALSE;
8505
8506 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8507 }
8508 if ( !dst_page->vmp_busy)
8509 dwp->dw_mask |= DW_vm_page_wire;
8510
8511 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8512 /*
8513 * Mark the page "busy" to block any future page fault
8514 * on this page in addition to wiring it.
8515 * We'll also remove the mapping
8516 * of all these pages before leaving this routine.
8517 */
8518 assert(!dst_page->vmp_fictitious);
8519 dst_page->vmp_busy = TRUE;
8520 }
8521 /*
8522 * expect the page to be used
8523 * page queues lock must be held to set 'reference'
8524 */
8525 dwp->dw_mask |= DW_set_reference;
8526
8527 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8528 SET_PAGE_DIRTY(dst_page, TRUE);
8529 }
8530 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8531 pmap_sync_page_attributes_phys(phys_page);
8532 dst_page->vmp_written_by_kernel = FALSE;
8533 }
8534
8535record_phys_addr:
8536 if (dst_page->vmp_busy)
8537 upl->flags |= UPL_HAS_BUSY;
8538
8539 lite_list[entry>>5] |= 1 << (entry & 31);
8540
8541 if (phys_page > upl->highest_page)
8542 upl->highest_page = phys_page;
8543
8544 if (user_page_list) {
8545 user_page_list[entry].phys_addr = phys_page;
8546 user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
8547 user_page_list[entry].absent = dst_page->vmp_absent;
8548 user_page_list[entry].dirty = dst_page->vmp_dirty;
8549 user_page_list[entry].precious = dst_page->vmp_precious;
8550 user_page_list[entry].device = FALSE;
8551 user_page_list[entry].needed = FALSE;
8552 if (dst_page->vmp_clustered == TRUE)
8553 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8554 else
8555 user_page_list[entry].speculative = FALSE;
8556 user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8557 user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8558 user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8559 user_page_list[entry].mark = FALSE;
8560 }
8561 if (object != kernel_object && object != compressor_object) {
8562 /*
8563 * someone is explicitly grabbing this page...
8564 * update clustered and speculative state
8565 *
8566 */
8567 if (dst_page->vmp_clustered)
8568 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8569 }
8570skip_page:
8571 entry++;
8572 dst_offset += PAGE_SIZE_64;
8573 xfer_size -= PAGE_SIZE;
8574
8575 if (dwp->dw_mask) {
8576 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8577
8578 if (dw_count >= dw_limit) {
8579 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8580
8581 dwp = &dw_array[0];
8582 dw_count = 0;
8583 }
8584 }
8585 }
8586 assert(entry == size_in_pages);
8587
8588 if (dw_count)
8589 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8590finish:
8591 if (user_page_list && set_cache_attr_needed == TRUE)
8592 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8593
8594 if (page_list_count != NULL) {
8595 if (upl->flags & UPL_INTERNAL)
8596 *page_list_count = 0;
8597 else if (*page_list_count > size_in_pages)
8598 *page_list_count = size_in_pages;
8599 }
8600 vm_object_unlock(object);
8601
8602 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8603 /*
8604 * We've marked all the pages "busy" so that future
8605 * page faults will block.
8606 * Now remove the mapping for these pages, so that they
8607 * can't be accessed without causing a page fault.
8608 */
8609 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8610 PMAP_NULL, 0, VM_PROT_NONE);
8611 assert(!object->blocked_access);
8612 object->blocked_access = TRUE;
8613 }
8614
8615 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8616 return KERN_SUCCESS;
8617
8618return_err:
8619 dw_index = 0;
8620
8621 for (; offset < dst_offset; offset += PAGE_SIZE) {
8622 boolean_t need_unwire;
8623
8624 dst_page = vm_page_lookup(object, offset);
8625
8626 if (dst_page == VM_PAGE_NULL)
8627 panic("vm_object_iopl_request: Wired page missing. \n");
8628
8629 /*
8630 * if we've already processed this page in an earlier
8631 * dw_do_work, we need to undo the wiring... we will
8632 * leave the dirty and reference bits on if they
8633 * were set, since we don't have a good way of knowing
8634 * what the previous state was and we won't get here
8635 * under any normal circumstances... we will always
8636 * clear BUSY and wakeup any waiters via vm_page_free
8637 * or PAGE_WAKEUP_DONE
8638 */
8639 need_unwire = TRUE;
8640
8641 if (dw_count) {
8642 if (dw_array[dw_index].dw_m == dst_page) {
8643 /*
8644 * still in the deferred work list
8645 * which means we haven't yet called
8646 * vm_page_wire on this page
8647 */
8648 need_unwire = FALSE;
8649
8650 dw_index++;
8651 dw_count--;
8652 }
8653 }
8654 vm_page_lock_queues();
8655
8656 if (dst_page->vmp_absent || free_wired_pages == TRUE) {
8657 vm_page_free(dst_page);
8658
8659 need_unwire = FALSE;
8660 } else {
8661 if (need_unwire == TRUE)
8662 vm_page_unwire(dst_page, TRUE);
8663
8664 PAGE_WAKEUP_DONE(dst_page);
8665 }
8666 vm_page_unlock_queues();
8667
8668 if (need_unwire == TRUE)
8669 VM_STAT_INCR(reactivations);
8670 }
8671#if UPL_DEBUG
8672 upl->upl_state = 2;
8673#endif
8674 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8675 vm_object_activity_end(object);
8676 vm_object_collapse(object, 0, TRUE);
8677 }
8678 vm_object_unlock(object);
8679 upl_destroy(upl);
8680
8681 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0);
8682 return ret;
8683}
8684
8685kern_return_t
8686upl_transpose(
8687 upl_t upl1,
8688 upl_t upl2)
8689{
8690 kern_return_t retval;
8691 boolean_t upls_locked;
8692 vm_object_t object1, object2;
8693
8694 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8695 return KERN_INVALID_ARGUMENT;
8696 }
8697
8698 upls_locked = FALSE;
8699
8700 /*
8701 * Since we need to lock both UPLs at the same time,
8702 * avoid deadlocks by always taking locks in the same order.
8703 */
8704 if (upl1 < upl2) {
8705 upl_lock(upl1);
8706 upl_lock(upl2);
8707 } else {
8708 upl_lock(upl2);
8709 upl_lock(upl1);
8710 }
8711 upls_locked = TRUE; /* the UPLs will need to be unlocked */
8712
8713 object1 = upl1->map_object;
8714 object2 = upl2->map_object;
8715
8716 if (upl1->offset != 0 || upl2->offset != 0 ||
8717 upl1->size != upl2->size) {
8718 /*
8719 * We deal only with full objects, not subsets.
8720 * That's because we exchange the entire backing store info
8721 * for the objects: pager, resident pages, etc... We can't do
8722 * only part of it.
8723 */
8724 retval = KERN_INVALID_VALUE;
8725 goto done;
8726 }
8727
8728 /*
8729 * Tranpose the VM objects' backing store.
8730 */
8731 retval = vm_object_transpose(object1, object2,
8732 (vm_object_size_t) upl1->size);
8733
8734 if (retval == KERN_SUCCESS) {
8735 /*
8736 * Make each UPL point to the correct VM object, i.e. the
8737 * object holding the pages that the UPL refers to...
8738 */
8739#if CONFIG_IOSCHED || UPL_DEBUG
8740 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8741 vm_object_lock(object1);
8742 vm_object_lock(object2);
8743 }
8744 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8745 queue_remove(&object1->uplq, upl1, upl_t, uplq);
8746 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8747 queue_remove(&object2->uplq, upl2, upl_t, uplq);
8748#endif
8749 upl1->map_object = object2;
8750 upl2->map_object = object1;
8751
8752#if CONFIG_IOSCHED || UPL_DEBUG
8753 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8754 queue_enter(&object2->uplq, upl1, upl_t, uplq);
8755 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8756 queue_enter(&object1->uplq, upl2, upl_t, uplq);
8757 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8758 vm_object_unlock(object2);
8759 vm_object_unlock(object1);
8760 }
8761#endif
8762 }
8763
8764done:
8765 /*
8766 * Cleanup.
8767 */
8768 if (upls_locked) {
8769 upl_unlock(upl1);
8770 upl_unlock(upl2);
8771 upls_locked = FALSE;
8772 }
8773
8774 return retval;
8775}
8776
8777void
8778upl_range_needed(
8779 upl_t upl,
8780 int index,
8781 int count)
8782{
8783 upl_page_info_t *user_page_list;
8784 int size_in_pages;
8785
8786 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8787 return;
8788
8789 size_in_pages = upl->size / PAGE_SIZE;
8790
8791 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8792
8793 while (count-- && index < size_in_pages)
8794 user_page_list[index++].needed = TRUE;
8795}
8796
8797
8798/*
8799 * Reserve of virtual addresses in the kernel address space.
8800 * We need to map the physical pages in the kernel, so that we
8801 * can call the code-signing or slide routines with a kernel
8802 * virtual address. We keep this pool of pre-allocated kernel
8803 * virtual addresses so that we don't have to scan the kernel's
8804 * virtaul address space each time we need to work with
8805 * a physical page.
8806 */
8807decl_simple_lock_data(,vm_paging_lock)
8808#define VM_PAGING_NUM_PAGES 64
8809vm_map_offset_t vm_paging_base_address = 0;
8810boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8811int vm_paging_max_index = 0;
8812int vm_paging_page_waiter = 0;
8813int vm_paging_page_waiter_total = 0;
8814
8815unsigned long vm_paging_no_kernel_page = 0;
8816unsigned long vm_paging_objects_mapped = 0;
8817unsigned long vm_paging_pages_mapped = 0;
8818unsigned long vm_paging_objects_mapped_slow = 0;
8819unsigned long vm_paging_pages_mapped_slow = 0;
8820
8821void
8822vm_paging_map_init(void)
8823{
8824 kern_return_t kr;
8825 vm_map_offset_t page_map_offset;
8826 vm_map_entry_t map_entry;
8827
8828 assert(vm_paging_base_address == 0);
8829
8830 /*
8831 * Initialize our pool of pre-allocated kernel
8832 * virtual addresses.
8833 */
8834 page_map_offset = 0;
8835 kr = vm_map_find_space(kernel_map,
8836 &page_map_offset,
8837 VM_PAGING_NUM_PAGES * PAGE_SIZE,
8838 0,
8839 0,
8840 VM_MAP_KERNEL_FLAGS_NONE,
8841 VM_KERN_MEMORY_NONE,
8842 &map_entry);
8843 if (kr != KERN_SUCCESS) {
8844 panic("vm_paging_map_init: kernel_map full\n");
8845 }
8846 VME_OBJECT_SET(map_entry, kernel_object);
8847 VME_OFFSET_SET(map_entry, page_map_offset);
8848 map_entry->protection = VM_PROT_NONE;
8849 map_entry->max_protection = VM_PROT_NONE;
8850 map_entry->permanent = TRUE;
8851 vm_object_reference(kernel_object);
8852 vm_map_unlock(kernel_map);
8853
8854 assert(vm_paging_base_address == 0);
8855 vm_paging_base_address = page_map_offset;
8856}
8857
8858/*
8859 * vm_paging_map_object:
8860 * Maps part of a VM object's pages in the kernel
8861 * virtual address space, using the pre-allocated
8862 * kernel virtual addresses, if possible.
8863 * Context:
8864 * The VM object is locked. This lock will get
8865 * dropped and re-acquired though, so the caller
8866 * must make sure the VM object is kept alive
8867 * (by holding a VM map that has a reference
8868 * on it, for example, or taking an extra reference).
8869 * The page should also be kept busy to prevent
8870 * it from being reclaimed.
8871 */
8872kern_return_t
8873vm_paging_map_object(
8874 vm_page_t page,
8875 vm_object_t object,
8876 vm_object_offset_t offset,
8877 vm_prot_t protection,
8878 boolean_t can_unlock_object,
8879 vm_map_size_t *size, /* IN/OUT */
8880 vm_map_offset_t *address, /* OUT */
8881 boolean_t *need_unmap) /* OUT */
8882{
8883 kern_return_t kr;
8884 vm_map_offset_t page_map_offset;
8885 vm_map_size_t map_size;
8886 vm_object_offset_t object_offset;
8887 int i;
8888
8889 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
8890 /* use permanent 1-to-1 kernel mapping of physical memory ? */
8891#if __x86_64__
8892 *address = (vm_map_offset_t)
8893 PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
8894 PAGE_SHIFT);
8895 *need_unmap = FALSE;
8896 return KERN_SUCCESS;
8897#elif __arm__ || __arm64__
8898 *address = (vm_map_offset_t)
8899 phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
8900 *need_unmap = FALSE;
8901 return KERN_SUCCESS;
8902#else
8903#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
8904#endif
8905
8906 assert(page->vmp_busy);
8907 /*
8908 * Use one of the pre-allocated kernel virtual addresses
8909 * and just enter the VM page in the kernel address space
8910 * at that virtual address.
8911 */
8912 simple_lock(&vm_paging_lock);
8913
8914 /*
8915 * Try and find an available kernel virtual address
8916 * from our pre-allocated pool.
8917 */
8918 page_map_offset = 0;
8919 for (;;) {
8920 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
8921 if (vm_paging_page_inuse[i] == FALSE) {
8922 page_map_offset =
8923 vm_paging_base_address +
8924 (i * PAGE_SIZE);
8925 break;
8926 }
8927 }
8928 if (page_map_offset != 0) {
8929 /* found a space to map our page ! */
8930 break;
8931 }
8932
8933 if (can_unlock_object) {
8934 /*
8935 * If we can afford to unlock the VM object,
8936 * let's take the slow path now...
8937 */
8938 break;
8939 }
8940 /*
8941 * We can't afford to unlock the VM object, so
8942 * let's wait for a space to become available...
8943 */
8944 vm_paging_page_waiter_total++;
8945 vm_paging_page_waiter++;
8946 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
8947 if (kr == THREAD_WAITING) {
8948 simple_unlock(&vm_paging_lock);
8949 kr = thread_block(THREAD_CONTINUE_NULL);
8950 simple_lock(&vm_paging_lock);
8951 }
8952 vm_paging_page_waiter--;
8953 /* ... and try again */
8954 }
8955
8956 if (page_map_offset != 0) {
8957 /*
8958 * We found a kernel virtual address;
8959 * map the physical page to that virtual address.
8960 */
8961 if (i > vm_paging_max_index) {
8962 vm_paging_max_index = i;
8963 }
8964 vm_paging_page_inuse[i] = TRUE;
8965 simple_unlock(&vm_paging_lock);
8966
8967 page->vmp_pmapped = TRUE;
8968
8969 /*
8970 * Keep the VM object locked over the PMAP_ENTER
8971 * and the actual use of the page by the kernel,
8972 * or this pmap mapping might get undone by a
8973 * vm_object_pmap_protect() call...
8974 */
8975 PMAP_ENTER(kernel_pmap,
8976 page_map_offset,
8977 page,
8978 protection,
8979 VM_PROT_NONE,
8980 0,
8981 TRUE,
8982 kr);
8983 assert(kr == KERN_SUCCESS);
8984 vm_paging_objects_mapped++;
8985 vm_paging_pages_mapped++;
8986 *address = page_map_offset;
8987 *need_unmap = TRUE;
8988
8989#if KASAN
8990 kasan_notify_address(page_map_offset, PAGE_SIZE);
8991#endif
8992
8993 /* all done and mapped, ready to use ! */
8994 return KERN_SUCCESS;
8995 }
8996
8997 /*
8998 * We ran out of pre-allocated kernel virtual
8999 * addresses. Just map the page in the kernel
9000 * the slow and regular way.
9001 */
9002 vm_paging_no_kernel_page++;
9003 simple_unlock(&vm_paging_lock);
9004 }
9005
9006 if (! can_unlock_object) {
9007 *address = 0;
9008 *size = 0;
9009 *need_unmap = FALSE;
9010 return KERN_NOT_SUPPORTED;
9011 }
9012
9013 object_offset = vm_object_trunc_page(offset);
9014 map_size = vm_map_round_page(*size,
9015 VM_MAP_PAGE_MASK(kernel_map));
9016
9017 /*
9018 * Try and map the required range of the object
9019 * in the kernel_map
9020 */
9021
9022 vm_object_reference_locked(object); /* for the map entry */
9023 vm_object_unlock(object);
9024
9025 kr = vm_map_enter(kernel_map,
9026 address,
9027 map_size,
9028 0,
9029 VM_FLAGS_ANYWHERE,
9030 VM_MAP_KERNEL_FLAGS_NONE,
9031 VM_KERN_MEMORY_NONE,
9032 object,
9033 object_offset,
9034 FALSE,
9035 protection,
9036 VM_PROT_ALL,
9037 VM_INHERIT_NONE);
9038 if (kr != KERN_SUCCESS) {
9039 *address = 0;
9040 *size = 0;
9041 *need_unmap = FALSE;
9042 vm_object_deallocate(object); /* for the map entry */
9043 vm_object_lock(object);
9044 return kr;
9045 }
9046
9047 *size = map_size;
9048
9049 /*
9050 * Enter the mapped pages in the page table now.
9051 */
9052 vm_object_lock(object);
9053 /*
9054 * VM object must be kept locked from before PMAP_ENTER()
9055 * until after the kernel is done accessing the page(s).
9056 * Otherwise, the pmap mappings in the kernel could be
9057 * undone by a call to vm_object_pmap_protect().
9058 */
9059
9060 for (page_map_offset = 0;
9061 map_size != 0;
9062 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9063
9064 page = vm_page_lookup(object, offset + page_map_offset);
9065 if (page == VM_PAGE_NULL) {
9066 printf("vm_paging_map_object: no page !?");
9067 vm_object_unlock(object);
9068 kr = vm_map_remove(kernel_map, *address, *size,
9069 VM_MAP_REMOVE_NO_FLAGS);
9070 assert(kr == KERN_SUCCESS);
9071 *address = 0;
9072 *size = 0;
9073 *need_unmap = FALSE;
9074 vm_object_lock(object);
9075 return KERN_MEMORY_ERROR;
9076 }
9077 page->vmp_pmapped = TRUE;
9078
9079 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9080 PMAP_ENTER(kernel_pmap,
9081 *address + page_map_offset,
9082 page,
9083 protection,
9084 VM_PROT_NONE,
9085 0,
9086 TRUE,
9087 kr);
9088 assert(kr == KERN_SUCCESS);
9089#if KASAN
9090 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9091#endif
9092 }
9093
9094 vm_paging_objects_mapped_slow++;
9095 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9096
9097 *need_unmap = TRUE;
9098
9099 return KERN_SUCCESS;
9100}
9101
9102/*
9103 * vm_paging_unmap_object:
9104 * Unmaps part of a VM object's pages from the kernel
9105 * virtual address space.
9106 * Context:
9107 * The VM object is locked. This lock will get
9108 * dropped and re-acquired though.
9109 */
9110void
9111vm_paging_unmap_object(
9112 vm_object_t object,
9113 vm_map_offset_t start,
9114 vm_map_offset_t end)
9115{
9116 kern_return_t kr;
9117 int i;
9118
9119 if ((vm_paging_base_address == 0) ||
9120 (start < vm_paging_base_address) ||
9121 (end > (vm_paging_base_address
9122 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9123 /*
9124 * We didn't use our pre-allocated pool of
9125 * kernel virtual address. Deallocate the
9126 * virtual memory.
9127 */
9128 if (object != VM_OBJECT_NULL) {
9129 vm_object_unlock(object);
9130 }
9131 kr = vm_map_remove(kernel_map, start, end,
9132 VM_MAP_REMOVE_NO_FLAGS);
9133 if (object != VM_OBJECT_NULL) {
9134 vm_object_lock(object);
9135 }
9136 assert(kr == KERN_SUCCESS);
9137 } else {
9138 /*
9139 * We used a kernel virtual address from our
9140 * pre-allocated pool. Put it back in the pool
9141 * for next time.
9142 */
9143 assert(end - start == PAGE_SIZE);
9144 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9145 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9146
9147 /* undo the pmap mapping */
9148 pmap_remove(kernel_pmap, start, end);
9149
9150 simple_lock(&vm_paging_lock);
9151 vm_paging_page_inuse[i] = FALSE;
9152 if (vm_paging_page_waiter) {
9153 thread_wakeup(&vm_paging_page_waiter);
9154 }
9155 simple_unlock(&vm_paging_lock);
9156 }
9157}
9158
9159
9160/*
9161 * page->vmp_object must be locked
9162 */
9163void
9164vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9165{
9166 if (!queues_locked) {
9167 vm_page_lockspin_queues();
9168 }
9169
9170 page->vmp_free_when_done = FALSE;
9171 /*
9172 * need to drop the laundry count...
9173 * we may also need to remove it
9174 * from the I/O paging queue...
9175 * vm_pageout_throttle_up handles both cases
9176 *
9177 * the laundry and pageout_queue flags are cleared...
9178 */
9179 vm_pageout_throttle_up(page);
9180
9181 if (!queues_locked) {
9182 vm_page_unlock_queues();
9183 }
9184}
9185
9186upl_t
9187vector_upl_create(vm_offset_t upl_offset)
9188{
9189 int vector_upl_size = sizeof(struct _vector_upl);
9190 int i=0;
9191 upl_t upl;
9192 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9193
9194 upl = upl_create(0,UPL_VECTOR,0);
9195 upl->vector_upl = vector_upl;
9196 upl->offset = upl_offset;
9197 vector_upl->size = 0;
9198 vector_upl->offset = upl_offset;
9199 vector_upl->invalid_upls=0;
9200 vector_upl->num_upls=0;
9201 vector_upl->pagelist = NULL;
9202
9203 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9204 vector_upl->upl_iostates[i].size = 0;
9205 vector_upl->upl_iostates[i].offset = 0;
9206
9207 }
9208 return upl;
9209}
9210
9211void
9212vector_upl_deallocate(upl_t upl)
9213{
9214 if(upl) {
9215 vector_upl_t vector_upl = upl->vector_upl;
9216 if(vector_upl) {
9217 if(vector_upl->invalid_upls != vector_upl->num_upls)
9218 panic("Deallocating non-empty Vectored UPL\n");
9219 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9220 vector_upl->invalid_upls=0;
9221 vector_upl->num_upls = 0;
9222 vector_upl->pagelist = NULL;
9223 vector_upl->size = 0;
9224 vector_upl->offset = 0;
9225 kfree(vector_upl, sizeof(struct _vector_upl));
9226 vector_upl = (vector_upl_t)0xfeedfeed;
9227 }
9228 else
9229 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9230 }
9231 else
9232 panic("vector_upl_deallocate was passed a NULL upl\n");
9233}
9234
9235boolean_t
9236vector_upl_is_valid(upl_t upl)
9237{
9238 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9239 vector_upl_t vector_upl = upl->vector_upl;
9240 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9241 return FALSE;
9242 else
9243 return TRUE;
9244 }
9245 return FALSE;
9246}
9247
9248boolean_t
9249vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9250{
9251 if(vector_upl_is_valid(upl)) {
9252 vector_upl_t vector_upl = upl->vector_upl;
9253
9254 if(vector_upl) {
9255 if(subupl) {
9256 if(io_size) {
9257 if(io_size < PAGE_SIZE)
9258 io_size = PAGE_SIZE;
9259 subupl->vector_upl = (void*)vector_upl;
9260 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9261 vector_upl->size += io_size;
9262 upl->size += io_size;
9263 }
9264 else {
9265 uint32_t i=0,invalid_upls=0;
9266 for(i = 0; i < vector_upl->num_upls; i++) {
9267 if(vector_upl->upl_elems[i] == subupl)
9268 break;
9269 }
9270 if(i == vector_upl->num_upls)
9271 panic("Trying to remove sub-upl when none exists");
9272
9273 vector_upl->upl_elems[i] = NULL;
9274 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9275 if(invalid_upls == vector_upl->num_upls)
9276 return TRUE;
9277 else
9278 return FALSE;
9279 }
9280 }
9281 else
9282 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9283 }
9284 else
9285 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9286 }
9287 else
9288 panic("vector_upl_set_subupl was passed a NULL upl\n");
9289
9290 return FALSE;
9291}
9292
9293void
9294vector_upl_set_pagelist(upl_t upl)
9295{
9296 if(vector_upl_is_valid(upl)) {
9297 uint32_t i=0;
9298 vector_upl_t vector_upl = upl->vector_upl;
9299
9300 if(vector_upl) {
9301 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9302
9303 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9304
9305 for(i=0; i < vector_upl->num_upls; i++) {
9306 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9307 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9308 pagelist_size += cur_upl_pagelist_size;
9309 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9310 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9311 }
9312 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9313 }
9314 else
9315 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9316 }
9317 else
9318 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9319
9320}
9321
9322upl_t
9323vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9324{
9325 if(vector_upl_is_valid(upl)) {
9326 vector_upl_t vector_upl = upl->vector_upl;
9327 if(vector_upl) {
9328 if(index < vector_upl->num_upls)
9329 return vector_upl->upl_elems[index];
9330 }
9331 else
9332 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9333 }
9334 return NULL;
9335}
9336
9337upl_t
9338vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9339{
9340 if(vector_upl_is_valid(upl)) {
9341 uint32_t i=0;
9342 vector_upl_t vector_upl = upl->vector_upl;
9343
9344 if(vector_upl) {
9345 upl_t subupl = NULL;
9346 vector_upl_iostates_t subupl_state;
9347
9348 for(i=0; i < vector_upl->num_upls; i++) {
9349 subupl = vector_upl->upl_elems[i];
9350 subupl_state = vector_upl->upl_iostates[i];
9351 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9352 /* We could have been passed an offset/size pair that belongs
9353 * to an UPL element that has already been committed/aborted.
9354 * If so, return NULL.
9355 */
9356 if(subupl == NULL)
9357 return NULL;
9358 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9359 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9360 if(*upl_size > subupl_state.size)
9361 *upl_size = subupl_state.size;
9362 }
9363 if(*upl_offset >= subupl_state.offset)
9364 *upl_offset -= subupl_state.offset;
9365 else if(i)
9366 panic("Vector UPL offset miscalculation\n");
9367 return subupl;
9368 }
9369 }
9370 }
9371 else
9372 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9373 }
9374 return NULL;
9375}
9376
9377void
9378vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9379{
9380 *v_upl_submap = NULL;
9381
9382 if(vector_upl_is_valid(upl)) {
9383 vector_upl_t vector_upl = upl->vector_upl;
9384 if(vector_upl) {
9385 *v_upl_submap = vector_upl->submap;
9386 *submap_dst_addr = vector_upl->submap_dst_addr;
9387 }
9388 else
9389 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9390 }
9391 else
9392 panic("vector_upl_get_submap was passed a null UPL\n");
9393}
9394
9395void
9396vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9397{
9398 if(vector_upl_is_valid(upl)) {
9399 vector_upl_t vector_upl = upl->vector_upl;
9400 if(vector_upl) {
9401 vector_upl->submap = submap;
9402 vector_upl->submap_dst_addr = submap_dst_addr;
9403 }
9404 else
9405 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9406 }
9407 else
9408 panic("vector_upl_get_submap was passed a NULL UPL\n");
9409}
9410
9411void
9412vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9413{
9414 if(vector_upl_is_valid(upl)) {
9415 uint32_t i = 0;
9416 vector_upl_t vector_upl = upl->vector_upl;
9417
9418 if(vector_upl) {
9419 for(i = 0; i < vector_upl->num_upls; i++) {
9420 if(vector_upl->upl_elems[i] == subupl)
9421 break;
9422 }
9423
9424 if(i == vector_upl->num_upls)
9425 panic("setting sub-upl iostate when none exists");
9426
9427 vector_upl->upl_iostates[i].offset = offset;
9428 if(size < PAGE_SIZE)
9429 size = PAGE_SIZE;
9430 vector_upl->upl_iostates[i].size = size;
9431 }
9432 else
9433 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9434 }
9435 else
9436 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9437}
9438
9439void
9440vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9441{
9442 if(vector_upl_is_valid(upl)) {
9443 uint32_t i = 0;
9444 vector_upl_t vector_upl = upl->vector_upl;
9445
9446 if(vector_upl) {
9447 for(i = 0; i < vector_upl->num_upls; i++) {
9448 if(vector_upl->upl_elems[i] == subupl)
9449 break;
9450 }
9451
9452 if(i == vector_upl->num_upls)
9453 panic("getting sub-upl iostate when none exists");
9454
9455 *offset = vector_upl->upl_iostates[i].offset;
9456 *size = vector_upl->upl_iostates[i].size;
9457 }
9458 else
9459 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9460 }
9461 else
9462 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9463}
9464
9465void
9466vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9467{
9468 if(vector_upl_is_valid(upl)) {
9469 vector_upl_t vector_upl = upl->vector_upl;
9470 if(vector_upl) {
9471 if(index < vector_upl->num_upls) {
9472 *offset = vector_upl->upl_iostates[index].offset;
9473 *size = vector_upl->upl_iostates[index].size;
9474 }
9475 else
9476 *offset = *size = 0;
9477 }
9478 else
9479 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9480 }
9481 else
9482 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9483}
9484
9485upl_page_info_t *
9486upl_get_internal_vectorupl_pagelist(upl_t upl)
9487{
9488 return ((vector_upl_t)(upl->vector_upl))->pagelist;
9489}
9490
9491void *
9492upl_get_internal_vectorupl(upl_t upl)
9493{
9494 return upl->vector_upl;
9495}
9496
9497vm_size_t
9498upl_get_internal_pagelist_offset(void)
9499{
9500 return sizeof(struct upl);
9501}
9502
9503void
9504upl_clear_dirty(
9505 upl_t upl,
9506 boolean_t value)
9507{
9508 if (value) {
9509 upl->flags |= UPL_CLEAR_DIRTY;
9510 } else {
9511 upl->flags &= ~UPL_CLEAR_DIRTY;
9512 }
9513}
9514
9515void
9516upl_set_referenced(
9517 upl_t upl,
9518 boolean_t value)
9519{
9520 upl_lock(upl);
9521 if (value) {
9522 upl->ext_ref_count++;
9523 } else {
9524 if (!upl->ext_ref_count) {
9525 panic("upl_set_referenced not %p\n", upl);
9526 }
9527 upl->ext_ref_count--;
9528 }
9529 upl_unlock(upl);
9530}
9531
9532#if CONFIG_IOSCHED
9533void
9534upl_set_blkno(
9535 upl_t upl,
9536 vm_offset_t upl_offset,
9537 int io_size,
9538 int64_t blkno)
9539{
9540 int i,j;
9541 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9542 return;
9543
9544 assert(upl->upl_reprio_info != 0);
9545 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9546 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9547 }
9548}
9549#endif
9550
9551void inline memoryshot(unsigned int event, unsigned int control)
9552{
9553 if (vm_debug_events) {
9554 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9555 vm_page_active_count, vm_page_inactive_count,
9556 vm_page_free_count, vm_page_speculative_count,
9557 vm_page_throttled_count);
9558 } else {
9559 (void) event;
9560 (void) control;
9561 }
9562
9563}
9564
9565#ifdef MACH_BSD
9566
9567boolean_t upl_device_page(upl_page_info_t *upl)
9568{
9569 return(UPL_DEVICE_PAGE(upl));
9570}
9571boolean_t upl_page_present(upl_page_info_t *upl, int index)
9572{
9573 return(UPL_PAGE_PRESENT(upl, index));
9574}
9575boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
9576{
9577 return(UPL_SPECULATIVE_PAGE(upl, index));
9578}
9579boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
9580{
9581 return(UPL_DIRTY_PAGE(upl, index));
9582}
9583boolean_t upl_valid_page(upl_page_info_t *upl, int index)
9584{
9585 return(UPL_VALID_PAGE(upl, index));
9586}
9587ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
9588{
9589 return(UPL_PHYS_PAGE(upl, index));
9590}
9591
9592void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9593{
9594 upl[index].mark = v;
9595}
9596
9597boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9598{
9599 return upl[index].mark;
9600}
9601
9602void
9603vm_countdirtypages(void)
9604{
9605 vm_page_t m;
9606 int dpages;
9607 int pgopages;
9608 int precpages;
9609
9610
9611 dpages=0;
9612 pgopages=0;
9613 precpages=0;
9614
9615 vm_page_lock_queues();
9616 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9617 do {
9618 if (m ==(vm_page_t )0) break;
9619
9620 if(m->vmp_dirty) dpages++;
9621 if(m->vmp_free_when_done) pgopages++;
9622 if(m->vmp_precious) precpages++;
9623
9624 assert(VM_PAGE_OBJECT(m) != kernel_object);
9625 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9626 if (m ==(vm_page_t )0) break;
9627
9628 } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9629 vm_page_unlock_queues();
9630
9631 vm_page_lock_queues();
9632 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9633 do {
9634 if (m ==(vm_page_t )0) break;
9635
9636 dpages++;
9637 assert(m->vmp_dirty);
9638 assert(!m->vmp_free_when_done);
9639 assert(VM_PAGE_OBJECT(m) != kernel_object);
9640 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9641 if (m ==(vm_page_t )0) break;
9642
9643 } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9644 vm_page_unlock_queues();
9645
9646 vm_page_lock_queues();
9647 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9648 do {
9649 if (m ==(vm_page_t )0) break;
9650
9651 if(m->vmp_dirty) dpages++;
9652 if(m->vmp_free_when_done) pgopages++;
9653 if(m->vmp_precious) precpages++;
9654
9655 assert(VM_PAGE_OBJECT(m) != kernel_object);
9656 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9657 if (m ==(vm_page_t )0) break;
9658
9659 } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9660 vm_page_unlock_queues();
9661
9662 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9663
9664 dpages=0;
9665 pgopages=0;
9666 precpages=0;
9667
9668 vm_page_lock_queues();
9669 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9670
9671 do {
9672 if(m == (vm_page_t )0) break;
9673 if(m->vmp_dirty) dpages++;
9674 if(m->vmp_free_when_done) pgopages++;
9675 if(m->vmp_precious) precpages++;
9676
9677 assert(VM_PAGE_OBJECT(m) != kernel_object);
9678 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9679 if(m == (vm_page_t )0) break;
9680
9681 } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9682 vm_page_unlock_queues();
9683
9684 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9685
9686}
9687#endif /* MACH_BSD */
9688
9689
9690#if CONFIG_IOSCHED
9691int upl_get_cached_tier(upl_t upl)
9692{
9693 assert(upl);
9694 if (upl->flags & UPL_TRACKED_BY_OBJECT)
9695 return (upl->upl_priority);
9696 return (-1);
9697}
9698#endif /* CONFIG_IOSCHED */
9699
9700
9701void upl_callout_iodone(upl_t upl)
9702{
9703 struct upl_io_completion *upl_ctx = upl->upl_iodone;
9704
9705 if (upl_ctx) {
9706 void (*iodone_func)(void *, int) = upl_ctx->io_done;
9707
9708 assert(upl_ctx->io_done);
9709
9710 (*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9711 }
9712}
9713
9714void upl_set_iodone(upl_t upl, void *upl_iodone)
9715{
9716 upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9717}
9718
9719void upl_set_iodone_error(upl_t upl, int error)
9720{
9721 struct upl_io_completion *upl_ctx = upl->upl_iodone;
9722
9723 if (upl_ctx)
9724 upl_ctx->io_error = error;
9725}
9726
9727
9728ppnum_t upl_get_highest_page(
9729 upl_t upl)
9730{
9731 return upl->highest_page;
9732}
9733
9734upl_size_t upl_get_size(
9735 upl_t upl)
9736{
9737 return upl->size;
9738}
9739
9740upl_t upl_associated_upl(upl_t upl)
9741{
9742 return upl->associated_upl;
9743}
9744
9745void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
9746{
9747 upl->associated_upl = associated_upl;
9748}
9749
9750struct vnode * upl_lookup_vnode(upl_t upl)
9751{
9752 if (!upl->map_object->internal)
9753 return vnode_pager_lookup_vnode(upl->map_object->pager);
9754 else
9755 return NULL;
9756}
9757
9758#if UPL_DEBUG
9759kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
9760{
9761 upl->ubc_alias1 = alias1;
9762 upl->ubc_alias2 = alias2;
9763 return KERN_SUCCESS;
9764}
9765int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
9766{
9767 if(al)
9768 *al = upl->ubc_alias1;
9769 if(al2)
9770 *al2 = upl->ubc_alias2;
9771 return KERN_SUCCESS;
9772}
9773#endif /* UPL_DEBUG */
9774
9775#if VM_PRESSURE_EVENTS
9776/*
9777 * Upward trajectory.
9778 */
9779extern boolean_t vm_compressor_low_on_space(void);
9780
9781boolean_t
9782VM_PRESSURE_NORMAL_TO_WARNING(void) {
9783
9784 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9785
9786 /* Available pages below our threshold */
9787 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
9788 /* No frozen processes to kill */
9789 if (memorystatus_frozen_count == 0) {
9790 /* Not enough suspended processes available. */
9791 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
9792 return TRUE;
9793 }
9794 }
9795 }
9796 return FALSE;
9797
9798 } else {
9799 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
9800 }
9801}
9802
9803boolean_t
9804VM_PRESSURE_WARNING_TO_CRITICAL(void) {
9805
9806 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9807
9808 /* Available pages below our threshold */
9809 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
9810 return TRUE;
9811 }
9812 return FALSE;
9813 } else {
9814 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9815 }
9816}
9817
9818/*
9819 * Downward trajectory.
9820 */
9821boolean_t
9822VM_PRESSURE_WARNING_TO_NORMAL(void) {
9823
9824 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9825
9826 /* Available pages above our threshold */
9827 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
9828 if (memorystatus_available_pages > target_threshold) {
9829 return TRUE;
9830 }
9831 return FALSE;
9832 } else {
9833 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
9834 }
9835}
9836
9837boolean_t
9838VM_PRESSURE_CRITICAL_TO_WARNING(void) {
9839
9840 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9841
9842 /* Available pages above our threshold */
9843 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
9844 if (memorystatus_available_pages > target_threshold) {
9845 return TRUE;
9846 }
9847 return FALSE;
9848 } else {
9849 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9850 }
9851}
9852#endif /* VM_PRESSURE_EVENTS */
9853
9854
9855
9856#define VM_TEST_COLLAPSE_COMPRESSOR 0
9857#define VM_TEST_WIRE_AND_EXTRACT 0
9858#define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
9859#if __arm64__
9860#define VM_TEST_KERNEL_OBJECT_FAULT 0
9861#endif /* __arm64__ */
9862#define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT || DEBUG)
9863
9864#if VM_TEST_COLLAPSE_COMPRESSOR
9865extern boolean_t vm_object_collapse_compressor_allowed;
9866#include <IOKit/IOLib.h>
9867static void
9868vm_test_collapse_compressor(void)
9869{
9870 vm_object_size_t backing_size, top_size;
9871 vm_object_t backing_object, top_object;
9872 vm_map_offset_t backing_offset, top_offset;
9873 unsigned char *backing_address, *top_address;
9874 kern_return_t kr;
9875
9876 printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
9877
9878 /* create backing object */
9879 backing_size = 15 * PAGE_SIZE;
9880 backing_object = vm_object_allocate(backing_size);
9881 assert(backing_object != VM_OBJECT_NULL);
9882 printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
9883 backing_object);
9884 /* map backing object */
9885 backing_offset = 0;
9886 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
9887 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9888 backing_object, 0, FALSE,
9889 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9890 assert(kr == KERN_SUCCESS);
9891 backing_address = (unsigned char *) backing_offset;
9892 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9893 "mapped backing object %p at 0x%llx\n",
9894 backing_object, (uint64_t) backing_offset);
9895 /* populate with pages to be compressed in backing object */
9896 backing_address[0x1*PAGE_SIZE] = 0xB1;
9897 backing_address[0x4*PAGE_SIZE] = 0xB4;
9898 backing_address[0x7*PAGE_SIZE] = 0xB7;
9899 backing_address[0xa*PAGE_SIZE] = 0xBA;
9900 backing_address[0xd*PAGE_SIZE] = 0xBD;
9901 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9902 "populated pages to be compressed in "
9903 "backing_object %p\n", backing_object);
9904 /* compress backing object */
9905 vm_object_pageout(backing_object);
9906 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
9907 backing_object);
9908 /* wait for all the pages to be gone */
9909 while (*(volatile int *)&backing_object->resident_page_count != 0)
9910 IODelay(10);
9911 printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
9912 backing_object);
9913 /* populate with pages to be resident in backing object */
9914 backing_address[0x0*PAGE_SIZE] = 0xB0;
9915 backing_address[0x3*PAGE_SIZE] = 0xB3;
9916 backing_address[0x6*PAGE_SIZE] = 0xB6;
9917 backing_address[0x9*PAGE_SIZE] = 0xB9;
9918 backing_address[0xc*PAGE_SIZE] = 0xBC;
9919 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9920 "populated pages to be resident in "
9921 "backing_object %p\n", backing_object);
9922 /* leave the other pages absent */
9923 /* mess with the paging_offset of the backing_object */
9924 assert(backing_object->paging_offset == 0);
9925 backing_object->paging_offset = 0x3000;
9926
9927 /* create top object */
9928 top_size = 9 * PAGE_SIZE;
9929 top_object = vm_object_allocate(top_size);
9930 assert(top_object != VM_OBJECT_NULL);
9931 printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
9932 top_object);
9933 /* map top object */
9934 top_offset = 0;
9935 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
9936 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9937 top_object, 0, FALSE,
9938 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9939 assert(kr == KERN_SUCCESS);
9940 top_address = (unsigned char *) top_offset;
9941 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9942 "mapped top object %p at 0x%llx\n",
9943 top_object, (uint64_t) top_offset);
9944 /* populate with pages to be compressed in top object */
9945 top_address[0x3*PAGE_SIZE] = 0xA3;
9946 top_address[0x4*PAGE_SIZE] = 0xA4;
9947 top_address[0x5*PAGE_SIZE] = 0xA5;
9948 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9949 "populated pages to be compressed in "
9950 "top_object %p\n", top_object);
9951 /* compress top object */
9952 vm_object_pageout(top_object);
9953 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
9954 top_object);
9955 /* wait for all the pages to be gone */
9956 while (top_object->resident_page_count != 0)
9957 IODelay(10);
9958 printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
9959 top_object);
9960 /* populate with pages to be resident in top object */
9961 top_address[0x0*PAGE_SIZE] = 0xA0;
9962 top_address[0x1*PAGE_SIZE] = 0xA1;
9963 top_address[0x2*PAGE_SIZE] = 0xA2;
9964 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9965 "populated pages to be resident in "
9966 "top_object %p\n", top_object);
9967 /* leave the other pages absent */
9968
9969 /* link the 2 objects */
9970 vm_object_reference(backing_object);
9971 top_object->shadow = backing_object;
9972 top_object->vo_shadow_offset = 0x3000;
9973 printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
9974 top_object, backing_object);
9975
9976 /* unmap backing object */
9977 vm_map_remove(kernel_map,
9978 backing_offset,
9979 backing_offset + backing_size,
9980 VM_MAP_REMOVE_NO_FLAGS);
9981 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9982 "unmapped backing_object %p [0x%llx:0x%llx]\n",
9983 backing_object,
9984 (uint64_t) backing_offset,
9985 (uint64_t) (backing_offset + backing_size));
9986
9987 /* collapse */
9988 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
9989 vm_object_lock(top_object);
9990 vm_object_collapse(top_object, 0, FALSE);
9991 vm_object_unlock(top_object);
9992 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
9993
9994 /* did it work? */
9995 if (top_object->shadow != VM_OBJECT_NULL) {
9996 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
9997 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
9998 if (vm_object_collapse_compressor_allowed) {
9999 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10000 }
10001 } else {
10002 /* check the contents of the mapping */
10003 unsigned char expect[9] =
10004 { 0xA0, 0xA1, 0xA2, /* resident in top */
10005 0xA3, 0xA4, 0xA5, /* compressed in top */
10006 0xB9, /* resident in backing + shadow_offset */
10007 0xBD, /* compressed in backing + shadow_offset + paging_offset */
10008 0x00 }; /* absent in both */
10009 unsigned char actual[9];
10010 unsigned int i, errors;
10011
10012 errors = 0;
10013 for (i = 0; i < sizeof (actual); i++) {
10014 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10015 if (actual[i] != expect[i]) {
10016 errors++;
10017 }
10018 }
10019 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10020 "actual [%x %x %x %x %x %x %x %x %x] "
10021 "expect [%x %x %x %x %x %x %x %x %x] "
10022 "%d errors\n",
10023 actual[0], actual[1], actual[2], actual[3],
10024 actual[4], actual[5], actual[6], actual[7],
10025 actual[8],
10026 expect[0], expect[1], expect[2], expect[3],
10027 expect[4], expect[5], expect[6], expect[7],
10028 expect[8],
10029 errors);
10030 if (errors) {
10031 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10032 } else {
10033 printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10034 }
10035 }
10036}
10037#else /* VM_TEST_COLLAPSE_COMPRESSOR */
10038#define vm_test_collapse_compressor()
10039#endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10040
10041#if VM_TEST_WIRE_AND_EXTRACT
10042extern ledger_template_t task_ledger_template;
10043#include <mach/mach_vm.h>
10044extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10045 vm_offset_t offset);
10046static void
10047vm_test_wire_and_extract(void)
10048{
10049 ledger_t ledger;
10050 vm_map_t user_map, wire_map;
10051 mach_vm_address_t user_addr, wire_addr;
10052 mach_vm_size_t user_size, wire_size;
10053 mach_vm_offset_t cur_offset;
10054 vm_prot_t cur_prot, max_prot;
10055 ppnum_t user_ppnum, wire_ppnum;
10056 kern_return_t kr;
10057
10058 ledger = ledger_instantiate(task_ledger_template,
10059 LEDGER_CREATE_ACTIVE_ENTRIES);
10060 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10061 0x100000000ULL,
10062 0x200000000ULL,
10063 TRUE);
10064 wire_map = vm_map_create(NULL,
10065 0x100000000ULL,
10066 0x200000000ULL,
10067 TRUE);
10068 user_addr = 0;
10069 user_size = 0x10000;
10070 kr = mach_vm_allocate(user_map,
10071 &user_addr,
10072 user_size,
10073 VM_FLAGS_ANYWHERE);
10074 assert(kr == KERN_SUCCESS);
10075 wire_addr = 0;
10076 wire_size = user_size;
10077 kr = mach_vm_remap(wire_map,
10078 &wire_addr,
10079 wire_size,
10080 0,
10081 VM_FLAGS_ANYWHERE,
10082 user_map,
10083 user_addr,
10084 FALSE,
10085 &cur_prot,
10086 &max_prot,
10087 VM_INHERIT_NONE);
10088 assert(kr == KERN_SUCCESS);
10089 for (cur_offset = 0;
10090 cur_offset < wire_size;
10091 cur_offset += PAGE_SIZE) {
10092 kr = vm_map_wire_and_extract(wire_map,
10093 wire_addr + cur_offset,
10094 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10095 TRUE,
10096 &wire_ppnum);
10097 assert(kr == KERN_SUCCESS);
10098 user_ppnum = vm_map_get_phys_page(user_map,
10099 user_addr + cur_offset);
10100 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10101 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10102 kr,
10103 user_map, user_addr + cur_offset, user_ppnum,
10104 wire_map, wire_addr + cur_offset, wire_ppnum);
10105 if (kr != KERN_SUCCESS ||
10106 wire_ppnum == 0 ||
10107 wire_ppnum != user_ppnum) {
10108 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10109 }
10110 }
10111 cur_offset -= PAGE_SIZE;
10112 kr = vm_map_wire_and_extract(wire_map,
10113 wire_addr + cur_offset,
10114 VM_PROT_DEFAULT,
10115 TRUE,
10116 &wire_ppnum);
10117 assert(kr == KERN_SUCCESS);
10118 printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10119 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10120 kr,
10121 user_map, user_addr + cur_offset, user_ppnum,
10122 wire_map, wire_addr + cur_offset, wire_ppnum);
10123 if (kr != KERN_SUCCESS ||
10124 wire_ppnum == 0 ||
10125 wire_ppnum != user_ppnum) {
10126 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10127 }
10128
10129 printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10130}
10131#else /* VM_TEST_WIRE_AND_EXTRACT */
10132#define vm_test_wire_and_extract()
10133#endif /* VM_TEST_WIRE_AND_EXTRACT */
10134
10135#if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10136static void
10137vm_test_page_wire_overflow_panic(void)
10138{
10139 vm_object_t object;
10140 vm_page_t page;
10141
10142 printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10143
10144 object = vm_object_allocate(PAGE_SIZE);
10145 vm_object_lock(object);
10146 page = vm_page_alloc(object, 0x0);
10147 vm_page_lock_queues();
10148 do {
10149 vm_page_wire(page, 1, FALSE);
10150 } while (page->wire_count != 0);
10151 vm_page_unlock_queues();
10152 vm_object_unlock(object);
10153 panic("FBDP(%p,%p): wire_count overflow not detected\n",
10154 object, page);
10155}
10156#else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10157#define vm_test_page_wire_overflow_panic()
10158#endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10159
10160#if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10161extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10162static void
10163vm_test_kernel_object_fault(void)
10164{
10165 kern_return_t kr;
10166 vm_offset_t stack;
10167 uintptr_t frameb[2];
10168 int ret;
10169
10170 kr = kernel_memory_allocate(kernel_map, &stack,
10171 kernel_stack_size + (2*PAGE_SIZE),
10172 0,
10173 (KMA_KSTACK | KMA_KOBJECT |
10174 KMA_GUARD_FIRST | KMA_GUARD_LAST),
10175 VM_KERN_MEMORY_STACK);
10176 if (kr != KERN_SUCCESS) {
10177 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10178 }
10179 ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10180 if (ret != 0) {
10181 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10182 } else {
10183 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10184 }
10185 vm_map_remove(kernel_map,
10186 stack,
10187 stack + kernel_stack_size + (2*PAGE_SIZE),
10188 VM_MAP_REMOVE_KUNWIRE);
10189 stack = 0;
10190}
10191#else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10192#define vm_test_kernel_object_fault()
10193#endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10194
10195#if VM_TEST_DEVICE_PAGER_TRANSPOSE
10196static void
10197vm_test_device_pager_transpose(void)
10198{
10199 memory_object_t device_pager;
10200 vm_object_t anon_object, device_object;
10201 vm_size_t size;
10202 vm_map_offset_t anon_mapping, device_mapping;
10203 kern_return_t kr;
10204
10205 size = 3 * PAGE_SIZE;
10206 anon_object = vm_object_allocate(size);
10207 assert(anon_object != VM_OBJECT_NULL);
10208 device_pager = device_pager_setup(NULL, 0, size, 0);
10209 assert(device_pager != NULL);
10210 device_object = memory_object_to_vm_object(device_pager);
10211 assert(device_object != VM_OBJECT_NULL);
10212 anon_mapping = 0;
10213 kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10214 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10215 anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10216 VM_INHERIT_DEFAULT);
10217 assert(kr == KERN_SUCCESS);
10218 device_mapping = 0;
10219 kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10220 VM_FLAGS_ANYWHERE,
10221 VM_MAP_KERNEL_FLAGS_NONE,
10222 VM_KERN_MEMORY_NONE,
10223 (void *)device_pager, 0, FALSE,
10224 VM_PROT_DEFAULT, VM_PROT_ALL,
10225 VM_INHERIT_DEFAULT);
10226 assert(kr == KERN_SUCCESS);
10227 memory_object_deallocate(device_pager);
10228
10229 vm_object_lock(anon_object);
10230 vm_object_activity_begin(anon_object);
10231 anon_object->blocked_access = TRUE;
10232 vm_object_unlock(anon_object);
10233 vm_object_lock(device_object);
10234 vm_object_activity_begin(device_object);
10235 device_object->blocked_access = TRUE;
10236 vm_object_unlock(device_object);
10237
10238 assert(anon_object->ref_count == 1);
10239 assert(!anon_object->named);
10240 assert(device_object->ref_count == 2);
10241 assert(device_object->named);
10242
10243 kr = vm_object_transpose(device_object, anon_object, size);
10244 assert(kr == KERN_SUCCESS);
10245
10246 vm_object_lock(anon_object);
10247 vm_object_activity_end(anon_object);
10248 anon_object->blocked_access = FALSE;
10249 vm_object_unlock(anon_object);
10250 vm_object_lock(device_object);
10251 vm_object_activity_end(device_object);
10252 device_object->blocked_access = FALSE;
10253 vm_object_unlock(device_object);
10254
10255 assert(anon_object->ref_count == 2);
10256 assert(anon_object->named);
10257 kr = vm_deallocate(kernel_map, anon_mapping, size);
10258 assert(kr == KERN_SUCCESS);
10259 assert(device_object->ref_count == 1);
10260 assert(!device_object->named);
10261 kr = vm_deallocate(kernel_map, device_mapping, size);
10262 assert(kr == KERN_SUCCESS);
10263
10264 printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10265}
10266#else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10267#define vm_test_device_pager_transpose()
10268#endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10269
10270void
10271vm_tests(void)
10272{
10273 vm_test_collapse_compressor();
10274 vm_test_wire_and_extract();
10275 vm_test_page_wire_overflow_panic();
10276 vm_test_kernel_object_fault();
10277 vm_test_device_pager_transpose();
10278}
10279