1/*
2 * Copyright (c) 2014 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41#include <mach/mach_vm.h>
42
43#include <kern/host.h>
44#include <kern/kalloc.h>
45#include <kern/page_decrypt.h>
46#include <kern/queue.h>
47#include <kern/thread.h>
48#include <kern/ipc_kobject.h>
49
50#include <ipc/ipc_port.h>
51#include <ipc/ipc_space.h>
52
53#include <vm/vm_fault.h>
54#include <vm/vm_map.h>
55#include <vm/vm_pageout.h>
56#include <vm/memory_object.h>
57#include <vm/vm_pageout.h>
58#include <vm/vm_protos.h>
59#include <vm/vm_kern.h>
60
61
62/*
63 * 4K MEMORY PAGER
64 *
65 * This external memory manager (EMM) handles memory mappings that are
66 * 4K-aligned but not page-aligned and can therefore not be mapped directly.
67 *
68 * It mostly handles page-in requests (from memory_object_data_request()) by
69 * getting the data needed to fill in each 4K-chunk. That can require
70 * getting data from one or two pages from its backing VM object
71 * (a file or a "apple-protected" pager backed by an encrypted file), and
72 * copies the data to another page so that it is aligned as expected by
73 * the mapping.
74 *
75 * Returned pages can never be dirtied and must always be mapped copy-on-write,
76 * so the memory manager does not need to handle page-out requests (from
77 * memory_object_data_return()).
78 *
79 */
80
81/* forward declarations */
82void fourk_pager_reference(memory_object_t mem_obj);
83void fourk_pager_deallocate(memory_object_t mem_obj);
84kern_return_t fourk_pager_init(memory_object_t mem_obj,
85 memory_object_control_t control,
86 memory_object_cluster_size_t pg_size);
87kern_return_t fourk_pager_terminate(memory_object_t mem_obj);
88kern_return_t fourk_pager_data_request(memory_object_t mem_obj,
89 memory_object_offset_t offset,
90 memory_object_cluster_size_t length,
91 vm_prot_t protection_required,
92 memory_object_fault_info_t fault_info);
93kern_return_t fourk_pager_data_return(memory_object_t mem_obj,
94 memory_object_offset_t offset,
95 memory_object_cluster_size_t data_cnt,
96 memory_object_offset_t *resid_offset,
97 int *io_error,
98 boolean_t dirty,
99 boolean_t kernel_copy,
100 int upl_flags);
101kern_return_t fourk_pager_data_initialize(memory_object_t mem_obj,
102 memory_object_offset_t offset,
103 memory_object_cluster_size_t data_cnt);
104kern_return_t fourk_pager_data_unlock(memory_object_t mem_obj,
105 memory_object_offset_t offset,
106 memory_object_size_t size,
107 vm_prot_t desired_access);
108kern_return_t fourk_pager_synchronize(memory_object_t mem_obj,
109 memory_object_offset_t offset,
110 memory_object_size_t length,
111 vm_sync_t sync_flags);
112kern_return_t fourk_pager_map(memory_object_t mem_obj,
113 vm_prot_t prot);
114kern_return_t fourk_pager_last_unmap(memory_object_t mem_obj);
115
116/*
117 * Vector of VM operations for this EMM.
118 * These routines are invoked by VM via the memory_object_*() interfaces.
119 */
120const struct memory_object_pager_ops fourk_pager_ops = {
121 fourk_pager_reference,
122 fourk_pager_deallocate,
123 fourk_pager_init,
124 fourk_pager_terminate,
125 fourk_pager_data_request,
126 fourk_pager_data_return,
127 fourk_pager_data_initialize,
128 fourk_pager_data_unlock,
129 fourk_pager_synchronize,
130 fourk_pager_map,
131 fourk_pager_last_unmap,
132 NULL, /* data_reclaim */
133 "fourk_pager"
134};
135
136/*
137 * The "fourk_pager" describes a memory object backed by
138 * the "4K" EMM.
139 */
140#define FOURK_PAGER_SLOTS 4 /* 16K / 4K */
141typedef struct fourk_pager_backing {
142 vm_object_t backing_object;
143 vm_object_offset_t backing_offset;
144} *fourk_pager_backing_t;
145typedef struct fourk_pager {
146 /* mandatory generic header */
147 struct memory_object fourk_pgr_hdr;
148
149 /* pager-specific data */
150 queue_chain_t pager_queue; /* next & prev pagers */
151 unsigned int ref_count; /* reference count */
152 int is_ready; /* is this pager ready ? */
153 int is_mapped; /* is this mem_obj mapped ? */
154 struct fourk_pager_backing slots[FOURK_PAGER_SLOTS]; /* backing for each
155 4K-chunk */
156} *fourk_pager_t;
157#define FOURK_PAGER_NULL ((fourk_pager_t) NULL)
158
159/*
160 * List of memory objects managed by this EMM.
161 * The list is protected by the "fourk_pager_lock" lock.
162 */
163int fourk_pager_count = 0; /* number of pagers */
164int fourk_pager_count_mapped = 0; /* number of unmapped pagers */
165queue_head_t fourk_pager_queue;
166decl_lck_mtx_data(,fourk_pager_lock)
167
168/*
169 * Maximum number of unmapped pagers we're willing to keep around.
170 */
171int fourk_pager_cache_limit = 0;
172
173/*
174 * Statistics & counters.
175 */
176int fourk_pager_count_max = 0;
177int fourk_pager_count_unmapped_max = 0;
178int fourk_pager_num_trim_max = 0;
179int fourk_pager_num_trim_total = 0;
180
181
182lck_grp_t fourk_pager_lck_grp;
183lck_grp_attr_t fourk_pager_lck_grp_attr;
184lck_attr_t fourk_pager_lck_attr;
185
186
187/* internal prototypes */
188fourk_pager_t fourk_pager_lookup(memory_object_t mem_obj);
189void fourk_pager_dequeue(fourk_pager_t pager);
190void fourk_pager_deallocate_internal(fourk_pager_t pager,
191 boolean_t locked);
192void fourk_pager_terminate_internal(fourk_pager_t pager);
193void fourk_pager_trim(void);
194
195
196#if DEBUG
197int fourk_pagerdebug = 0;
198#define PAGER_ALL 0xffffffff
199#define PAGER_INIT 0x00000001
200#define PAGER_PAGEIN 0x00000002
201
202#define PAGER_DEBUG(LEVEL, A) \
203 MACRO_BEGIN \
204 if ((fourk_pagerdebug & LEVEL)==LEVEL) { \
205 printf A; \
206 } \
207 MACRO_END
208#else
209#define PAGER_DEBUG(LEVEL, A)
210#endif
211
212
213void
214fourk_pager_bootstrap(void)
215{
216 lck_grp_attr_setdefault(&fourk_pager_lck_grp_attr);
217 lck_grp_init(&fourk_pager_lck_grp, "4K-pager", &fourk_pager_lck_grp_attr);
218 lck_attr_setdefault(&fourk_pager_lck_attr);
219 lck_mtx_init(&fourk_pager_lock, &fourk_pager_lck_grp, &fourk_pager_lck_attr);
220 queue_init(&fourk_pager_queue);
221}
222
223/*
224 * fourk_pager_init()
225 *
226 * Initialize the memory object and makes it ready to be used and mapped.
227 */
228kern_return_t
229fourk_pager_init(
230 memory_object_t mem_obj,
231 memory_object_control_t control,
232#if !DEBUG
233 __unused
234#endif
235 memory_object_cluster_size_t pg_size)
236{
237 fourk_pager_t pager;
238 kern_return_t kr;
239 memory_object_attr_info_data_t attributes;
240
241 PAGER_DEBUG(PAGER_ALL,
242 ("fourk_pager_init: %p, %p, %x\n",
243 mem_obj, control, pg_size));
244
245 if (control == MEMORY_OBJECT_CONTROL_NULL)
246 return KERN_INVALID_ARGUMENT;
247
248 pager = fourk_pager_lookup(mem_obj);
249
250 memory_object_control_reference(control);
251
252 pager->fourk_pgr_hdr.mo_control = control;
253
254 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
255 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
256 attributes.cluster_size = (1 << (PAGE_SHIFT));
257 attributes.may_cache_object = FALSE;
258 attributes.temporary = TRUE;
259
260 kr = memory_object_change_attributes(
261 control,
262 MEMORY_OBJECT_ATTRIBUTE_INFO,
263 (memory_object_info_t) &attributes,
264 MEMORY_OBJECT_ATTR_INFO_COUNT);
265 if (kr != KERN_SUCCESS)
266 panic("fourk_pager_init: "
267 "memory_object_change_attributes() failed");
268
269#if CONFIG_SECLUDED_MEMORY
270 if (secluded_for_filecache) {
271 memory_object_mark_eligible_for_secluded(control, TRUE);
272 }
273#endif /* CONFIG_SECLUDED_MEMORY */
274
275 return KERN_SUCCESS;
276}
277
278/*
279 * fourk_pager_data_return()
280 *
281 * Handles page-out requests from VM. This should never happen since
282 * the pages provided by this EMM are not supposed to be dirty or dirtied
283 * and VM should simply discard the contents and reclaim the pages if it
284 * needs to.
285 */
286kern_return_t
287fourk_pager_data_return(
288 __unused memory_object_t mem_obj,
289 __unused memory_object_offset_t offset,
290 __unused memory_object_cluster_size_t data_cnt,
291 __unused memory_object_offset_t *resid_offset,
292 __unused int *io_error,
293 __unused boolean_t dirty,
294 __unused boolean_t kernel_copy,
295 __unused int upl_flags)
296{
297 panic("fourk_pager_data_return: should never get called");
298 return KERN_FAILURE;
299}
300
301kern_return_t
302fourk_pager_data_initialize(
303 __unused memory_object_t mem_obj,
304 __unused memory_object_offset_t offset,
305 __unused memory_object_cluster_size_t data_cnt)
306{
307 panic("fourk_pager_data_initialize: should never get called");
308 return KERN_FAILURE;
309}
310
311kern_return_t
312fourk_pager_data_unlock(
313 __unused memory_object_t mem_obj,
314 __unused memory_object_offset_t offset,
315 __unused memory_object_size_t size,
316 __unused vm_prot_t desired_access)
317{
318 return KERN_FAILURE;
319}
320
321/*
322 * fourk_pager_reference()
323 *
324 * Get a reference on this memory object.
325 * For external usage only. Assumes that the initial reference count is not 0,
326 * i.e one should not "revive" a dead pager this way.
327 */
328void
329fourk_pager_reference(
330 memory_object_t mem_obj)
331{
332 fourk_pager_t pager;
333
334 pager = fourk_pager_lookup(mem_obj);
335
336 lck_mtx_lock(&fourk_pager_lock);
337 assert(pager->ref_count > 0);
338 pager->ref_count++;
339 lck_mtx_unlock(&fourk_pager_lock);
340}
341
342
343/*
344 * fourk_pager_dequeue:
345 *
346 * Removes a pager from the list of pagers.
347 *
348 * The caller must hold "fourk_pager_lock".
349 */
350void
351fourk_pager_dequeue(
352 fourk_pager_t pager)
353{
354 assert(!pager->is_mapped);
355
356 queue_remove(&fourk_pager_queue,
357 pager,
358 fourk_pager_t,
359 pager_queue);
360 pager->pager_queue.next = NULL;
361 pager->pager_queue.prev = NULL;
362
363 fourk_pager_count--;
364}
365
366/*
367 * fourk_pager_terminate_internal:
368 *
369 * Trigger the asynchronous termination of the memory object associated
370 * with this pager.
371 * When the memory object is terminated, there will be one more call
372 * to memory_object_deallocate() (i.e. fourk_pager_deallocate())
373 * to finish the clean up.
374 *
375 * "fourk_pager_lock" should not be held by the caller.
376 * We don't need the lock because the pager has already been removed from
377 * the pagers' list and is now ours exclusively.
378 */
379void
380fourk_pager_terminate_internal(
381 fourk_pager_t pager)
382{
383 int i;
384
385 assert(pager->is_ready);
386 assert(!pager->is_mapped);
387
388 for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
389 if (pager->slots[i].backing_object != VM_OBJECT_NULL &&
390 pager->slots[i].backing_object != (vm_object_t) -1) {
391 vm_object_deallocate(pager->slots[i].backing_object);
392 pager->slots[i].backing_object = (vm_object_t) -1;
393 pager->slots[i].backing_offset = (vm_object_offset_t) -1;
394 }
395 }
396
397 /* trigger the destruction of the memory object */
398 memory_object_destroy(pager->fourk_pgr_hdr.mo_control, 0);
399}
400
401/*
402 * fourk_pager_deallocate_internal()
403 *
404 * Release a reference on this pager and free it when the last
405 * reference goes away.
406 * Can be called with fourk_pager_lock held or not but always returns
407 * with it unlocked.
408 */
409void
410fourk_pager_deallocate_internal(
411 fourk_pager_t pager,
412 boolean_t locked)
413{
414 boolean_t needs_trimming;
415 int count_unmapped;
416
417 if (! locked) {
418 lck_mtx_lock(&fourk_pager_lock);
419 }
420
421 count_unmapped = (fourk_pager_count -
422 fourk_pager_count_mapped);
423 if (count_unmapped > fourk_pager_cache_limit) {
424 /* we have too many unmapped pagers: trim some */
425 needs_trimming = TRUE;
426 } else {
427 needs_trimming = FALSE;
428 }
429
430 /* drop a reference on this pager */
431 pager->ref_count--;
432
433 if (pager->ref_count == 1) {
434 /*
435 * Only the "named" reference is left, which means that
436 * no one is really holding on to this pager anymore.
437 * Terminate it.
438 */
439 fourk_pager_dequeue(pager);
440 /* the pager is all ours: no need for the lock now */
441 lck_mtx_unlock(&fourk_pager_lock);
442 fourk_pager_terminate_internal(pager);
443 } else if (pager->ref_count == 0) {
444 /*
445 * Dropped the existence reference; the memory object has
446 * been terminated. Do some final cleanup and release the
447 * pager structure.
448 */
449 lck_mtx_unlock(&fourk_pager_lock);
450 if (pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL) {
451 memory_object_control_deallocate(pager->fourk_pgr_hdr.mo_control);
452 pager->fourk_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
453 }
454 kfree(pager, sizeof (*pager));
455 pager = FOURK_PAGER_NULL;
456 } else {
457 /* there are still plenty of references: keep going... */
458 lck_mtx_unlock(&fourk_pager_lock);
459 }
460
461 if (needs_trimming) {
462 fourk_pager_trim();
463 }
464 /* caution: lock is not held on return... */
465}
466
467/*
468 * fourk_pager_deallocate()
469 *
470 * Release a reference on this pager and free it when the last
471 * reference goes away.
472 */
473void
474fourk_pager_deallocate(
475 memory_object_t mem_obj)
476{
477 fourk_pager_t pager;
478
479 PAGER_DEBUG(PAGER_ALL, ("fourk_pager_deallocate: %p\n", mem_obj));
480 pager = fourk_pager_lookup(mem_obj);
481 fourk_pager_deallocate_internal(pager, FALSE);
482}
483
484/*
485 *
486 */
487kern_return_t
488fourk_pager_terminate(
489#if !DEBUG
490 __unused
491#endif
492 memory_object_t mem_obj)
493{
494 PAGER_DEBUG(PAGER_ALL, ("fourk_pager_terminate: %p\n", mem_obj));
495
496 return KERN_SUCCESS;
497}
498
499/*
500 *
501 */
502kern_return_t
503fourk_pager_synchronize(
504 __unused memory_object_t mem_obj,
505 __unused memory_object_offset_t offset,
506 __unused memory_object_size_t length,
507 __unused vm_sync_t sync_flags)
508{
509 panic("fourk_pager_synchronize: memory_object_synchronize no longer supported\n");
510 return (KERN_FAILURE);
511}
512
513/*
514 * fourk_pager_map()
515 *
516 * This allows VM to let us, the EMM, know that this memory object
517 * is currently mapped one or more times. This is called by VM each time
518 * the memory object gets mapped and we take one extra reference on the
519 * memory object to account for all its mappings.
520 */
521kern_return_t
522fourk_pager_map(
523 memory_object_t mem_obj,
524 __unused vm_prot_t prot)
525{
526 fourk_pager_t pager;
527
528 PAGER_DEBUG(PAGER_ALL, ("fourk_pager_map: %p\n", mem_obj));
529
530 pager = fourk_pager_lookup(mem_obj);
531
532 lck_mtx_lock(&fourk_pager_lock);
533 assert(pager->is_ready);
534 assert(pager->ref_count > 0); /* pager is alive */
535 if (pager->is_mapped == FALSE) {
536 /*
537 * First mapping of this pager: take an extra reference
538 * that will remain until all the mappings of this pager
539 * are removed.
540 */
541 pager->is_mapped = TRUE;
542 pager->ref_count++;
543 fourk_pager_count_mapped++;
544 }
545 lck_mtx_unlock(&fourk_pager_lock);
546
547 return KERN_SUCCESS;
548}
549
550/*
551 * fourk_pager_last_unmap()
552 *
553 * This is called by VM when this memory object is no longer mapped anywhere.
554 */
555kern_return_t
556fourk_pager_last_unmap(
557 memory_object_t mem_obj)
558{
559 fourk_pager_t pager;
560 int count_unmapped;
561
562 PAGER_DEBUG(PAGER_ALL,
563 ("fourk_pager_last_unmap: %p\n", mem_obj));
564
565 pager = fourk_pager_lookup(mem_obj);
566
567 lck_mtx_lock(&fourk_pager_lock);
568 if (pager->is_mapped) {
569 /*
570 * All the mappings are gone, so let go of the one extra
571 * reference that represents all the mappings of this pager.
572 */
573 fourk_pager_count_mapped--;
574 count_unmapped = (fourk_pager_count -
575 fourk_pager_count_mapped);
576 if (count_unmapped > fourk_pager_count_unmapped_max) {
577 fourk_pager_count_unmapped_max = count_unmapped;
578 }
579 pager->is_mapped = FALSE;
580 fourk_pager_deallocate_internal(pager, TRUE);
581 /* caution: deallocate_internal() released the lock ! */
582 } else {
583 lck_mtx_unlock(&fourk_pager_lock);
584 }
585
586 return KERN_SUCCESS;
587}
588
589
590/*
591 *
592 */
593fourk_pager_t
594fourk_pager_lookup(
595 memory_object_t mem_obj)
596{
597 fourk_pager_t pager;
598
599 assert(mem_obj->mo_pager_ops == &fourk_pager_ops);
600 pager = (fourk_pager_t) mem_obj;
601 assert(pager->ref_count > 0);
602 return pager;
603}
604
605void
606fourk_pager_trim(void)
607{
608 fourk_pager_t pager, prev_pager;
609 queue_head_t trim_queue;
610 int num_trim;
611 int count_unmapped;
612
613 lck_mtx_lock(&fourk_pager_lock);
614
615 /*
616 * We have too many pagers, try and trim some unused ones,
617 * starting with the oldest pager at the end of the queue.
618 */
619 queue_init(&trim_queue);
620 num_trim = 0;
621
622 for (pager = (fourk_pager_t)
623 queue_last(&fourk_pager_queue);
624 !queue_end(&fourk_pager_queue,
625 (queue_entry_t) pager);
626 pager = prev_pager) {
627 /* get prev elt before we dequeue */
628 prev_pager = (fourk_pager_t)
629 queue_prev(&pager->pager_queue);
630
631 if (pager->ref_count == 2 &&
632 pager->is_ready &&
633 !pager->is_mapped) {
634 /* this pager can be trimmed */
635 num_trim++;
636 /* remove this pager from the main list ... */
637 fourk_pager_dequeue(pager);
638 /* ... and add it to our trim queue */
639 queue_enter_first(&trim_queue,
640 pager,
641 fourk_pager_t,
642 pager_queue);
643
644 count_unmapped = (fourk_pager_count -
645 fourk_pager_count_mapped);
646 if (count_unmapped <= fourk_pager_cache_limit) {
647 /* we have enough pagers to trim */
648 break;
649 }
650 }
651 }
652 if (num_trim > fourk_pager_num_trim_max) {
653 fourk_pager_num_trim_max = num_trim;
654 }
655 fourk_pager_num_trim_total += num_trim;
656
657 lck_mtx_unlock(&fourk_pager_lock);
658
659 /* terminate the trimmed pagers */
660 while (!queue_empty(&trim_queue)) {
661 queue_remove_first(&trim_queue,
662 pager,
663 fourk_pager_t,
664 pager_queue);
665 pager->pager_queue.next = NULL;
666 pager->pager_queue.prev = NULL;
667 assert(pager->ref_count == 2);
668 /*
669 * We can't call deallocate_internal() because the pager
670 * has already been dequeued, but we still need to remove
671 * a reference.
672 */
673 pager->ref_count--;
674 fourk_pager_terminate_internal(pager);
675 }
676}
677
678
679
680
681
682
683vm_object_t
684fourk_pager_to_vm_object(
685 memory_object_t mem_obj)
686{
687 fourk_pager_t pager;
688 vm_object_t object;
689
690 pager = fourk_pager_lookup(mem_obj);
691 if (pager == NULL) {
692 return VM_OBJECT_NULL;
693 }
694
695 assert(pager->ref_count > 0);
696 assert(pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL);
697 object = memory_object_control_to_vm_object(pager->fourk_pgr_hdr.mo_control);
698 assert(object != VM_OBJECT_NULL);
699 return object;
700}
701
702memory_object_t
703fourk_pager_create(void)
704{
705 fourk_pager_t pager;
706 memory_object_control_t control;
707 kern_return_t kr;
708 int i;
709
710#if 00
711 if (PAGE_SIZE_64 == FOURK_PAGE_SIZE) {
712 panic("fourk_pager_create: page size is 4K !?");
713 }
714#endif
715
716 pager = (fourk_pager_t) kalloc(sizeof (*pager));
717 if (pager == FOURK_PAGER_NULL) {
718 return MEMORY_OBJECT_NULL;
719 }
720 bzero(pager, sizeof (*pager));
721
722 /*
723 * The vm_map call takes both named entry ports and raw memory
724 * objects in the same parameter. We need to make sure that
725 * vm_map does not see this object as a named entry port. So,
726 * we reserve the first word in the object for a fake ip_kotype
727 * setting - that will tell vm_map to use it as a memory object.
728 */
729 pager->fourk_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
730 pager->fourk_pgr_hdr.mo_pager_ops = &fourk_pager_ops;
731 pager->fourk_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
732
733 pager->ref_count = 2; /* existence + setup reference */
734 pager->is_ready = FALSE;/* not ready until it has a "name" */
735 pager->is_mapped = FALSE;
736
737 for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
738 pager->slots[i].backing_object = (vm_object_t) -1;
739 pager->slots[i].backing_offset = (vm_object_offset_t) -1;
740 }
741
742 lck_mtx_lock(&fourk_pager_lock);
743
744 /* enter new pager at the head of our list of pagers */
745 queue_enter_first(&fourk_pager_queue,
746 pager,
747 fourk_pager_t,
748 pager_queue);
749 fourk_pager_count++;
750 if (fourk_pager_count > fourk_pager_count_max) {
751 fourk_pager_count_max = fourk_pager_count;
752 }
753 lck_mtx_unlock(&fourk_pager_lock);
754
755 kr = memory_object_create_named((memory_object_t) pager,
756 0,
757 &control);
758 assert(kr == KERN_SUCCESS);
759
760 lck_mtx_lock(&fourk_pager_lock);
761 /* the new pager is now ready to be used */
762 pager->is_ready = TRUE;
763 lck_mtx_unlock(&fourk_pager_lock);
764
765 /* wakeup anyone waiting for this pager to be ready */
766 thread_wakeup(&pager->is_ready);
767
768 return (memory_object_t) pager;
769}
770
771/*
772 * fourk_pager_data_request()
773 *
774 * Handles page-in requests from VM.
775 */
776int fourk_pager_data_request_debug = 0;
777kern_return_t
778fourk_pager_data_request(
779 memory_object_t mem_obj,
780 memory_object_offset_t offset,
781 memory_object_cluster_size_t length,
782#if !DEBUG
783 __unused
784#endif
785 vm_prot_t protection_required,
786 memory_object_fault_info_t mo_fault_info)
787{
788 fourk_pager_t pager;
789 memory_object_control_t mo_control;
790 upl_t upl;
791 int upl_flags;
792 upl_size_t upl_size;
793 upl_page_info_t *upl_pl;
794 unsigned int pl_count;
795 vm_object_t dst_object;
796 kern_return_t kr, retval;
797 vm_map_offset_t kernel_mapping;
798 vm_offset_t src_vaddr, dst_vaddr;
799 vm_offset_t cur_offset;
800 int sub_page;
801 int sub_page_idx, sub_page_cnt;
802
803 pager = fourk_pager_lookup(mem_obj);
804 assert(pager->is_ready);
805 assert(pager->ref_count > 1); /* pager is alive and mapped */
806
807 PAGER_DEBUG(PAGER_PAGEIN, ("fourk_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
808
809 retval = KERN_SUCCESS;
810 kernel_mapping = 0;
811
812 offset = memory_object_trunc_page(offset);
813
814 /*
815 * Gather in a UPL all the VM pages requested by VM.
816 */
817 mo_control = pager->fourk_pgr_hdr.mo_control;
818
819 upl_size = length;
820 upl_flags =
821 UPL_RET_ONLY_ABSENT |
822 UPL_SET_LITE |
823 UPL_NO_SYNC |
824 UPL_CLEAN_IN_PLACE | /* triggers UPL_CLEAR_DIRTY */
825 UPL_SET_INTERNAL;
826 pl_count = 0;
827 kr = memory_object_upl_request(mo_control,
828 offset, upl_size,
829 &upl, NULL, NULL, upl_flags, VM_KERN_MEMORY_NONE);
830 if (kr != KERN_SUCCESS) {
831 retval = kr;
832 goto done;
833 }
834 dst_object = mo_control->moc_object;
835 assert(dst_object != VM_OBJECT_NULL);
836
837#if __x86_64__ || __arm__ || __arm64__
838 /* use the 1-to-1 mapping of physical memory */
839#else /* __x86_64__ || __arm__ || __arm64__ */
840 /*
841 * Reserve 2 virtual pages in the kernel address space to map the
842 * source and destination physical pages when it's their turn to
843 * be processed.
844 */
845 vm_map_entry_t map_entry;
846
847 vm_object_reference(kernel_object); /* ref. for mapping */
848 kr = vm_map_find_space(kernel_map,
849 &kernel_mapping,
850 2 * PAGE_SIZE_64,
851 0,
852 0,
853 VM_MAP_KERNEL_FLAGS_NONE,
854 &map_entry);
855 if (kr != KERN_SUCCESS) {
856 vm_object_deallocate(kernel_object);
857 retval = kr;
858 goto done;
859 }
860 map_entry->object.vm_object = kernel_object;
861 map_entry->offset = kernel_mapping;
862 vm_map_unlock(kernel_map);
863 src_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping);
864 dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping + PAGE_SIZE_64);
865#endif /* __x86_64__ || __arm__ || __arm64__ */
866
867 /*
868 * Fill in the contents of the pages requested by VM.
869 */
870 upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
871 pl_count = length / PAGE_SIZE;
872 for (cur_offset = 0;
873 retval == KERN_SUCCESS && cur_offset < length;
874 cur_offset += PAGE_SIZE) {
875 ppnum_t dst_pnum;
876 int num_subpg_signed, num_subpg_validated;
877 int num_subpg_tainted, num_subpg_nx;
878
879 if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) {
880 /* this page is not in the UPL: skip it */
881 continue;
882 }
883
884 /*
885 * Establish an explicit pmap mapping of the destination
886 * physical page.
887 * We can't do a regular VM mapping because the VM page
888 * is "busy".
889 */
890 dst_pnum = (ppnum_t)
891 upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
892 assert(dst_pnum != 0);
893#if __x86_64__
894 dst_vaddr = (vm_map_offset_t)
895 PHYSMAP_PTOV((pmap_paddr_t)dst_pnum << PAGE_SHIFT);
896#elif __arm__ || __arm64__
897 dst_vaddr = (vm_map_offset_t)
898 phystokv((pmap_paddr_t)dst_pnum << PAGE_SHIFT);
899#else
900 kr = pmap_enter(kernel_pmap,
901 dst_vaddr,
902 dst_pnum,
903 VM_PROT_READ | VM_PROT_WRITE,
904 VM_PROT_NONE,
905 0,
906 TRUE);
907
908 assert(kr == KERN_SUCCESS);
909#endif
910
911 /* retrieve appropriate data for each 4K-page in this page */
912 if (PAGE_SHIFT == FOURK_PAGE_SHIFT &&
913 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
914 /*
915 * Find the slot for the requested 4KB page in
916 * the 16K page...
917 */
918 assert(PAGE_SHIFT == FOURK_PAGE_SHIFT);
919 assert(page_shift_user32 == SIXTEENK_PAGE_SHIFT);
920 sub_page_idx = ((offset & SIXTEENK_PAGE_MASK) /
921 PAGE_SIZE);
922 /*
923 * ... and provide only that one 4KB page.
924 */
925 sub_page_cnt = 1;
926 } else {
927 /*
928 * Iterate over all slots, i.e. retrieve all four 4KB
929 * pages in the requested 16KB page.
930 */
931 assert(PAGE_SHIFT == SIXTEENK_PAGE_SHIFT);
932 sub_page_idx = 0;
933 sub_page_cnt = FOURK_PAGER_SLOTS;
934 }
935
936 num_subpg_signed = 0;
937 num_subpg_validated = 0;
938 num_subpg_tainted = 0;
939 num_subpg_nx = 0;
940
941 /* retrieve appropriate data for each 4K-page in this page */
942 for (sub_page = sub_page_idx;
943 sub_page < sub_page_idx + sub_page_cnt;
944 sub_page++) {
945 vm_object_t src_object;
946 memory_object_offset_t src_offset;
947 vm_offset_t offset_in_src_page;
948 kern_return_t error_code;
949 vm_object_t src_page_object;
950 vm_page_t src_page;
951 vm_page_t top_page;
952 vm_prot_t prot;
953 int interruptible;
954 struct vm_object_fault_info fault_info;
955 boolean_t subpg_validated;
956 unsigned subpg_tainted;
957
958
959 if (offset < SIXTEENK_PAGE_SIZE) {
960 /*
961 * The 1st 16K-page can cover multiple
962 * sub-mappings, as described in the
963 * pager->slots[] array.
964 */
965 src_object =
966 pager->slots[sub_page].backing_object;
967 src_offset =
968 pager->slots[sub_page].backing_offset;
969 } else {
970 fourk_pager_backing_t slot;
971
972 /*
973 * Beyond the 1st 16K-page in the pager is
974 * an extension of the last "sub page" in
975 * the pager->slots[] array.
976 */
977 slot = &pager->slots[FOURK_PAGER_SLOTS-1];
978 src_object = slot->backing_object;
979 src_offset = slot->backing_offset;
980 src_offset += FOURK_PAGE_SIZE;
981 src_offset +=
982 (vm_map_trunc_page(offset,
983 SIXTEENK_PAGE_MASK)
984 - SIXTEENK_PAGE_SIZE);
985 src_offset += sub_page * FOURK_PAGE_SIZE;
986 }
987 offset_in_src_page = src_offset & PAGE_MASK_64;
988 src_offset = vm_object_trunc_page(src_offset);
989
990 if (src_object == VM_OBJECT_NULL ||
991 src_object == (vm_object_t) -1) {
992 /* zero-fill */
993 bzero((char *)(dst_vaddr +
994 ((sub_page-sub_page_idx)
995 * FOURK_PAGE_SIZE)),
996 FOURK_PAGE_SIZE);
997 if (fourk_pager_data_request_debug) {
998 printf("fourk_pager_data_request"
999 "(%p,0x%llx+0x%lx+0x%04x): "
1000 "ZERO\n",
1001 pager,
1002 offset,
1003 cur_offset,
1004 ((sub_page - sub_page_idx)
1005 * FOURK_PAGE_SIZE));
1006 }
1007 continue;
1008 }
1009
1010 /* fault in the source page from src_object */
1011 retry_src_fault:
1012 src_page = VM_PAGE_NULL;
1013 top_page = VM_PAGE_NULL;
1014 fault_info = *((struct vm_object_fault_info *)
1015 (uintptr_t)mo_fault_info);
1016 fault_info.stealth = TRUE;
1017 fault_info.io_sync = FALSE;
1018 fault_info.mark_zf_absent = FALSE;
1019 fault_info.batch_pmap_op = FALSE;
1020 interruptible = fault_info.interruptible;
1021 prot = VM_PROT_READ;
1022 error_code = 0;
1023
1024 vm_object_lock(src_object);
1025 vm_object_paging_begin(src_object);
1026 kr = vm_fault_page(src_object,
1027 src_offset,
1028 VM_PROT_READ,
1029 FALSE,
1030 FALSE, /* src_page not looked up */
1031 &prot,
1032 &src_page,
1033 &top_page,
1034 NULL,
1035 &error_code,
1036 FALSE,
1037 FALSE,
1038 &fault_info);
1039 switch (kr) {
1040 case VM_FAULT_SUCCESS:
1041 break;
1042 case VM_FAULT_RETRY:
1043 goto retry_src_fault;
1044 case VM_FAULT_MEMORY_SHORTAGE:
1045 if (vm_page_wait(interruptible)) {
1046 goto retry_src_fault;
1047 }
1048 /* fall thru */
1049 case VM_FAULT_INTERRUPTED:
1050 retval = MACH_SEND_INTERRUPTED;
1051 goto src_fault_done;
1052 case VM_FAULT_SUCCESS_NO_VM_PAGE:
1053 /* success but no VM page: fail */
1054 vm_object_paging_end(src_object);
1055 vm_object_unlock(src_object);
1056 /*FALLTHROUGH*/
1057 case VM_FAULT_MEMORY_ERROR:
1058 /* the page is not there! */
1059 if (error_code) {
1060 retval = error_code;
1061 } else {
1062 retval = KERN_MEMORY_ERROR;
1063 }
1064 goto src_fault_done;
1065 default:
1066 panic("fourk_pager_data_request: "
1067 "vm_fault_page() unexpected error 0x%x\n",
1068 kr);
1069 }
1070 assert(src_page != VM_PAGE_NULL);
1071 assert(src_page->vmp_busy);
1072
1073 src_page_object = VM_PAGE_OBJECT(src_page);
1074
1075 if (( !VM_PAGE_PAGEABLE(src_page)) &&
1076 !VM_PAGE_WIRED(src_page)) {
1077 vm_page_lockspin_queues();
1078 if (( !VM_PAGE_PAGEABLE(src_page)) &&
1079 !VM_PAGE_WIRED(src_page)) {
1080 vm_page_deactivate(src_page);
1081 }
1082 vm_page_unlock_queues();
1083 }
1084
1085#if __x86_64__
1086 src_vaddr = (vm_map_offset_t)
1087 PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(src_page)
1088 << PAGE_SHIFT);
1089#elif __arm__ || __arm64__
1090 src_vaddr = (vm_map_offset_t)
1091 phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(src_page)
1092 << PAGE_SHIFT);
1093#else
1094 /*
1095 * Establish an explicit mapping of the source
1096 * physical page.
1097 */
1098 kr = pmap_enter(kernel_pmap,
1099 src_vaddr,
1100 VM_PAGE_GET_PHYS_PAGE(src_page),
1101 VM_PROT_READ,
1102 VM_PROT_NONE,
1103 0,
1104 TRUE);
1105
1106 assert(kr == KERN_SUCCESS);
1107#endif
1108
1109 /*
1110 * Validate the 4K page we want from
1111 * this source page...
1112 */
1113 subpg_validated = FALSE;
1114 subpg_tainted = 0;
1115 if (src_page_object->code_signed) {
1116 vm_page_validate_cs_mapped_chunk(
1117 src_page,
1118 (const void *) src_vaddr,
1119 offset_in_src_page,
1120 FOURK_PAGE_SIZE,
1121 &subpg_validated,
1122 &subpg_tainted);
1123 num_subpg_signed++;
1124 if (subpg_validated) {
1125 num_subpg_validated++;
1126 }
1127 if (subpg_tainted & CS_VALIDATE_TAINTED) {
1128 num_subpg_tainted++;
1129 }
1130 if (subpg_tainted & CS_VALIDATE_NX) {
1131 /* subpg should not be executable */
1132 if (sub_page_cnt > 1) {
1133 /*
1134 * The destination page has
1135 * more than 1 subpage and its
1136 * other subpages might need
1137 * EXEC, so we do not propagate
1138 * CS_VALIDATE_NX to the
1139 * destination page...
1140 */
1141 } else {
1142 num_subpg_nx++;
1143 }
1144 }
1145 }
1146
1147 /*
1148 * Copy the relevant portion of the source page
1149 * into the appropriate part of the destination page.
1150 */
1151 bcopy((const char *)(src_vaddr + offset_in_src_page),
1152 (char *)(dst_vaddr +
1153 ((sub_page - sub_page_idx) *
1154 FOURK_PAGE_SIZE)),
1155 FOURK_PAGE_SIZE);
1156 if (fourk_pager_data_request_debug) {
1157 printf("fourk_data_request"
1158 "(%p,0x%llx+0x%lx+0x%04x): "
1159 "backed by [%p:0x%llx]: "
1160 "[0x%016llx 0x%016llx] "
1161 "code_signed=%d "
1162 "cs_valid=%d cs_tainted=%d cs_nx=%d\n",
1163 pager,
1164 offset, cur_offset,
1165 (sub_page-sub_page_idx)*FOURK_PAGE_SIZE,
1166 src_page_object,
1167 src_page->vmp_offset + offset_in_src_page,
1168 *(uint64_t *)(dst_vaddr +
1169 ((sub_page-sub_page_idx) *
1170 FOURK_PAGE_SIZE)),
1171 *(uint64_t *)(dst_vaddr +
1172 ((sub_page-sub_page_idx) *
1173 FOURK_PAGE_SIZE) +
1174 8),
1175 src_page_object->code_signed,
1176 subpg_validated,
1177 !!(subpg_tainted & CS_VALIDATE_TAINTED),
1178 !!(subpg_tainted & CS_VALIDATE_NX));
1179 }
1180
1181#if __x86_64__ || __arm__ || __arm64__
1182 /* we used the 1-to-1 mapping of physical memory */
1183 src_vaddr = 0;
1184#else /* __x86_64__ || __arm__ || __arm64__ */
1185 /*
1186 * Remove the pmap mapping of the source page
1187 * in the kernel.
1188 */
1189 pmap_remove(kernel_pmap,
1190 (addr64_t) src_vaddr,
1191 (addr64_t) src_vaddr + PAGE_SIZE_64);
1192#endif /* __x86_64__ || __arm__ || __arm64__ */
1193
1194 src_fault_done:
1195 /*
1196 * Cleanup the result of vm_fault_page().
1197 */
1198 if (src_page) {
1199 assert(VM_PAGE_OBJECT(src_page) == src_page_object);
1200
1201 PAGE_WAKEUP_DONE(src_page);
1202 src_page = VM_PAGE_NULL;
1203 vm_object_paging_end(src_page_object);
1204 vm_object_unlock(src_page_object);
1205 if (top_page) {
1206 vm_object_t top_object;
1207
1208 top_object = VM_PAGE_OBJECT(top_page);
1209 vm_object_lock(top_object);
1210 VM_PAGE_FREE(top_page);
1211 top_page = VM_PAGE_NULL;
1212 vm_object_paging_end(top_object);
1213 vm_object_unlock(top_object);
1214 }
1215 }
1216 }
1217 if (num_subpg_signed > 0) {
1218 /* some code-signing involved with this 16K page */
1219 if (num_subpg_tainted > 0) {
1220 /* a tainted subpage taints entire 16K page */
1221 UPL_SET_CS_TAINTED(upl_pl,
1222 cur_offset / PAGE_SIZE,
1223 TRUE);
1224 /* also mark as "validated" for consisteny */
1225 UPL_SET_CS_VALIDATED(upl_pl,
1226 cur_offset / PAGE_SIZE,
1227 TRUE);
1228 } else if (num_subpg_validated == num_subpg_signed) {
1229 /*
1230 * All the code-signed 4K subpages of this
1231 * 16K page are validated: our 16K page is
1232 * considered validated.
1233 */
1234 UPL_SET_CS_VALIDATED(upl_pl,
1235 cur_offset / PAGE_SIZE,
1236 TRUE);
1237 }
1238 if (num_subpg_nx > 0) {
1239 UPL_SET_CS_NX(upl_pl,
1240 cur_offset / PAGE_SIZE,
1241 TRUE);
1242 }
1243 }
1244 }
1245
1246done:
1247 if (upl != NULL) {
1248 /* clean up the UPL */
1249
1250 /*
1251 * The pages are currently dirty because we've just been
1252 * writing on them, but as far as we're concerned, they're
1253 * clean since they contain their "original" contents as
1254 * provided by us, the pager.
1255 * Tell the UPL to mark them "clean".
1256 */
1257 upl_clear_dirty(upl, TRUE);
1258
1259 /* abort or commit the UPL */
1260 if (retval != KERN_SUCCESS) {
1261 upl_abort(upl, 0);
1262 if (retval == KERN_ABORTED) {
1263 wait_result_t wait_result;
1264
1265 /*
1266 * We aborted the fault and did not provide
1267 * any contents for the requested pages but
1268 * the pages themselves are not invalid, so
1269 * let's return success and let the caller
1270 * retry the fault, in case it might succeed
1271 * later (when the decryption code is up and
1272 * running in the kernel, for example).
1273 */
1274 retval = KERN_SUCCESS;
1275 /*
1276 * Wait a little bit first to avoid using
1277 * too much CPU time retrying and failing
1278 * the same fault over and over again.
1279 */
1280 wait_result = assert_wait_timeout(
1281 (event_t) fourk_pager_data_request,
1282 THREAD_UNINT,
1283 10000, /* 10ms */
1284 NSEC_PER_USEC);
1285 assert(wait_result == THREAD_WAITING);
1286 wait_result = thread_block(THREAD_CONTINUE_NULL);
1287 assert(wait_result == THREAD_TIMED_OUT);
1288 }
1289 } else {
1290 boolean_t empty;
1291 upl_commit_range(upl, 0, upl->size,
1292 UPL_COMMIT_CS_VALIDATED | UPL_COMMIT_WRITTEN_BY_KERNEL,
1293 upl_pl, pl_count, &empty);
1294 }
1295
1296 /* and deallocate the UPL */
1297 upl_deallocate(upl);
1298 upl = NULL;
1299 }
1300 if (kernel_mapping != 0) {
1301 /* clean up the mapping of the source and destination pages */
1302 kr = vm_map_remove(kernel_map,
1303 kernel_mapping,
1304 kernel_mapping + (2 * PAGE_SIZE_64),
1305 VM_MAP_REMOVE_NO_FLAGS);
1306 assert(kr == KERN_SUCCESS);
1307 kernel_mapping = 0;
1308 src_vaddr = 0;
1309 dst_vaddr = 0;
1310 }
1311
1312 return retval;
1313}
1314
1315
1316
1317kern_return_t
1318fourk_pager_populate(
1319 memory_object_t mem_obj,
1320 boolean_t overwrite,
1321 int index,
1322 vm_object_t new_backing_object,
1323 vm_object_offset_t new_backing_offset,
1324 vm_object_t *old_backing_object,
1325 vm_object_offset_t *old_backing_offset)
1326{
1327 fourk_pager_t pager;
1328
1329 pager = fourk_pager_lookup(mem_obj);
1330 if (pager == NULL) {
1331 return KERN_INVALID_ARGUMENT;
1332 }
1333
1334 assert(pager->ref_count > 0);
1335 assert(pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL);
1336
1337 if (index < 0 || index > FOURK_PAGER_SLOTS) {
1338 return KERN_INVALID_ARGUMENT;
1339 }
1340
1341 if (!overwrite &&
1342 (pager->slots[index].backing_object != (vm_object_t) -1 ||
1343 pager->slots[index].backing_offset != (vm_object_offset_t) -1)) {
1344 return KERN_INVALID_ADDRESS;
1345 }
1346
1347 *old_backing_object = pager->slots[index].backing_object;
1348 *old_backing_offset = pager->slots[index].backing_offset;
1349
1350 pager->slots[index].backing_object = new_backing_object;
1351 pager->slots[index].backing_offset = new_backing_offset;
1352
1353 return KERN_SUCCESS;
1354}
1355
1356