1 | /* |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | /* |
59 | * File: vm/vm_kern.c |
60 | * Author: Avadis Tevanian, Jr., Michael Wayne Young |
61 | * Date: 1985 |
62 | * |
63 | * Kernel memory management. |
64 | */ |
65 | |
66 | #include <mach/kern_return.h> |
67 | #include <mach/vm_param.h> |
68 | #include <kern/assert.h> |
69 | #include <kern/thread.h> |
70 | #include <vm/vm_kern.h> |
71 | #include <vm/vm_map_internal.h> |
72 | #include <vm/vm_object.h> |
73 | #include <vm/vm_page.h> |
74 | #include <vm/vm_compressor.h> |
75 | #include <vm/vm_pageout.h> |
76 | #include <vm/vm_init.h> |
77 | #include <vm/vm_fault.h> |
78 | #include <vm/vm_memtag.h> |
79 | #include <kern/misc_protos.h> |
80 | #include <vm/cpm.h> |
81 | #include <kern/ledger.h> |
82 | #include <kern/bits.h> |
83 | #include <kern/startup.h> |
84 | |
85 | #include <string.h> |
86 | |
87 | #include <libkern/OSDebug.h> |
88 | #include <libkern/crypto/sha2.h> |
89 | #include <libkern/section_keywords.h> |
90 | #include <sys/kdebug.h> |
91 | #include <sys/kdebug_triage.h> |
92 | |
93 | #include <san/kasan.h> |
94 | #include <kern/kext_alloc.h> |
95 | #include <kern/backtrace.h> |
96 | #include <os/hash.h> |
97 | #include <kern/zalloc_internal.h> |
98 | #include <libkern/crypto/rand.h> |
99 | |
100 | /* |
101 | * Variables exported by this module. |
102 | */ |
103 | |
104 | SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map; |
105 | SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT]; |
106 | SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT]; |
107 | |
108 | static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges" , |
109 | KMEM_RANGE_ID_NUM_PTR); |
110 | #define KMEM_GOBJ_THRESHOLD (32ULL << 20) |
111 | #if DEBUG || DEVELOPMENT |
112 | #define KMEM_OUTLIER_LOG_SIZE (16ULL << 10) |
113 | #define KMEM_OUTLIER_SIZE 0 |
114 | #define KMEM_OUTLIER_ALIGN 1 |
115 | btlog_t kmem_outlier_log; |
116 | #endif /* DEBUG || DEVELOPMENT */ |
117 | |
118 | __startup_data static vm_map_size_t data_range_size; |
119 | __startup_data static vm_map_size_t ptr_range_size; |
120 | __startup_data static vm_map_size_t sprayqtn_range_size; |
121 | |
122 | #pragma mark helpers |
123 | |
124 | __attribute__((overloadable)) |
125 | __header_always_inline kmem_flags_t |
126 | ANYF(kma_flags_t flags) |
127 | { |
128 | return (kmem_flags_t)flags; |
129 | } |
130 | |
131 | __attribute__((overloadable)) |
132 | __header_always_inline kmem_flags_t |
133 | ANYF(kmr_flags_t flags) |
134 | { |
135 | return (kmem_flags_t)flags; |
136 | } |
137 | |
138 | __attribute__((overloadable)) |
139 | __header_always_inline kmem_flags_t |
140 | ANYF(kmf_flags_t flags) |
141 | { |
142 | return (kmem_flags_t)flags; |
143 | } |
144 | |
145 | __abortlike |
146 | static void |
147 | __kmem_invalid_size_panic( |
148 | vm_map_t map, |
149 | vm_size_t size, |
150 | uint32_t flags) |
151 | { |
152 | panic("kmem(map=%p, flags=0x%x): invalid size %zd" , |
153 | map, flags, (size_t)size); |
154 | } |
155 | |
156 | __abortlike |
157 | static void |
158 | __kmem_invalid_arguments_panic( |
159 | const char *what, |
160 | vm_map_t map, |
161 | vm_address_t address, |
162 | vm_size_t size, |
163 | uint32_t flags) |
164 | { |
165 | panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): " |
166 | "invalid arguments passed" , |
167 | what, map, (void *)address, (size_t)size, flags); |
168 | } |
169 | |
170 | __abortlike |
171 | static void |
172 | __kmem_failed_panic( |
173 | vm_map_t map, |
174 | vm_size_t size, |
175 | uint32_t flags, |
176 | kern_return_t kr, |
177 | const char *what) |
178 | { |
179 | panic("kmem_%s(%p, %zd, 0x%x): failed with %d" , |
180 | what, map, (size_t)size, flags, kr); |
181 | } |
182 | |
183 | __abortlike |
184 | static void |
185 | __kmem_entry_not_found_panic( |
186 | vm_map_t map, |
187 | vm_offset_t addr) |
188 | { |
189 | panic("kmem(map=%p) no entry found at %p" , map, (void *)addr); |
190 | } |
191 | |
192 | static inline vm_object_t |
193 | __kmem_object(kmem_flags_t flags) |
194 | { |
195 | if (flags & KMEM_COMPRESSOR) { |
196 | if (flags & KMEM_KOBJECT) { |
197 | panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified" ); |
198 | } |
199 | return compressor_object; |
200 | } |
201 | if (!(flags & KMEM_KOBJECT)) { |
202 | panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required" ); |
203 | } |
204 | return kernel_object_default; |
205 | } |
206 | |
207 | static inline pmap_mapping_type_t |
208 | __kmem_mapping_type(kmem_flags_t flags) |
209 | { |
210 | if (flags & (KMEM_DATA | KMEM_COMPRESSOR)) { |
211 | return PMAP_MAPPING_TYPE_DEFAULT; |
212 | } else { |
213 | return PMAP_MAPPING_TYPE_RESTRICTED; |
214 | } |
215 | } |
216 | |
217 | static inline vm_size_t |
218 | __kmem_guard_left(kmem_flags_t flags) |
219 | { |
220 | return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0; |
221 | } |
222 | |
223 | static inline vm_size_t |
224 | __kmem_guard_right(kmem_flags_t flags) |
225 | { |
226 | return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0; |
227 | } |
228 | |
229 | static inline vm_size_t |
230 | __kmem_guard_size(kmem_flags_t flags) |
231 | { |
232 | return __kmem_guard_left(flags) + __kmem_guard_right(flags); |
233 | } |
234 | |
235 | __pure2 |
236 | static inline vm_size_t |
237 | __kmem_entry_orig_size(vm_map_entry_t entry) |
238 | { |
239 | vm_object_t object = VME_OBJECT(entry); |
240 | |
241 | if (entry->vme_kernel_object) { |
242 | return entry->vme_end - entry->vme_start - |
243 | entry->vme_object_or_delta; |
244 | } else { |
245 | return object->vo_size - object->vo_size_delta; |
246 | } |
247 | } |
248 | |
249 | |
250 | #pragma mark kmem range methods |
251 | |
252 | #if __arm64__ |
253 | // <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to |
254 | #define mach_vm_range_load(r, r_min, r_max) \ |
255 | asm("ldp %[rmin], %[rmax], [%[range]]" \ |
256 | : [rmin] "=r"(r_min), [rmax] "=r"(r_max) \ |
257 | : [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address)) |
258 | #else |
259 | #define mach_vm_range_load(r, rmin, rmax) \ |
260 | ({ rmin = (r)->min_address; rmax = (r)->max_address; }) |
261 | #endif |
262 | |
263 | __abortlike |
264 | static void |
265 | __mach_vm_range_overflow( |
266 | mach_vm_offset_t addr, |
267 | mach_vm_offset_t size) |
268 | { |
269 | panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around" , |
270 | addr, addr, size); |
271 | } |
272 | |
273 | __abortlike |
274 | static void |
275 | __mach_vm_range_invalid( |
276 | mach_vm_offset_t min_address, |
277 | mach_vm_offset_t max_address) |
278 | { |
279 | panic("invalid vm range: [0x%llx, 0x%llx) wraps around" , |
280 | min_address, max_address); |
281 | } |
282 | |
283 | __header_always_inline mach_vm_size_t |
284 | mach_vm_range_size(const struct mach_vm_range *r) |
285 | { |
286 | mach_vm_offset_t rmin, rmax; |
287 | |
288 | mach_vm_range_load(r, rmin, rmax); |
289 | return rmax - rmin; |
290 | } |
291 | |
292 | __attribute__((overloadable)) |
293 | __header_always_inline bool |
294 | mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr) |
295 | { |
296 | mach_vm_offset_t rmin, rmax; |
297 | |
298 | #if CONFIG_KERNEL_TAGGING |
299 | if (VM_KERNEL_ADDRESS(addr)) { |
300 | addr = vm_memtag_canonicalize_address(addr); |
301 | } |
302 | #endif /* CONFIG_KERNEL_TAGGING */ |
303 | |
304 | /* |
305 | * The `&` is not a typo: we really expect the check to pass, |
306 | * so encourage the compiler to eagerly load and test without branches |
307 | */ |
308 | mach_vm_range_load(r, rmin, rmax); |
309 | return (addr >= rmin) & (addr < rmax); |
310 | } |
311 | |
312 | __attribute__((overloadable)) |
313 | __header_always_inline bool |
314 | mach_vm_range_contains( |
315 | const struct mach_vm_range *r, |
316 | mach_vm_offset_t addr, |
317 | mach_vm_offset_t size) |
318 | { |
319 | mach_vm_offset_t rmin, rmax; |
320 | |
321 | #if CONFIG_KERNEL_TAGGING |
322 | if (VM_KERNEL_ADDRESS(addr)) { |
323 | addr = vm_memtag_canonicalize_address(addr); |
324 | } |
325 | #endif /* CONFIG_KERNEL_TAGGING */ |
326 | |
327 | /* |
328 | * The `&` is not a typo: we really expect the check to pass, |
329 | * so encourage the compiler to eagerly load and test without branches |
330 | */ |
331 | mach_vm_range_load(r, rmin, rmax); |
332 | return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax); |
333 | } |
334 | |
335 | __attribute__((overloadable)) |
336 | __header_always_inline bool |
337 | mach_vm_range_intersects( |
338 | const struct mach_vm_range *r1, |
339 | const struct mach_vm_range *r2) |
340 | { |
341 | mach_vm_offset_t r1_min, r1_max; |
342 | mach_vm_offset_t r2_min, r2_max; |
343 | |
344 | mach_vm_range_load(r1, r1_min, r1_max); |
345 | r2_min = r2->min_address; |
346 | r2_max = r2->max_address; |
347 | |
348 | if (r1_min > r1_max) { |
349 | __mach_vm_range_invalid(min_address: r1_min, max_address: r1_max); |
350 | } |
351 | |
352 | if (r2_min > r2_max) { |
353 | __mach_vm_range_invalid(min_address: r2_min, max_address: r2_max); |
354 | } |
355 | |
356 | return r1_max > r2_min && r1_min < r2_max; |
357 | } |
358 | |
359 | __attribute__((overloadable)) |
360 | __header_always_inline bool |
361 | mach_vm_range_intersects( |
362 | const struct mach_vm_range *r1, |
363 | mach_vm_offset_t addr, |
364 | mach_vm_offset_t size) |
365 | { |
366 | struct mach_vm_range r2; |
367 | |
368 | addr = VM_KERNEL_STRIP_UPTR(addr); |
369 | r2.min_address = addr; |
370 | if (os_add_overflow(addr, size, &r2.max_address)) { |
371 | __mach_vm_range_overflow(addr, size); |
372 | } |
373 | |
374 | return mach_vm_range_intersects(r1, r2: &r2); |
375 | } |
376 | |
377 | bool |
378 | kmem_range_id_contains( |
379 | kmem_range_id_t range_id, |
380 | vm_map_offset_t addr, |
381 | vm_map_size_t size) |
382 | { |
383 | return mach_vm_range_contains(r: &kmem_ranges[range_id], addr, size); |
384 | } |
385 | |
386 | __abortlike |
387 | static void |
388 | kmem_range_invalid_panic( |
389 | kmem_range_id_t range_id, |
390 | vm_map_offset_t addr, |
391 | vm_map_size_t size) |
392 | { |
393 | const struct mach_vm_range *r = &kmem_ranges[range_id]; |
394 | mach_vm_offset_t rmin, rmax; |
395 | |
396 | mach_vm_range_load(r, rmin, rmax); |
397 | if (addr + size < rmin) { |
398 | panic("addr %p + size %llu overflows %p" , (void *)addr, size, |
399 | (void *)(addr + size)); |
400 | } |
401 | panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)" , |
402 | (void *)addr, size, range_id, (void *)rmin, (void *)rmax); |
403 | } |
404 | |
405 | /* |
406 | * Return whether the entire allocation is contained in the given range |
407 | */ |
408 | static bool |
409 | kmem_range_contains_fully( |
410 | kmem_range_id_t range_id, |
411 | vm_map_offset_t addr, |
412 | vm_map_size_t size) |
413 | { |
414 | const struct mach_vm_range *r = &kmem_ranges[range_id]; |
415 | mach_vm_offset_t rmin, rmax; |
416 | bool result = false; |
417 | |
418 | if (VM_KERNEL_ADDRESS(addr)) { |
419 | addr = vm_memtag_canonicalize_address(addr); |
420 | } |
421 | |
422 | /* |
423 | * The `&` is not a typo: we really expect the check to pass, |
424 | * so encourage the compiler to eagerly load and test without branches |
425 | */ |
426 | mach_vm_range_load(r, rmin, rmax); |
427 | result = (addr >= rmin) & (addr < rmax); |
428 | if (__improbable(result |
429 | && ((addr + size < rmin) || (addr + size > rmax)))) { |
430 | kmem_range_invalid_panic(range_id, addr, size); |
431 | } |
432 | return result; |
433 | } |
434 | |
435 | vm_map_size_t |
436 | kmem_range_id_size(kmem_range_id_t range_id) |
437 | { |
438 | return mach_vm_range_size(r: &kmem_ranges[range_id]); |
439 | } |
440 | |
441 | kmem_range_id_t |
442 | kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size) |
443 | { |
444 | kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST; |
445 | |
446 | for (; range_id < KMEM_RANGE_COUNT; range_id++) { |
447 | if (kmem_range_contains_fully(range_id, addr, size)) { |
448 | return range_id; |
449 | } |
450 | } |
451 | return KMEM_RANGE_ID_NONE; |
452 | } |
453 | |
454 | bool |
455 | kmem_is_ptr_range(vm_map_range_id_t range_id) |
456 | { |
457 | return (range_id >= KMEM_RANGE_ID_FIRST) && |
458 | (range_id <= KMEM_RANGE_ID_NUM_PTR); |
459 | } |
460 | |
461 | __abortlike |
462 | static void |
463 | kmem_range_invalid_for_overwrite(vm_map_offset_t addr) |
464 | { |
465 | panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges" , |
466 | (void *)addr); |
467 | } |
468 | |
469 | mach_vm_range_t |
470 | kmem_validate_range_for_overwrite( |
471 | vm_map_offset_t addr, |
472 | vm_map_size_t size) |
473 | { |
474 | vm_map_range_id_t range_id = kmem_addr_get_range(addr, size); |
475 | |
476 | if (kmem_is_ptr_range(range_id)) { |
477 | kmem_range_invalid_for_overwrite(addr); |
478 | } |
479 | |
480 | return &kmem_ranges[range_id]; |
481 | } |
482 | |
483 | |
484 | #pragma mark entry parameters |
485 | |
486 | |
487 | __abortlike |
488 | static void |
489 | __kmem_entry_validate_panic( |
490 | vm_map_t map, |
491 | vm_map_entry_t entry, |
492 | vm_offset_t addr, |
493 | vm_size_t size, |
494 | uint32_t flags, |
495 | kmem_guard_t guard) |
496 | { |
497 | const char *what = "???" ; |
498 | |
499 | if (entry->vme_atomic != guard.kmg_atomic) { |
500 | what = "atomicity" ; |
501 | } else if (entry->is_sub_map != guard.kmg_submap) { |
502 | what = "objectness" ; |
503 | } else if (addr != entry->vme_start) { |
504 | what = "left bound" ; |
505 | } else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) { |
506 | what = "right bound" ; |
507 | } else if (guard.kmg_context != entry->vme_context) { |
508 | what = "guard" ; |
509 | } |
510 | |
511 | panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): " |
512 | "entry:%p %s mismatch guard(0x%08x)" , |
513 | map, (void *)addr, size, flags, entry, |
514 | what, guard.kmg_context); |
515 | } |
516 | |
517 | static bool |
518 | __kmem_entry_validate_guard( |
519 | vm_map_entry_t entry, |
520 | vm_offset_t addr, |
521 | vm_size_t size, |
522 | kmem_flags_t flags, |
523 | kmem_guard_t guard) |
524 | { |
525 | if (entry->vme_atomic != guard.kmg_atomic) { |
526 | return false; |
527 | } |
528 | |
529 | if (!guard.kmg_atomic) { |
530 | return true; |
531 | } |
532 | |
533 | if (entry->is_sub_map != guard.kmg_submap) { |
534 | return false; |
535 | } |
536 | |
537 | if (addr != entry->vme_start) { |
538 | return false; |
539 | } |
540 | |
541 | if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) { |
542 | return false; |
543 | } |
544 | |
545 | if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) { |
546 | return false; |
547 | } |
548 | |
549 | return true; |
550 | } |
551 | |
552 | void |
553 | kmem_entry_validate_guard( |
554 | vm_map_t map, |
555 | vm_map_entry_t entry, |
556 | vm_offset_t addr, |
557 | vm_size_t size, |
558 | kmem_guard_t guard) |
559 | { |
560 | if (!__kmem_entry_validate_guard(entry, addr, size, flags: KMEM_NONE, guard)) { |
561 | __kmem_entry_validate_panic(map, entry, addr, size, flags: KMEM_NONE, guard); |
562 | } |
563 | } |
564 | |
565 | __abortlike |
566 | static void |
567 | __kmem_entry_validate_object_panic( |
568 | vm_map_t map, |
569 | vm_map_entry_t entry, |
570 | kmem_flags_t flags) |
571 | { |
572 | const char *what; |
573 | const char *verb; |
574 | |
575 | if (entry->is_sub_map) { |
576 | panic("kmem(map=%p) entry %p is a submap" , map, entry); |
577 | } |
578 | |
579 | if (flags & KMEM_KOBJECT) { |
580 | what = "kernel" ; |
581 | verb = "isn't" ; |
582 | } else if (flags & KMEM_COMPRESSOR) { |
583 | what = "compressor" ; |
584 | verb = "isn't" ; |
585 | } else if (entry->vme_kernel_object) { |
586 | what = "kernel" ; |
587 | verb = "is unexpectedly" ; |
588 | } else { |
589 | what = "compressor" ; |
590 | verb = "is unexpectedly" ; |
591 | } |
592 | |
593 | panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object" , |
594 | map, flags, entry, verb, what); |
595 | } |
596 | |
597 | static bool |
598 | __kmem_entry_validate_object( |
599 | vm_map_entry_t entry, |
600 | kmem_flags_t flags) |
601 | { |
602 | if (entry->is_sub_map) { |
603 | return false; |
604 | } |
605 | if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) { |
606 | return false; |
607 | } |
608 | |
609 | return (bool)(flags & KMEM_COMPRESSOR) == |
610 | (VME_OBJECT(entry) == compressor_object); |
611 | } |
612 | |
613 | vm_size_t |
614 | kmem_size_guard( |
615 | vm_map_t map, |
616 | vm_offset_t addr, |
617 | kmem_guard_t guard) |
618 | { |
619 | kmem_flags_t flags = KMEM_GUESS_SIZE; |
620 | vm_map_entry_t entry; |
621 | vm_size_t size; |
622 | |
623 | vm_map_lock_read(map); |
624 | |
625 | #if KASAN_CLASSIC |
626 | addr -= PAGE_SIZE; |
627 | #endif /* KASAN_CLASSIC */ |
628 | addr = vm_memtag_canonicalize_address(addr); |
629 | |
630 | if (!vm_map_lookup_entry(map, address: addr, entry: &entry)) { |
631 | __kmem_entry_not_found_panic(map, addr); |
632 | } |
633 | |
634 | if (!__kmem_entry_validate_guard(entry, addr, size: 0, flags, guard)) { |
635 | __kmem_entry_validate_panic(map, entry, addr, size: 0, flags, guard); |
636 | } |
637 | |
638 | size = __kmem_entry_orig_size(entry); |
639 | |
640 | vm_map_unlock_read(map); |
641 | |
642 | return size; |
643 | } |
644 | |
645 | static inline uint16_t |
646 | kmem_hash_backtrace( |
647 | void *fp) |
648 | { |
649 | uint64_t bt_count; |
650 | uintptr_t bt[8] = {}; |
651 | |
652 | struct backtrace_control ctl = { |
653 | .btc_frame_addr = (uintptr_t)fp, |
654 | }; |
655 | |
656 | bt_count = backtrace(bt, btlen: sizeof(bt) / sizeof(bt[0]), ctl: &ctl, NULL); |
657 | return (uint16_t) os_hash_jenkins(data: bt, length: bt_count * sizeof(bt[0])); |
658 | } |
659 | |
660 | static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK, |
661 | "Insufficient bits to represent ptr ranges" ); |
662 | |
663 | kmem_range_id_t |
664 | kmem_adjust_range_id( |
665 | uint32_t hash) |
666 | { |
667 | return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 + |
668 | (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges); |
669 | } |
670 | |
671 | static bool |
672 | kmem_use_sprayqtn( |
673 | kma_flags_t kma_flags, |
674 | vm_map_size_t map_size, |
675 | vm_offset_t mask) |
676 | { |
677 | /* |
678 | * Pointer allocations that are above the guard objects threshold or have |
679 | * leading guard pages with non standard alignment requests are redirected |
680 | * to the sprayqtn range. |
681 | */ |
682 | #if DEBUG || DEVELOPMENT |
683 | btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ? |
684 | BTREF_GET_NOWAIT : 0; |
685 | |
686 | if ((kma_flags & KMA_SPRAYQTN) == 0) { |
687 | if (map_size > KMEM_GOBJ_THRESHOLD) { |
688 | btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE, |
689 | btref_get(__builtin_frame_address(0), flags)); |
690 | } else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) { |
691 | btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN, |
692 | btref_get(__builtin_frame_address(0), flags)); |
693 | } |
694 | } |
695 | #endif /* DEBUG || DEVELOPMENT */ |
696 | |
697 | return (kma_flags & KMA_SPRAYQTN) || |
698 | (map_size > KMEM_GOBJ_THRESHOLD) || |
699 | ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)); |
700 | } |
701 | |
702 | static void |
703 | kmem_apply_security_policy( |
704 | vm_map_t map, |
705 | kma_flags_t kma_flags, |
706 | kmem_guard_t guard, |
707 | vm_map_size_t map_size, |
708 | vm_offset_t mask, |
709 | vm_map_kernel_flags_t *vmk_flags, |
710 | bool assert_dir __unused) |
711 | { |
712 | kmem_range_id_t range_id; |
713 | bool from_right; |
714 | uint16_t type_hash = guard.kmg_type_hash; |
715 | |
716 | if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) { |
717 | return; |
718 | } |
719 | |
720 | /* |
721 | * A non-zero type-hash must be passed by krealloc_type |
722 | */ |
723 | #if (DEBUG || DEVELOPMENT) |
724 | if (assert_dir && !(kma_flags & KMA_DATA)) { |
725 | assert(type_hash != 0); |
726 | } |
727 | #endif |
728 | |
729 | if (kma_flags & KMA_DATA) { |
730 | range_id = KMEM_RANGE_ID_DATA; |
731 | /* |
732 | * As an optimization in KMA_DATA to avoid fragmentation, |
733 | * allocate static carveouts at the end of the DATA range. |
734 | */ |
735 | from_right = (bool)(kma_flags & KMA_PERMANENT); |
736 | } else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) { |
737 | range_id = KMEM_RANGE_ID_SPRAYQTN; |
738 | from_right = (bool)(kma_flags & KMA_PERMANENT); |
739 | } else if (type_hash) { |
740 | range_id = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK); |
741 | from_right = type_hash & KMEM_DIRECTION_MASK; |
742 | } else { |
743 | /* |
744 | * Range id needs to correspond to one of the PTR ranges |
745 | */ |
746 | type_hash = (uint16_t) kmem_hash_backtrace(fp: __builtin_frame_address(0)); |
747 | range_id = kmem_adjust_range_id(hash: type_hash); |
748 | from_right = type_hash & KMEM_DIRECTION_MASK; |
749 | } |
750 | |
751 | vmk_flags->vmkf_range_id = range_id; |
752 | vmk_flags->vmkf_last_free = from_right; |
753 | } |
754 | |
755 | #pragma mark allocation |
756 | |
757 | static kmem_return_t |
758 | kmem_alloc_guard_internal( |
759 | vm_map_t map, |
760 | vm_size_t size, |
761 | vm_offset_t mask, |
762 | kma_flags_t flags, |
763 | kmem_guard_t guard, |
764 | kern_return_t (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *)) |
765 | { |
766 | vm_object_t object; |
767 | vm_offset_t delta = 0; |
768 | vm_map_entry_t entry = NULL; |
769 | vm_map_offset_t map_addr, fill_start; |
770 | vm_map_size_t map_size, fill_size; |
771 | vm_page_t guard_left = VM_PAGE_NULL; |
772 | vm_page_t guard_right = VM_PAGE_NULL; |
773 | vm_page_t wired_page_list = VM_PAGE_NULL; |
774 | vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE(); |
775 | bool skip_guards; |
776 | kmem_return_t kmr = { }; |
777 | |
778 | assert(kernel_map && map->pmap == kernel_pmap); |
779 | |
780 | #if DEBUG || DEVELOPMENT |
781 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, |
782 | size, 0, 0, 0); |
783 | #endif |
784 | |
785 | if (size == 0 || |
786 | (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) || |
787 | (size < __kmem_guard_size(flags: ANYF(flags)))) { |
788 | __kmem_invalid_size_panic(map, size, flags); |
789 | } |
790 | |
791 | /* |
792 | * limit the size of a single extent of wired memory |
793 | * to try and limit the damage to the system if |
794 | * too many pages get wired down |
795 | * limit raised to 2GB with 128GB max physical limit, |
796 | * but scaled by installed memory above this |
797 | * |
798 | * Note: kmem_alloc_contig_guard() is immune to this check. |
799 | */ |
800 | if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) && |
801 | alloc_pages == NULL && |
802 | size > MAX(1ULL << 31, sane_size / 64))) { |
803 | kmr.kmr_return = KERN_RESOURCE_SHORTAGE; |
804 | goto out_error; |
805 | } |
806 | |
807 | /* |
808 | * Guard pages: |
809 | * |
810 | * Guard pages are implemented as fictitious pages. |
811 | * |
812 | * However, some maps, and some objects are known |
813 | * to manage their memory explicitly, and do not need |
814 | * those to be materialized, which saves memory. |
815 | * |
816 | * By placing guard pages on either end of a stack, |
817 | * they can help detect cases where a thread walks |
818 | * off either end of its stack. |
819 | * |
820 | * They are allocated and set up here and attempts |
821 | * to access those pages are trapped in vm_fault_page(). |
822 | * |
823 | * The map_size we were passed may include extra space for |
824 | * guard pages. fill_size represents the actual size to populate. |
825 | * Similarly, fill_start indicates where the actual pages |
826 | * will begin in the range. |
827 | */ |
828 | |
829 | map_size = round_page(x: size); |
830 | fill_start = 0; |
831 | fill_size = map_size - __kmem_guard_size(flags: ANYF(flags)); |
832 | |
833 | #if KASAN_CLASSIC |
834 | if (flags & KMA_KASAN_GUARD) { |
835 | assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0); |
836 | flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST; |
837 | delta = ptoa(2); |
838 | map_size += delta; |
839 | } |
840 | #else |
841 | (void)delta; |
842 | #endif /* KASAN_CLASSIC */ |
843 | |
844 | skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) || |
845 | map->never_faults; |
846 | |
847 | if (flags & KMA_GUARD_FIRST) { |
848 | vmk_flags.vmkf_guard_before = true; |
849 | fill_start += PAGE_SIZE; |
850 | } |
851 | if ((flags & KMA_GUARD_FIRST) && !skip_guards) { |
852 | guard_left = vm_page_grab_guard(canwait: (flags & KMA_NOPAGEWAIT) == 0); |
853 | if (__improbable(guard_left == VM_PAGE_NULL)) { |
854 | kmr.kmr_return = KERN_RESOURCE_SHORTAGE; |
855 | goto out_error; |
856 | } |
857 | } |
858 | if ((flags & KMA_GUARD_LAST) && !skip_guards) { |
859 | guard_right = vm_page_grab_guard(canwait: (flags & KMA_NOPAGEWAIT) == 0); |
860 | if (__improbable(guard_right == VM_PAGE_NULL)) { |
861 | kmr.kmr_return = KERN_RESOURCE_SHORTAGE; |
862 | goto out_error; |
863 | } |
864 | } |
865 | |
866 | if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) { |
867 | if (alloc_pages) { |
868 | kmr.kmr_return = alloc_pages(fill_size, flags, |
869 | &wired_page_list); |
870 | } else { |
871 | kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags, |
872 | list: &wired_page_list); |
873 | } |
874 | if (__improbable(kmr.kmr_return != KERN_SUCCESS)) { |
875 | goto out_error; |
876 | } |
877 | } |
878 | |
879 | /* |
880 | * Allocate a new object (if necessary). We must do this before |
881 | * locking the map, or risk deadlock with the default pager. |
882 | */ |
883 | if (flags & KMA_KOBJECT) { |
884 | object = kernel_object_default; |
885 | vm_object_reference(object); |
886 | } else if (flags & KMA_COMPRESSOR) { |
887 | object = compressor_object; |
888 | vm_object_reference(object); |
889 | } else { |
890 | object = vm_object_allocate(size: map_size); |
891 | vm_object_lock(object); |
892 | vm_object_set_size(object, outer_size: map_size, inner_size: size); |
893 | /* stabilize the object to prevent shadowing */ |
894 | object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; |
895 | VM_OBJECT_SET_TRUE_SHARE(object, TRUE); |
896 | vm_object_unlock(object); |
897 | } |
898 | |
899 | if (flags & KMA_LAST_FREE) { |
900 | vmk_flags.vmkf_last_free = true; |
901 | } |
902 | if (flags & KMA_PERMANENT) { |
903 | vmk_flags.vmf_permanent = true; |
904 | } |
905 | kmem_apply_security_policy(map, kma_flags: flags, guard, map_size, mask, vmk_flags: &vmk_flags, |
906 | false); |
907 | |
908 | kmr.kmr_return = vm_map_find_space(map, hint_addr: 0, size: map_size, mask, |
909 | vmk_flags, o_entry: &entry); |
910 | if (__improbable(KERN_SUCCESS != kmr.kmr_return)) { |
911 | vm_object_deallocate(object); |
912 | goto out_error; |
913 | } |
914 | |
915 | map_addr = entry->vme_start; |
916 | VME_OBJECT_SET(entry, object, atomic: guard.kmg_atomic, context: guard.kmg_context); |
917 | VME_ALIAS_SET(entry, alias: guard.kmg_tag); |
918 | if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) { |
919 | VME_OFFSET_SET(entry, offset: map_addr); |
920 | } |
921 | |
922 | #if KASAN |
923 | if ((flags & KMA_KOBJECT) && guard.kmg_atomic) { |
924 | entry->vme_object_or_delta = (-size & PAGE_MASK) + delta; |
925 | } |
926 | #endif /* KASAN */ |
927 | |
928 | if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) { |
929 | entry->wired_count = 1; |
930 | vme_btref_consider_and_set(entry, fp: __builtin_frame_address(0)); |
931 | } |
932 | |
933 | if (guard_left || guard_right || wired_page_list) { |
934 | vm_object_offset_t offset = 0ull; |
935 | |
936 | vm_object_lock(object); |
937 | vm_map_unlock(map); |
938 | |
939 | if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) { |
940 | offset = map_addr; |
941 | } |
942 | |
943 | if (guard_left) { |
944 | vm_page_insert(page: guard_left, object, offset); |
945 | guard_left->vmp_busy = FALSE; |
946 | guard_left = VM_PAGE_NULL; |
947 | } |
948 | |
949 | if (guard_right) { |
950 | vm_page_insert(page: guard_right, object, |
951 | offset: offset + fill_start + fill_size); |
952 | guard_right->vmp_busy = FALSE; |
953 | guard_right = VM_PAGE_NULL; |
954 | } |
955 | |
956 | if (wired_page_list) { |
957 | kernel_memory_populate_object_and_unlock(object, |
958 | addr: map_addr + fill_start, offset: offset + fill_start, size: fill_size, |
959 | page_list: wired_page_list, flags, tag: guard.kmg_tag, VM_PROT_DEFAULT, |
960 | mapping_type: __kmem_mapping_type(flags: ANYF(flags))); |
961 | } else { |
962 | vm_object_unlock(object); |
963 | } |
964 | } else { |
965 | vm_map_unlock(map); |
966 | } |
967 | |
968 | /* |
969 | * now that the pages are wired, we no longer have to fear coalesce |
970 | */ |
971 | if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) { |
972 | vm_map_simplify(map, start: map_addr); |
973 | } |
974 | |
975 | #if DEBUG || DEVELOPMENT |
976 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, |
977 | atop(fill_size), 0, 0, 0); |
978 | #endif /* DEBUG || DEVELOPMENT */ |
979 | kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr); |
980 | |
981 | #if KASAN |
982 | if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) { |
983 | /* |
984 | * We need to allow the range for pageable memory, |
985 | * or faulting will not be allowed. |
986 | */ |
987 | kasan_notify_address(map_addr, map_size); |
988 | } |
989 | #endif /* KASAN */ |
990 | #if KASAN_CLASSIC |
991 | if (flags & KMA_KASAN_GUARD) { |
992 | kmr.kmr_address += PAGE_SIZE; |
993 | kasan_alloc_large(kmr.kmr_address, size); |
994 | } |
995 | #endif /* KASAN_CLASSIC */ |
996 | #if CONFIG_KERNEL_TAGGING |
997 | if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) { |
998 | kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, size); |
999 | vm_memtag_set_tag((vm_offset_t)kmr.kmr_address, size); |
1000 | #if KASAN_TBI |
1001 | kasan_tbi_retag_unused_space((vm_offset_t)kmr.kmr_address, map_size, size); |
1002 | #endif /* KASAN_TBI */ |
1003 | } |
1004 | #endif /* CONFIG_KERNEL_TAGGING */ |
1005 | return kmr; |
1006 | |
1007 | out_error: |
1008 | if (flags & KMA_NOFAIL) { |
1009 | __kmem_failed_panic(map, size, flags, kr: kmr.kmr_return, what: "alloc" ); |
1010 | } |
1011 | if (guard_left) { |
1012 | guard_left->vmp_snext = wired_page_list; |
1013 | wired_page_list = guard_left; |
1014 | } |
1015 | if (guard_right) { |
1016 | guard_right->vmp_snext = wired_page_list; |
1017 | wired_page_list = guard_right; |
1018 | } |
1019 | if (wired_page_list) { |
1020 | vm_page_free_list(mem: wired_page_list, FALSE); |
1021 | } |
1022 | |
1023 | #if DEBUG || DEVELOPMENT |
1024 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, |
1025 | 0, 0, 0, 0); |
1026 | #endif /* DEBUG || DEVELOPMENT */ |
1027 | |
1028 | return kmr; |
1029 | } |
1030 | |
1031 | kmem_return_t |
1032 | kmem_alloc_guard( |
1033 | vm_map_t map, |
1034 | vm_size_t size, |
1035 | vm_offset_t mask, |
1036 | kma_flags_t flags, |
1037 | kmem_guard_t guard) |
1038 | { |
1039 | return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL); |
1040 | } |
1041 | |
1042 | kmem_return_t |
1043 | kmem_alloc_contig_guard( |
1044 | vm_map_t map, |
1045 | vm_size_t size, |
1046 | vm_offset_t mask, |
1047 | ppnum_t max_pnum, |
1048 | ppnum_t pnum_mask, |
1049 | kma_flags_t flags, |
1050 | kmem_guard_t guard) |
1051 | { |
1052 | __auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) { |
1053 | return cpm_allocate(size: fill_size, list: pages, max_pnum, pnum_mask, FALSE, flags: kma_flags); |
1054 | }; |
1055 | |
1056 | return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages); |
1057 | } |
1058 | |
1059 | kmem_return_t |
1060 | kmem_suballoc( |
1061 | vm_map_t parent, |
1062 | mach_vm_offset_t *addr, |
1063 | vm_size_t size, |
1064 | vm_map_create_options_t vmc_options, |
1065 | int vm_flags, |
1066 | kms_flags_t flags, |
1067 | vm_tag_t tag) |
1068 | { |
1069 | vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; |
1070 | vm_map_offset_t map_addr = 0; |
1071 | kmem_return_t kmr = { }; |
1072 | vm_map_t map; |
1073 | |
1074 | assert(page_aligned(size)); |
1075 | assert(parent->pmap == kernel_pmap); |
1076 | |
1077 | vm_map_kernel_flags_set_vmflags(vmk_flags: &vmk_flags, vm_flags, vm_tag: tag); |
1078 | |
1079 | if (parent == kernel_map) { |
1080 | assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA)); |
1081 | } |
1082 | |
1083 | if (vmk_flags.vmf_fixed) { |
1084 | map_addr = trunc_page(*addr); |
1085 | } |
1086 | |
1087 | pmap_reference(vm_map_pmap(parent)); |
1088 | map = vm_map_create_options(vm_map_pmap(parent), min_off: 0, max_off: size, options: vmc_options); |
1089 | |
1090 | /* |
1091 | * 1. vm_map_enter() will consume one ref on success. |
1092 | * |
1093 | * 2. make the entry atomic as kernel submaps should never be split. |
1094 | * |
1095 | * 3. instruct vm_map_enter() that it is a fresh submap |
1096 | * that needs to be taught its bounds as it inserted. |
1097 | */ |
1098 | vm_map_reference(map); |
1099 | |
1100 | vmk_flags.vmkf_submap = true; |
1101 | if ((flags & KMS_DATA) == 0) { |
1102 | /* FIXME: IOKit submaps get fragmented and can't be atomic */ |
1103 | vmk_flags.vmkf_submap_atomic = true; |
1104 | } |
1105 | vmk_flags.vmkf_submap_adjust = true; |
1106 | if (flags & KMS_LAST_FREE) { |
1107 | vmk_flags.vmkf_last_free = true; |
1108 | } |
1109 | if (flags & KMS_PERMANENT) { |
1110 | vmk_flags.vmf_permanent = true; |
1111 | } |
1112 | if (flags & KMS_DATA) { |
1113 | vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA; |
1114 | } |
1115 | |
1116 | kmr.kmr_return = vm_map_enter(map: parent, address: &map_addr, size, mask: 0, |
1117 | vmk_flags, object: (vm_object_t)map, offset: 0, FALSE, |
1118 | VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); |
1119 | |
1120 | if (kmr.kmr_return != KERN_SUCCESS) { |
1121 | if (flags & KMS_NOFAIL) { |
1122 | panic("kmem_suballoc(map=%p, size=%zd) failed with %d" , |
1123 | parent, size, kmr.kmr_return); |
1124 | } |
1125 | assert(os_ref_get_count_raw(&map->map_refcnt) == 2); |
1126 | vm_map_deallocate(map); |
1127 | vm_map_deallocate(map); /* also removes ref to pmap */ |
1128 | return kmr; |
1129 | } |
1130 | |
1131 | /* |
1132 | * For kmem_suballocs that register a claim and are assigned a range, ensure |
1133 | * that the exact same range is returned. |
1134 | */ |
1135 | if (*addr != 0 && parent == kernel_map && |
1136 | startup_phase > STARTUP_SUB_KMEM) { |
1137 | assert(CAST_DOWN(vm_offset_t, map_addr) == *addr); |
1138 | } else { |
1139 | *addr = map_addr; |
1140 | } |
1141 | |
1142 | kmr.kmr_submap = map; |
1143 | return kmr; |
1144 | } |
1145 | |
1146 | /* |
1147 | * kmem_alloc: |
1148 | * |
1149 | * Allocate wired-down memory in the kernel's address map |
1150 | * or a submap. The memory is not zero-filled. |
1151 | */ |
1152 | |
1153 | __exported kern_return_t |
1154 | kmem_alloc_external( |
1155 | vm_map_t map, |
1156 | vm_offset_t *addrp, |
1157 | vm_size_t size); |
1158 | kern_return_t |
1159 | kmem_alloc_external( |
1160 | vm_map_t map, |
1161 | vm_offset_t *addrp, |
1162 | vm_size_t size) |
1163 | { |
1164 | if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) { |
1165 | return kmem_alloc(map, addrp, size, flags: KMA_NONE, tag: vm_tag_bt()); |
1166 | } |
1167 | /* Maintain ABI compatibility: invalid sizes used to be allowed */ |
1168 | return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT; |
1169 | } |
1170 | |
1171 | |
1172 | /* |
1173 | * kmem_alloc_kobject: |
1174 | * |
1175 | * Allocate wired-down memory in the kernel's address map |
1176 | * or a submap. The memory is not zero-filled. |
1177 | * |
1178 | * The memory is allocated in the kernel_object. |
1179 | * It may not be copied with vm_map_copy, and |
1180 | * it may not be reallocated with kmem_realloc. |
1181 | */ |
1182 | |
1183 | __exported kern_return_t |
1184 | kmem_alloc_kobject_external( |
1185 | vm_map_t map, |
1186 | vm_offset_t *addrp, |
1187 | vm_size_t size); |
1188 | kern_return_t |
1189 | kmem_alloc_kobject_external( |
1190 | vm_map_t map, |
1191 | vm_offset_t *addrp, |
1192 | vm_size_t size) |
1193 | { |
1194 | if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) { |
1195 | return kmem_alloc(map, addrp, size, flags: KMA_KOBJECT, tag: vm_tag_bt()); |
1196 | } |
1197 | /* Maintain ABI compatibility: invalid sizes used to be allowed */ |
1198 | return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT; |
1199 | } |
1200 | |
1201 | /* |
1202 | * kmem_alloc_pageable: |
1203 | * |
1204 | * Allocate pageable memory in the kernel's address map. |
1205 | */ |
1206 | |
1207 | __exported kern_return_t |
1208 | kmem_alloc_pageable_external( |
1209 | vm_map_t map, |
1210 | vm_offset_t *addrp, |
1211 | vm_size_t size); |
1212 | kern_return_t |
1213 | kmem_alloc_pageable_external( |
1214 | vm_map_t map, |
1215 | vm_offset_t *addrp, |
1216 | vm_size_t size) |
1217 | { |
1218 | if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) { |
1219 | return kmem_alloc(map, addrp, size, flags: KMA_PAGEABLE | KMA_DATA, tag: vm_tag_bt()); |
1220 | } |
1221 | /* Maintain ABI compatibility: invalid sizes used to be allowed */ |
1222 | return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT; |
1223 | } |
1224 | |
1225 | |
1226 | #pragma mark population |
1227 | |
1228 | static void |
1229 | kernel_memory_populate_pmap_enter( |
1230 | vm_object_t object, |
1231 | vm_address_t addr, |
1232 | vm_object_offset_t offset, |
1233 | vm_page_t mem, |
1234 | vm_prot_t prot, |
1235 | int pe_flags, |
1236 | pmap_mapping_type_t mapping_type) |
1237 | { |
1238 | kern_return_t pe_result; |
1239 | int pe_options; |
1240 | |
1241 | if (VMP_ERROR_GET(mem)) { |
1242 | panic("VM page %p should not have an error" , mem); |
1243 | } |
1244 | |
1245 | pe_options = PMAP_OPTIONS_NOWAIT; |
1246 | if (object->internal) { |
1247 | pe_options |= PMAP_OPTIONS_INTERNAL; |
1248 | } |
1249 | if (mem->vmp_reusable || object->all_reusable) { |
1250 | pe_options |= PMAP_OPTIONS_REUSABLE; |
1251 | } |
1252 | |
1253 | pe_result = pmap_enter_options(pmap: kernel_pmap, v: addr + offset, |
1254 | pn: VM_PAGE_GET_PHYS_PAGE(m: mem), prot, VM_PROT_NONE, |
1255 | flags: pe_flags, /* wired */ TRUE, options: pe_options, NULL, mapping_type); |
1256 | |
1257 | if (pe_result == KERN_RESOURCE_SHORTAGE) { |
1258 | vm_object_unlock(object); |
1259 | |
1260 | pe_options &= ~PMAP_OPTIONS_NOWAIT; |
1261 | |
1262 | pe_result = pmap_enter_options(pmap: kernel_pmap, v: addr + offset, |
1263 | pn: VM_PAGE_GET_PHYS_PAGE(m: mem), prot, VM_PROT_NONE, |
1264 | flags: pe_flags, /* wired */ TRUE, options: pe_options, NULL, mapping_type); |
1265 | |
1266 | vm_object_lock(object); |
1267 | } |
1268 | |
1269 | assert(pe_result == KERN_SUCCESS); |
1270 | } |
1271 | |
1272 | void |
1273 | kernel_memory_populate_object_and_unlock( |
1274 | vm_object_t object, /* must be locked */ |
1275 | vm_address_t addr, |
1276 | vm_offset_t offset, |
1277 | vm_size_t size, |
1278 | vm_page_t page_list, |
1279 | kma_flags_t flags, |
1280 | vm_tag_t tag, |
1281 | vm_prot_t prot, |
1282 | pmap_mapping_type_t mapping_type) |
1283 | { |
1284 | vm_page_t mem; |
1285 | int pe_flags; |
1286 | bool gobbled_list = page_list && page_list->vmp_gobbled; |
1287 | |
1288 | assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0)); |
1289 | assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object); |
1290 | if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) { |
1291 | assert3u(offset, ==, addr); |
1292 | } else { |
1293 | /* |
1294 | * kernel_memory_populate_pmap_enter() might drop the object |
1295 | * lock, and the caller might not own a reference anymore |
1296 | * and rely on holding the vm object lock for liveness. |
1297 | */ |
1298 | vm_object_reference_locked(object); |
1299 | } |
1300 | |
1301 | if (flags & KMA_KSTACK) { |
1302 | pe_flags = VM_MEM_STACK; |
1303 | } else { |
1304 | pe_flags = 0; |
1305 | } |
1306 | |
1307 | |
1308 | for (vm_object_offset_t pg_offset = 0; |
1309 | pg_offset < size; |
1310 | pg_offset += PAGE_SIZE_64) { |
1311 | if (page_list == NULL) { |
1312 | panic("%s: page_list too short" , __func__); |
1313 | } |
1314 | |
1315 | mem = page_list; |
1316 | page_list = mem->vmp_snext; |
1317 | mem->vmp_snext = NULL; |
1318 | |
1319 | assert(mem->vmp_wire_count == 0); |
1320 | assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q); |
1321 | assert(!mem->vmp_fictitious && !mem->vmp_private); |
1322 | |
1323 | if (flags & KMA_COMPRESSOR) { |
1324 | mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR; |
1325 | /* |
1326 | * Background processes doing I/O accounting can call |
1327 | * into NVME driver to do some work which results in |
1328 | * an allocation here and so we want to make sure |
1329 | * that the pages used by compressor, regardless of |
1330 | * process context, are never on the special Q. |
1331 | */ |
1332 | mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY; |
1333 | |
1334 | vm_page_insert(page: mem, object, offset: offset + pg_offset); |
1335 | } else { |
1336 | mem->vmp_q_state = VM_PAGE_IS_WIRED; |
1337 | mem->vmp_wire_count = 1; |
1338 | |
1339 | vm_page_insert_wired(page: mem, object, offset: offset + pg_offset, tag); |
1340 | } |
1341 | |
1342 | mem->vmp_gobbled = false; |
1343 | mem->vmp_busy = false; |
1344 | mem->vmp_pmapped = true; |
1345 | mem->vmp_wpmapped = true; |
1346 | |
1347 | /* |
1348 | * Manual PMAP_ENTER_OPTIONS() with shortcuts |
1349 | * for the kernel and compressor objects. |
1350 | */ |
1351 | kernel_memory_populate_pmap_enter(object, addr, offset: pg_offset, |
1352 | mem, prot, pe_flags, mapping_type); |
1353 | |
1354 | if (flags & KMA_NOENCRYPT) { |
1355 | pmap_set_noencrypt(pn: VM_PAGE_GET_PHYS_PAGE(m: mem)); |
1356 | } |
1357 | } |
1358 | |
1359 | if (page_list) { |
1360 | panic("%s: page_list too long" , __func__); |
1361 | } |
1362 | |
1363 | vm_object_unlock(object); |
1364 | if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) { |
1365 | vm_object_deallocate(object); |
1366 | } |
1367 | |
1368 | /* |
1369 | * Update the accounting: |
1370 | * - the compressor "wired" pages don't really count as wired |
1371 | * - kmem_alloc_contig_guard() gives gobbled pages, |
1372 | * which already count as wired but need to be ungobbled. |
1373 | */ |
1374 | if (gobbled_list) { |
1375 | vm_page_lockspin_queues(); |
1376 | if (flags & KMA_COMPRESSOR) { |
1377 | vm_page_wire_count -= atop(size); |
1378 | } |
1379 | vm_page_gobble_count -= atop(size); |
1380 | vm_page_unlock_queues(); |
1381 | } else if ((flags & KMA_COMPRESSOR) == 0) { |
1382 | vm_page_lockspin_queues(); |
1383 | vm_page_wire_count += atop(size); |
1384 | vm_page_unlock_queues(); |
1385 | } |
1386 | |
1387 | if (flags & KMA_KOBJECT) { |
1388 | /* vm_page_insert_wired() handles regular objects already */ |
1389 | vm_tag_update_size(tag, size, NULL); |
1390 | } |
1391 | |
1392 | #if KASAN |
1393 | if (flags & KMA_COMPRESSOR) { |
1394 | kasan_notify_address_nopoison(addr, size); |
1395 | } else { |
1396 | kasan_notify_address(addr, size); |
1397 | } |
1398 | #endif /* KASAN */ |
1399 | } |
1400 | |
1401 | |
1402 | kern_return_t |
1403 | kernel_memory_populate( |
1404 | vm_offset_t addr, |
1405 | vm_size_t size, |
1406 | kma_flags_t flags, |
1407 | vm_tag_t tag) |
1408 | { |
1409 | kern_return_t kr = KERN_SUCCESS; |
1410 | vm_page_t page_list = NULL; |
1411 | vm_size_t page_count = atop_64(size); |
1412 | vm_object_t object = __kmem_object(flags: ANYF(flags)); |
1413 | |
1414 | #if DEBUG || DEVELOPMENT |
1415 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, |
1416 | size, 0, 0, 0); |
1417 | #endif /* DEBUG || DEVELOPMENT */ |
1418 | |
1419 | kr = vm_page_alloc_list(page_count, flags, list: &page_list); |
1420 | if (kr == KERN_SUCCESS) { |
1421 | vm_object_lock(object); |
1422 | kernel_memory_populate_object_and_unlock(object, addr, |
1423 | offset: addr, size, page_list, flags, tag, VM_PROT_DEFAULT, |
1424 | mapping_type: __kmem_mapping_type(flags: ANYF(flags))); |
1425 | } |
1426 | |
1427 | #if DEBUG || DEVELOPMENT |
1428 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, |
1429 | page_count, 0, 0, 0); |
1430 | #endif /* DEBUG || DEVELOPMENT */ |
1431 | return kr; |
1432 | } |
1433 | |
1434 | void |
1435 | kernel_memory_depopulate( |
1436 | vm_offset_t addr, |
1437 | vm_size_t size, |
1438 | kma_flags_t flags, |
1439 | vm_tag_t tag) |
1440 | { |
1441 | vm_object_t object = __kmem_object(flags: ANYF(flags)); |
1442 | vm_object_offset_t offset = addr; |
1443 | vm_page_t mem; |
1444 | vm_page_t local_freeq = NULL; |
1445 | unsigned int pages_unwired = 0; |
1446 | |
1447 | vm_object_lock(object); |
1448 | |
1449 | pmap_protect(map: kernel_pmap, s: offset, e: offset + size, VM_PROT_NONE); |
1450 | |
1451 | for (vm_object_offset_t pg_offset = 0; |
1452 | pg_offset < size; |
1453 | pg_offset += PAGE_SIZE_64) { |
1454 | mem = vm_page_lookup(object, offset: offset + pg_offset); |
1455 | |
1456 | assert(mem); |
1457 | |
1458 | if (flags & KMA_COMPRESSOR) { |
1459 | assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR); |
1460 | } else { |
1461 | assert(mem->vmp_q_state == VM_PAGE_IS_WIRED); |
1462 | pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: mem)); |
1463 | pages_unwired++; |
1464 | } |
1465 | |
1466 | mem->vmp_busy = TRUE; |
1467 | |
1468 | assert(mem->vmp_tabled); |
1469 | vm_page_remove(page: mem, TRUE); |
1470 | assert(mem->vmp_busy); |
1471 | |
1472 | assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0); |
1473 | |
1474 | mem->vmp_q_state = VM_PAGE_NOT_ON_Q; |
1475 | mem->vmp_snext = local_freeq; |
1476 | local_freeq = mem; |
1477 | } |
1478 | |
1479 | vm_object_unlock(object); |
1480 | |
1481 | vm_page_free_list(mem: local_freeq, TRUE); |
1482 | |
1483 | if (!(flags & KMA_COMPRESSOR)) { |
1484 | vm_page_lockspin_queues(); |
1485 | vm_page_wire_count -= pages_unwired; |
1486 | vm_page_unlock_queues(); |
1487 | } |
1488 | |
1489 | if (flags & KMA_KOBJECT) { |
1490 | /* vm_page_remove() handles regular objects already */ |
1491 | vm_tag_update_size(tag, size: -ptoa_64(pages_unwired), NULL); |
1492 | } |
1493 | } |
1494 | |
1495 | #pragma mark reallocation |
1496 | |
1497 | __abortlike |
1498 | static void |
1499 | __kmem_realloc_invalid_object_size_panic( |
1500 | vm_map_t map, |
1501 | vm_address_t address, |
1502 | vm_size_t size, |
1503 | vm_map_entry_t entry) |
1504 | { |
1505 | vm_object_t object = VME_OBJECT(entry); |
1506 | vm_size_t objsize = __kmem_entry_orig_size(entry); |
1507 | |
1508 | panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): " |
1509 | "object %p has unexpected size %ld" , |
1510 | map, (void *)address, (size_t)size, entry, object, objsize); |
1511 | } |
1512 | |
1513 | __abortlike |
1514 | static void |
1515 | ( |
1516 | vm_map_t map, |
1517 | vm_address_t address, |
1518 | vm_size_t size, |
1519 | vm_map_entry_t entry) |
1520 | { |
1521 | vm_object_t object = VME_OBJECT(entry); |
1522 | memory_object_t = object->pager; |
1523 | bool = object->pager_created; |
1524 | bool = object->pager_initialized; |
1525 | bool = object->pager_ready; |
1526 | |
1527 | panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): " |
1528 | "object %p has unexpected pager %p (%d,%d,%d)" , |
1529 | map, (void *)address, (size_t)size, entry, object, |
1530 | pager, pager_created, pager_initialized, pager_ready); |
1531 | } |
1532 | |
1533 | static kmem_return_t |
1534 | kmem_realloc_shrink_guard( |
1535 | vm_map_t map, |
1536 | vm_offset_t req_oldaddr, |
1537 | vm_size_t req_oldsize, |
1538 | vm_size_t req_newsize, |
1539 | kmr_flags_t flags, |
1540 | kmem_guard_t guard, |
1541 | vm_map_entry_t entry) |
1542 | { |
1543 | vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE; |
1544 | vm_object_t object; |
1545 | vm_offset_t delta = 0; |
1546 | kmem_return_t kmr; |
1547 | bool was_atomic; |
1548 | vm_size_t oldsize = round_page(x: req_oldsize); |
1549 | vm_size_t newsize = round_page(x: req_newsize); |
1550 | vm_address_t oldaddr = req_oldaddr; |
1551 | |
1552 | #if KASAN_CLASSIC |
1553 | if (flags & KMR_KASAN_GUARD) { |
1554 | assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0); |
1555 | flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST; |
1556 | oldaddr -= PAGE_SIZE; |
1557 | delta = ptoa(2); |
1558 | oldsize += delta; |
1559 | newsize += delta; |
1560 | } |
1561 | #endif /* KASAN_CLASSIC */ |
1562 | |
1563 | if (flags & KMR_TAG) { |
1564 | oldaddr = vm_memtag_canonicalize_address(req_oldaddr); |
1565 | } |
1566 | |
1567 | vm_map_lock_assert_exclusive(map); |
1568 | |
1569 | if ((flags & KMR_KOBJECT) == 0) { |
1570 | object = VME_OBJECT(entry); |
1571 | vm_object_reference(object); |
1572 | } |
1573 | |
1574 | /* |
1575 | * Shrinking an atomic entry starts with splitting it, |
1576 | * and removing the second half. |
1577 | */ |
1578 | was_atomic = entry->vme_atomic; |
1579 | entry->vme_atomic = false; |
1580 | vm_map_clip_end(map, entry, endaddr: entry->vme_start + newsize); |
1581 | entry->vme_atomic = was_atomic; |
1582 | |
1583 | #if KASAN |
1584 | if (entry->vme_kernel_object && was_atomic) { |
1585 | entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta; |
1586 | } |
1587 | #if KASAN_CLASSIC |
1588 | if (flags & KMR_KASAN_GUARD) { |
1589 | kasan_poison_range(oldaddr + newsize, oldsize - newsize, |
1590 | ASAN_VALID); |
1591 | } |
1592 | #endif |
1593 | #if KASAN_TBI |
1594 | if (flags & KMR_TAG) { |
1595 | kasan_tbi_mark_free_space(req_oldaddr + newsize, oldsize - newsize); |
1596 | } |
1597 | #endif /* KASAN_TBI */ |
1598 | #endif /* KASAN */ |
1599 | (void)vm_map_remove_and_unlock(map, |
1600 | start: oldaddr + newsize, end: oldaddr + oldsize, |
1601 | flags: vmr_flags, KMEM_GUARD_NONE); |
1602 | |
1603 | |
1604 | /* |
1605 | * Lastly, if there are guard pages, deal with them. |
1606 | * |
1607 | * The kernel object just needs to depopulate, |
1608 | * regular objects require freeing the last page |
1609 | * and replacing it with a guard. |
1610 | */ |
1611 | if (flags & KMR_KOBJECT) { |
1612 | if (flags & KMR_GUARD_LAST) { |
1613 | kernel_memory_depopulate(addr: oldaddr + newsize - PAGE_SIZE, |
1614 | PAGE_SIZE, flags: KMA_KOBJECT, tag: guard.kmg_tag); |
1615 | } |
1616 | } else { |
1617 | vm_page_t guard_right = VM_PAGE_NULL; |
1618 | vm_offset_t remove_start = newsize; |
1619 | |
1620 | if (flags & KMR_GUARD_LAST) { |
1621 | if (!map->never_faults) { |
1622 | guard_right = vm_page_grab_guard(true); |
1623 | } |
1624 | remove_start -= PAGE_SIZE; |
1625 | } |
1626 | |
1627 | vm_object_lock(object); |
1628 | |
1629 | if (object->vo_size != oldsize) { |
1630 | __kmem_realloc_invalid_object_size_panic(map, |
1631 | address: req_oldaddr, size: req_oldsize + delta, entry); |
1632 | } |
1633 | vm_object_set_size(object, outer_size: newsize, inner_size: req_newsize); |
1634 | |
1635 | vm_object_page_remove(object, start: remove_start, end: oldsize); |
1636 | |
1637 | if (guard_right) { |
1638 | vm_page_insert(page: guard_right, object, offset: newsize - PAGE_SIZE); |
1639 | guard_right->vmp_busy = false; |
1640 | } |
1641 | vm_object_unlock(object); |
1642 | vm_object_deallocate(object); |
1643 | } |
1644 | |
1645 | kmr.kmr_address = req_oldaddr; |
1646 | kmr.kmr_return = 0; |
1647 | #if KASAN_CLASSIC |
1648 | if (flags & KMA_KASAN_GUARD) { |
1649 | kasan_alloc_large(kmr.kmr_address, req_newsize); |
1650 | } |
1651 | #endif /* KASAN_CLASSIC */ |
1652 | #if KASAN_TBI |
1653 | if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) { |
1654 | kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize); |
1655 | vm_memtag_set_tag(kmr.kmr_address, req_newsize); |
1656 | kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize); |
1657 | } |
1658 | #endif /* KASAN_TBI */ |
1659 | |
1660 | return kmr; |
1661 | } |
1662 | |
1663 | kmem_return_t |
1664 | kmem_realloc_guard( |
1665 | vm_map_t map, |
1666 | vm_offset_t req_oldaddr, |
1667 | vm_size_t req_oldsize, |
1668 | vm_size_t req_newsize, |
1669 | kmr_flags_t flags, |
1670 | kmem_guard_t guard) |
1671 | { |
1672 | vm_object_t object; |
1673 | vm_size_t oldsize; |
1674 | vm_size_t newsize; |
1675 | vm_offset_t delta = 0; |
1676 | vm_map_offset_t oldaddr; |
1677 | vm_map_offset_t newaddr; |
1678 | vm_object_offset_t newoffs; |
1679 | vm_map_entry_t oldentry; |
1680 | vm_map_entry_t newentry; |
1681 | vm_page_t page_list = NULL; |
1682 | bool needs_wakeup = false; |
1683 | kmem_return_t kmr = { }; |
1684 | unsigned int last_timestamp; |
1685 | vm_map_kernel_flags_t vmk_flags = { |
1686 | .vmkf_last_free = (bool)(flags & KMR_LAST_FREE), |
1687 | }; |
1688 | |
1689 | assert(KMEM_REALLOC_FLAGS_VALID(flags)); |
1690 | if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) { |
1691 | __kmem_invalid_arguments_panic(what: "realloc" , map, address: req_oldaddr, |
1692 | size: req_oldsize, flags); |
1693 | } |
1694 | |
1695 | if (req_oldaddr == 0ul) { |
1696 | return kmem_alloc_guard(map, size: req_newsize, mask: 0, flags: (kma_flags_t)flags, guard); |
1697 | } |
1698 | |
1699 | if (req_newsize == 0ul) { |
1700 | kmem_free_guard(map, addr: req_oldaddr, size: req_oldsize, |
1701 | flags: (kmf_flags_t)flags, guard); |
1702 | return kmr; |
1703 | } |
1704 | |
1705 | if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) { |
1706 | __kmem_invalid_size_panic(map, size: req_newsize, flags); |
1707 | } |
1708 | if (req_newsize < __kmem_guard_size(flags: ANYF(flags))) { |
1709 | __kmem_invalid_size_panic(map, size: req_newsize, flags); |
1710 | } |
1711 | |
1712 | oldsize = round_page(x: req_oldsize); |
1713 | newsize = round_page(x: req_newsize); |
1714 | oldaddr = req_oldaddr; |
1715 | #if KASAN_CLASSIC |
1716 | if (flags & KMR_KASAN_GUARD) { |
1717 | flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST; |
1718 | oldaddr -= PAGE_SIZE; |
1719 | delta = ptoa(2); |
1720 | oldsize += delta; |
1721 | newsize += delta; |
1722 | } |
1723 | #endif /* KASAN_CLASSIC */ |
1724 | #if CONFIG_KERNEL_TAGGING |
1725 | if (flags & KMR_TAG) { |
1726 | vm_memtag_verify_tag(req_oldaddr); |
1727 | oldaddr = vm_memtag_canonicalize_address(req_oldaddr); |
1728 | } |
1729 | #endif /* CONFIG_KERNEL_TAGGING */ |
1730 | |
1731 | #if !KASAN |
1732 | /* |
1733 | * If not on a KASAN variant and no difference in requested size, |
1734 | * just return. |
1735 | * |
1736 | * Otherwise we want to validate the size and re-tag for KASAN_TBI. |
1737 | */ |
1738 | if (oldsize == newsize) { |
1739 | kmr.kmr_address = req_oldaddr; |
1740 | return kmr; |
1741 | } |
1742 | #endif /* !KASAN */ |
1743 | |
1744 | /* |
1745 | * If we're growing the allocation, |
1746 | * then reserve the pages we'll need, |
1747 | * and find a spot for its new place. |
1748 | */ |
1749 | if (oldsize < newsize) { |
1750 | #if DEBUG || DEVELOPMENT |
1751 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, |
1752 | VM_KERN_REQUEST, DBG_FUNC_START, |
1753 | newsize - oldsize, 0, 0, 0); |
1754 | #endif /* DEBUG || DEVELOPMENT */ |
1755 | kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize), |
1756 | flags: (kma_flags_t)flags, list: &page_list); |
1757 | if (kmr.kmr_return == KERN_SUCCESS) { |
1758 | kmem_apply_security_policy(map, kma_flags: (kma_flags_t)flags, guard, |
1759 | map_size: newsize, mask: 0, vmk_flags: &vmk_flags, true); |
1760 | kmr.kmr_return = vm_map_find_space(map, hint_addr: 0, size: newsize, mask: 0, |
1761 | vmk_flags, o_entry: &newentry); |
1762 | } |
1763 | if (__improbable(kmr.kmr_return != KERN_SUCCESS)) { |
1764 | if (flags & KMR_REALLOCF) { |
1765 | kmem_free_guard(map, addr: req_oldaddr, size: req_oldsize, |
1766 | flags: KMF_NONE, guard); |
1767 | } |
1768 | if (page_list) { |
1769 | vm_page_free_list(mem: page_list, FALSE); |
1770 | } |
1771 | #if DEBUG || DEVELOPMENT |
1772 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, |
1773 | VM_KERN_REQUEST, DBG_FUNC_END, |
1774 | 0, 0, 0, 0); |
1775 | #endif /* DEBUG || DEVELOPMENT */ |
1776 | return kmr; |
1777 | } |
1778 | |
1779 | /* map is locked */ |
1780 | } else { |
1781 | vm_map_lock(map); |
1782 | } |
1783 | |
1784 | |
1785 | /* |
1786 | * Locate the entry: |
1787 | * - wait for it to quiesce. |
1788 | * - validate its guard, |
1789 | * - learn its correct tag, |
1790 | */ |
1791 | again: |
1792 | if (!vm_map_lookup_entry(map, address: oldaddr, entry: &oldentry)) { |
1793 | __kmem_entry_not_found_panic(map, addr: req_oldaddr); |
1794 | } |
1795 | if ((flags & KMR_KOBJECT) && oldentry->in_transition) { |
1796 | oldentry->needs_wakeup = true; |
1797 | vm_map_entry_wait(map, THREAD_UNINT); |
1798 | goto again; |
1799 | } |
1800 | kmem_entry_validate_guard(map, entry: oldentry, addr: oldaddr, size: oldsize, guard); |
1801 | if (!__kmem_entry_validate_object(entry: oldentry, flags: ANYF(flags))) { |
1802 | __kmem_entry_validate_object_panic(map, entry: oldentry, flags: ANYF(flags)); |
1803 | } |
1804 | /* |
1805 | * TODO: We should validate for non atomic entries that the range |
1806 | * we are acting on is what we expect here. |
1807 | */ |
1808 | #if KASAN |
1809 | if (__kmem_entry_orig_size(oldentry) != req_oldsize) { |
1810 | __kmem_realloc_invalid_object_size_panic(map, |
1811 | req_oldaddr, req_oldsize + delta, oldentry); |
1812 | } |
1813 | |
1814 | if (oldsize == newsize) { |
1815 | kmr.kmr_address = req_oldaddr; |
1816 | if (oldentry->vme_kernel_object) { |
1817 | oldentry->vme_object_or_delta = delta + |
1818 | (-req_newsize & PAGE_MASK); |
1819 | } else { |
1820 | object = VME_OBJECT(oldentry); |
1821 | vm_object_lock(object); |
1822 | vm_object_set_size(object, newsize, req_newsize); |
1823 | vm_object_unlock(object); |
1824 | } |
1825 | vm_map_unlock(map); |
1826 | |
1827 | #if KASAN_CLASSIC |
1828 | if (flags & KMA_KASAN_GUARD) { |
1829 | kasan_alloc_large(kmr.kmr_address, req_newsize); |
1830 | } |
1831 | #endif /* KASAN_CLASSIC */ |
1832 | #if KASAN_TBI |
1833 | if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) { |
1834 | kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize); |
1835 | vm_memtag_set_tag(kmr.kmr_address, req_newsize); |
1836 | kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize); |
1837 | } |
1838 | #endif /* KASAN_TBI */ |
1839 | return kmr; |
1840 | } |
1841 | #endif /* KASAN */ |
1842 | |
1843 | guard.kmg_tag = VME_ALIAS(oldentry); |
1844 | |
1845 | if (newsize < oldsize) { |
1846 | return kmem_realloc_shrink_guard(map, req_oldaddr, |
1847 | req_oldsize, req_newsize, flags, guard, entry: oldentry); |
1848 | } |
1849 | |
1850 | |
1851 | /* |
1852 | * We are growing the entry |
1853 | * |
1854 | * For regular objects we use the object `vo_size` updates |
1855 | * as a guarantee that no 2 kmem_realloc() can happen |
1856 | * concurrently (by doing it before the map is unlocked. |
1857 | * |
1858 | * For the kernel object, prevent the entry from being |
1859 | * reallocated or changed by marking it "in_transition". |
1860 | */ |
1861 | |
1862 | object = VME_OBJECT(oldentry); |
1863 | vm_object_lock(object); |
1864 | vm_object_reference_locked(object); |
1865 | |
1866 | newaddr = newentry->vme_start; |
1867 | newoffs = oldsize; |
1868 | |
1869 | VME_OBJECT_SET(entry: newentry, object, atomic: guard.kmg_atomic, context: guard.kmg_context); |
1870 | VME_ALIAS_SET(entry: newentry, alias: guard.kmg_tag); |
1871 | if (flags & KMR_KOBJECT) { |
1872 | oldentry->in_transition = true; |
1873 | VME_OFFSET_SET(entry: newentry, offset: newaddr); |
1874 | newentry->wired_count = 1; |
1875 | vme_btref_consider_and_set(entry: newentry, fp: __builtin_frame_address(0)); |
1876 | newoffs = newaddr + oldsize; |
1877 | } else { |
1878 | if (object->pager_created || object->pager) { |
1879 | /* |
1880 | * We can't "realloc/grow" the pager, so pageable |
1881 | * allocations should not go through this path. |
1882 | */ |
1883 | __kmem_realloc_invalid_pager_panic(map, |
1884 | address: req_oldaddr, size: req_oldsize + delta, entry: oldentry); |
1885 | } |
1886 | if (object->vo_size != oldsize) { |
1887 | __kmem_realloc_invalid_object_size_panic(map, |
1888 | address: req_oldaddr, size: req_oldsize + delta, entry: oldentry); |
1889 | } |
1890 | vm_object_set_size(object, outer_size: newsize, inner_size: req_newsize); |
1891 | } |
1892 | |
1893 | last_timestamp = map->timestamp; |
1894 | vm_map_unlock(map); |
1895 | |
1896 | |
1897 | /* |
1898 | * Now proceed with the population of pages. |
1899 | * |
1900 | * Kernel objects can use the kmem population helpers. |
1901 | * |
1902 | * Regular objects will insert pages manually, |
1903 | * then wire the memory into the new range. |
1904 | */ |
1905 | |
1906 | vm_size_t guard_right_size = __kmem_guard_right(flags: ANYF(flags)); |
1907 | |
1908 | if (flags & KMR_KOBJECT) { |
1909 | pmap_mapping_type_t mapping_type = __kmem_mapping_type(flags: ANYF(flags)); |
1910 | |
1911 | pmap_protect(map: kernel_pmap, |
1912 | s: oldaddr, e: oldaddr + oldsize - guard_right_size, |
1913 | VM_PROT_NONE); |
1914 | |
1915 | for (vm_object_offset_t offset = 0; |
1916 | offset < oldsize - guard_right_size; |
1917 | offset += PAGE_SIZE_64) { |
1918 | vm_page_t mem; |
1919 | |
1920 | mem = vm_page_lookup(object, offset: oldaddr + offset); |
1921 | if (mem == VM_PAGE_NULL) { |
1922 | continue; |
1923 | } |
1924 | |
1925 | pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: mem)); |
1926 | |
1927 | mem->vmp_busy = true; |
1928 | vm_page_remove(page: mem, true); |
1929 | vm_page_insert_wired(page: mem, object, offset: newaddr + offset, |
1930 | tag: guard.kmg_tag); |
1931 | mem->vmp_busy = false; |
1932 | |
1933 | kernel_memory_populate_pmap_enter(object, addr: newaddr, |
1934 | offset, mem, VM_PROT_DEFAULT, pe_flags: 0, mapping_type); |
1935 | } |
1936 | |
1937 | kernel_memory_populate_object_and_unlock(object, |
1938 | addr: newaddr + oldsize - guard_right_size, |
1939 | offset: newoffs - guard_right_size, |
1940 | size: newsize - oldsize, |
1941 | page_list, flags: (kma_flags_t)flags, |
1942 | tag: guard.kmg_tag, VM_PROT_DEFAULT, mapping_type); |
1943 | } else { |
1944 | vm_page_t guard_right = VM_PAGE_NULL; |
1945 | |
1946 | /* |
1947 | * Note: we are borrowing the new entry reference |
1948 | * on the object for the duration of this code, |
1949 | * which works because we keep the object locked |
1950 | * throughout. |
1951 | */ |
1952 | if ((flags & KMR_GUARD_LAST) && !map->never_faults) { |
1953 | guard_right = vm_page_lookup(object, offset: oldsize - PAGE_SIZE); |
1954 | assert(guard_right->vmp_fictitious); |
1955 | guard_right->vmp_busy = true; |
1956 | vm_page_remove(page: guard_right, true); |
1957 | } |
1958 | |
1959 | if (flags & KMR_FREEOLD) { |
1960 | /* |
1961 | * Freeing the old mapping will make |
1962 | * the old pages become pageable until |
1963 | * the new mapping makes them wired again. |
1964 | * Let's take an extra "wire_count" to |
1965 | * prevent any accidental "page out". |
1966 | * We'll have to undo that after wiring |
1967 | * the new mapping. |
1968 | */ |
1969 | vm_object_reference_locked(object); /* keep object alive */ |
1970 | for (vm_object_offset_t offset = 0; |
1971 | offset < oldsize - guard_right_size; |
1972 | offset += PAGE_SIZE_64) { |
1973 | vm_page_t mem; |
1974 | |
1975 | mem = vm_page_lookup(object, offset); |
1976 | assert(mem != VM_PAGE_NULL); |
1977 | assertf(!VM_PAGE_PAGEABLE(mem), |
1978 | "mem %p qstate %d" , |
1979 | mem, mem->vmp_q_state); |
1980 | if (VM_PAGE_GET_PHYS_PAGE(m: mem) == vm_page_guard_addr) { |
1981 | /* guard pages are not wired */ |
1982 | } else { |
1983 | assertf(VM_PAGE_WIRED(mem), |
1984 | "mem %p qstate %d wirecount %d" , |
1985 | mem, |
1986 | mem->vmp_q_state, |
1987 | mem->vmp_wire_count); |
1988 | assertf(mem->vmp_wire_count >= 1, |
1989 | "mem %p wirecount %d" , |
1990 | mem, mem->vmp_wire_count); |
1991 | mem->vmp_wire_count++; |
1992 | } |
1993 | } |
1994 | } |
1995 | |
1996 | for (vm_object_offset_t offset = oldsize - guard_right_size; |
1997 | offset < newsize - guard_right_size; |
1998 | offset += PAGE_SIZE_64) { |
1999 | vm_page_t mem = page_list; |
2000 | |
2001 | page_list = mem->vmp_snext; |
2002 | mem->vmp_snext = VM_PAGE_NULL; |
2003 | assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q); |
2004 | assert(!VM_PAGE_PAGEABLE(mem)); |
2005 | |
2006 | vm_page_insert(page: mem, object, offset); |
2007 | mem->vmp_busy = false; |
2008 | } |
2009 | |
2010 | if (guard_right) { |
2011 | vm_page_insert(page: guard_right, object, offset: newsize - PAGE_SIZE); |
2012 | guard_right->vmp_busy = false; |
2013 | } |
2014 | |
2015 | vm_object_unlock(object); |
2016 | } |
2017 | |
2018 | /* |
2019 | * Mark the entry as idle again, |
2020 | * and honor KMR_FREEOLD if needed. |
2021 | */ |
2022 | |
2023 | vm_map_lock(map); |
2024 | if (last_timestamp + 1 != map->timestamp && |
2025 | !vm_map_lookup_entry(map, address: oldaddr, entry: &oldentry)) { |
2026 | __kmem_entry_not_found_panic(map, addr: req_oldaddr); |
2027 | } |
2028 | |
2029 | if (flags & KMR_KOBJECT) { |
2030 | assert(oldentry->in_transition); |
2031 | oldentry->in_transition = false; |
2032 | if (oldentry->needs_wakeup) { |
2033 | needs_wakeup = true; |
2034 | oldentry->needs_wakeup = false; |
2035 | } |
2036 | } |
2037 | |
2038 | if (flags & KMR_FREEOLD) { |
2039 | vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE; |
2040 | |
2041 | #if KASAN_CLASSIC |
2042 | if (flags & KMR_KASAN_GUARD) { |
2043 | kasan_poison_range(oldaddr, oldsize, ASAN_VALID); |
2044 | } |
2045 | #endif |
2046 | #if KASAN_TBI |
2047 | if (flags & KMR_TAG) { |
2048 | kasan_tbi_mark_free_space(req_oldaddr, oldsize); |
2049 | } |
2050 | #endif /* KASAN_TBI */ |
2051 | if (flags & KMR_GUARD_LAST) { |
2052 | vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST; |
2053 | } |
2054 | (void)vm_map_remove_and_unlock(map, |
2055 | start: oldaddr, end: oldaddr + oldsize, |
2056 | flags: vmr_flags, guard); |
2057 | } else { |
2058 | vm_map_unlock(map); |
2059 | } |
2060 | |
2061 | if ((flags & KMR_KOBJECT) == 0) { |
2062 | kern_return_t kr; |
2063 | /* |
2064 | * This must happen _after_ we do the KMR_FREEOLD, |
2065 | * because wiring the pages will call into the pmap, |
2066 | * and if the pages are typed XNU_KERNEL_RESTRICTED, |
2067 | * this would cause a second mapping of the page and panic. |
2068 | */ |
2069 | kr = vm_map_wire_kernel(map, start: newaddr, end: newaddr + newsize, |
2070 | VM_PROT_DEFAULT, tag: guard.kmg_tag, FALSE); |
2071 | assert(kr == KERN_SUCCESS); |
2072 | |
2073 | if (flags & KMR_FREEOLD) { |
2074 | /* |
2075 | * Undo the extra "wiring" we made above |
2076 | * and release the extra reference we took |
2077 | * on the object. |
2078 | */ |
2079 | vm_object_lock(object); |
2080 | for (vm_object_offset_t offset = 0; |
2081 | offset < oldsize - guard_right_size; |
2082 | offset += PAGE_SIZE_64) { |
2083 | vm_page_t mem; |
2084 | |
2085 | mem = vm_page_lookup(object, offset); |
2086 | assert(mem != VM_PAGE_NULL); |
2087 | assertf(!VM_PAGE_PAGEABLE(mem), |
2088 | "mem %p qstate %d" , |
2089 | mem, mem->vmp_q_state); |
2090 | if (VM_PAGE_GET_PHYS_PAGE(m: mem) == vm_page_guard_addr) { |
2091 | /* guard pages are not wired */ |
2092 | } else { |
2093 | assertf(VM_PAGE_WIRED(mem), |
2094 | "mem %p qstate %d wirecount %d" , |
2095 | mem, |
2096 | mem->vmp_q_state, |
2097 | mem->vmp_wire_count); |
2098 | assertf(mem->vmp_wire_count >= 2, |
2099 | "mem %p wirecount %d" , |
2100 | mem, mem->vmp_wire_count); |
2101 | mem->vmp_wire_count--; |
2102 | assert(VM_PAGE_WIRED(mem)); |
2103 | assert(mem->vmp_wire_count >= 1); |
2104 | } |
2105 | } |
2106 | vm_object_unlock(object); |
2107 | vm_object_deallocate(object); /* release extra ref */ |
2108 | } |
2109 | } |
2110 | |
2111 | if (needs_wakeup) { |
2112 | vm_map_entry_wakeup(map); |
2113 | } |
2114 | |
2115 | #if DEBUG || DEVELOPMENT |
2116 | VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, |
2117 | atop(newsize - oldsize), 0, 0, 0); |
2118 | #endif /* DEBUG || DEVELOPMENT */ |
2119 | kmr.kmr_address = newaddr; |
2120 | |
2121 | #if KASAN |
2122 | kasan_notify_address(kmr.kmr_address, newsize); |
2123 | #endif /* KASAN */ |
2124 | #if KASAN_CLASSIC |
2125 | if (flags & KMR_KASAN_GUARD) { |
2126 | kmr.kmr_address += PAGE_SIZE; |
2127 | kasan_alloc_large(kmr.kmr_address, req_newsize); |
2128 | } |
2129 | #endif /* KASAN_CLASSIC */ |
2130 | #if KASAN_TBI |
2131 | if (flags & KMR_TAG) { |
2132 | kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize); |
2133 | vm_memtag_set_tag(kmr.kmr_address, req_newsize); |
2134 | kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize); |
2135 | } |
2136 | #endif /* KASAN_TBI */ |
2137 | |
2138 | return kmr; |
2139 | } |
2140 | |
2141 | |
2142 | #pragma mark free |
2143 | |
2144 | #if KASAN |
2145 | |
2146 | __abortlike |
2147 | static void |
2148 | __kmem_free_invalid_object_size_panic( |
2149 | vm_map_t map, |
2150 | vm_address_t address, |
2151 | vm_size_t size, |
2152 | vm_map_entry_t entry) |
2153 | { |
2154 | vm_object_t object = VME_OBJECT(entry); |
2155 | vm_size_t objsize = __kmem_entry_orig_size(entry); |
2156 | |
2157 | panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): " |
2158 | "object %p has unexpected size %ld" , |
2159 | map, (void *)address, (size_t)size, entry, object, objsize); |
2160 | } |
2161 | |
2162 | #endif /* KASAN */ |
2163 | |
2164 | vm_size_t |
2165 | kmem_free_guard( |
2166 | vm_map_t map, |
2167 | vm_offset_t req_addr, |
2168 | vm_size_t req_size, |
2169 | kmf_flags_t flags, |
2170 | kmem_guard_t guard) |
2171 | { |
2172 | vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE; |
2173 | vm_address_t addr = req_addr; |
2174 | vm_offset_t delta = 0; |
2175 | vm_size_t size; |
2176 | #if KASAN |
2177 | vm_map_entry_t entry; |
2178 | #endif /* KASAN */ |
2179 | |
2180 | assert(map->pmap == kernel_pmap); |
2181 | |
2182 | #if KASAN_CLASSIC |
2183 | if (flags & KMF_KASAN_GUARD) { |
2184 | addr -= PAGE_SIZE; |
2185 | delta = ptoa(2); |
2186 | } |
2187 | #endif /* KASAN_CLASSIC */ |
2188 | #if CONFIG_KERNEL_TAGGING |
2189 | if (flags & KMF_TAG) { |
2190 | vm_memtag_verify_tag(req_addr); |
2191 | addr = vm_memtag_canonicalize_address(req_addr); |
2192 | } |
2193 | #endif /* CONFIG_KERNEL_TAGGING */ |
2194 | |
2195 | if (flags & KMF_GUESS_SIZE) { |
2196 | vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE; |
2197 | size = PAGE_SIZE; |
2198 | } else if (req_size == 0) { |
2199 | __kmem_invalid_size_panic(map, size: req_size, flags); |
2200 | } else { |
2201 | size = round_page(x: req_size) + delta; |
2202 | } |
2203 | |
2204 | vm_map_lock(map); |
2205 | |
2206 | #if KASAN |
2207 | if (!vm_map_lookup_entry(map, addr, &entry)) { |
2208 | __kmem_entry_not_found_panic(map, req_addr); |
2209 | } |
2210 | if (flags & KMF_GUESS_SIZE) { |
2211 | vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE; |
2212 | req_size = __kmem_entry_orig_size(entry); |
2213 | size = round_page(req_size + delta); |
2214 | } else if (guard.kmg_atomic && entry->vme_kernel_object && |
2215 | __kmem_entry_orig_size(entry) != req_size) { |
2216 | /* |
2217 | * We can't make a strict check for regular |
2218 | * VM objects because it could be: |
2219 | * |
2220 | * - the kmem_guard_free() of a kmem_realloc_guard() without |
2221 | * KMR_FREEOLD, and in that case the object size won't match. |
2222 | * |
2223 | * - a submap, in which case there is no "orig size". |
2224 | */ |
2225 | __kmem_free_invalid_object_size_panic(map, |
2226 | req_addr, req_size + delta, entry); |
2227 | } |
2228 | #endif /* KASAN */ |
2229 | #if KASAN_CLASSIC |
2230 | if (flags & KMR_KASAN_GUARD) { |
2231 | kasan_poison_range(addr, size, ASAN_VALID); |
2232 | } |
2233 | #endif |
2234 | #if KASAN_TBI |
2235 | if (flags & KMF_TAG) { |
2236 | kasan_tbi_mark_free_space(req_addr, size); |
2237 | } |
2238 | #endif /* KASAN_TBI */ |
2239 | |
2240 | /* |
2241 | * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which |
2242 | * unwires the kernel mapping. The page won't be mapped any longer so |
2243 | * there is no extra step that is required for memory tagging to "clear" |
2244 | * it -- the page will be later laundered when reused. |
2245 | */ |
2246 | return vm_map_remove_and_unlock(map, start: addr, end: addr + size, |
2247 | flags: vmr_flags, guard).kmr_size - delta; |
2248 | } |
2249 | |
2250 | __exported void |
2251 | kmem_free_external( |
2252 | vm_map_t map, |
2253 | vm_offset_t addr, |
2254 | vm_size_t size); |
2255 | void |
2256 | kmem_free_external( |
2257 | vm_map_t map, |
2258 | vm_offset_t addr, |
2259 | vm_size_t size) |
2260 | { |
2261 | if (size) { |
2262 | kmem_free(map, trunc_page(addr), size); |
2263 | #if MACH_ASSERT |
2264 | } else { |
2265 | printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n" , |
2266 | map, (void *)addr, __builtin_return_address(0)); |
2267 | #endif |
2268 | } |
2269 | } |
2270 | |
2271 | #pragma mark kmem metadata |
2272 | |
2273 | /* |
2274 | * Guard objects for kmem pointer allocation: |
2275 | * |
2276 | * Guard objects introduce size slabs to kmem pointer allocations that are |
2277 | * allocated in chunks of n * sizeclass. When an allocation of a specific |
2278 | * sizeclass is requested a random slot from [0, n) is returned. |
2279 | * Allocations are returned from that chunk until m slots are left. The |
2280 | * remaining m slots are referred to as guard objects. They don't get |
2281 | * allocated and the chunk is now considered full. When an allocation is |
2282 | * freed to the chunk 1 slot is now available from m + 1 for the next |
2283 | * allocation of that sizeclass. |
2284 | * |
2285 | * Guard objects are intended to make exploitation of use after frees harder |
2286 | * as allocations that are freed can no longer be reliable reallocated. |
2287 | * They also make exploitation of OOBs harder as overflowing out of an |
2288 | * allocation can no longer be safe even with sufficient spraying. |
2289 | */ |
2290 | |
2291 | #define KMEM_META_PRIMARY UINT8_MAX |
2292 | #define KMEM_META_START (UINT8_MAX - 1) |
2293 | #define KMEM_META_FREE (UINT8_MAX - 2) |
2294 | #if __ARM_16K_PG__ |
2295 | #define KMEM_MIN_SIZE PAGE_SIZE |
2296 | #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16) |
2297 | #else /* __ARM_16K_PG__ */ |
2298 | /* |
2299 | * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those |
2300 | * devices use 4k page size when their RAM is <= 1GB and 16k otherwise. |
2301 | * Therefore populate sizeclasses from 4k for those devices. |
2302 | */ |
2303 | #define KMEM_MIN_SIZE (4 * 1024) |
2304 | #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32) |
2305 | #endif /* __ARM_16K_PG__ */ |
2306 | #define KMEM_MAX_SIZE (32ULL << 20) |
2307 | #define KMEM_START_IDX (kmem_log2down(KMEM_MIN_SIZE)) |
2308 | #define KMEM_LAST_IDX (kmem_log2down(KMEM_MAX_SIZE)) |
2309 | #define KMEM_NUM_SIZECLASS (KMEM_LAST_IDX - KMEM_START_IDX + 1) |
2310 | #define KMEM_FRONTS (KMEM_RANGE_ID_NUM_PTR * 2) |
2311 | #define KMEM_NUM_GUARDS 2 |
2312 | |
2313 | struct kmem_page_meta { |
2314 | union { |
2315 | /* |
2316 | * On primary allocated chunk with KMEM_META_PRIMARY marker |
2317 | */ |
2318 | uint32_t km_bitmap; |
2319 | /* |
2320 | * On start and end of free chunk with KMEM_META_FREE marker |
2321 | */ |
2322 | uint32_t km_free_chunks; |
2323 | }; |
2324 | /* |
2325 | * KMEM_META_PRIMARY: Start meta of allocated chunk |
2326 | * KMEM_META_FREE : Start and end meta of free chunk |
2327 | * KMEM_META_START : Meta region start and end |
2328 | */ |
2329 | uint8_t km_page_marker; |
2330 | uint8_t km_sizeclass; |
2331 | union { |
2332 | /* |
2333 | * On primary allocated chunk with KMEM_META_PRIMARY marker |
2334 | */ |
2335 | uint16_t km_chunk_len; |
2336 | /* |
2337 | * On secondary allocated chunks |
2338 | */ |
2339 | uint16_t km_page_idx; |
2340 | }; |
2341 | LIST_ENTRY(kmem_page_meta) km_link; |
2342 | } kmem_page_meta_t; |
2343 | |
2344 | typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t; |
2345 | struct kmem_sizeclass { |
2346 | vm_map_size_t ks_size; |
2347 | uint32_t ks_num_chunk; |
2348 | uint32_t ks_num_elem; |
2349 | crypto_random_ctx_t __zpercpu ks_rng_ctx; |
2350 | kmem_list_head_t ks_allfree_head[KMEM_FRONTS]; |
2351 | kmem_list_head_t ks_partial_head[KMEM_FRONTS]; |
2352 | kmem_list_head_t ks_full_head[KMEM_FRONTS]; |
2353 | }; |
2354 | |
2355 | static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS]; |
2356 | |
2357 | /* |
2358 | * Locks to synchronize metadata population |
2359 | */ |
2360 | static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks" ); |
2361 | static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp); |
2362 | #define kmem_meta_lock() lck_mtx_lock(&kmem_meta_region_lck) |
2363 | #define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck) |
2364 | |
2365 | static SECURITY_READ_ONLY_LATE(struct mach_vm_range) |
2366 | kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1]; |
2367 | static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *) |
2368 | kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1]; |
2369 | /* |
2370 | * Keeps track of metadata high water mark for each front |
2371 | */ |
2372 | static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS]; |
2373 | static SECURITY_READ_ONLY_LATE(vm_map_t) |
2374 | kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1]; |
2375 | static vm_map_size_t kmem_meta_size; |
2376 | |
2377 | static uint32_t |
2378 | kmem_get_front( |
2379 | kmem_range_id_t range_id, |
2380 | bool from_right) |
2381 | { |
2382 | assert((range_id >= KMEM_RANGE_ID_FIRST) && |
2383 | (range_id <= KMEM_RANGE_ID_NUM_PTR)); |
2384 | return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right; |
2385 | } |
2386 | |
2387 | static inline uint32_t |
2388 | kmem_slot_idx_to_bit( |
2389 | uint32_t slot_idx, |
2390 | uint32_t size_idx __unused) |
2391 | { |
2392 | assert(slot_idx < kmem_size_array[size_idx].ks_num_elem); |
2393 | return 1ull << slot_idx; |
2394 | } |
2395 | |
2396 | static uint32_t |
2397 | kmem_get_idx_from_size(vm_map_size_t size) |
2398 | { |
2399 | assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE); |
2400 | return kmem_log2down(size - 1) - KMEM_START_IDX + 1; |
2401 | } |
2402 | |
2403 | __abortlike |
2404 | static void |
2405 | kmem_invalid_size_idx(uint32_t idx) |
2406 | { |
2407 | panic("Invalid sizeclass idx %u" , idx); |
2408 | } |
2409 | |
2410 | static vm_map_size_t |
2411 | kmem_get_size_from_idx(uint32_t idx) |
2412 | { |
2413 | if (__improbable(idx >= KMEM_NUM_SIZECLASS)) { |
2414 | kmem_invalid_size_idx(idx); |
2415 | } |
2416 | return 1ul << (idx + KMEM_START_IDX); |
2417 | } |
2418 | |
2419 | static inline uint16_t |
2420 | kmem_get_page_idx(struct kmem_page_meta *meta) |
2421 | { |
2422 | uint8_t page_marker = meta->km_page_marker; |
2423 | |
2424 | return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx; |
2425 | } |
2426 | |
2427 | __abortlike |
2428 | static void |
2429 | kmem_invalid_chunk_len(struct kmem_page_meta *meta) |
2430 | { |
2431 | panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY" , |
2432 | meta); |
2433 | } |
2434 | |
2435 | static inline uint16_t |
2436 | kmem_get_chunk_len(struct kmem_page_meta *meta) |
2437 | { |
2438 | if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) { |
2439 | kmem_invalid_chunk_len(meta); |
2440 | } |
2441 | |
2442 | return meta->km_chunk_len; |
2443 | } |
2444 | |
2445 | __abortlike |
2446 | static void |
2447 | kmem_invalid_free_chunk_len(struct kmem_page_meta *meta) |
2448 | { |
2449 | panic("Reading free chunks for meta %p where marker != KMEM_META_FREE" , |
2450 | meta); |
2451 | } |
2452 | |
2453 | static inline uint32_t |
2454 | kmem_get_free_chunk_len(struct kmem_page_meta *meta) |
2455 | { |
2456 | if (__improbable(meta->km_page_marker != KMEM_META_FREE)) { |
2457 | kmem_invalid_free_chunk_len(meta); |
2458 | } |
2459 | |
2460 | return meta->km_free_chunks; |
2461 | } |
2462 | |
2463 | /* |
2464 | * Return the metadata corresponding to the specified address |
2465 | */ |
2466 | static struct kmem_page_meta * |
2467 | kmem_addr_to_meta( |
2468 | vm_map_offset_t addr, |
2469 | vm_map_range_id_t range_id, |
2470 | vm_map_offset_t *range_start, |
2471 | uint64_t *meta_idx) |
2472 | { |
2473 | struct kmem_page_meta *meta_base = kmem_meta_base[range_id]; |
2474 | |
2475 | *range_start = kmem_ranges[range_id].min_address; |
2476 | *meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN; |
2477 | return &meta_base[*meta_idx]; |
2478 | } |
2479 | |
2480 | /* |
2481 | * Return the metadata start of the chunk that the address belongs to |
2482 | */ |
2483 | static struct kmem_page_meta * |
2484 | kmem_addr_to_meta_start( |
2485 | vm_address_t addr, |
2486 | vm_map_range_id_t range_id, |
2487 | vm_map_offset_t *chunk_start) |
2488 | { |
2489 | vm_map_offset_t range_start; |
2490 | uint64_t meta_idx; |
2491 | struct kmem_page_meta *meta; |
2492 | |
2493 | meta = kmem_addr_to_meta(addr, range_id, range_start: &range_start, meta_idx: &meta_idx); |
2494 | meta_idx -= kmem_get_page_idx(meta); |
2495 | meta -= kmem_get_page_idx(meta); |
2496 | assert(meta->km_page_marker == KMEM_META_PRIMARY); |
2497 | *chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN); |
2498 | return meta; |
2499 | } |
2500 | |
2501 | __startup_func |
2502 | static void |
2503 | kmem_init_meta_front( |
2504 | struct kmem_page_meta *meta, |
2505 | kmem_range_id_t range_id, |
2506 | bool from_right) |
2507 | { |
2508 | kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE, |
2509 | flags: KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK); |
2510 | meta->km_page_marker = KMEM_META_START; |
2511 | if (!from_right) { |
2512 | meta++; |
2513 | kmem_meta_base[range_id] = meta; |
2514 | } |
2515 | kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta; |
2516 | } |
2517 | |
2518 | __startup_func |
2519 | static void |
2520 | kmem_metadata_init(void) |
2521 | { |
2522 | for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) { |
2523 | vm_map_offset_t addr = kmem_meta_range[i].min_address; |
2524 | struct kmem_page_meta *meta; |
2525 | uint64_t meta_idx; |
2526 | |
2527 | vm_map_will_allocate_early_map(map_owner: &kmem_meta_map[i]); |
2528 | kmem_meta_map[i] = kmem_suballoc(parent: kernel_map, addr: &addr, size: kmem_meta_size, |
2529 | vmc_options: VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST, |
2530 | VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, flags: KMS_PERMANENT | KMS_NOFAIL, |
2531 | VM_KERN_MEMORY_OSFMK).kmr_submap; |
2532 | |
2533 | kmem_meta_range[i].min_address = addr; |
2534 | kmem_meta_range[i].max_address = addr + kmem_meta_size; |
2535 | |
2536 | meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address; |
2537 | kmem_init_meta_front(meta, range_id: i, from_right: 0); |
2538 | |
2539 | meta = kmem_addr_to_meta(addr: kmem_ranges[i].max_address, range_id: i, range_start: &addr, |
2540 | meta_idx: &meta_idx); |
2541 | kmem_init_meta_front(meta, range_id: i, from_right: 1); |
2542 | } |
2543 | } |
2544 | |
2545 | __startup_func |
2546 | static void |
2547 | kmem_init_front_head( |
2548 | struct kmem_sizeclass *ks, |
2549 | uint32_t front) |
2550 | { |
2551 | LIST_INIT(&ks->ks_allfree_head[front]); |
2552 | LIST_INIT(&ks->ks_partial_head[front]); |
2553 | LIST_INIT(&ks->ks_full_head[front]); |
2554 | } |
2555 | |
2556 | __startup_func |
2557 | static void |
2558 | kmem_sizeclass_init(void) |
2559 | { |
2560 | for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) { |
2561 | struct kmem_sizeclass *ks = &kmem_size_array[i]; |
2562 | kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST; |
2563 | |
2564 | ks->ks_size = kmem_get_size_from_idx(idx: i); |
2565 | ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) / |
2566 | KMEM_CHUNK_SIZE_MIN; |
2567 | ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size; |
2568 | assert(ks->ks_num_elem <= |
2569 | (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8)); |
2570 | for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) { |
2571 | kmem_init_front_head(ks, front: kmem_get_front(range_id, from_right: 0)); |
2572 | kmem_init_front_head(ks, front: kmem_get_front(range_id, from_right: 1)); |
2573 | } |
2574 | } |
2575 | } |
2576 | |
2577 | /* |
2578 | * This is done during EARLY_BOOT as it needs the corecrypto module to be |
2579 | * set up. |
2580 | */ |
2581 | __startup_func |
2582 | static void |
2583 | kmem_crypto_init(void) |
2584 | { |
2585 | vm_size_t ctx_size = crypto_random_kmem_ctx_size(); |
2586 | |
2587 | for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) { |
2588 | struct kmem_sizeclass *ks = &kmem_size_array[i]; |
2589 | |
2590 | ks->ks_rng_ctx = zalloc_percpu_permanent(size: ctx_size, ZALIGN_PTR); |
2591 | zpercpu_foreach(ctx, ks->ks_rng_ctx) { |
2592 | crypto_random_kmem_init(ctx); |
2593 | } |
2594 | } |
2595 | } |
2596 | STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init); |
2597 | |
2598 | __abortlike |
2599 | static void |
2600 | kmem_validate_slot_panic( |
2601 | vm_map_offset_t addr, |
2602 | struct kmem_page_meta *meta, |
2603 | uint32_t slot_idx, |
2604 | uint32_t size_idx) |
2605 | { |
2606 | if (meta->km_page_marker != KMEM_META_PRIMARY) { |
2607 | panic("Metadata (%p) for addr (%p) not primary" , meta, (void *)addr); |
2608 | } |
2609 | if (meta->km_sizeclass != size_idx) { |
2610 | panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion" , |
2611 | meta, meta->km_sizeclass, size_idx); |
2612 | } |
2613 | panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free" , |
2614 | slot_idx, meta, (void *)addr); |
2615 | } |
2616 | |
2617 | __abortlike |
2618 | static void |
2619 | kmem_invalid_slot_for_addr( |
2620 | mach_vm_range_t slot, |
2621 | vm_map_offset_t start, |
2622 | vm_map_offset_t end) |
2623 | { |
2624 | panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]" , |
2625 | (void *)slot->min_address, (void *)slot->max_address, |
2626 | (void *)start, (void *)end); |
2627 | } |
2628 | |
2629 | void |
2630 | kmem_validate_slot( |
2631 | vm_map_offset_t addr, |
2632 | struct kmem_page_meta *meta, |
2633 | uint32_t size_idx, |
2634 | uint32_t slot_idx) |
2635 | { |
2636 | if ((meta->km_page_marker != KMEM_META_PRIMARY) || |
2637 | (meta->km_sizeclass != size_idx) || |
2638 | ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) { |
2639 | kmem_validate_slot_panic(addr, meta, slot_idx: size_idx, size_idx: slot_idx); |
2640 | } |
2641 | } |
2642 | |
2643 | static void |
2644 | kmem_validate_slot_initial( |
2645 | mach_vm_range_t slot, |
2646 | vm_map_offset_t start, |
2647 | vm_map_offset_t end, |
2648 | struct kmem_page_meta *meta, |
2649 | uint32_t size_idx, |
2650 | uint32_t slot_idx) |
2651 | { |
2652 | if ((slot->min_address == 0) || (slot->max_address == 0) || |
2653 | (start < slot->min_address) || (start >= slot->max_address) || |
2654 | (end > slot->max_address)) { |
2655 | kmem_invalid_slot_for_addr(slot, start, end); |
2656 | } |
2657 | |
2658 | kmem_validate_slot(addr: start, meta, size_idx, slot_idx); |
2659 | } |
2660 | |
2661 | uint32_t |
2662 | kmem_addr_get_slot_idx( |
2663 | vm_map_offset_t start, |
2664 | vm_map_offset_t end, |
2665 | vm_map_range_id_t range_id, |
2666 | struct kmem_page_meta **meta, |
2667 | uint32_t *size_idx, |
2668 | mach_vm_range_t slot) |
2669 | { |
2670 | vm_map_offset_t chunk_start; |
2671 | vm_map_size_t slot_size; |
2672 | uint32_t slot_idx; |
2673 | |
2674 | *meta = kmem_addr_to_meta_start(addr: start, range_id, chunk_start: &chunk_start); |
2675 | *size_idx = (*meta)->km_sizeclass; |
2676 | slot_size = kmem_get_size_from_idx(idx: *size_idx); |
2677 | slot_idx = (start - chunk_start) / slot_size; |
2678 | slot->min_address = chunk_start + slot_idx * slot_size; |
2679 | slot->max_address = slot->min_address + slot_size; |
2680 | |
2681 | kmem_validate_slot_initial(slot, start, end, meta: *meta, size_idx: *size_idx, slot_idx); |
2682 | |
2683 | return slot_idx; |
2684 | } |
2685 | |
2686 | static bool |
2687 | kmem_populate_needed(vm_offset_t from, vm_offset_t to) |
2688 | { |
2689 | #if KASAN |
2690 | #pragma unused(from, to) |
2691 | return true; |
2692 | #else |
2693 | vm_offset_t page_addr = trunc_page(from); |
2694 | |
2695 | for (; page_addr < to; page_addr += PAGE_SIZE) { |
2696 | /* |
2697 | * This can race with another thread doing a populate on the same metadata |
2698 | * page, where we see an updated pmap but unmapped KASan shadow, causing a |
2699 | * fault in the shadow when we first access the metadata page. Avoid this |
2700 | * by always synchronizing on the kmem_meta_lock with KASan. |
2701 | */ |
2702 | if (!pmap_find_phys(map: kernel_pmap, va: page_addr)) { |
2703 | return true; |
2704 | } |
2705 | } |
2706 | |
2707 | return false; |
2708 | #endif /* !KASAN */ |
2709 | } |
2710 | |
2711 | static void |
2712 | kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to) |
2713 | { |
2714 | vm_offset_t page_addr = trunc_page(from); |
2715 | |
2716 | vm_map_unlock(kernel_map); |
2717 | |
2718 | for (; page_addr < to; page_addr += PAGE_SIZE) { |
2719 | for (;;) { |
2720 | kern_return_t ret = KERN_SUCCESS; |
2721 | |
2722 | /* |
2723 | * All updates to kmem metadata are done under the kmem_meta_lock |
2724 | */ |
2725 | kmem_meta_lock(); |
2726 | if (0 == pmap_find_phys(map: kernel_pmap, va: page_addr)) { |
2727 | ret = kernel_memory_populate(addr: page_addr, |
2728 | PAGE_SIZE, flags: KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO, |
2729 | VM_KERN_MEMORY_OSFMK); |
2730 | } |
2731 | kmem_meta_unlock(); |
2732 | |
2733 | if (ret == KERN_SUCCESS) { |
2734 | break; |
2735 | } |
2736 | |
2737 | /* |
2738 | * We can't pass KMA_NOPAGEWAIT under a global lock as it leads |
2739 | * to bad system deadlocks, so if the allocation failed, |
2740 | * we need to do the VM_PAGE_WAIT() outside of the lock. |
2741 | */ |
2742 | VM_PAGE_WAIT(); |
2743 | } |
2744 | } |
2745 | |
2746 | vm_map_lock(kernel_map); |
2747 | } |
2748 | |
2749 | __abortlike |
2750 | static void |
2751 | kmem_invalid_meta_panic( |
2752 | struct kmem_page_meta *meta, |
2753 | uint32_t slot_idx, |
2754 | struct kmem_sizeclass sizeclass) |
2755 | { |
2756 | uint32_t size_idx = kmem_get_idx_from_size(size: sizeclass.ks_size); |
2757 | |
2758 | if (slot_idx >= sizeclass.ks_num_elem) { |
2759 | panic("Invalid slot idx %u [0:%u] for meta %p" , slot_idx, |
2760 | sizeclass.ks_num_elem, meta); |
2761 | } |
2762 | if (meta->km_sizeclass != size_idx) { |
2763 | panic("Invalid size_idx (%u != %u) in meta %p" , size_idx, |
2764 | meta->km_sizeclass, meta); |
2765 | } |
2766 | panic("page_marker %u not primary in meta %p" , meta->km_page_marker, meta); |
2767 | } |
2768 | |
2769 | __abortlike |
2770 | static void |
2771 | kmem_slot_has_entry_panic( |
2772 | vm_map_entry_t entry, |
2773 | vm_map_offset_t addr) |
2774 | { |
2775 | panic("Entry (%p) already exists for addr (%p) being returned" , |
2776 | entry, (void *)addr); |
2777 | } |
2778 | |
2779 | __abortlike |
2780 | static void |
2781 | kmem_slot_not_found( |
2782 | struct kmem_page_meta *meta, |
2783 | uint32_t slot_idx) |
2784 | { |
2785 | panic("%uth free slot not found for meta %p bitmap %u" , slot_idx, meta, |
2786 | meta->km_bitmap); |
2787 | } |
2788 | |
2789 | /* |
2790 | * Returns a 16bit random number between 0 and |
2791 | * upper_limit (inclusive) |
2792 | */ |
2793 | __startup_func |
2794 | uint16_t |
2795 | kmem_get_random16( |
2796 | uint16_t upper_limit) |
2797 | { |
2798 | static uint64_t random_entropy; |
2799 | assert(upper_limit < UINT16_MAX); |
2800 | if (random_entropy == 0) { |
2801 | random_entropy = early_random(); |
2802 | } |
2803 | uint32_t result = random_entropy & UINT32_MAX; |
2804 | random_entropy >>= 32; |
2805 | return (uint16_t)(result % (upper_limit + 1)); |
2806 | } |
2807 | |
2808 | static uint32_t |
2809 | kmem_get_nth_free_slot( |
2810 | struct kmem_page_meta *meta, |
2811 | uint32_t n, |
2812 | uint32_t bitmap) |
2813 | { |
2814 | uint32_t zeros_seen = 0, ones_seen = 0; |
2815 | |
2816 | while (bitmap) { |
2817 | uint32_t count = __builtin_ctz(bitmap); |
2818 | |
2819 | zeros_seen += count; |
2820 | bitmap >>= count; |
2821 | if (__probable(~bitmap)) { |
2822 | count = __builtin_ctz(~bitmap); |
2823 | } else { |
2824 | count = 32; |
2825 | } |
2826 | if (count + ones_seen > n) { |
2827 | return zeros_seen + n; |
2828 | } |
2829 | ones_seen += count; |
2830 | bitmap >>= count; |
2831 | } |
2832 | |
2833 | kmem_slot_not_found(meta, slot_idx: n); |
2834 | } |
2835 | |
2836 | |
2837 | static uint32_t |
2838 | kmem_get_next_slot( |
2839 | struct kmem_page_meta *meta, |
2840 | struct kmem_sizeclass sizeclass, |
2841 | uint32_t bitmap) |
2842 | { |
2843 | uint32_t num_slots = __builtin_popcount(bitmap); |
2844 | uint64_t slot_idx = 0; |
2845 | |
2846 | assert(num_slots > 0); |
2847 | if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) { |
2848 | /* |
2849 | * Use early random prior to early boot as the ks_rng_ctx requires |
2850 | * the corecrypto module to be setup before it is initialized and |
2851 | * used. |
2852 | * |
2853 | * num_slots can't be 0 as we take this path when we have more than |
2854 | * one slot left. |
2855 | */ |
2856 | slot_idx = kmem_get_random16(upper_limit: (uint16_t)num_slots - 1); |
2857 | } else { |
2858 | crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), bound: num_slots, |
2859 | random: &slot_idx); |
2860 | } |
2861 | |
2862 | return kmem_get_nth_free_slot(meta, n: slot_idx, bitmap); |
2863 | } |
2864 | |
2865 | /* |
2866 | * Returns an unallocated slot from the given metadata |
2867 | */ |
2868 | static vm_map_offset_t |
2869 | kmem_get_addr_from_meta( |
2870 | struct kmem_page_meta *meta, |
2871 | vm_map_range_id_t range_id, |
2872 | struct kmem_sizeclass sizeclass, |
2873 | vm_map_entry_t *entry) |
2874 | { |
2875 | vm_map_offset_t addr; |
2876 | vm_map_size_t size = sizeclass.ks_size; |
2877 | uint32_t size_idx = kmem_get_idx_from_size(size); |
2878 | uint64_t meta_idx = meta - kmem_meta_base[range_id]; |
2879 | mach_vm_offset_t range_start = kmem_ranges[range_id].min_address; |
2880 | uint32_t slot_bit; |
2881 | uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, bitmap: meta->km_bitmap); |
2882 | |
2883 | if ((slot_idx >= sizeclass.ks_num_elem) || |
2884 | (meta->km_sizeclass != size_idx) || |
2885 | (meta->km_page_marker != KMEM_META_PRIMARY)) { |
2886 | kmem_invalid_meta_panic(meta, slot_idx, sizeclass); |
2887 | } |
2888 | |
2889 | slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx); |
2890 | meta->km_bitmap &= ~slot_bit; |
2891 | |
2892 | addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size); |
2893 | assert(kmem_range_contains_fully(range_id, addr, size)); |
2894 | if (vm_map_lookup_entry(map: kernel_map, address: addr, entry)) { |
2895 | kmem_slot_has_entry_panic(entry: *entry, addr); |
2896 | } |
2897 | if ((*entry != vm_map_to_entry(kernel_map)) && |
2898 | ((*entry)->vme_next != vm_map_to_entry(kernel_map)) && |
2899 | ((*entry)->vme_next->vme_start < (addr + size))) { |
2900 | kmem_slot_has_entry_panic(entry: *entry, addr); |
2901 | } |
2902 | return addr; |
2903 | } |
2904 | |
2905 | __abortlike |
2906 | static void |
2907 | kmem_range_out_of_va( |
2908 | kmem_range_id_t range_id, |
2909 | uint32_t num_chunks) |
2910 | { |
2911 | panic("No more VA to allocate %u chunks in range %u" , num_chunks, range_id); |
2912 | } |
2913 | |
2914 | static void |
2915 | kmem_init_allocated_chunk( |
2916 | struct kmem_page_meta *meta, |
2917 | struct kmem_sizeclass sizeclass, |
2918 | uint32_t size_idx) |
2919 | { |
2920 | uint32_t meta_num = sizeclass.ks_num_chunk; |
2921 | uint32_t num_elem = sizeclass.ks_num_elem; |
2922 | |
2923 | meta->km_bitmap = (1ull << num_elem) - 1; |
2924 | meta->km_chunk_len = (uint16_t)meta_num; |
2925 | assert(LIST_NEXT(meta, km_link) == NULL); |
2926 | assert(meta->km_link.le_prev == NULL); |
2927 | meta->km_sizeclass = (uint8_t)size_idx; |
2928 | meta->km_page_marker = KMEM_META_PRIMARY; |
2929 | meta++; |
2930 | for (uint32_t i = 1; i < meta_num; i++) { |
2931 | meta->km_page_idx = (uint16_t)i; |
2932 | meta->km_sizeclass = (uint8_t)size_idx; |
2933 | meta->km_page_marker = 0; |
2934 | meta->km_bitmap = 0; |
2935 | meta++; |
2936 | } |
2937 | } |
2938 | |
2939 | static uint32_t |
2940 | kmem_get_additional_meta( |
2941 | struct kmem_page_meta *meta, |
2942 | uint32_t meta_req, |
2943 | bool from_right, |
2944 | struct kmem_page_meta **adj_free_meta) |
2945 | { |
2946 | struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1); |
2947 | |
2948 | if (meta_prev->km_page_marker == KMEM_META_FREE) { |
2949 | uint32_t chunk_len = kmem_get_free_chunk_len(meta: meta_prev); |
2950 | |
2951 | *adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1); |
2952 | meta_req -= chunk_len; |
2953 | } else { |
2954 | *adj_free_meta = NULL; |
2955 | } |
2956 | |
2957 | return meta_req; |
2958 | } |
2959 | |
2960 | |
2961 | static struct kmem_page_meta * |
2962 | kmem_get_new_chunk( |
2963 | vm_map_range_id_t range_id, |
2964 | bool from_right, |
2965 | uint32_t size_idx) |
2966 | { |
2967 | struct kmem_sizeclass sizeclass = kmem_size_array[size_idx]; |
2968 | struct kmem_page_meta *start, *end, *meta_update; |
2969 | struct kmem_page_meta *adj_free_meta = NULL; |
2970 | uint32_t meta_req = sizeclass.ks_num_chunk; |
2971 | |
2972 | for (;;) { |
2973 | struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)]; |
2974 | struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, from_right: 1)]; |
2975 | struct kmem_page_meta *meta; |
2976 | vm_offset_t start_addr, end_addr; |
2977 | uint32_t meta_num; |
2978 | |
2979 | meta = from_right ? metab : metaf; |
2980 | meta_num = kmem_get_additional_meta(meta, meta_req, from_right, |
2981 | adj_free_meta: &adj_free_meta); |
2982 | |
2983 | if (metaf + meta_num >= metab) { |
2984 | kmem_range_out_of_va(range_id, num_chunks: meta_num); |
2985 | } |
2986 | |
2987 | start = from_right ? (metab - meta_num) : metaf; |
2988 | end = from_right ? metab : (metaf + meta_num); |
2989 | |
2990 | start_addr = (vm_offset_t)start; |
2991 | end_addr = (vm_offset_t)end; |
2992 | |
2993 | /* |
2994 | * If the new high watermark stays on the same page, |
2995 | * no need to populate and drop the lock. |
2996 | */ |
2997 | if (!page_aligned(from_right ? end_addr : start_addr) && |
2998 | trunc_page(start_addr) == trunc_page(end_addr - 1)) { |
2999 | break; |
3000 | } |
3001 | if (!kmem_populate_needed(from: start_addr, to: end_addr)) { |
3002 | break; |
3003 | } |
3004 | |
3005 | kmem_populate_meta_locked(from: start_addr, to: end_addr); |
3006 | |
3007 | /* |
3008 | * Since we dropped the lock, reassess conditions still hold: |
3009 | * - the HWM we are changing must not have moved |
3010 | * - the other HWM must not intersect with ours |
3011 | * - in case of coalescing, the adjacent free meta must still |
3012 | * be free and of the same size. |
3013 | * |
3014 | * If we failed to grow, reevaluate whether freelists have |
3015 | * entries now by returning NULL. |
3016 | */ |
3017 | metaf = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)]; |
3018 | metab = kmem_meta_hwm[kmem_get_front(range_id, from_right: 1)]; |
3019 | if (meta != (from_right ? metab : metaf)) { |
3020 | return NULL; |
3021 | } |
3022 | if (metaf + meta_num >= metab) { |
3023 | kmem_range_out_of_va(range_id, num_chunks: meta_num); |
3024 | } |
3025 | if (adj_free_meta) { |
3026 | if (adj_free_meta->km_page_marker != KMEM_META_FREE || |
3027 | kmem_get_free_chunk_len(meta: adj_free_meta) != |
3028 | meta_req - meta_num) { |
3029 | return NULL; |
3030 | } |
3031 | } |
3032 | |
3033 | break; |
3034 | } |
3035 | |
3036 | /* |
3037 | * If there is an adjacent free chunk remove it from free list |
3038 | */ |
3039 | if (adj_free_meta) { |
3040 | LIST_REMOVE(adj_free_meta, km_link); |
3041 | LIST_NEXT(adj_free_meta, km_link) = NULL; |
3042 | adj_free_meta->km_link.le_prev = NULL; |
3043 | } |
3044 | |
3045 | /* |
3046 | * Update hwm |
3047 | */ |
3048 | meta_update = from_right ? start : end; |
3049 | kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update; |
3050 | |
3051 | /* |
3052 | * Initialize metadata |
3053 | */ |
3054 | start = from_right ? start : (end - meta_req); |
3055 | kmem_init_allocated_chunk(meta: start, sizeclass, size_idx); |
3056 | |
3057 | return start; |
3058 | } |
3059 | |
3060 | static void |
3061 | kmem_requeue_meta( |
3062 | struct kmem_page_meta *meta, |
3063 | struct kmem_list_head *head) |
3064 | { |
3065 | LIST_REMOVE(meta, km_link); |
3066 | LIST_INSERT_HEAD(head, meta, km_link); |
3067 | } |
3068 | |
3069 | /* |
3070 | * Return corresponding sizeclass to stash free chunks in |
3071 | */ |
3072 | __abortlike |
3073 | static void |
3074 | kmem_invalid_chunk_num(uint32_t chunks) |
3075 | { |
3076 | panic("Invalid number of chunks %u\n" , chunks); |
3077 | } |
3078 | |
3079 | static uint32_t |
3080 | kmem_get_size_idx_for_chunks(uint32_t chunks) |
3081 | { |
3082 | for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) { |
3083 | if (chunks >= kmem_size_array[i].ks_num_chunk) { |
3084 | return i; |
3085 | } |
3086 | } |
3087 | kmem_invalid_chunk_num(chunks); |
3088 | } |
3089 | |
3090 | static void |
3091 | kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count) |
3092 | { |
3093 | bzero(s: meta, n: count * sizeof(struct kmem_page_meta)); |
3094 | } |
3095 | |
3096 | static void |
3097 | kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count) |
3098 | { |
3099 | #if MACH_ASSERT |
3100 | size_t size = count * sizeof(struct kmem_page_meta); |
3101 | |
3102 | assert(memcmp_zero_ptr_aligned(meta, size) == 0); |
3103 | #else |
3104 | #pragma unused(meta, count) |
3105 | #endif |
3106 | } |
3107 | |
3108 | /*! |
3109 | * @function kmem_init_free_chunk() |
3110 | * |
3111 | * @discussion |
3112 | * This function prepares a range of chunks to be put on a free list. |
3113 | * The first and last metadata might be dirty, but the "inner" ones |
3114 | * must be zero filled by the caller prior to calling this function. |
3115 | */ |
3116 | static void |
3117 | kmem_init_free_chunk( |
3118 | struct kmem_page_meta *meta, |
3119 | uint32_t num_chunks, |
3120 | uint32_t front) |
3121 | { |
3122 | struct kmem_sizeclass *sizeclass; |
3123 | uint32_t size_idx = kmem_get_size_idx_for_chunks(chunks: num_chunks); |
3124 | |
3125 | if (num_chunks > 2) { |
3126 | kmem_check_meta_range_is_clear(meta: meta + 1, count: num_chunks - 2); |
3127 | } |
3128 | |
3129 | meta[0] = (struct kmem_page_meta){ |
3130 | .km_free_chunks = num_chunks, |
3131 | .km_page_marker = KMEM_META_FREE, |
3132 | .km_sizeclass = (uint8_t)size_idx, |
3133 | }; |
3134 | if (num_chunks > 1) { |
3135 | meta[num_chunks - 1] = (struct kmem_page_meta){ |
3136 | .km_free_chunks = num_chunks, |
3137 | .km_page_marker = KMEM_META_FREE, |
3138 | .km_sizeclass = (uint8_t)size_idx, |
3139 | }; |
3140 | } |
3141 | |
3142 | sizeclass = &kmem_size_array[size_idx]; |
3143 | LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link); |
3144 | } |
3145 | |
3146 | static struct kmem_page_meta * |
3147 | kmem_get_free_chunk_from_list( |
3148 | struct kmem_sizeclass *org_sizeclass, |
3149 | uint32_t size_idx, |
3150 | uint32_t front) |
3151 | { |
3152 | struct kmem_sizeclass *sizeclass; |
3153 | uint32_t num_chunks = org_sizeclass->ks_num_chunk; |
3154 | struct kmem_page_meta *meta; |
3155 | uint32_t idx = size_idx; |
3156 | |
3157 | while (idx < KMEM_NUM_SIZECLASS) { |
3158 | sizeclass = &kmem_size_array[idx]; |
3159 | meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]); |
3160 | if (meta) { |
3161 | break; |
3162 | } |
3163 | idx++; |
3164 | } |
3165 | |
3166 | /* |
3167 | * Trim if larger in size |
3168 | */ |
3169 | if (meta) { |
3170 | uint32_t num_chunks_free = kmem_get_free_chunk_len(meta); |
3171 | |
3172 | assert(meta->km_page_marker == KMEM_META_FREE); |
3173 | LIST_REMOVE(meta, km_link); |
3174 | LIST_NEXT(meta, km_link) = NULL; |
3175 | meta->km_link.le_prev = NULL; |
3176 | if (num_chunks_free > num_chunks) { |
3177 | num_chunks_free -= num_chunks; |
3178 | kmem_init_free_chunk(meta: meta + num_chunks, num_chunks: num_chunks_free, front); |
3179 | } |
3180 | |
3181 | kmem_init_allocated_chunk(meta, sizeclass: *org_sizeclass, size_idx); |
3182 | } |
3183 | |
3184 | return meta; |
3185 | } |
3186 | |
3187 | kern_return_t |
3188 | kmem_locate_space( |
3189 | vm_map_size_t size, |
3190 | vm_map_range_id_t range_id, |
3191 | bool from_right, |
3192 | vm_map_offset_t *start_inout, |
3193 | vm_map_entry_t *entry_out) |
3194 | { |
3195 | vm_map_entry_t entry; |
3196 | uint32_t size_idx = kmem_get_idx_from_size(size); |
3197 | uint32_t front = kmem_get_front(range_id, from_right); |
3198 | struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx]; |
3199 | struct kmem_page_meta *meta; |
3200 | |
3201 | assert(size <= sizeclass->ks_size); |
3202 | again: |
3203 | if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) { |
3204 | *start_inout = kmem_get_addr_from_meta(meta, range_id, sizeclass: *sizeclass, entry: &entry); |
3205 | /* |
3206 | * Requeue to full if necessary |
3207 | */ |
3208 | assert(meta->km_page_marker == KMEM_META_PRIMARY); |
3209 | if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) { |
3210 | kmem_requeue_meta(meta, head: &sizeclass->ks_full_head[front]); |
3211 | } |
3212 | } else if ((meta = kmem_get_free_chunk_from_list(org_sizeclass: sizeclass, size_idx, |
3213 | front)) != NULL) { |
3214 | *start_inout = kmem_get_addr_from_meta(meta, range_id, sizeclass: *sizeclass, entry: &entry); |
3215 | /* |
3216 | * Queue to partial |
3217 | */ |
3218 | assert(meta->km_page_marker == KMEM_META_PRIMARY); |
3219 | assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS); |
3220 | LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link); |
3221 | } else { |
3222 | meta = kmem_get_new_chunk(range_id, from_right, size_idx); |
3223 | if (meta == NULL) { |
3224 | goto again; |
3225 | } |
3226 | *start_inout = kmem_get_addr_from_meta(meta, range_id, sizeclass: *sizeclass, entry: &entry); |
3227 | assert(meta->km_page_marker == KMEM_META_PRIMARY); |
3228 | LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link); |
3229 | } |
3230 | |
3231 | if (entry_out) { |
3232 | *entry_out = entry; |
3233 | } |
3234 | |
3235 | return KERN_SUCCESS; |
3236 | } |
3237 | |
3238 | /* |
3239 | * Determine whether the given metadata was allocated from the right |
3240 | */ |
3241 | static bool |
3242 | kmem_meta_is_from_right( |
3243 | kmem_range_id_t range_id, |
3244 | struct kmem_page_meta *meta) |
3245 | { |
3246 | struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)]; |
3247 | #if DEBUG || DEVELOPMENT |
3248 | struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)]; |
3249 | #endif |
3250 | struct kmem_page_meta *meta_base = kmem_meta_base[range_id]; |
3251 | struct kmem_page_meta *meta_end; |
3252 | |
3253 | meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address; |
3254 | |
3255 | if ((meta >= meta_base) && (meta < metaf)) { |
3256 | return false; |
3257 | } |
3258 | |
3259 | assert(meta >= metab && meta < meta_end); |
3260 | return true; |
3261 | } |
3262 | |
3263 | static void |
3264 | kmem_free_chunk( |
3265 | kmem_range_id_t range_id, |
3266 | struct kmem_page_meta *meta, |
3267 | bool from_right) |
3268 | { |
3269 | struct kmem_page_meta *meta_coalesce = meta - 1; |
3270 | struct kmem_page_meta *meta_start = meta; |
3271 | uint32_t num_chunks = kmem_get_chunk_len(meta); |
3272 | uint32_t add_chunks; |
3273 | struct kmem_page_meta *meta_end = meta + num_chunks; |
3274 | struct kmem_page_meta *meta_hwm_l, *meta_hwm_r; |
3275 | uint32_t front = kmem_get_front(range_id, from_right); |
3276 | |
3277 | meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)]; |
3278 | meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, from_right: 1)]; |
3279 | |
3280 | LIST_REMOVE(meta, km_link); |
3281 | kmem_clear_meta_range(meta, count: num_chunks); |
3282 | |
3283 | /* |
3284 | * Coalesce left |
3285 | */ |
3286 | if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) && |
3287 | (meta_coalesce->km_page_marker == KMEM_META_FREE)) { |
3288 | meta_start = meta_coalesce - kmem_get_free_chunk_len(meta: meta_coalesce) + 1; |
3289 | add_chunks = kmem_get_free_chunk_len(meta: meta_start); |
3290 | num_chunks += add_chunks; |
3291 | LIST_REMOVE(meta_start, km_link); |
3292 | kmem_clear_meta_range(meta: meta_start + add_chunks - 1, count: 1); |
3293 | } |
3294 | |
3295 | /* |
3296 | * Coalesce right |
3297 | */ |
3298 | if (((!from_right && (meta_end < meta_hwm_l)) || from_right) && |
3299 | (meta_end->km_page_marker == KMEM_META_FREE)) { |
3300 | add_chunks = kmem_get_free_chunk_len(meta: meta_end); |
3301 | LIST_REMOVE(meta_end, km_link); |
3302 | kmem_clear_meta_range(meta: meta_end, count: 1); |
3303 | meta_end = meta_end + add_chunks; |
3304 | num_chunks += add_chunks; |
3305 | } |
3306 | |
3307 | kmem_init_free_chunk(meta: meta_start, num_chunks, front); |
3308 | } |
3309 | |
3310 | static void |
3311 | kmem_free_slot( |
3312 | kmem_range_id_t range_id, |
3313 | mach_vm_range_t slot) |
3314 | { |
3315 | struct kmem_page_meta *meta; |
3316 | vm_map_offset_t chunk_start; |
3317 | uint32_t size_idx, chunk_elem, slot_idx, num_elem; |
3318 | struct kmem_sizeclass *sizeclass; |
3319 | vm_map_size_t slot_size; |
3320 | |
3321 | meta = kmem_addr_to_meta_start(addr: slot->min_address, range_id, chunk_start: &chunk_start); |
3322 | size_idx = meta->km_sizeclass; |
3323 | slot_size = kmem_get_size_from_idx(idx: size_idx); |
3324 | slot_idx = (slot->min_address - chunk_start) / slot_size; |
3325 | assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0); |
3326 | meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx); |
3327 | |
3328 | sizeclass = &kmem_size_array[size_idx]; |
3329 | chunk_elem = sizeclass->ks_num_elem; |
3330 | num_elem = __builtin_popcount(meta->km_bitmap); |
3331 | |
3332 | if (num_elem == chunk_elem) { |
3333 | /* |
3334 | * If entire chunk empty add to emtpy list |
3335 | */ |
3336 | bool from_right = kmem_meta_is_from_right(range_id, meta); |
3337 | |
3338 | kmem_free_chunk(range_id, meta, from_right); |
3339 | } else if (num_elem == KMEM_NUM_GUARDS + 1) { |
3340 | /* |
3341 | * If we freed to full chunk move it to partial |
3342 | */ |
3343 | uint32_t front = kmem_get_front(range_id, |
3344 | from_right: kmem_meta_is_from_right(range_id, meta)); |
3345 | |
3346 | kmem_requeue_meta(meta, head: &sizeclass->ks_partial_head[front]); |
3347 | } |
3348 | } |
3349 | |
3350 | void |
3351 | kmem_free_space( |
3352 | vm_map_offset_t start, |
3353 | vm_map_offset_t end, |
3354 | vm_map_range_id_t range_id, |
3355 | mach_vm_range_t slot) |
3356 | { |
3357 | bool entry_present = false; |
3358 | vm_map_entry_t prev_entry; |
3359 | vm_map_entry_t next_entry; |
3360 | |
3361 | if ((slot->min_address == start) && (slot->max_address == end)) { |
3362 | /* |
3363 | * Entire slot is being freed at once |
3364 | */ |
3365 | return kmem_free_slot(range_id, slot); |
3366 | } |
3367 | |
3368 | entry_present = vm_map_lookup_entry(map: kernel_map, address: start, entry: &prev_entry); |
3369 | assert(!entry_present); |
3370 | next_entry = prev_entry->vme_next; |
3371 | |
3372 | if (((prev_entry == vm_map_to_entry(kernel_map) || |
3373 | prev_entry->vme_end <= slot->min_address)) && |
3374 | (next_entry == vm_map_to_entry(kernel_map) || |
3375 | (next_entry->vme_start >= slot->max_address))) { |
3376 | /* |
3377 | * Free entire slot |
3378 | */ |
3379 | kmem_free_slot(range_id, slot); |
3380 | } |
3381 | } |
3382 | |
3383 | #pragma mark kmem init |
3384 | |
3385 | /* |
3386 | * The default percentage of memory that can be mlocked is scaled based on the total |
3387 | * amount of memory in the system. These percentages are caclulated |
3388 | * offline and stored in this table. We index this table by |
3389 | * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range |
3390 | * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t)) |
3391 | * |
3392 | * Note that these values were picked for mac. |
3393 | * If we ever have very large memory config arm devices, we may want to revisit |
3394 | * since the kernel overhead is smaller there due to the larger page size. |
3395 | */ |
3396 | |
3397 | /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */ |
3398 | #define VM_USER_WIREABLE_MIN_CONFIG 32 |
3399 | #if CONFIG_JETSAM |
3400 | /* Systems with jetsam can wire a bit more b/c the system can relieve wired |
3401 | * pressure. |
3402 | */ |
3403 | static vm_map_size_t wire_limit_percents[] = |
3404 | { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97}; |
3405 | #else |
3406 | static vm_map_size_t wire_limit_percents[] = |
3407 | { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97}; |
3408 | #endif /* CONFIG_JETSAM */ |
3409 | |
3410 | /* |
3411 | * Sets the default global user wire limit which limits the amount of |
3412 | * memory that can be locked via mlock() based on the above algorithm.. |
3413 | * This can be overridden via a sysctl. |
3414 | */ |
3415 | static void |
3416 | kmem_set_user_wire_limits(void) |
3417 | { |
3418 | uint64_t available_mem_log; |
3419 | uint64_t max_wire_percent; |
3420 | size_t wire_limit_percents_length = sizeof(wire_limit_percents) / |
3421 | sizeof(vm_map_size_t); |
3422 | vm_map_size_t limit; |
3423 | uint64_t config_memsize = max_mem; |
3424 | #if defined(XNU_TARGET_OS_OSX) |
3425 | config_memsize = max_mem_actual; |
3426 | #endif /* defined(XNU_TARGET_OS_OSX) */ |
3427 | |
3428 | available_mem_log = bit_floor(n: config_memsize); |
3429 | |
3430 | if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) { |
3431 | available_mem_log = 0; |
3432 | } else { |
3433 | available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG; |
3434 | } |
3435 | if (available_mem_log >= wire_limit_percents_length) { |
3436 | available_mem_log = wire_limit_percents_length - 1; |
3437 | } |
3438 | max_wire_percent = wire_limit_percents[available_mem_log]; |
3439 | |
3440 | limit = config_memsize * max_wire_percent / 100; |
3441 | /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */ |
3442 | if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) { |
3443 | limit = config_memsize - VM_NOT_USER_WIREABLE_MAX; |
3444 | } |
3445 | |
3446 | vm_global_user_wire_limit = limit; |
3447 | /* the default per task limit is the same as the global limit */ |
3448 | vm_per_task_user_wire_limit = limit; |
3449 | vm_add_wire_count_over_global_limit = 0; |
3450 | vm_add_wire_count_over_user_limit = 0; |
3451 | } |
3452 | |
3453 | #define KMEM_MAX_CLAIMS 50 |
3454 | __startup_data |
3455 | struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {}; |
3456 | __startup_data |
3457 | uint32_t kmem_claim_count = 0; |
3458 | |
3459 | __startup_func |
3460 | void |
3461 | kmem_range_startup_init( |
3462 | struct kmem_range_startup_spec *sp) |
3463 | { |
3464 | assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT); |
3465 | if (sp->kc_calculate_sz) { |
3466 | sp->kc_size = (sp->kc_calculate_sz)(); |
3467 | } |
3468 | if (sp->kc_size) { |
3469 | kmem_claims[kmem_claim_count] = *sp; |
3470 | kmem_claim_count++; |
3471 | } |
3472 | } |
3473 | |
3474 | static vm_offset_t |
3475 | kmem_fuzz_start(void) |
3476 | { |
3477 | vm_offset_t kmapoff_kaddr = 0; |
3478 | uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */ |
3479 | vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt); |
3480 | |
3481 | kmem_alloc(map: kernel_map, addrp: &kmapoff_kaddr, size: kmapoff_size, |
3482 | flags: KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY, |
3483 | VM_KERN_MEMORY_OSFMK); |
3484 | return kmapoff_kaddr + kmapoff_size; |
3485 | } |
3486 | |
3487 | /* |
3488 | * Generate a randomly shuffled array of indices from 0 to count - 1 |
3489 | */ |
3490 | __startup_func |
3491 | void |
3492 | kmem_shuffle( |
3493 | uint16_t *shuffle_buf, |
3494 | uint16_t count) |
3495 | { |
3496 | for (uint16_t i = 0; i < count; i++) { |
3497 | uint16_t j = kmem_get_random16(upper_limit: i); |
3498 | if (j != i) { |
3499 | shuffle_buf[i] = shuffle_buf[j]; |
3500 | } |
3501 | shuffle_buf[j] = i; |
3502 | } |
3503 | } |
3504 | |
3505 | __startup_func |
3506 | static void |
3507 | kmem_shuffle_claims(void) |
3508 | { |
3509 | uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {}; |
3510 | uint16_t limit = (uint16_t)kmem_claim_count; |
3511 | |
3512 | kmem_shuffle(shuffle_buf: &shuffle_buf[0], count: limit); |
3513 | for (uint16_t i = 0; i < limit; i++) { |
3514 | struct kmem_range_startup_spec tmp = kmem_claims[i]; |
3515 | kmem_claims[i] = kmem_claims[shuffle_buf[i]]; |
3516 | kmem_claims[shuffle_buf[i]] = tmp; |
3517 | } |
3518 | } |
3519 | |
3520 | __startup_func |
3521 | static void |
3522 | kmem_readjust_ranges( |
3523 | uint32_t cur_idx) |
3524 | { |
3525 | assert(cur_idx != 0); |
3526 | uint32_t j = cur_idx - 1, random; |
3527 | struct kmem_range_startup_spec sp = kmem_claims[cur_idx]; |
3528 | struct mach_vm_range *sp_range = sp.kc_range; |
3529 | |
3530 | /* |
3531 | * Find max index where restriction is met |
3532 | */ |
3533 | for (; j > 0; j--) { |
3534 | struct kmem_range_startup_spec spj = kmem_claims[j]; |
3535 | vm_map_offset_t max_start = spj.kc_range->min_address; |
3536 | if (spj.kc_flags & KC_NO_MOVE) { |
3537 | panic("kmem_range_init: Can't scramble with multiple constraints" ); |
3538 | } |
3539 | if (max_start <= sp_range->min_address) { |
3540 | break; |
3541 | } |
3542 | } |
3543 | |
3544 | /* |
3545 | * Pick a random index from 0 to max index and shift claims to the right |
3546 | * to make room for restricted claim |
3547 | */ |
3548 | random = kmem_get_random16(upper_limit: (uint16_t)j); |
3549 | assert(random <= j); |
3550 | |
3551 | sp_range->min_address = kmem_claims[random].kc_range->min_address; |
3552 | sp_range->max_address = sp_range->min_address + sp.kc_size; |
3553 | |
3554 | for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) { |
3555 | struct kmem_range_startup_spec spj = kmem_claims[j]; |
3556 | struct mach_vm_range *range = spj.kc_range; |
3557 | range->min_address += sp.kc_size; |
3558 | range->max_address += sp.kc_size; |
3559 | kmem_claims[j + 1] = spj; |
3560 | } |
3561 | |
3562 | sp.kc_flags = KC_NO_MOVE; |
3563 | kmem_claims[random] = sp; |
3564 | } |
3565 | |
3566 | __startup_func |
3567 | static vm_map_size_t |
3568 | kmem_add_ptr_claims(void) |
3569 | { |
3570 | uint64_t kmem_meta_num, kmem_ptr_chunks; |
3571 | vm_map_size_t org_ptr_range_size = ptr_range_size; |
3572 | |
3573 | ptr_range_size -= PAGE_SIZE; |
3574 | ptr_range_size *= KMEM_CHUNK_SIZE_MIN; |
3575 | ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta)); |
3576 | |
3577 | kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN; |
3578 | ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN; |
3579 | |
3580 | kmem_meta_num = kmem_ptr_chunks + 2; |
3581 | kmem_meta_size = round_page(x: kmem_meta_num * sizeof(struct kmem_page_meta)); |
3582 | |
3583 | assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size); |
3584 | /* |
3585 | * Add claims for kmem's ranges |
3586 | */ |
3587 | for (uint32_t i = 0; i < kmem_ptr_ranges; i++) { |
3588 | struct kmem_range_startup_spec kmem_spec = { |
3589 | .kc_name = "kmem_ptr_range" , |
3590 | .kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i], |
3591 | .kc_size = ptr_range_size, |
3592 | .kc_flags = KC_NO_ENTRY, |
3593 | }; |
3594 | kmem_claims[kmem_claim_count++] = kmem_spec; |
3595 | |
3596 | struct kmem_range_startup_spec kmem_meta_spec = { |
3597 | .kc_name = "kmem_ptr_range_meta" , |
3598 | .kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i], |
3599 | .kc_size = kmem_meta_size, |
3600 | .kc_flags = KC_NONE, |
3601 | }; |
3602 | kmem_claims[kmem_claim_count++] = kmem_meta_spec; |
3603 | } |
3604 | return (org_ptr_range_size - ptr_range_size - kmem_meta_size) * |
3605 | kmem_ptr_ranges; |
3606 | } |
3607 | |
3608 | __startup_func |
3609 | static void |
3610 | (void) |
3611 | { |
3612 | vm_map_size_t largest_free_size = 0, total_claims = 0; |
3613 | |
3614 | vm_map_sizes(map: kernel_map, NULL, NULL, plargest_free: &largest_free_size); |
3615 | largest_free_size = trunc_page(largest_free_size); |
3616 | |
3617 | /* |
3618 | * kasan and configs w/o *TRR need to have just one ptr range due to |
3619 | * resource constraints. |
3620 | */ |
3621 | #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT) |
3622 | kmem_ptr_ranges = 1; |
3623 | #endif |
3624 | /* |
3625 | * Determine size of data and pointer kmem_ranges |
3626 | */ |
3627 | for (uint32_t i = 0; i < kmem_claim_count; i++) { |
3628 | total_claims += kmem_claims[i].kc_size; |
3629 | } |
3630 | assert((total_claims & PAGE_MASK) == 0); |
3631 | largest_free_size -= total_claims; |
3632 | |
3633 | /* |
3634 | * Use half the total available VA for all pointer allocations (this |
3635 | * includes the kmem_sprayqtn range). Given that we have 4 total |
3636 | * ranges divide the available VA by 8. |
3637 | */ |
3638 | ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2); |
3639 | sprayqtn_range_size = ptr_range_size; |
3640 | |
3641 | if (sprayqtn_range_size > (sane_size / 2)) { |
3642 | sprayqtn_range_size = sane_size / 2; |
3643 | } |
3644 | |
3645 | ptr_range_size = round_page(x: ptr_range_size); |
3646 | sprayqtn_range_size = round_page(x: sprayqtn_range_size); |
3647 | |
3648 | |
3649 | data_range_size = largest_free_size |
3650 | - (ptr_range_size * kmem_ptr_ranges) |
3651 | - sprayqtn_range_size; |
3652 | |
3653 | /* |
3654 | * Add claims for kmem's ranges |
3655 | */ |
3656 | data_range_size += kmem_add_ptr_claims(); |
3657 | assert(data_range_size + sprayqtn_range_size + |
3658 | ((ptr_range_size + kmem_meta_size) * kmem_ptr_ranges) <= |
3659 | largest_free_size); |
3660 | |
3661 | struct kmem_range_startup_spec kmem_spec_sprayqtn = { |
3662 | .kc_name = "kmem_sprayqtn_range" , |
3663 | .kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN], |
3664 | .kc_size = sprayqtn_range_size, |
3665 | .kc_flags = KC_NO_ENTRY, |
3666 | }; |
3667 | kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn; |
3668 | |
3669 | struct kmem_range_startup_spec kmem_spec_data = { |
3670 | .kc_name = "kmem_data_range" , |
3671 | .kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA], |
3672 | .kc_size = data_range_size, |
3673 | .kc_flags = KC_NO_ENTRY, |
3674 | }; |
3675 | kmem_claims[kmem_claim_count++] = kmem_spec_data; |
3676 | } |
3677 | |
3678 | __startup_func |
3679 | static void |
3680 | kmem_scramble_ranges(void) |
3681 | { |
3682 | vm_map_offset_t start = 0; |
3683 | |
3684 | /* |
3685 | * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that |
3686 | * the vm can find the requested ranges. |
3687 | */ |
3688 | kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset, |
3689 | VM_MAP_PAGE_SIZE(kernel_map)); |
3690 | kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset; |
3691 | |
3692 | /* |
3693 | * Allocating the g_kext_map prior to randomizing the remaining submaps as |
3694 | * this map is 2G in size and starts at the end of kernel_text on x86. It |
3695 | * could overflow into the heap. |
3696 | */ |
3697 | kext_alloc_init(); |
3698 | |
3699 | /* |
3700 | * Eat a random amount of kernel_map to fuzz subsequent heap, zone and |
3701 | * stack addresses. (With a 4K page and 9 bits of randomness, this |
3702 | * eats about 2M of VA from the map) |
3703 | * |
3704 | * Note that we always need to slide by at least one page because the VM |
3705 | * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base |
3706 | * do not admit this address to be part of any zone submap. |
3707 | */ |
3708 | start = kmem_fuzz_start(); |
3709 | |
3710 | /* |
3711 | * Add claims for ptr and data kmem_ranges |
3712 | */ |
3713 | kmem_add_extra_claims(); |
3714 | |
3715 | /* |
3716 | * Shuffle registered claims |
3717 | */ |
3718 | assert(kmem_claim_count < UINT16_MAX); |
3719 | kmem_shuffle_claims(); |
3720 | |
3721 | /* |
3722 | * Apply restrictions and determine range for each claim |
3723 | */ |
3724 | for (uint32_t i = 0; i < kmem_claim_count; i++) { |
3725 | vm_map_offset_t end = 0; |
3726 | struct kmem_range_startup_spec sp = kmem_claims[i]; |
3727 | struct mach_vm_range *sp_range = sp.kc_range; |
3728 | if (vm_map_locate_space(map: kernel_map, size: sp.kc_size, mask: 0, |
3729 | VM_MAP_KERNEL_FLAGS_ANYWHERE(), start_inout: &start, NULL) != KERN_SUCCESS) { |
3730 | panic("kmem_range_init: vm_map_locate_space failing for claim %s" , |
3731 | sp.kc_name); |
3732 | } |
3733 | |
3734 | end = start + sp.kc_size; |
3735 | /* |
3736 | * Re-adjust ranges if restriction not met |
3737 | */ |
3738 | if (sp_range->min_address && start > sp_range->min_address) { |
3739 | kmem_readjust_ranges(cur_idx: i); |
3740 | } else { |
3741 | sp_range->min_address = start; |
3742 | sp_range->max_address = end; |
3743 | } |
3744 | start = end; |
3745 | } |
3746 | |
3747 | /* |
3748 | * We have settled on the ranges, now create temporary entries for the |
3749 | * claims |
3750 | */ |
3751 | for (uint32_t i = 0; i < kmem_claim_count; i++) { |
3752 | struct kmem_range_startup_spec sp = kmem_claims[i]; |
3753 | vm_map_entry_t entry = NULL; |
3754 | if (sp.kc_flags & KC_NO_ENTRY) { |
3755 | continue; |
3756 | } |
3757 | if (vm_map_find_space(map: kernel_map, hint_addr: sp.kc_range->min_address, size: sp.kc_size, mask: 0, |
3758 | VM_MAP_KERNEL_FLAGS_ANYWHERE(), o_entry: &entry) != KERN_SUCCESS) { |
3759 | panic("kmem_range_init: vm_map_find_space failing for claim %s" , |
3760 | sp.kc_name); |
3761 | } |
3762 | vm_object_reference(kernel_object_default); |
3763 | VME_OBJECT_SET(entry, object: kernel_object_default, false, context: 0); |
3764 | VME_OFFSET_SET(entry, offset: entry->vme_start); |
3765 | vm_map_unlock(kernel_map); |
3766 | } |
3767 | /* |
3768 | * Now that we are done assigning all the ranges, reset |
3769 | * kmem_ranges[KMEM_RANGE_ID_NONE] |
3770 | */ |
3771 | kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {}; |
3772 | |
3773 | #if DEBUG || DEVELOPMENT |
3774 | for (uint32_t i = 0; i < kmem_claim_count; i++) { |
3775 | struct kmem_range_startup_spec sp = kmem_claims[i]; |
3776 | |
3777 | printf("%-24s: %p - %p (%u%c)\n" , sp.kc_name, |
3778 | (void *)sp.kc_range->min_address, |
3779 | (void *)sp.kc_range->max_address, |
3780 | mach_vm_size_pretty(sp.kc_size), |
3781 | mach_vm_size_unit(sp.kc_size)); |
3782 | } |
3783 | #endif /* DEBUG || DEVELOPMENT */ |
3784 | } |
3785 | |
3786 | __startup_func |
3787 | static void |
3788 | kmem_range_init(void) |
3789 | { |
3790 | vm_size_t range_adjustment; |
3791 | |
3792 | kmem_scramble_ranges(); |
3793 | |
3794 | range_adjustment = sprayqtn_range_size >> 3; |
3795 | kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address = |
3796 | kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment; |
3797 | kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address = |
3798 | kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address; |
3799 | |
3800 | range_adjustment = data_range_size >> 3; |
3801 | kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address = |
3802 | kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment; |
3803 | kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address = |
3804 | kmem_ranges[KMEM_RANGE_ID_DATA].max_address; |
3805 | |
3806 | pmap_init(); |
3807 | kmem_metadata_init(); |
3808 | kmem_sizeclass_init(); |
3809 | |
3810 | #if DEBUG || DEVELOPMENT |
3811 | for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) { |
3812 | vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]); |
3813 | printf("kmem_large_ranges[%d] : %p - %p (%u%c)\n" , i, |
3814 | (void *)kmem_large_ranges[i].min_address, |
3815 | (void *)kmem_large_ranges[i].max_address, |
3816 | mach_vm_size_pretty(range_size), |
3817 | mach_vm_size_unit(range_size)); |
3818 | } |
3819 | #endif |
3820 | } |
3821 | STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init); |
3822 | |
3823 | #if DEBUG || DEVELOPMENT |
3824 | __startup_func |
3825 | static void |
3826 | kmem_log_init(void) |
3827 | { |
3828 | /* |
3829 | * Log can only be created after the the kmem subsystem is initialized as |
3830 | * btlog creation uses kmem |
3831 | */ |
3832 | kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0); |
3833 | } |
3834 | STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init); |
3835 | |
3836 | kmem_gobj_stats |
3837 | kmem_get_gobj_stats(void) |
3838 | { |
3839 | kmem_gobj_stats stats = {}; |
3840 | |
3841 | vm_map_lock(kernel_map); |
3842 | for (uint8_t i = 0; i < kmem_ptr_ranges; i++) { |
3843 | kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i; |
3844 | struct mach_vm_range range = kmem_ranges[range_id]; |
3845 | struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)]; |
3846 | struct kmem_page_meta *meta_end; |
3847 | uint64_t meta_idx = meta - kmem_meta_base[range_id]; |
3848 | vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0; |
3849 | vm_map_offset_t addr; |
3850 | vm_map_entry_t entry; |
3851 | |
3852 | /* |
3853 | * Left front |
3854 | */ |
3855 | va = (meta_idx * KMEM_CHUNK_SIZE_MIN); |
3856 | meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta)); |
3857 | |
3858 | /* |
3859 | * Right front |
3860 | */ |
3861 | meta = kmem_meta_hwm[kmem_get_front(range_id, 1)]; |
3862 | meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr, |
3863 | &meta_idx); |
3864 | meta_idx = meta_end - meta; |
3865 | meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta)); |
3866 | va += (meta_idx * KMEM_CHUNK_SIZE_MIN); |
3867 | |
3868 | /* |
3869 | * Compute VA allocated in entire range |
3870 | */ |
3871 | if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) { |
3872 | entry = entry->vme_next; |
3873 | } |
3874 | while (entry != vm_map_to_entry(kernel_map) && |
3875 | entry->vme_start < range.max_address) { |
3876 | used += (entry->vme_end - entry->vme_start); |
3877 | entry = entry->vme_next; |
3878 | } |
3879 | |
3880 | pte_sz = round_page(atop(va - used) * 8); |
3881 | |
3882 | stats.total_used += used; |
3883 | stats.total_va += va; |
3884 | stats.pte_sz += pte_sz; |
3885 | stats.meta_sz += meta_sz; |
3886 | } |
3887 | vm_map_unlock(kernel_map); |
3888 | |
3889 | return stats; |
3890 | } |
3891 | |
3892 | #endif /* DEBUG || DEVELOPMENT */ |
3893 | |
3894 | /* |
3895 | * kmem_init: |
3896 | * |
3897 | * Initialize the kernel's virtual memory map, taking |
3898 | * into account all memory allocated up to this time. |
3899 | */ |
3900 | __startup_func |
3901 | void |
3902 | kmem_init( |
3903 | vm_offset_t start, |
3904 | vm_offset_t end) |
3905 | { |
3906 | vm_map_offset_t map_start; |
3907 | vm_map_offset_t map_end; |
3908 | |
3909 | map_start = vm_map_trunc_page(start, |
3910 | VM_MAP_PAGE_MASK(kernel_map)); |
3911 | map_end = vm_map_round_page(end, |
3912 | VM_MAP_PAGE_MASK(kernel_map)); |
3913 | |
3914 | vm_map_will_allocate_early_map(map_owner: &kernel_map); |
3915 | #if defined(__arm64__) |
3916 | kernel_map = vm_map_create_options(pmap_kernel(), |
3917 | VM_MIN_KERNEL_AND_KEXT_ADDRESS, |
3918 | VM_MAX_KERNEL_ADDRESS, |
3919 | options: VM_MAP_CREATE_DEFAULT); |
3920 | /* |
3921 | * Reserve virtual memory allocated up to this time. |
3922 | */ |
3923 | { |
3924 | unsigned int region_select = 0; |
3925 | vm_map_offset_t region_start; |
3926 | vm_map_size_t region_size; |
3927 | vm_map_offset_t map_addr; |
3928 | kern_return_t kr; |
3929 | |
3930 | while (pmap_virtual_region(region_select, startp: ®ion_start, size: ®ion_size)) { |
3931 | map_addr = region_start; |
3932 | kr = vm_map_enter(map: kernel_map, address: &map_addr, |
3933 | vm_map_round_page(region_size, |
3934 | VM_MAP_PAGE_MASK(kernel_map)), |
3935 | mask: (vm_map_offset_t) 0, |
3936 | VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(.vmkf_no_pmap_check = true), |
3937 | VM_OBJECT_NULL, |
3938 | offset: (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE, |
3939 | VM_INHERIT_DEFAULT); |
3940 | |
3941 | if (kr != KERN_SUCCESS) { |
3942 | panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x" , |
3943 | (uint64_t) start, (uint64_t) end, (uint64_t) region_start, |
3944 | (uint64_t) region_size, kr); |
3945 | } |
3946 | |
3947 | region_select++; |
3948 | } |
3949 | } |
3950 | #else |
3951 | kernel_map = vm_map_create_options(pmap_kernel(), |
3952 | VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end, |
3953 | VM_MAP_CREATE_DEFAULT); |
3954 | /* |
3955 | * Reserve virtual memory allocated up to this time. |
3956 | */ |
3957 | if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) { |
3958 | vm_map_offset_t map_addr; |
3959 | kern_return_t kr; |
3960 | |
3961 | map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS; |
3962 | kr = vm_map_enter(kernel_map, |
3963 | &map_addr, |
3964 | (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), |
3965 | (vm_map_offset_t) 0, |
3966 | VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true), |
3967 | VM_OBJECT_NULL, |
3968 | (vm_object_offset_t) 0, FALSE, |
3969 | VM_PROT_NONE, VM_PROT_NONE, |
3970 | VM_INHERIT_DEFAULT); |
3971 | |
3972 | if (kr != KERN_SUCCESS) { |
3973 | panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x" , |
3974 | (uint64_t) start, (uint64_t) end, |
3975 | (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, |
3976 | (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), |
3977 | kr); |
3978 | } |
3979 | } |
3980 | #endif |
3981 | |
3982 | kmem_set_user_wire_limits(); |
3983 | } |
3984 | |
3985 | |
3986 | #pragma mark map copyio |
3987 | |
3988 | /* |
3989 | * Routine: copyinmap |
3990 | * Purpose: |
3991 | * Like copyin, except that fromaddr is an address |
3992 | * in the specified VM map. This implementation |
3993 | * is incomplete; it handles the current user map |
3994 | * and the kernel map/submaps. |
3995 | */ |
3996 | kern_return_t |
3997 | copyinmap( |
3998 | vm_map_t map, |
3999 | vm_map_offset_t fromaddr, |
4000 | void *todata, |
4001 | vm_size_t length) |
4002 | { |
4003 | kern_return_t kr = KERN_SUCCESS; |
4004 | vm_map_t oldmap; |
4005 | |
4006 | if (vm_map_pmap(map) == pmap_kernel()) { |
4007 | /* assume a correct copy */ |
4008 | memcpy(dst: todata, CAST_DOWN(void *, fromaddr), n: length); |
4009 | } else if (current_map() == map) { |
4010 | if (copyin(fromaddr, todata, length) != 0) { |
4011 | kr = KERN_INVALID_ADDRESS; |
4012 | } |
4013 | } else { |
4014 | vm_map_reference(map); |
4015 | oldmap = vm_map_switch(map); |
4016 | if (copyin(fromaddr, todata, length) != 0) { |
4017 | kr = KERN_INVALID_ADDRESS; |
4018 | } |
4019 | vm_map_switch(map: oldmap); |
4020 | vm_map_deallocate(map); |
4021 | } |
4022 | return kr; |
4023 | } |
4024 | |
4025 | /* |
4026 | * Routine: copyoutmap |
4027 | * Purpose: |
4028 | * Like copyout, except that toaddr is an address |
4029 | * in the specified VM map. |
4030 | */ |
4031 | kern_return_t |
4032 | copyoutmap( |
4033 | vm_map_t map, |
4034 | void *fromdata, |
4035 | vm_map_address_t toaddr, |
4036 | vm_size_t length) |
4037 | { |
4038 | kern_return_t kr = KERN_SUCCESS; |
4039 | vm_map_t oldmap; |
4040 | |
4041 | if (vm_map_pmap(map) == pmap_kernel()) { |
4042 | /* assume a correct copy */ |
4043 | memcpy(CAST_DOWN(void *, toaddr), src: fromdata, n: length); |
4044 | } else if (current_map() == map) { |
4045 | if (copyout(fromdata, toaddr, length) != 0) { |
4046 | ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR), KERN_INVALID_ADDRESS /* arg */); |
4047 | kr = KERN_INVALID_ADDRESS; |
4048 | } |
4049 | } else { |
4050 | vm_map_reference(map); |
4051 | oldmap = vm_map_switch(map); |
4052 | if (copyout(fromdata, toaddr, length) != 0) { |
4053 | ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR), KERN_INVALID_ADDRESS /* arg */); |
4054 | kr = KERN_INVALID_ADDRESS; |
4055 | } |
4056 | vm_map_switch(map: oldmap); |
4057 | vm_map_deallocate(map); |
4058 | } |
4059 | return kr; |
4060 | } |
4061 | |
4062 | /* |
4063 | * Routine: copyoutmap_atomic{32, 64} |
4064 | * Purpose: |
4065 | * Like copyoutmap, except that the operation is atomic. |
4066 | * Takes in value rather than *fromdata pointer. |
4067 | */ |
4068 | kern_return_t |
4069 | copyoutmap_atomic32( |
4070 | vm_map_t map, |
4071 | uint32_t value, |
4072 | vm_map_address_t toaddr) |
4073 | { |
4074 | kern_return_t kr = KERN_SUCCESS; |
4075 | vm_map_t oldmap; |
4076 | |
4077 | if (vm_map_pmap(map) == pmap_kernel()) { |
4078 | /* assume a correct toaddr */ |
4079 | *(uint32_t *)toaddr = value; |
4080 | } else if (current_map() == map) { |
4081 | if (copyout_atomic32(u32: value, user_addr: toaddr) != 0) { |
4082 | kr = KERN_INVALID_ADDRESS; |
4083 | } |
4084 | } else { |
4085 | vm_map_reference(map); |
4086 | oldmap = vm_map_switch(map); |
4087 | if (copyout_atomic32(u32: value, user_addr: toaddr) != 0) { |
4088 | kr = KERN_INVALID_ADDRESS; |
4089 | } |
4090 | vm_map_switch(map: oldmap); |
4091 | vm_map_deallocate(map); |
4092 | } |
4093 | return kr; |
4094 | } |
4095 | |
4096 | kern_return_t |
4097 | copyoutmap_atomic64( |
4098 | vm_map_t map, |
4099 | uint64_t value, |
4100 | vm_map_address_t toaddr) |
4101 | { |
4102 | kern_return_t kr = KERN_SUCCESS; |
4103 | vm_map_t oldmap; |
4104 | |
4105 | if (vm_map_pmap(map) == pmap_kernel()) { |
4106 | /* assume a correct toaddr */ |
4107 | *(uint64_t *)toaddr = value; |
4108 | } else if (current_map() == map) { |
4109 | if (copyout_atomic64(u64: value, user_addr: toaddr) != 0) { |
4110 | kr = KERN_INVALID_ADDRESS; |
4111 | } |
4112 | } else { |
4113 | vm_map_reference(map); |
4114 | oldmap = vm_map_switch(map); |
4115 | if (copyout_atomic64(u64: value, user_addr: toaddr) != 0) { |
4116 | kr = KERN_INVALID_ADDRESS; |
4117 | } |
4118 | vm_map_switch(map: oldmap); |
4119 | vm_map_deallocate(map); |
4120 | } |
4121 | return kr; |
4122 | } |
4123 | |
4124 | |
4125 | #pragma mark pointer obfuscation / packing |
4126 | |
4127 | /* |
4128 | * |
4129 | * The following two functions are to be used when exposing kernel |
4130 | * addresses to userspace via any of the various debug or info |
4131 | * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM() |
4132 | * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and |
4133 | * are exported to KEXTs. |
4134 | * |
4135 | * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL |
4136 | */ |
4137 | |
4138 | vm_offset_t |
4139 | vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt) |
4140 | { |
4141 | assert(salt != 0); |
4142 | |
4143 | if (addr == 0) { |
4144 | return 0ul; |
4145 | } |
4146 | |
4147 | if (VM_KERNEL_IS_SLID(addr)) { |
4148 | return VM_KERNEL_UNSLIDE(addr); |
4149 | } |
4150 | |
4151 | vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)]; |
4152 | SHA256_CTX sha_ctx; |
4153 | |
4154 | SHA256_Init(ctx: &sha_ctx); |
4155 | SHA256_Update(ctx: &sha_ctx, data: &salt, len: sizeof(salt)); |
4156 | SHA256_Update(ctx: &sha_ctx, data: &addr, len: sizeof(addr)); |
4157 | SHA256_Final(digest: sha_digest, ctx: &sha_ctx); |
4158 | |
4159 | return sha_digest[0]; |
4160 | } |
4161 | |
4162 | __exported vm_offset_t |
4163 | vm_kernel_addrhash_external(vm_offset_t addr); |
4164 | vm_offset_t |
4165 | vm_kernel_addrhash_external(vm_offset_t addr) |
4166 | { |
4167 | return vm_kernel_addrhash_internal(addr, salt: vm_kernel_addrhash_salt_ext); |
4168 | } |
4169 | |
4170 | void |
4171 | vm_kernel_addrhide( |
4172 | vm_offset_t addr, |
4173 | vm_offset_t *hide_addr) |
4174 | { |
4175 | *hide_addr = VM_KERNEL_ADDRHIDE(addr); |
4176 | } |
4177 | |
4178 | /* |
4179 | * vm_kernel_addrperm_external: |
4180 | * vm_kernel_unslide_or_perm_external: |
4181 | * |
4182 | * Use these macros when exposing an address to userspace that could come from |
4183 | * either kernel text/data *or* the heap. |
4184 | */ |
4185 | void |
4186 | vm_kernel_addrperm_external( |
4187 | vm_offset_t addr, |
4188 | vm_offset_t *perm_addr) |
4189 | { |
4190 | if (VM_KERNEL_IS_SLID(addr)) { |
4191 | *perm_addr = VM_KERNEL_UNSLIDE(addr); |
4192 | } else if (VM_KERNEL_ADDRESS(addr)) { |
4193 | *perm_addr = addr + vm_kernel_addrperm_ext; |
4194 | } else { |
4195 | *perm_addr = addr; |
4196 | } |
4197 | } |
4198 | |
4199 | void |
4200 | vm_kernel_unslide_or_perm_external( |
4201 | vm_offset_t addr, |
4202 | vm_offset_t *up_addr) |
4203 | { |
4204 | vm_kernel_addrperm_external(addr, perm_addr: up_addr); |
4205 | } |
4206 | |
4207 | void |
4208 | vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params) |
4209 | { |
4210 | if (ptr & ((1ul << params.vmpp_shift) - 1)) { |
4211 | panic("pointer %p can't be packed: low %d bits aren't 0" , |
4212 | (void *)ptr, params.vmpp_shift); |
4213 | } else if (ptr <= params.vmpp_base) { |
4214 | panic("pointer %p can't be packed: below base %p" , |
4215 | (void *)ptr, (void *)params.vmpp_base); |
4216 | } else { |
4217 | panic("pointer %p can't be packed: maximum encodable pointer is %p" , |
4218 | (void *)ptr, (void *)vm_packing_max_packable(params)); |
4219 | } |
4220 | } |
4221 | |
4222 | void |
4223 | vm_packing_verify_range( |
4224 | const char *subsystem, |
4225 | vm_offset_t min_address, |
4226 | vm_offset_t max_address, |
4227 | vm_packing_params_t params) |
4228 | { |
4229 | if (min_address > max_address) { |
4230 | panic("%s: %s range invalid min:%p > max:%p" , |
4231 | __func__, subsystem, (void *)min_address, (void *)max_address); |
4232 | } |
4233 | |
4234 | if (!params.vmpp_base_relative) { |
4235 | return; |
4236 | } |
4237 | |
4238 | if (min_address <= params.vmpp_base) { |
4239 | panic("%s: %s range invalid min:%p <= base:%p" , |
4240 | __func__, subsystem, (void *)min_address, (void *)params.vmpp_base); |
4241 | } |
4242 | |
4243 | if (max_address > vm_packing_max_packable(params)) { |
4244 | panic("%s: %s range invalid max:%p >= max packable:%p" , |
4245 | __func__, subsystem, (void *)max_address, |
4246 | (void *)vm_packing_max_packable(params)); |
4247 | } |
4248 | } |
4249 | |
4250 | #pragma mark tests |
4251 | #if DEBUG || DEVELOPMENT |
4252 | #include <sys/errno.h> |
4253 | |
4254 | static void |
4255 | kmem_test_for_entry( |
4256 | vm_map_t map, |
4257 | vm_offset_t addr, |
4258 | void (^block)(vm_map_entry_t)) |
4259 | { |
4260 | vm_map_entry_t entry; |
4261 | |
4262 | vm_map_lock(map); |
4263 | block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL); |
4264 | vm_map_unlock(map); |
4265 | } |
4266 | |
4267 | #define kmem_test_assert_map(map, pg, entries) ({ \ |
4268 | assert3u((map)->size, ==, ptoa(pg)); \ |
4269 | assert3u((map)->hdr.nentries, ==, entries); \ |
4270 | }) |
4271 | |
4272 | static bool |
4273 | can_write_at(vm_offset_t offs, uint32_t page) |
4274 | { |
4275 | static const int zero; |
4276 | |
4277 | return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0; |
4278 | } |
4279 | #define assert_writeable(offs, page) \ |
4280 | assertf(can_write_at(offs, page), \ |
4281 | "can write at %p + ptoa(%d)", (void *)offs, page) |
4282 | |
4283 | #define assert_faults(offs, page) \ |
4284 | assertf(!can_write_at(offs, page), \ |
4285 | "can write at %p + ptoa(%d)", (void *)offs, page) |
4286 | |
4287 | #define peek(offs, page) \ |
4288 | (*(uint32_t *)((offs) + ptoa(page))) |
4289 | |
4290 | #define poke(offs, page, v) \ |
4291 | (*(uint32_t *)((offs) + ptoa(page)) = (v)) |
4292 | |
4293 | __attribute__((noinline)) |
4294 | static void |
4295 | kmem_alloc_basic_test(vm_map_t map) |
4296 | { |
4297 | kmem_guard_t guard = { |
4298 | .kmg_tag = VM_KERN_MEMORY_DIAG, |
4299 | }; |
4300 | vm_offset_t addr; |
4301 | |
4302 | /* |
4303 | * Test wired basics: |
4304 | * - KMA_KOBJECT |
4305 | * - KMA_GUARD_FIRST, KMA_GUARD_LAST |
4306 | * - allocation alignment |
4307 | */ |
4308 | addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1, |
4309 | KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address; |
4310 | assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)" , map); |
4311 | assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0); |
4312 | kmem_test_assert_map(map, 10, 1); |
4313 | |
4314 | kmem_test_for_entry(map, addr, ^(vm_map_entry_t e){ |
4315 | assertf(e, "unable to find address %p in map %p" , (void *)addr, map); |
4316 | assert(e->vme_kernel_object); |
4317 | assert(!e->vme_atomic); |
4318 | assert3u(e->vme_start, <=, addr); |
4319 | assert3u(addr + ptoa(10), <=, e->vme_end); |
4320 | }); |
4321 | |
4322 | assert_faults(addr, 0); |
4323 | for (int i = 1; i < 9; i++) { |
4324 | assert_writeable(addr, i); |
4325 | } |
4326 | assert_faults(addr, 9); |
4327 | |
4328 | kmem_free(map, addr, ptoa(10)); |
4329 | kmem_test_assert_map(map, 0, 0); |
4330 | |
4331 | /* |
4332 | * Test pageable basics. |
4333 | */ |
4334 | addr = kmem_alloc_guard(map, ptoa(10), 0, |
4335 | KMA_PAGEABLE, guard).kmr_address; |
4336 | assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)" , map); |
4337 | kmem_test_assert_map(map, 10, 1); |
4338 | |
4339 | for (int i = 0; i < 9; i++) { |
4340 | assert_faults(addr, i); |
4341 | poke(addr, i, 42); |
4342 | assert_writeable(addr, i); |
4343 | } |
4344 | |
4345 | kmem_free(map, addr, ptoa(10)); |
4346 | kmem_test_assert_map(map, 0, 0); |
4347 | } |
4348 | |
4349 | __attribute__((noinline)) |
4350 | static void |
4351 | kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind) |
4352 | { |
4353 | kmem_guard_t guard = { |
4354 | .kmg_atomic = !(kind & KMR_DATA), |
4355 | .kmg_tag = VM_KERN_MEMORY_DIAG, |
4356 | .kmg_context = 0xefface, |
4357 | }; |
4358 | vm_offset_t addr, newaddr; |
4359 | const int N = 10; |
4360 | |
4361 | /* |
4362 | * This isn't something kmem_realloc_guard() _needs_ to do, |
4363 | * we could conceive an implementation where it grows in place |
4364 | * if there's space after it. |
4365 | * |
4366 | * However, this is what the implementation does today. |
4367 | */ |
4368 | bool realloc_growth_changes_address = true; |
4369 | bool GL = (kind & KMR_GUARD_LAST); |
4370 | |
4371 | /* |
4372 | * Initial N page allocation |
4373 | */ |
4374 | addr = kmem_alloc_guard(map, ptoa(N), 0, |
4375 | (kind & (KMA_KOBJECT | KMA_GUARD_LAST | KMA_DATA)) | KMA_ZERO, |
4376 | guard).kmr_address; |
4377 | assert3u(addr, !=, 0); |
4378 | kmem_test_assert_map(map, N, 1); |
4379 | for (int pg = 0; pg < N - GL; pg++) { |
4380 | poke(addr, pg, 42 + pg); |
4381 | } |
4382 | for (int pg = N - GL; pg < N; pg++) { |
4383 | assert_faults(addr, pg); |
4384 | } |
4385 | |
4386 | |
4387 | /* |
4388 | * Grow to N + 3 pages |
4389 | */ |
4390 | newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3), |
4391 | kind | KMR_ZERO, guard).kmr_address; |
4392 | assert3u(newaddr, !=, 0); |
4393 | if (realloc_growth_changes_address) { |
4394 | assert3u(addr, !=, newaddr); |
4395 | } |
4396 | if ((kind & KMR_FREEOLD) || (addr == newaddr)) { |
4397 | kmem_test_assert_map(map, N + 3, 1); |
4398 | } else { |
4399 | kmem_test_assert_map(map, 2 * N + 3, 2); |
4400 | } |
4401 | for (int pg = 0; pg < N - GL; pg++) { |
4402 | assert3u(peek(newaddr, pg), ==, 42 + pg); |
4403 | } |
4404 | if ((kind & KMR_FREEOLD) == 0) { |
4405 | for (int pg = 0; pg < N - GL; pg++) { |
4406 | assert3u(peek(addr, pg), ==, 42 + pg); |
4407 | } |
4408 | /* check for tru-share */ |
4409 | poke(addr + 16, 0, 1234); |
4410 | assert3u(peek(newaddr + 16, 0), ==, 1234); |
4411 | kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard); |
4412 | kmem_test_assert_map(map, N + 3, 1); |
4413 | } |
4414 | if (addr != newaddr) { |
4415 | for (int pg = 0; pg < N - GL; pg++) { |
4416 | assert_faults(addr, pg); |
4417 | } |
4418 | } |
4419 | for (int pg = N - GL; pg < N + 3 - GL; pg++) { |
4420 | assert3u(peek(newaddr, pg), ==, 0); |
4421 | } |
4422 | for (int pg = N + 3 - GL; pg < N + 3; pg++) { |
4423 | assert_faults(newaddr, pg); |
4424 | } |
4425 | addr = newaddr; |
4426 | |
4427 | |
4428 | /* |
4429 | * Shrink to N - 2 pages |
4430 | */ |
4431 | newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2), |
4432 | kind | KMR_ZERO, guard).kmr_address; |
4433 | assert3u(map->size, ==, ptoa(N - 2)); |
4434 | assert3u(newaddr, ==, addr); |
4435 | kmem_test_assert_map(map, N - 2, 1); |
4436 | |
4437 | for (int pg = 0; pg < N - 2 - GL; pg++) { |
4438 | assert3u(peek(addr, pg), ==, 42 + pg); |
4439 | } |
4440 | for (int pg = N - 2 - GL; pg < N + 3; pg++) { |
4441 | assert_faults(addr, pg); |
4442 | } |
4443 | |
4444 | kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard); |
4445 | kmem_test_assert_map(map, 0, 0); |
4446 | } |
4447 | |
4448 | static int |
4449 | kmem_basic_test(__unused int64_t in, int64_t *out) |
4450 | { |
4451 | mach_vm_offset_t addr; |
4452 | vm_map_t map; |
4453 | |
4454 | printf("%s: test running\n" , __func__); |
4455 | |
4456 | map = kmem_suballoc(kernel_map, &addr, 64U << 20, |
4457 | VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE, |
4458 | KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap; |
4459 | |
4460 | printf("%s: kmem_alloc ...\n" , __func__); |
4461 | kmem_alloc_basic_test(map); |
4462 | printf("%s: PASS\n" , __func__); |
4463 | |
4464 | printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n" , __func__); |
4465 | kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD); |
4466 | printf("%s: PASS\n" , __func__); |
4467 | |
4468 | printf("%s: kmem_realloc (KMR_FREEOLD) ...\n" , __func__); |
4469 | kmem_realloc_basic_test(map, KMR_FREEOLD); |
4470 | printf("%s: PASS\n" , __func__); |
4471 | |
4472 | printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n" , __func__); |
4473 | kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST); |
4474 | printf("%s: PASS\n" , __func__); |
4475 | |
4476 | printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n" , __func__); |
4477 | kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST); |
4478 | printf("%s: PASS\n" , __func__); |
4479 | |
4480 | printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n" , __func__); |
4481 | kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST); |
4482 | printf("%s: PASS\n" , __func__); |
4483 | |
4484 | printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n" , __func__); |
4485 | kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST); |
4486 | printf("%s: PASS\n" , __func__); |
4487 | |
4488 | printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n" , __func__); |
4489 | kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST); |
4490 | printf("%s: PASS\n" , __func__); |
4491 | |
4492 | printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n" , __func__); |
4493 | kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST); |
4494 | printf("%s: PASS\n" , __func__); |
4495 | |
4496 | /* using KMR_DATA signals to test the non atomic realloc path */ |
4497 | printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n" , __func__); |
4498 | kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD); |
4499 | printf("%s: PASS\n" , __func__); |
4500 | |
4501 | printf("%s: kmem_realloc (KMR_DATA) ...\n" , __func__); |
4502 | kmem_realloc_basic_test(map, KMR_DATA); |
4503 | printf("%s: PASS\n" , __func__); |
4504 | |
4505 | kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP); |
4506 | vm_map_deallocate(map); |
4507 | |
4508 | printf("%s: test passed\n" , __func__); |
4509 | *out = 1; |
4510 | return 0; |
4511 | } |
4512 | SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test); |
4513 | |
4514 | static void |
4515 | kmem_test_get_size_idx_for_chunks(uint32_t chunks) |
4516 | { |
4517 | uint32_t idx = kmem_get_size_idx_for_chunks(chunks); |
4518 | |
4519 | assert(chunks >= kmem_size_array[idx].ks_num_chunk); |
4520 | } |
4521 | |
4522 | __attribute__((noinline)) |
4523 | static void |
4524 | kmem_test_get_size_idx_for_all_chunks() |
4525 | { |
4526 | for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) { |
4527 | uint32_t chunks = kmem_size_array[i].ks_num_chunk; |
4528 | |
4529 | if (chunks != 1) { |
4530 | kmem_test_get_size_idx_for_chunks(chunks - 1); |
4531 | } |
4532 | kmem_test_get_size_idx_for_chunks(chunks); |
4533 | kmem_test_get_size_idx_for_chunks(chunks + 1); |
4534 | } |
4535 | } |
4536 | |
4537 | static int |
4538 | kmem_guard_obj_test(__unused int64_t in, int64_t *out) |
4539 | { |
4540 | printf("%s: test running\n" , __func__); |
4541 | |
4542 | printf("%s: kmem_get_size_idx_for_chunks\n" , __func__); |
4543 | kmem_test_get_size_idx_for_all_chunks(); |
4544 | printf("%s: PASS\n" , __func__); |
4545 | |
4546 | printf("%s: test passed\n" , __func__); |
4547 | *out = 1; |
4548 | return 0; |
4549 | } |
4550 | SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test); |
4551 | #endif /* DEBUG || DEVELOPMENT */ |
4552 | |