1 | /* |
2 | * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | /* |
59 | * File: kern/kalloc.c |
60 | * Author: Avadis Tevanian, Jr. |
61 | * Date: 1985 |
62 | * |
63 | * General kernel memory allocator. This allocator is designed |
64 | * to be used by the kernel to manage dynamic memory fast. |
65 | */ |
66 | |
67 | #include "mach/vm_types.h" |
68 | #include <mach/boolean.h> |
69 | #include <mach/sdt.h> |
70 | #include <mach/machine/vm_types.h> |
71 | #include <mach/vm_param.h> |
72 | #include <kern/misc_protos.h> |
73 | #include <kern/counter.h> |
74 | #include <kern/zalloc_internal.h> |
75 | #include <kern/kalloc.h> |
76 | #include <kern/ledger.h> |
77 | #include <kern/backtrace.h> |
78 | #include <vm/vm_kern.h> |
79 | #include <vm/vm_object.h> |
80 | #include <vm/vm_map.h> |
81 | #include <vm/vm_memtag.h> |
82 | #include <sys/kdebug.h> |
83 | |
84 | #include <os/hash.h> |
85 | #include <san/kasan.h> |
86 | #include <libkern/section_keywords.h> |
87 | #include <libkern/prelink.h> |
88 | |
89 | SCALABLE_COUNTER_DEFINE(kalloc_large_count); |
90 | SCALABLE_COUNTER_DEFINE(kalloc_large_total); |
91 | |
92 | #pragma mark initialization |
93 | |
94 | /* |
95 | * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest |
96 | * sized zone. This allocator is built on top of the zone allocator. A zone |
97 | * is created for each potential size that we are willing to get in small |
98 | * blocks. |
99 | * |
100 | * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM. |
101 | */ |
102 | |
103 | /* |
104 | * The kt_zone_cfg table defines the configuration of zones on various |
105 | * platforms for kalloc_type fixed size allocations. |
106 | */ |
107 | |
108 | #if KASAN_CLASSIC |
109 | #define K_SIZE_CLASS(size) \ |
110 | (((size) & PAGE_MASK) == 0 ? (size) : \ |
111 | ((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE)) |
112 | #else |
113 | #define K_SIZE_CLASS(size) (size) |
114 | #endif |
115 | static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE); |
116 | |
117 | static const uint16_t kt_zone_cfg[] = { |
118 | K_SIZE_CLASS(16), |
119 | K_SIZE_CLASS(32), |
120 | K_SIZE_CLASS(48), |
121 | K_SIZE_CLASS(64), |
122 | K_SIZE_CLASS(80), |
123 | K_SIZE_CLASS(96), |
124 | K_SIZE_CLASS(128), |
125 | K_SIZE_CLASS(160), |
126 | K_SIZE_CLASS(192), |
127 | K_SIZE_CLASS(224), |
128 | K_SIZE_CLASS(256), |
129 | K_SIZE_CLASS(288), |
130 | K_SIZE_CLASS(368), |
131 | K_SIZE_CLASS(400), |
132 | K_SIZE_CLASS(512), |
133 | K_SIZE_CLASS(576), |
134 | K_SIZE_CLASS(768), |
135 | K_SIZE_CLASS(1024), |
136 | K_SIZE_CLASS(1152), |
137 | K_SIZE_CLASS(1280), |
138 | K_SIZE_CLASS(1664), |
139 | K_SIZE_CLASS(2048), |
140 | K_SIZE_CLASS(4096), |
141 | K_SIZE_CLASS(6144), |
142 | K_SIZE_CLASS(8192), |
143 | K_SIZE_CLASS(12288), |
144 | K_SIZE_CLASS(16384), |
145 | #if __arm64__ |
146 | K_SIZE_CLASS(24576), |
147 | K_SIZE_CLASS(32768), |
148 | #endif /* __arm64__ */ |
149 | }; |
150 | |
151 | #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0])) |
152 | |
153 | /* |
154 | * kalloc_type callsites are assigned a zone during early boot. They |
155 | * use the dlut[] (direct lookup table), indexed by size normalized |
156 | * to the minimum alignment to find the right zone index quickly. |
157 | */ |
158 | #define INDEX_ZDLUT(size) (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN) |
159 | #define KALLOC_DLUT_SIZE (KHEAP_MAX_SIZE / KALLOC_MINALIGN) |
160 | #define MAX_SIZE_ZDLUT ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN) |
161 | static __startup_data uint8_t kalloc_type_dlut[KALLOC_DLUT_SIZE]; |
162 | static __startup_data uint32_t kheap_zsize[KHEAP_NUM_ZONES]; |
163 | |
164 | #if VM_TAG_SIZECLASSES |
165 | static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg)); |
166 | #endif |
167 | |
168 | const char * const kalloc_heap_names[] = { |
169 | [KHEAP_ID_NONE] = "" , |
170 | [KHEAP_ID_SHARED] = "shared." , |
171 | [KHEAP_ID_DATA_BUFFERS] = "data." , |
172 | [KHEAP_ID_KT_VAR] = "" , |
173 | }; |
174 | |
175 | /* |
176 | * Shared heap configuration |
177 | */ |
178 | SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_SHARED[1] = { |
179 | { |
180 | .kh_name = "shared.kalloc" , |
181 | .kh_heap_id = KHEAP_ID_SHARED, |
182 | .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE, |
183 | } |
184 | }; |
185 | |
186 | /* |
187 | * Bag of bytes heap configuration |
188 | */ |
189 | SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = { |
190 | { |
191 | .kh_name = "data.kalloc" , |
192 | .kh_heap_id = KHEAP_ID_DATA_BUFFERS, |
193 | .kh_tag = VM_KERN_MEMORY_KALLOC_DATA, |
194 | } |
195 | }; |
196 | |
197 | /* |
198 | * Configuration of variable kalloc type heaps |
199 | */ |
200 | SECURITY_READ_ONLY_LATE(struct kheap_info) |
201 | kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {}; |
202 | SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = { |
203 | { |
204 | .kh_name = "kalloc.type.var" , |
205 | .kh_heap_id = KHEAP_ID_KT_VAR, |
206 | .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE |
207 | } |
208 | }; |
209 | |
210 | KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT" , KHEAP_ID_KT_VAR); |
211 | |
212 | __startup_func |
213 | static void |
214 | kalloc_zsize_compute(void) |
215 | { |
216 | uint32_t step = KHEAP_STEP_START; |
217 | uint32_t size = KHEAP_START_SIZE; |
218 | |
219 | /* |
220 | * Manually initialize extra initial zones |
221 | */ |
222 | kheap_zsize[0] = size / 2; |
223 | kheap_zsize[1] = size; |
224 | static_assert(KHEAP_EXTRA_ZONES == 2); |
225 | |
226 | /* |
227 | * Compute sizes for remaining zones |
228 | */ |
229 | for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) { |
230 | uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES; |
231 | |
232 | kheap_zsize[step_idx] = K_SIZE_CLASS(size + step); |
233 | kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step); |
234 | |
235 | step *= 2; |
236 | size += step; |
237 | } |
238 | } |
239 | |
240 | static zone_t |
241 | kalloc_zone_for_size_with_flags( |
242 | zone_id_t zid, |
243 | vm_size_t size, |
244 | zalloc_flags_t flags) |
245 | { |
246 | vm_size_t max_size = KHEAP_MAX_SIZE; |
247 | bool forcopyin = flags & Z_MAY_COPYINMAP; |
248 | zone_t zone; |
249 | |
250 | if (flags & Z_KALLOC_ARRAY) { |
251 | size = roundup(size, KALLOC_ARRAY_GRANULE); |
252 | } |
253 | |
254 | if (forcopyin) { |
255 | #if __x86_64__ |
256 | /* |
257 | * On Intel, the OSData() ABI used to allocate |
258 | * from the kernel map starting at PAGE_SIZE. |
259 | * |
260 | * If only vm_map_copyin() or a wrapper is used, |
261 | * then everything will work fine because vm_map_copy_t |
262 | * will perform an actual copy if the data is smaller |
263 | * than msg_ool_size_small (== KHEAP_MAX_SIZE). |
264 | * |
265 | * However, if anyone is trying to call mach_vm_remap(), |
266 | * then bad things (TM) happen. |
267 | * |
268 | * Avoid this by preserving the ABI and moving |
269 | * to kalloc_large() earlier. |
270 | * |
271 | * Any recent code really ought to use IOMemoryDescriptor |
272 | * for this purpose however. |
273 | */ |
274 | max_size = PAGE_SIZE - 1; |
275 | #endif |
276 | } |
277 | |
278 | if (size <= max_size) { |
279 | uint32_t idx; |
280 | |
281 | if (size <= KHEAP_START_SIZE) { |
282 | zid += (size > 16); |
283 | } else { |
284 | /* |
285 | * . log2down(size - 1) is log2up(size) - 1 |
286 | * . (size - 1) >> (log2down(size - 1) - 1) |
287 | * is either 0x2 or 0x3 |
288 | */ |
289 | idx = kalloc_log2down((uint32_t)(size - 1)); |
290 | zid += KHEAP_EXTRA_ZONES + |
291 | 2 * (idx - KHEAP_START_IDX) + |
292 | ((uint32_t)(size - 1) >> (idx - 1)) - 2; |
293 | } |
294 | |
295 | zone = zone_by_id(zid); |
296 | #if KASAN_CLASSIC |
297 | /* |
298 | * Under kasan classic, certain size classes are a redzone |
299 | * away from the mathematical formula above, and we need |
300 | * to "go to the next zone". |
301 | * |
302 | * Because the KHEAP_MAX_SIZE bucket _does_ exist however, |
303 | * this will never go to an "invalid" zone that doesn't |
304 | * belong to the kheap. |
305 | */ |
306 | if (size > zone_elem_inner_size(zone)) { |
307 | zone++; |
308 | } |
309 | #endif |
310 | return zone; |
311 | } |
312 | |
313 | return ZONE_NULL; |
314 | } |
315 | |
316 | zone_t |
317 | kalloc_zone_for_size(zone_id_t zid, size_t size) |
318 | { |
319 | return kalloc_zone_for_size_with_flags(zid, size, flags: Z_WAITOK); |
320 | } |
321 | |
322 | static inline bool |
323 | kheap_size_from_zone( |
324 | void *addr, |
325 | vm_size_t size, |
326 | zalloc_flags_t flags) |
327 | { |
328 | vm_size_t max_size = KHEAP_MAX_SIZE; |
329 | bool forcopyin = flags & Z_MAY_COPYINMAP; |
330 | |
331 | #if __x86_64__ |
332 | /* |
333 | * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags() |
334 | * behavior, then the element could have a PAGE_SIZE reported size, |
335 | * yet still be from a zone for Z_MAY_COPYINMAP. |
336 | */ |
337 | if (forcopyin) { |
338 | if (size == PAGE_SIZE && |
339 | zone_id_for_element(addr, size) != ZONE_ID_INVALID) { |
340 | return true; |
341 | } |
342 | |
343 | max_size = PAGE_SIZE - 1; |
344 | } |
345 | #else |
346 | #pragma unused(addr, forcopyin) |
347 | #endif |
348 | |
349 | return size <= max_size; |
350 | } |
351 | |
352 | /* |
353 | * All data zones shouldn't use shared zone. Therefore set the no share |
354 | * bit right after creation. |
355 | */ |
356 | __startup_func |
357 | static void |
358 | kalloc_set_no_share_for_data( |
359 | zone_kheap_id_t kheap_id, |
360 | zone_stats_t zstats) |
361 | { |
362 | if (kheap_id == KHEAP_ID_DATA_BUFFERS) { |
363 | zpercpu_foreach(zs, zstats) { |
364 | os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed); |
365 | } |
366 | } |
367 | } |
368 | |
369 | __startup_func |
370 | static void |
371 | kalloc_zone_init( |
372 | const char *kheap_name, |
373 | zone_kheap_id_t kheap_id, |
374 | zone_id_t *kheap_zstart, |
375 | zone_create_flags_t zc_flags) |
376 | { |
377 | zc_flags |= ZC_PGZ_USE_GUARDS; |
378 | |
379 | for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) { |
380 | uint32_t size = kheap_zsize[i]; |
381 | char buf[MAX_ZONE_NAME], *z_name; |
382 | int len; |
383 | |
384 | len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u" , kheap_name, size); |
385 | z_name = zalloc_permanent(len + 1, ZALIGN_NONE); |
386 | strlcpy(dst: z_name, src: buf, n: len + 1); |
387 | |
388 | (void)zone_create_ext(name: z_name, size, flags: zc_flags, ZONE_ID_ANY, extra_setup: ^(zone_t z){ |
389 | #if __arm64e__ || CONFIG_KERNEL_TAGGING |
390 | uint32_t scale = kalloc_log2down(size / 32); |
391 | |
392 | if (size == 32 << scale) { |
393 | z->z_array_size_class = scale; |
394 | } else { |
395 | z->z_array_size_class = scale | 0x10; |
396 | } |
397 | #endif |
398 | zone_security_array[zone_index(z)].z_kheap_id = kheap_id; |
399 | if (i == 0) { |
400 | *kheap_zstart = zone_index(z); |
401 | } |
402 | kalloc_set_no_share_for_data(kheap_id, zstats: z->z_stats); |
403 | }); |
404 | } |
405 | } |
406 | |
407 | __startup_func |
408 | static void |
409 | kalloc_heap_init(struct kalloc_heap *kheap) |
410 | { |
411 | kalloc_zone_init(kheap_name: "kalloc" , kheap_id: kheap->kh_heap_id, kheap_zstart: &kheap->kh_zstart, |
412 | zc_flags: ZC_NONE); |
413 | /* |
414 | * Count all the "raw" views for zones in the heap. |
415 | */ |
416 | zone_view_count += KHEAP_NUM_ZONES; |
417 | } |
418 | |
419 | #define KEXT_ALIGN_SHIFT 6 |
420 | #define KEXT_ALIGN_BYTES (1<< KEXT_ALIGN_SHIFT) |
421 | #define KEXT_ALIGN_MASK (KEXT_ALIGN_BYTES-1) |
422 | #define kt_scratch_size (256ul << 10) |
423 | #define KALLOC_TYPE_SECTION(type) \ |
424 | (type == KTV_FIXED? "__kalloc_type": "__kalloc_var") |
425 | |
426 | /* |
427 | * Enum to specify the kalloc_type variant being used. |
428 | */ |
429 | __options_decl(kalloc_type_variant_t, uint16_t, { |
430 | KTV_FIXED = 0x0001, |
431 | KTV_VAR = 0x0002, |
432 | }); |
433 | |
434 | /* |
435 | * Macros that generate the appropriate kalloc_type variant (i.e fixed or |
436 | * variable) of the desired variable/function. |
437 | */ |
438 | #define kalloc_type_var(type, var) \ |
439 | ((type) == KTV_FIXED? \ |
440 | (vm_offset_t) kalloc_type_##var##_fixed: \ |
441 | (vm_offset_t) kalloc_type_##var##_var) |
442 | #define kalloc_type_func(type, func, ...) \ |
443 | ((type) == KTV_FIXED? \ |
444 | kalloc_type_##func##_fixed(__VA_ARGS__): \ |
445 | kalloc_type_##func##_var(__VA_ARGS__)) |
446 | |
447 | TUNABLE(kalloc_type_options_t, kt_options, "kt" , 0); |
448 | TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps" , |
449 | ZSECURITY_CONFIG_KT_VAR_BUDGET); |
450 | TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones" , |
451 | ZSECURITY_CONFIG_KT_BUDGET); |
452 | TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps" , 2); |
453 | static TUNABLE(bool, kt_shared_fixed, "-kt-shared" , true); |
454 | |
455 | /* |
456 | * Section start/end for fixed kalloc_type views |
457 | */ |
458 | extern struct kalloc_type_view kalloc_type_sec_start_fixed[] |
459 | __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type" ); |
460 | |
461 | extern struct kalloc_type_view kalloc_type_sec_end_fixed[] |
462 | __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type" ); |
463 | |
464 | /* |
465 | * Section start/end for variable kalloc_type views |
466 | */ |
467 | extern struct kalloc_type_var_view kalloc_type_sec_start_var[] |
468 | __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var" ); |
469 | |
470 | extern struct kalloc_type_var_view kalloc_type_sec_end_var[] |
471 | __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var" ); |
472 | |
473 | __startup_data |
474 | static kalloc_type_views_t *kt_buffer = NULL; |
475 | __startup_data |
476 | static uint64_t kt_count; |
477 | __startup_data |
478 | uint32_t kalloc_type_hash_seed; |
479 | |
480 | __startup_data |
481 | static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)]; |
482 | __startup_data |
483 | static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)]; |
484 | |
485 | struct nzones_with_idx { |
486 | uint16_t nzones; |
487 | uint16_t idx; |
488 | }; |
489 | int16_t zone_carry = 0; |
490 | |
491 | _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1), |
492 | "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX" ); |
493 | |
494 | /* |
495 | * For use by lldb to iterate over kalloc types |
496 | */ |
497 | SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg); |
498 | SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)]; |
499 | SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)]; |
500 | |
501 | #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16) |
502 | static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET | |
503 | KMEM_DIRECTION_MASK), |
504 | "Insufficient bits to represent range and dir for VM allocations" ); |
505 | static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK, |
506 | "validate idx mask" ); |
507 | /* qsort routines */ |
508 | typedef int (*cmpfunc_t)(const void *a, const void *b); |
509 | extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); |
510 | |
511 | static inline uint16_t |
512 | kalloc_type_get_idx(uint32_t kt_size) |
513 | { |
514 | return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT); |
515 | } |
516 | |
517 | static inline uint32_t |
518 | kalloc_type_set_idx(uint32_t kt_size, uint16_t idx) |
519 | { |
520 | return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT); |
521 | } |
522 | |
523 | static void |
524 | kalloc_type_build_dlut(void) |
525 | { |
526 | vm_size_t size = 0; |
527 | for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) { |
528 | uint8_t zindex = 0; |
529 | while (kt_zone_cfg[zindex] < size) { |
530 | zindex++; |
531 | } |
532 | kalloc_type_dlut[i] = zindex; |
533 | } |
534 | } |
535 | |
536 | static uint32_t |
537 | kalloc_type_idx_for_size(uint32_t size) |
538 | { |
539 | assert(size <= KHEAP_MAX_SIZE); |
540 | uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)]; |
541 | return kalloc_type_set_idx(kt_size: size, idx); |
542 | } |
543 | |
544 | static void |
545 | kalloc_type_assign_zone_fixed( |
546 | kalloc_type_view_t *cur, |
547 | kalloc_type_view_t *end, |
548 | zone_t z, |
549 | zone_t sig_zone, |
550 | zone_t shared_zone) |
551 | { |
552 | /* |
553 | * Assign the zone created for every kalloc_type_view |
554 | * of the same unique signature |
555 | */ |
556 | bool need_raw_view = false; |
557 | |
558 | while (cur < end) { |
559 | kalloc_type_view_t kt = *cur; |
560 | struct zone_view *zv = &kt->kt_zv; |
561 | zv->zv_zone = z; |
562 | kalloc_type_flags_t kt_flags = kt->kt_flags; |
563 | zone_security_flags_t zsflags = zone_security_config(z); |
564 | |
565 | assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size); |
566 | if (!shared_zone) { |
567 | assert(zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS); |
568 | } |
569 | |
570 | if (kt_flags & KT_SLID) { |
571 | kt->kt_signature -= vm_kernel_slide; |
572 | kt->kt_zv.zv_name -= vm_kernel_slide; |
573 | } |
574 | |
575 | if ((kt_flags & KT_PRIV_ACCT) || |
576 | ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) { |
577 | zv->zv_stats = zalloc_percpu_permanent_type( |
578 | struct zone_stats); |
579 | need_raw_view = true; |
580 | zone_view_count += 1; |
581 | } else { |
582 | zv->zv_stats = z->z_stats; |
583 | } |
584 | |
585 | if ((kt_flags & KT_NOSHARED) || !shared_zone) { |
586 | if ((kt_flags & KT_NOSHARED) && !(kt_flags & KT_PRIV_ACCT)) { |
587 | panic("KT_NOSHARED used w/o private accounting for view %s" , |
588 | zv->zv_name); |
589 | } |
590 | |
591 | zpercpu_foreach(zs, zv->zv_stats) { |
592 | os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed); |
593 | } |
594 | } |
595 | |
596 | if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) { |
597 | kt->kt_zshared = shared_zone; |
598 | kt->kt_zsig = sig_zone; |
599 | /* |
600 | * If we haven't yet set the signature equivalance then set it |
601 | * otherwise validate that the zone has the same signature equivalance |
602 | * as the sig_zone provided |
603 | */ |
604 | if (!zone_get_sig_eq(zone: z)) { |
605 | zone_set_sig_eq(zone: z, sig_eq: zone_index(z: sig_zone)); |
606 | } else { |
607 | assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone)); |
608 | } |
609 | } |
610 | zv->zv_next = (zone_view_t) z->z_views; |
611 | zv->zv_zone->z_views = (zone_view_t) kt; |
612 | cur++; |
613 | } |
614 | if (need_raw_view) { |
615 | zone_view_count += 1; |
616 | } |
617 | } |
618 | |
619 | __startup_func |
620 | static void |
621 | kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur, |
622 | kalloc_type_var_view_t *end, uint32_t heap_idx) |
623 | { |
624 | struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx]; |
625 | while (cur < end) { |
626 | kalloc_type_var_view_t kt = *cur; |
627 | kt->kt_heap_start = cfg->kh_zstart; |
628 | kalloc_type_flags_t kt_flags = kt->kt_flags; |
629 | |
630 | if (kt_flags & KT_SLID) { |
631 | if (kt->kt_sig_hdr) { |
632 | kt->kt_sig_hdr -= vm_kernel_slide; |
633 | } |
634 | kt->kt_sig_type -= vm_kernel_slide; |
635 | kt->kt_name -= vm_kernel_slide; |
636 | } |
637 | |
638 | if ((kt_flags & KT_PRIV_ACCT) || |
639 | ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) { |
640 | kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats); |
641 | zone_view_count += 1; |
642 | } |
643 | |
644 | kt->kt_next = (zone_view_t) cfg->kt_views; |
645 | cfg->kt_views = kt; |
646 | cur++; |
647 | } |
648 | } |
649 | |
650 | __startup_func |
651 | static inline void |
652 | kalloc_type_slide_fixed(vm_offset_t addr) |
653 | { |
654 | kalloc_type_view_t ktv = (struct kalloc_type_view *) addr; |
655 | ktv->kt_signature += vm_kernel_slide; |
656 | ktv->kt_zv.zv_name += vm_kernel_slide; |
657 | ktv->kt_flags |= KT_SLID; |
658 | } |
659 | |
660 | __startup_func |
661 | static inline void |
662 | kalloc_type_slide_var(vm_offset_t addr) |
663 | { |
664 | kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr; |
665 | if (ktv->kt_sig_hdr) { |
666 | ktv->kt_sig_hdr += vm_kernel_slide; |
667 | } |
668 | ktv->kt_sig_type += vm_kernel_slide; |
669 | ktv->kt_name += vm_kernel_slide; |
670 | ktv->kt_flags |= KT_SLID; |
671 | } |
672 | |
673 | __startup_func |
674 | static void |
675 | kalloc_type_validate_flags( |
676 | kalloc_type_flags_t kt_flags, |
677 | const char *kt_name, |
678 | uuid_string_t kext_uuid) |
679 | { |
680 | if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) { |
681 | panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with " |
682 | "required xnu headers" , kt_name, kext_uuid); |
683 | } |
684 | } |
685 | |
686 | static kalloc_type_flags_t |
687 | kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid) |
688 | { |
689 | kalloc_type_view_t ktv = (kalloc_type_view_t) addr; |
690 | kalloc_type_validate_flags(kt_flags: ktv->kt_flags, kt_name: ktv->kt_zv.zv_name, kext_uuid); |
691 | return ktv->kt_flags; |
692 | } |
693 | |
694 | static kalloc_type_flags_t |
695 | kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid) |
696 | { |
697 | kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr; |
698 | kalloc_type_validate_flags(kt_flags: ktv->kt_flags, kt_name: ktv->kt_name, kext_uuid); |
699 | return ktv->kt_flags; |
700 | } |
701 | |
702 | /* |
703 | * Check if signature of type is made up of only data and padding |
704 | */ |
705 | static bool |
706 | kalloc_type_is_data(kalloc_type_flags_t kt_flags) |
707 | { |
708 | assert(kt_flags & KT_CHANGED); |
709 | return kt_flags & KT_DATA_ONLY; |
710 | } |
711 | |
712 | /* |
713 | * Check if signature of type is made up of only pointers |
714 | */ |
715 | static bool |
716 | kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags) |
717 | { |
718 | assert(kt_flags & KT_CHANGED2); |
719 | return kt_flags & KT_PTR_ARRAY; |
720 | } |
721 | |
722 | static bool |
723 | kalloc_type_from_vm(kalloc_type_flags_t kt_flags) |
724 | { |
725 | assert(kt_flags & KT_CHANGED); |
726 | return kt_flags & KT_VM; |
727 | } |
728 | |
729 | __startup_func |
730 | static inline vm_size_t |
731 | kalloc_type_view_sz_fixed(void) |
732 | { |
733 | return sizeof(struct kalloc_type_view); |
734 | } |
735 | |
736 | __startup_func |
737 | static inline vm_size_t |
738 | kalloc_type_view_sz_var(void) |
739 | { |
740 | return sizeof(struct kalloc_type_var_view); |
741 | } |
742 | |
743 | __startup_func |
744 | static inline uint64_t |
745 | kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start, |
746 | vm_offset_t end) |
747 | { |
748 | return (end - start) / kalloc_type_func(type, view_sz); |
749 | } |
750 | |
751 | __startup_func |
752 | static inline void |
753 | kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv) |
754 | { |
755 | buffer->ktv_fixed = (kalloc_type_view_t) ktv; |
756 | } |
757 | |
758 | __startup_func |
759 | static inline void |
760 | kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv) |
761 | { |
762 | buffer->ktv_var = (kalloc_type_var_view_t) ktv; |
763 | } |
764 | |
765 | __startup_func |
766 | static void |
767 | kalloc_type_handle_data_view_fixed(vm_offset_t addr) |
768 | { |
769 | kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr; |
770 | zone_t z = kalloc_zone_for_size(zid: KHEAP_DATA_BUFFERS->kh_zstart, |
771 | size: cur_data_view->kt_size); |
772 | kalloc_type_assign_zone_fixed(cur: &cur_data_view, end: &cur_data_view + 1, z, NULL, |
773 | NULL); |
774 | } |
775 | |
776 | __startup_func |
777 | static void |
778 | kalloc_type_handle_data_view_var(vm_offset_t addr) |
779 | { |
780 | kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr; |
781 | kalloc_type_assign_zone_var(cur: &ktv, end: &ktv + 1, heap_idx: KT_VAR_DATA_HEAP); |
782 | } |
783 | |
784 | __startup_func |
785 | static uint32_t |
786 | kalloc_type_handle_parray_var(void) |
787 | { |
788 | uint32_t i = 0; |
789 | kalloc_type_var_view_t kt = kt_buffer[0].ktv_var; |
790 | const char *p_name = kt->kt_name; |
791 | |
792 | /* |
793 | * The sorted list of variable kalloc_type_view has pointer arrays at the |
794 | * beginning. Walk through them and assign a random pointer heap to each |
795 | * type detected by typename. |
796 | */ |
797 | while (kalloc_type_is_ptr_array(kt_flags: kt->kt_flags)) { |
798 | uint32_t heap_id = kmem_get_random16(upper_limit: 1) + KT_VAR_PTR_HEAP0; |
799 | const char *c_name = kt->kt_name; |
800 | uint32_t p_i = i; |
801 | |
802 | while (strcmp(s1: c_name, s2: p_name) == 0) { |
803 | i++; |
804 | kt = kt_buffer[i].ktv_var; |
805 | c_name = kt->kt_name; |
806 | } |
807 | p_name = c_name; |
808 | kalloc_type_assign_zone_var(cur: &kt_buffer[p_i].ktv_var, |
809 | end: &kt_buffer[i].ktv_var, heap_idx: heap_id); |
810 | } |
811 | |
812 | /* |
813 | * Returns the the index of the first view that isn't a pointer array |
814 | */ |
815 | return i; |
816 | } |
817 | |
818 | __startup_func |
819 | static uint32_t |
820 | kalloc_hash_adjust(uint32_t hash, uint32_t shift) |
821 | { |
822 | /* |
823 | * Limit range_id to ptr ranges |
824 | */ |
825 | uint32_t range_id = kmem_adjust_range_id(hash); |
826 | uint32_t direction = hash & 0x8000; |
827 | return (range_id | KMEM_HASH_SET | direction) << shift; |
828 | } |
829 | |
830 | __startup_func |
831 | static void |
832 | kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr, |
833 | kalloc_type_flags_t *kt_flags) |
834 | { |
835 | uint32_t hash = 0; |
836 | |
837 | assert(sig_ty != NULL); |
838 | hash = os_hash_jenkins_update(data: sig_ty, length: strlen(s: sig_ty), |
839 | hash: kalloc_type_hash_seed); |
840 | if (sig_hdr) { |
841 | hash = os_hash_jenkins_update(data: sig_hdr, length: strlen(s: sig_hdr), hash); |
842 | } |
843 | os_hash_jenkins_finish(hash); |
844 | hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK); |
845 | |
846 | *kt_flags = *kt_flags | kalloc_hash_adjust(hash, shift: 16); |
847 | } |
848 | |
849 | __startup_func |
850 | static void |
851 | kalloc_type_set_type_hash_fixed(vm_offset_t addr) |
852 | { |
853 | /* |
854 | * Use backtraces on fixed as we don't have signatures for types that go |
855 | * to the VM due to rdar://85182551. |
856 | */ |
857 | (void) addr; |
858 | } |
859 | |
860 | __startup_func |
861 | static void |
862 | kalloc_type_set_type_hash_var(vm_offset_t addr) |
863 | { |
864 | kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr; |
865 | kalloc_type_set_type_hash(sig_ty: ktv->kt_sig_type, sig_hdr: ktv->kt_sig_hdr, |
866 | kt_flags: &ktv->kt_flags); |
867 | } |
868 | |
869 | __startup_func |
870 | static void |
871 | kalloc_type_mark_processed_fixed(vm_offset_t addr) |
872 | { |
873 | kalloc_type_view_t ktv = (kalloc_type_view_t) addr; |
874 | ktv->kt_flags |= KT_PROCESSED; |
875 | } |
876 | |
877 | __startup_func |
878 | static void |
879 | kalloc_type_mark_processed_var(vm_offset_t addr) |
880 | { |
881 | kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr; |
882 | ktv->kt_flags |= KT_PROCESSED; |
883 | } |
884 | |
885 | __startup_func |
886 | static void |
887 | kalloc_type_update_view_fixed(vm_offset_t addr) |
888 | { |
889 | kalloc_type_view_t ktv = (kalloc_type_view_t) addr; |
890 | ktv->kt_size = kalloc_type_idx_for_size(size: ktv->kt_size); |
891 | } |
892 | |
893 | __startup_func |
894 | static void |
895 | kalloc_type_update_view_var(vm_offset_t addr) |
896 | { |
897 | (void) addr; |
898 | } |
899 | |
900 | __startup_func |
901 | static void |
902 | kalloc_type_view_copy( |
903 | const kalloc_type_variant_t type, |
904 | vm_offset_t start, |
905 | vm_offset_t end, |
906 | uint64_t *cur_count, |
907 | bool slide, |
908 | uuid_string_t kext_uuid) |
909 | { |
910 | uint64_t count = kalloc_type_view_count(type, start, end); |
911 | if (count + *cur_count >= kt_count) { |
912 | panic("kalloc_type_view_copy: Insufficient space in scratch buffer" ); |
913 | } |
914 | vm_offset_t cur = start; |
915 | while (cur < end) { |
916 | if (slide) { |
917 | kalloc_type_func(type, slide, cur); |
918 | } |
919 | kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur, |
920 | kext_uuid); |
921 | kalloc_type_func(type, mark_processed, cur); |
922 | /* |
923 | * Skip views that go to the VM |
924 | */ |
925 | if (kalloc_type_from_vm(kt_flags)) { |
926 | cur += kalloc_type_func(type, view_sz); |
927 | continue; |
928 | } |
929 | |
930 | /* |
931 | * If signature indicates that the entire allocation is data move it to |
932 | * KHEAP_DATA_BUFFERS. Note that KT_VAR_DATA_HEAP is a fake "data" heap, |
933 | * variable kalloc_type handles the actual redirection in the entry points |
934 | * kalloc/kfree_type_var_impl. |
935 | */ |
936 | if (kalloc_type_is_data(kt_flags)) { |
937 | kalloc_type_func(type, handle_data_view, cur); |
938 | cur += kalloc_type_func(type, view_sz); |
939 | continue; |
940 | } |
941 | |
942 | /* |
943 | * Set type hash that is used by kmem_*_guard |
944 | */ |
945 | kalloc_type_func(type, set_type_hash, cur); |
946 | kalloc_type_func(type, update_view, cur); |
947 | kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur); |
948 | cur += kalloc_type_func(type, view_sz); |
949 | *cur_count = *cur_count + 1; |
950 | } |
951 | } |
952 | |
953 | __startup_func |
954 | static uint64_t |
955 | kalloc_type_view_parse(const kalloc_type_variant_t type) |
956 | { |
957 | kc_format_t kc_format; |
958 | uint64_t cur_count = 0; |
959 | |
960 | if (!PE_get_primary_kc_format(type: &kc_format)) { |
961 | panic("kalloc_type_view_parse: wasn't able to determine kc format" ); |
962 | } |
963 | |
964 | if (kc_format == KCFormatStatic) { |
965 | /* |
966 | * If kc is static or KCGEN, __kalloc_type sections from kexts and |
967 | * xnu are coalesced. |
968 | */ |
969 | kalloc_type_view_copy(type, |
970 | kalloc_type_var(type, sec_start), |
971 | kalloc_type_var(type, sec_end), |
972 | cur_count: &cur_count, false, NULL); |
973 | } else if (kc_format == KCFormatFileset) { |
974 | /* |
975 | * If kc uses filesets, traverse __kalloc_type section for each |
976 | * macho in the BootKC. |
977 | */ |
978 | kernel_mach_header_t *kc_mh = NULL; |
979 | kernel_mach_header_t *kext_mh = NULL; |
980 | |
981 | kc_mh = (kernel_mach_header_t *)PE_get_kc_header(type: KCKindPrimary); |
982 | struct load_command *lc = |
983 | (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh)); |
984 | for (uint32_t i = 0; i < kc_mh->ncmds; |
985 | i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) { |
986 | if (lc->cmd != LC_FILESET_ENTRY) { |
987 | continue; |
988 | } |
989 | struct fileset_entry_command *fse = |
990 | (struct fileset_entry_command *)(vm_offset_t)lc; |
991 | kext_mh = (kernel_mach_header_t *)fse->vmaddr; |
992 | kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader( |
993 | header: kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type)); |
994 | if (sect != NULL) { |
995 | unsigned long uuidlen = 0; |
996 | void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen); |
997 | uuid_string_t kext_uuid_str; |
998 | if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) { |
999 | uuid_unparse_upper(uu: *(uuid_t *)kext_uuid, out: kext_uuid_str); |
1000 | } |
1001 | kalloc_type_view_copy(type, start: sect->addr, end: sect->addr + sect->size, |
1002 | cur_count: &cur_count, false, kext_uuid: kext_uuid_str); |
1003 | } |
1004 | } |
1005 | } else if (kc_format == KCFormatKCGEN) { |
1006 | /* |
1007 | * Parse __kalloc_type section from xnu |
1008 | */ |
1009 | kalloc_type_view_copy(type, |
1010 | kalloc_type_var(type, sec_start), |
1011 | kalloc_type_var(type, sec_end), cur_count: &cur_count, false, NULL); |
1012 | |
1013 | /* |
1014 | * Parse __kalloc_type section for kexts |
1015 | * |
1016 | * Note: We don't process the kalloc_type_views for kexts on armv7 |
1017 | * as this platform has insufficient memory for type based |
1018 | * segregation. kalloc_type_impl_external will direct callsites |
1019 | * based on their size. |
1020 | */ |
1021 | kernel_mach_header_t *xnu_mh = &_mh_execute_header; |
1022 | vm_offset_t cur = 0; |
1023 | vm_offset_t end = 0; |
1024 | |
1025 | /* |
1026 | * Kext machos are in the __PRELINK_TEXT segment. Extract the segment |
1027 | * and traverse it. |
1028 | */ |
1029 | kernel_section_t *prelink_sect = getsectbynamefromheader( |
1030 | header: xnu_mh, kPrelinkTextSegment, kPrelinkTextSection); |
1031 | assert(prelink_sect); |
1032 | cur = prelink_sect->addr; |
1033 | end = prelink_sect->addr + prelink_sect->size; |
1034 | |
1035 | while (cur < end) { |
1036 | uint64_t kext_text_sz = 0; |
1037 | kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur; |
1038 | |
1039 | if (kext_mh->magic == 0) { |
1040 | /* |
1041 | * Assert that we have processed all kexts and all that is left |
1042 | * is padding |
1043 | */ |
1044 | assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0); |
1045 | break; |
1046 | } else if (kext_mh->magic != MH_MAGIC_64 && |
1047 | kext_mh->magic != MH_CIGAM_64) { |
1048 | panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx" , |
1049 | cur); |
1050 | } |
1051 | |
1052 | /* |
1053 | * Kext macho found, iterate through its segments |
1054 | */ |
1055 | struct load_command *lc = |
1056 | (struct load_command *)(cur + sizeof(kernel_mach_header_t)); |
1057 | bool isSplitKext = false; |
1058 | |
1059 | for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end; |
1060 | i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) { |
1061 | if (lc->cmd == LC_SEGMENT_SPLIT_INFO) { |
1062 | isSplitKext = true; |
1063 | continue; |
1064 | } else if (lc->cmd != LC_SEGMENT_64) { |
1065 | continue; |
1066 | } |
1067 | |
1068 | kernel_segment_command_t *seg_cmd = |
1069 | (struct segment_command_64 *)(vm_offset_t)lc; |
1070 | /* |
1071 | * Parse kalloc_type section |
1072 | */ |
1073 | if (strcmp(s1: seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) { |
1074 | kernel_section_t *kt_sect = getsectbynamefromseg(sgp: seg_cmd, |
1075 | KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type)); |
1076 | if (kt_sect) { |
1077 | kalloc_type_view_copy(type, start: kt_sect->addr + vm_kernel_slide, |
1078 | end: kt_sect->addr + kt_sect->size + vm_kernel_slide, cur_count: &cur_count, |
1079 | true, NULL); |
1080 | } |
1081 | } |
1082 | /* |
1083 | * If the kext has a __TEXT segment, that is the only thing that |
1084 | * will be in the special __PRELINK_TEXT KC segment, so the next |
1085 | * macho is right after. |
1086 | */ |
1087 | if (strcmp(s1: seg_cmd->segname, s2: "__TEXT" ) == 0) { |
1088 | kext_text_sz = seg_cmd->filesize; |
1089 | } |
1090 | } |
1091 | /* |
1092 | * If the kext did not have a __TEXT segment (special xnu kexts with |
1093 | * only a __LINKEDIT segment) then the next macho will be after all the |
1094 | * header commands. |
1095 | */ |
1096 | if (!kext_text_sz) { |
1097 | kext_text_sz = kext_mh->sizeofcmds; |
1098 | } else if (!isSplitKext) { |
1099 | panic("kalloc_type_view_parse: No support for non-split seg KCs" ); |
1100 | break; |
1101 | } |
1102 | |
1103 | cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK)); |
1104 | } |
1105 | } else { |
1106 | /* |
1107 | * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle |
1108 | * parsing kalloc_type_view structs during startup. |
1109 | */ |
1110 | panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs" |
1111 | " for kc_format = %d\n" , kc_format); |
1112 | } |
1113 | return cur_count; |
1114 | } |
1115 | |
1116 | __startup_func |
1117 | static int |
1118 | kalloc_type_cmp_fixed(const void *a, const void *b) |
1119 | { |
1120 | const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a; |
1121 | const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b; |
1122 | |
1123 | const uint16_t idxA = kalloc_type_get_idx(kt_size: ktA->kt_size); |
1124 | const uint16_t idxB = kalloc_type_get_idx(kt_size: ktB->kt_size); |
1125 | /* |
1126 | * If the kalloc_type_views are in the same kalloc bucket, sort by |
1127 | * signature else sort by size |
1128 | */ |
1129 | if (idxA == idxB) { |
1130 | int result = strcmp(s1: ktA->kt_signature, s2: ktB->kt_signature); |
1131 | /* |
1132 | * If the kalloc_type_views have the same signature sort by site |
1133 | * name |
1134 | */ |
1135 | if (result == 0) { |
1136 | return strcmp(s1: ktA->kt_zv.zv_name, s2: ktB->kt_zv.zv_name); |
1137 | } |
1138 | return result; |
1139 | } |
1140 | const uint32_t sizeA = kalloc_type_get_size(kt_size: ktA->kt_size); |
1141 | const uint32_t sizeB = kalloc_type_get_size(kt_size: ktB->kt_size); |
1142 | return (int)(sizeA - sizeB); |
1143 | } |
1144 | |
1145 | __startup_func |
1146 | static int |
1147 | kalloc_type_cmp_var(const void *a, const void *b) |
1148 | { |
1149 | const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a; |
1150 | const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b; |
1151 | const char *ktA_hdr = ktA->kt_sig_hdr ?: "" ; |
1152 | const char *ktB_hdr = ktB->kt_sig_hdr ?: "" ; |
1153 | bool ktA_ptrArray = kalloc_type_is_ptr_array(kt_flags: ktA->kt_flags); |
1154 | bool ktB_ptrArray = kalloc_type_is_ptr_array(kt_flags: ktA->kt_flags); |
1155 | int result = 0; |
1156 | |
1157 | /* |
1158 | * Switched around (B - A) because we want the pointer arrays to be at the |
1159 | * top |
1160 | */ |
1161 | result = ktB_ptrArray - ktA_ptrArray; |
1162 | if (result == 0) { |
1163 | result = strcmp(s1: ktA_hdr, s2: ktB_hdr); |
1164 | if (result == 0) { |
1165 | result = strcmp(s1: ktA->kt_sig_type, s2: ktB->kt_sig_type); |
1166 | if (result == 0) { |
1167 | result = strcmp(s1: ktA->kt_name, s2: ktB->kt_name); |
1168 | } |
1169 | } |
1170 | } |
1171 | return result; |
1172 | } |
1173 | |
1174 | __startup_func |
1175 | static uint16_t * |
1176 | kalloc_type_create_iterators_fixed( |
1177 | uint16_t *kt_skip_list_start, |
1178 | uint64_t count) |
1179 | { |
1180 | uint16_t *kt_skip_list = kt_skip_list_start; |
1181 | uint16_t p_idx = UINT16_MAX; /* previous size idx */ |
1182 | uint16_t c_idx = 0; /* current size idx */ |
1183 | uint16_t unique_sig = 0; |
1184 | uint16_t total_sig = 0; |
1185 | const char *p_sig = NULL; |
1186 | const char *p_name = "" ; |
1187 | const char *c_sig = NULL; |
1188 | const char *c_name = NULL; |
1189 | |
1190 | /* |
1191 | * Walk over each kalloc_type_view |
1192 | */ |
1193 | for (uint16_t i = 0; i < count; i++) { |
1194 | kalloc_type_view_t kt = kt_buffer[i].ktv_fixed; |
1195 | |
1196 | c_idx = kalloc_type_get_idx(kt_size: kt->kt_size); |
1197 | c_sig = kt->kt_signature; |
1198 | c_name = kt->kt_zv.zv_name; |
1199 | /* |
1200 | * When current kalloc_type_view is in a different kalloc size |
1201 | * bucket than the previous, it means we have processed all in |
1202 | * the previous size bucket, so store the accumulated values |
1203 | * and advance the indices. |
1204 | */ |
1205 | if (p_idx == UINT16_MAX || c_idx != p_idx) { |
1206 | /* |
1207 | * Updates for frequency lists |
1208 | */ |
1209 | if (p_idx != UINT16_MAX) { |
1210 | kt_freq_list[p_idx] = unique_sig; |
1211 | kt_freq_list_total[p_idx] = total_sig - unique_sig; |
1212 | } |
1213 | unique_sig = 1; |
1214 | total_sig = 1; |
1215 | |
1216 | p_idx = c_idx; |
1217 | p_sig = c_sig; |
1218 | p_name = c_name; |
1219 | |
1220 | /* |
1221 | * Updates to signature skip list |
1222 | */ |
1223 | *kt_skip_list = i; |
1224 | kt_skip_list++; |
1225 | |
1226 | continue; |
1227 | } |
1228 | |
1229 | /* |
1230 | * When current kalloc_type_views is in the kalloc size bucket as |
1231 | * previous, analyze the siganture to see if it is unique. |
1232 | * |
1233 | * Signatures are collapsible if one is a substring of the next. |
1234 | */ |
1235 | if (strncmp(s1: c_sig, s2: p_sig, n: strlen(s: p_sig)) != 0) { |
1236 | /* |
1237 | * Unique signature detected. Update counts and advance index |
1238 | */ |
1239 | unique_sig++; |
1240 | total_sig++; |
1241 | |
1242 | *kt_skip_list = i; |
1243 | kt_skip_list++; |
1244 | p_sig = c_sig; |
1245 | p_name = c_name; |
1246 | continue; |
1247 | } |
1248 | /* |
1249 | * Need this here as we do substring matching for signatures so you |
1250 | * want to track the longer signature seen rather than the substring |
1251 | */ |
1252 | p_sig = c_sig; |
1253 | |
1254 | /* |
1255 | * Check if current kalloc_type_view corresponds to a new type |
1256 | */ |
1257 | if (strlen(s: p_name) != strlen(s: c_name) || strcmp(s1: p_name, s2: c_name) != 0) { |
1258 | total_sig++; |
1259 | p_name = c_name; |
1260 | } |
1261 | } |
1262 | /* |
1263 | * Final update |
1264 | */ |
1265 | assert(c_idx == p_idx); |
1266 | assert(kt_freq_list[c_idx] == 0); |
1267 | kt_freq_list[c_idx] = unique_sig; |
1268 | kt_freq_list_total[c_idx] = total_sig - unique_sig; |
1269 | *kt_skip_list = (uint16_t) count; |
1270 | |
1271 | return ++kt_skip_list; |
1272 | } |
1273 | |
1274 | __startup_func |
1275 | static uint32_t |
1276 | kalloc_type_create_iterators_var( |
1277 | uint32_t *kt_skip_list_start, |
1278 | uint32_t buf_start) |
1279 | { |
1280 | uint32_t *kt_skip_list = kt_skip_list_start; |
1281 | uint32_t n = 0; |
1282 | |
1283 | kt_skip_list[n] = buf_start; |
1284 | assert(kt_count > buf_start + 1); |
1285 | for (uint32_t i = buf_start + 1; i < kt_count; i++) { |
1286 | kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var; |
1287 | kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var; |
1288 | const char *ktA_hdr = ktA->kt_sig_hdr ?: "" ; |
1289 | const char *ktB_hdr = ktB->kt_sig_hdr ?: "" ; |
1290 | assert(ktA->kt_sig_type != NULL); |
1291 | assert(ktB->kt_sig_type != NULL); |
1292 | if (strcmp(s1: ktA_hdr, s2: ktB_hdr) != 0 || |
1293 | strcmp(s1: ktA->kt_sig_type, s2: ktB->kt_sig_type) != 0) { |
1294 | n++; |
1295 | kt_skip_list[n] = i; |
1296 | } |
1297 | } |
1298 | /* |
1299 | * Final update |
1300 | */ |
1301 | n++; |
1302 | kt_skip_list[n] = (uint32_t) kt_count; |
1303 | return n; |
1304 | } |
1305 | |
1306 | __startup_func |
1307 | static uint16_t |
1308 | kalloc_type_distribute_budget( |
1309 | uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)], |
1310 | uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)], |
1311 | uint16_t zone_budget, |
1312 | uint16_t min_zones_per_size) |
1313 | { |
1314 | uint16_t total_sig = 0; |
1315 | uint16_t min_sig = 0; |
1316 | uint16_t assigned_zones = 0; |
1317 | uint16_t remaining_zones = zone_budget; |
1318 | uint16_t modulo = 0; |
1319 | |
1320 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
1321 | uint16_t sig_freq = freq_list[i]; |
1322 | uint16_t min_zones = min_zones_per_size; |
1323 | |
1324 | if (sig_freq < min_zones_per_size) { |
1325 | min_zones = sig_freq; |
1326 | } |
1327 | total_sig += sig_freq; |
1328 | kt_zones[i] = min_zones; |
1329 | min_sig += min_zones; |
1330 | } |
1331 | if (remaining_zones > total_sig) { |
1332 | remaining_zones = total_sig; |
1333 | } |
1334 | assert(remaining_zones >= min_sig); |
1335 | remaining_zones -= min_sig; |
1336 | total_sig -= min_sig; |
1337 | assigned_zones += min_sig; |
1338 | |
1339 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
1340 | uint16_t freq = freq_list[i]; |
1341 | |
1342 | if (freq < min_zones_per_size) { |
1343 | continue; |
1344 | } |
1345 | uint32_t numer = (freq - min_zones_per_size) * remaining_zones; |
1346 | uint16_t n_zones = (uint16_t) numer / total_sig; |
1347 | |
1348 | /* |
1349 | * Accumulate remainder and increment n_zones when it goes above |
1350 | * denominator |
1351 | */ |
1352 | modulo += numer % total_sig; |
1353 | if (modulo >= total_sig) { |
1354 | n_zones++; |
1355 | modulo -= total_sig; |
1356 | } |
1357 | |
1358 | /* |
1359 | * Cap the total number of zones to the unique signatures |
1360 | */ |
1361 | if ((n_zones + min_zones_per_size) > freq) { |
1362 | uint16_t = n_zones + min_zones_per_size - freq; |
1363 | modulo += (extra_zones * total_sig); |
1364 | n_zones -= extra_zones; |
1365 | } |
1366 | kt_zones[i] += n_zones; |
1367 | assigned_zones += n_zones; |
1368 | } |
1369 | |
1370 | if (kt_options & KT_OPTIONS_DEBUG) { |
1371 | printf(format: "kalloc_type_apply_policy: assigned %u zones wasted %u zones\n" , |
1372 | assigned_zones, remaining_zones + min_sig - assigned_zones); |
1373 | } |
1374 | return remaining_zones + min_sig - assigned_zones; |
1375 | } |
1376 | |
1377 | __startup_func |
1378 | static int |
1379 | kalloc_type_cmp_type_zones(const void *a, const void *b) |
1380 | { |
1381 | const struct nzones_with_idx A = *(const struct nzones_with_idx *)a; |
1382 | const struct nzones_with_idx B = *(const struct nzones_with_idx *)b; |
1383 | |
1384 | return (int)(B.nzones - A.nzones); |
1385 | } |
1386 | |
1387 | __startup_func |
1388 | static void |
1389 | kalloc_type_redistribute_budget( |
1390 | uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)], |
1391 | uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)]) |
1392 | { |
1393 | uint16_t count = 0, cur_count = 0; |
1394 | struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {}; |
1395 | uint16_t top_zone_total = 0; |
1396 | |
1397 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
1398 | uint16_t zones = kt_zones[i]; |
1399 | |
1400 | /* |
1401 | * If a sizeclass got no zones but has types to divide make a note |
1402 | * of it |
1403 | */ |
1404 | if (zones == 0 && (freq_total_list[i] != 0)) { |
1405 | count++; |
1406 | } |
1407 | |
1408 | sorted_zones[i].nzones = kt_zones[i]; |
1409 | sorted_zones[i].idx = i; |
1410 | } |
1411 | |
1412 | qsort(a: &sorted_zones[0], n: (size_t) MAX_K_ZONE(kt_zone_cfg), |
1413 | es: sizeof(struct nzones_with_idx), cmp: kalloc_type_cmp_type_zones); |
1414 | |
1415 | for (uint16_t i = 0; i < 3; i++) { |
1416 | top_zone_total += sorted_zones[i].nzones; |
1417 | } |
1418 | |
1419 | /* |
1420 | * Borrow zones from the top 3 sizeclasses and redistribute to those |
1421 | * that didn't get a zone but that types to divide |
1422 | */ |
1423 | cur_count = count; |
1424 | for (uint16_t i = 0; i < 3; i++) { |
1425 | uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total; |
1426 | uint16_t zone_available = kt_zones[sorted_zones[i].idx]; |
1427 | |
1428 | if (zone_borrow > (zone_available / 2)) { |
1429 | zone_borrow = zone_available / 2; |
1430 | } |
1431 | kt_zones[sorted_zones[i].idx] -= zone_borrow; |
1432 | cur_count -= zone_borrow; |
1433 | } |
1434 | |
1435 | for (uint16_t i = 0; i < 3; i++) { |
1436 | if (cur_count == 0) { |
1437 | break; |
1438 | } |
1439 | kt_zones[sorted_zones[i].idx]--; |
1440 | cur_count--; |
1441 | } |
1442 | |
1443 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
1444 | if (kt_zones[i] == 0 && (freq_total_list[i] != 0) && |
1445 | (count > cur_count)) { |
1446 | kt_zones[i]++; |
1447 | count--; |
1448 | } |
1449 | } |
1450 | } |
1451 | |
1452 | static uint16_t |
1453 | kalloc_type_apply_policy( |
1454 | uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)], |
1455 | uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)], |
1456 | uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)], |
1457 | uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)], |
1458 | uint16_t zone_budget) |
1459 | { |
1460 | uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10); |
1461 | uint16_t zbudget_type = zone_budget - zbudget_sig; |
1462 | uint16_t wasted_zones = 0; |
1463 | |
1464 | #if DEBUG || DEVELOPMENT |
1465 | if (startup_phase < STARTUP_SUB_LOCKDOWN) { |
1466 | uint16_t current_zones = os_atomic_load(&num_zones, relaxed); |
1467 | |
1468 | assert(zone_budget + current_zones <= MAX_ZONES); |
1469 | } |
1470 | #endif |
1471 | |
1472 | wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones: kt_zones_sig, |
1473 | zone_budget: zbudget_sig, min_zones_per_size: 2); |
1474 | wasted_zones += kalloc_type_distribute_budget(freq_list: freq_total_list, |
1475 | kt_zones: kt_zones_type, zone_budget: zbudget_type, min_zones_per_size: 0); |
1476 | kalloc_type_redistribute_budget(freq_total_list, kt_zones: kt_zones_type); |
1477 | |
1478 | /* |
1479 | * Print stats when KT_OPTIONS_DEBUG boot-arg present |
1480 | */ |
1481 | if (kt_options & KT_OPTIONS_DEBUG) { |
1482 | printf(format: "Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t" |
1483 | "zones_type\n" ); |
1484 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
1485 | printf(format: "%u\t%u\t%u\t%u\t%u\t%u\n" , kt_zone_cfg[i], |
1486 | freq_total_list[i] + freq_list[i], freq_list[i], |
1487 | kt_zones_sig[i] + kt_zones_type[i], |
1488 | kt_zones_sig[i], kt_zones_type[i]); |
1489 | } |
1490 | } |
1491 | |
1492 | return wasted_zones; |
1493 | } |
1494 | |
1495 | |
1496 | __startup_func |
1497 | static void |
1498 | kalloc_type_create_zone_for_size( |
1499 | zone_t *kt_zones_for_size, |
1500 | uint16_t kt_zones, |
1501 | vm_size_t z_size) |
1502 | { |
1503 | zone_t p_zone = NULL; |
1504 | char *z_name = NULL; |
1505 | zone_t shared_z = NULL; |
1506 | |
1507 | for (uint16_t i = 0; i < kt_zones; i++) { |
1508 | z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE); |
1509 | snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu" , i, |
1510 | (size_t) z_size); |
1511 | zone_t z = zone_create(name: z_name, size: z_size, flags: ZC_KALLOC_TYPE); |
1512 | if (i != 0) { |
1513 | p_zone->z_kt_next = z; |
1514 | } |
1515 | p_zone = z; |
1516 | kt_zones_for_size[i] = z; |
1517 | } |
1518 | /* |
1519 | * Create shared zone for sizeclass if it doesn't already exist |
1520 | */ |
1521 | if (kt_shared_fixed) { |
1522 | shared_z = kalloc_zone_for_size(zid: KHEAP_SHARED->kh_zstart, size: z_size); |
1523 | if (zone_elem_inner_size(zone: shared_z) != z_size) { |
1524 | z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE); |
1525 | snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu" , |
1526 | (size_t) z_size); |
1527 | shared_z = zone_create_ext(name: z_name, size: z_size, flags: ZC_NONE, ZONE_ID_ANY, |
1528 | extra_setup: ^(zone_t zone){ |
1529 | zone_security_array[zone_index(z: zone)].z_kheap_id = KHEAP_ID_SHARED; |
1530 | }); |
1531 | } |
1532 | } |
1533 | kt_zones_for_size[kt_zones] = shared_z; |
1534 | } |
1535 | |
1536 | __startup_func |
1537 | static uint16_t |
1538 | kalloc_type_zones_for_type( |
1539 | uint16_t zones_total_type, |
1540 | uint16_t unique_types, |
1541 | uint16_t total_types, |
1542 | bool last_sig) |
1543 | { |
1544 | uint16_t zones_for_type = 0, n_mod = 0; |
1545 | |
1546 | if (zones_total_type == 0) { |
1547 | return 0; |
1548 | } |
1549 | |
1550 | zones_for_type = (zones_total_type * unique_types) / total_types; |
1551 | n_mod = (zones_total_type * unique_types) % total_types; |
1552 | zone_carry += n_mod; |
1553 | |
1554 | /* |
1555 | * Drain carry opportunistically |
1556 | */ |
1557 | if (((unique_types > 3) && (zone_carry > 0)) || |
1558 | (zone_carry >= (int) total_types) || |
1559 | (last_sig && (zone_carry > 0))) { |
1560 | zone_carry -= total_types; |
1561 | zones_for_type++; |
1562 | } |
1563 | |
1564 | if (last_sig) { |
1565 | assert(zone_carry == 0); |
1566 | } |
1567 | |
1568 | return zones_for_type; |
1569 | } |
1570 | |
1571 | __startup_func |
1572 | static uint16_t |
1573 | kalloc_type_build_skip_list( |
1574 | kalloc_type_view_t *start, |
1575 | kalloc_type_view_t *end, |
1576 | uint16_t *kt_skip_list) |
1577 | { |
1578 | kalloc_type_view_t *cur = start; |
1579 | kalloc_type_view_t prev = *start; |
1580 | uint16_t i = 0, idx = 0; |
1581 | |
1582 | kt_skip_list[idx] = i; |
1583 | idx++; |
1584 | |
1585 | while (cur < end) { |
1586 | kalloc_type_view_t kt_cur = *cur; |
1587 | |
1588 | if (strcmp(s1: prev->kt_zv.zv_name, s2: kt_cur->kt_zv.zv_name) != 0) { |
1589 | kt_skip_list[idx] = i; |
1590 | |
1591 | prev = kt_cur; |
1592 | idx++; |
1593 | } |
1594 | i++; |
1595 | cur++; |
1596 | } |
1597 | |
1598 | /* |
1599 | * Final update |
1600 | */ |
1601 | kt_skip_list[idx] = i; |
1602 | return idx; |
1603 | } |
1604 | |
1605 | __startup_func |
1606 | static void |
1607 | kalloc_type_init_sig_eq( |
1608 | zone_t *zones, |
1609 | uint16_t n_zones, |
1610 | zone_t sig_zone) |
1611 | { |
1612 | for (uint16_t i = 0; i < n_zones; i++) { |
1613 | zone_t z = zones[i]; |
1614 | |
1615 | assert(!zone_get_sig_eq(z)); |
1616 | zone_set_sig_eq(zone: z, sig_eq: zone_index(z: sig_zone)); |
1617 | } |
1618 | } |
1619 | |
1620 | __startup_func |
1621 | static uint16_t |
1622 | kalloc_type_distribute_zone_for_type( |
1623 | kalloc_type_view_t *start, |
1624 | kalloc_type_view_t *end, |
1625 | bool last_sig, |
1626 | uint16_t zones_total_type, |
1627 | uint16_t total_types, |
1628 | uint16_t *kt_skip_list, |
1629 | zone_t kt_zones_for_size[32], |
1630 | uint16_t type_zones_start, |
1631 | zone_t sig_zone, |
1632 | zone_t shared_zone) |
1633 | { |
1634 | uint16_t count = 0, n_zones = 0; |
1635 | uint16_t *shuffle_buf = NULL; |
1636 | zone_t *type_zones = &kt_zones_for_size[type_zones_start]; |
1637 | |
1638 | /* |
1639 | * Assert there is space in buffer |
1640 | */ |
1641 | count = kalloc_type_build_skip_list(start, end, kt_skip_list); |
1642 | n_zones = kalloc_type_zones_for_type(zones_total_type, unique_types: count, total_types, |
1643 | last_sig); |
1644 | shuffle_buf = &kt_skip_list[count + 1]; |
1645 | |
1646 | /* |
1647 | * Initalize signature equivalence zone for type zones |
1648 | */ |
1649 | kalloc_type_init_sig_eq(zones: type_zones, n_zones, sig_zone); |
1650 | |
1651 | if (n_zones == 0) { |
1652 | kalloc_type_assign_zone_fixed(cur: start, end, z: sig_zone, sig_zone, |
1653 | shared_zone); |
1654 | return n_zones; |
1655 | } |
1656 | |
1657 | /* |
1658 | * Don't shuffle in the sig_zone if there is only 1 type in the zone |
1659 | */ |
1660 | if (count == 1) { |
1661 | kalloc_type_assign_zone_fixed(cur: start, end, z: type_zones[0], sig_zone, |
1662 | shared_zone); |
1663 | return n_zones; |
1664 | } |
1665 | |
1666 | /* |
1667 | * Add the signature based zone to n_zones |
1668 | */ |
1669 | n_zones++; |
1670 | |
1671 | for (uint16_t i = 0; i < count; i++) { |
1672 | uint16_t zidx = i % n_zones, shuffled_zidx = 0; |
1673 | uint16_t type_start = kt_skip_list[i]; |
1674 | kalloc_type_view_t *kt_type_start = &start[type_start]; |
1675 | uint16_t type_end = kt_skip_list[i + 1]; |
1676 | kalloc_type_view_t *kt_type_end = &start[type_end]; |
1677 | zone_t zone; |
1678 | |
1679 | if (zidx == 0) { |
1680 | kmem_shuffle(shuffle_buf, count: n_zones); |
1681 | } |
1682 | |
1683 | shuffled_zidx = shuffle_buf[zidx]; |
1684 | zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1]; |
1685 | kalloc_type_assign_zone_fixed(cur: kt_type_start, end: kt_type_end, z: zone, sig_zone, |
1686 | shared_zone); |
1687 | } |
1688 | |
1689 | return n_zones - 1; |
1690 | } |
1691 | |
1692 | __startup_func |
1693 | static void |
1694 | kalloc_type_create_zones_fixed( |
1695 | uint16_t *kt_skip_list_start, |
1696 | uint16_t *kt_shuffle_buf) |
1697 | { |
1698 | uint16_t *kt_skip_list = kt_skip_list_start; |
1699 | uint16_t p_j = 0; |
1700 | uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {}; |
1701 | uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {}; |
1702 | #if DEBUG || DEVELOPMENT |
1703 | uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf - |
1704 | (vm_address_t) kt_buffer) / sizeof(uint16_t); |
1705 | #endif |
1706 | /* |
1707 | * Apply policy to determine how many zones to create for each size |
1708 | * class. |
1709 | */ |
1710 | kalloc_type_apply_policy(freq_list: kt_freq_list, freq_total_list: kt_freq_list_total, |
1711 | kt_zones_sig, kt_zones_type, zone_budget: kt_fixed_zones); |
1712 | |
1713 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
1714 | uint16_t n_unique_sig = kt_freq_list[i]; |
1715 | vm_size_t z_size = kt_zone_cfg[i]; |
1716 | uint16_t n_zones_sig = kt_zones_sig[i]; |
1717 | uint16_t n_zones_type = kt_zones_type[i]; |
1718 | uint16_t total_types = kt_freq_list_total[i]; |
1719 | uint16_t type_zones_used = 0; |
1720 | |
1721 | if (n_unique_sig == 0) { |
1722 | continue; |
1723 | } |
1724 | |
1725 | zone_carry = 0; |
1726 | assert(n_zones_sig + n_zones_type + 1 <= 32); |
1727 | zone_t kt_zones_for_size[32] = {}; |
1728 | kalloc_type_create_zone_for_size(kt_zones_for_size, |
1729 | kt_zones: n_zones_sig + n_zones_type, z_size); |
1730 | |
1731 | kalloc_type_zarray[i] = kt_zones_for_size[0]; |
1732 | /* |
1733 | * Ensure that there is enough space to shuffle n_unique_sig |
1734 | * indices |
1735 | */ |
1736 | assert(n_unique_sig < kt_shuffle_count); |
1737 | |
1738 | /* |
1739 | * Get a shuffled set of signature indices |
1740 | */ |
1741 | *kt_shuffle_buf = 0; |
1742 | if (n_unique_sig > 1) { |
1743 | kmem_shuffle(shuffle_buf: kt_shuffle_buf, count: n_unique_sig); |
1744 | } |
1745 | |
1746 | for (uint16_t j = 0; j < n_zones_sig; j++) { |
1747 | zone_t *z_ptr = &kt_zones_for_size[j]; |
1748 | |
1749 | kalloc_type_init_sig_eq(zones: z_ptr, n_zones: 1, sig_zone: *z_ptr); |
1750 | } |
1751 | |
1752 | for (uint16_t j = 0; j < n_unique_sig; j++) { |
1753 | /* |
1754 | * For every size that has unique types |
1755 | */ |
1756 | uint16_t shuffle_idx = kt_shuffle_buf[j]; |
1757 | uint16_t cur = kt_skip_list[shuffle_idx + p_j]; |
1758 | uint16_t end = kt_skip_list[shuffle_idx + p_j + 1]; |
1759 | zone_t zone = kt_zones_for_size[j % n_zones_sig]; |
1760 | zone_t shared_zone = kt_zones_for_size[n_zones_sig + n_zones_type]; |
1761 | bool last_sig; |
1762 | |
1763 | last_sig = (j == (n_unique_sig - 1)) ? true : false; |
1764 | type_zones_used += kalloc_type_distribute_zone_for_type( |
1765 | start: &kt_buffer[cur].ktv_fixed, |
1766 | end: &kt_buffer[end].ktv_fixed, last_sig, |
1767 | zones_total_type: n_zones_type, total_types: total_types + n_unique_sig, |
1768 | kt_skip_list: &kt_shuffle_buf[n_unique_sig], kt_zones_for_size, |
1769 | type_zones_start: n_zones_sig + type_zones_used, sig_zone: zone, shared_zone); |
1770 | } |
1771 | assert(type_zones_used <= n_zones_type); |
1772 | p_j += n_unique_sig; |
1773 | } |
1774 | } |
1775 | |
1776 | __startup_func |
1777 | static void |
1778 | kalloc_type_view_init_fixed(void) |
1779 | { |
1780 | kalloc_type_hash_seed = (uint32_t) early_random(); |
1781 | kalloc_type_build_dlut(); |
1782 | /* |
1783 | * Parse __kalloc_type sections and build array of pointers to |
1784 | * all kalloc type views in kt_buffer. |
1785 | */ |
1786 | kt_count = kalloc_type_view_parse(type: KTV_FIXED); |
1787 | assert(kt_count < KALLOC_TYPE_SIZE_MASK); |
1788 | |
1789 | #if DEBUG || DEVELOPMENT |
1790 | vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t); |
1791 | vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t); |
1792 | assert(kt_scratch_size >= kt_buffer_size + sig_slist_size); |
1793 | #endif |
1794 | |
1795 | /* |
1796 | * Sort based on size class and signature |
1797 | */ |
1798 | qsort(a: kt_buffer, n: (size_t) kt_count, es: sizeof(kalloc_type_view_t), |
1799 | cmp: kalloc_type_cmp_fixed); |
1800 | |
1801 | /* |
1802 | * Build a skip list that holds starts of unique signatures and a |
1803 | * frequency list of number of unique and total signatures per kalloc |
1804 | * size class |
1805 | */ |
1806 | uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count); |
1807 | uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed( |
1808 | kt_skip_list_start, count: kt_count); |
1809 | |
1810 | /* |
1811 | * Create zones based on signatures |
1812 | */ |
1813 | kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf); |
1814 | } |
1815 | |
1816 | __startup_func |
1817 | static void |
1818 | kalloc_type_heap_init(void) |
1819 | { |
1820 | assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS); |
1821 | char kh_name[MAX_ZONE_NAME]; |
1822 | uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps; |
1823 | |
1824 | for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) { |
1825 | snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u" , KHEAP_KT_VAR->kh_name, i); |
1826 | kalloc_zone_init(kheap_name: (const char *)&kh_name[0], kheap_id: KHEAP_ID_KT_VAR, |
1827 | kheap_zstart: &kalloc_type_heap_array[i].kh_zstart, zc_flags: ZC_KALLOC_TYPE); |
1828 | } |
1829 | /* |
1830 | * All variable kalloc type allocations are collapsed into a single |
1831 | * stat. Individual accounting can be requested via KT_PRIV_ACCT |
1832 | */ |
1833 | KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats); |
1834 | zone_view_count += 1; |
1835 | } |
1836 | |
1837 | __startup_func |
1838 | static void |
1839 | kalloc_type_assign_heap( |
1840 | uint32_t start, |
1841 | uint32_t end, |
1842 | uint32_t heap_id) |
1843 | { |
1844 | bool use_split = kmem_get_random16(upper_limit: 1); |
1845 | |
1846 | if (use_split) { |
1847 | heap_id = kt_var_heaps; |
1848 | } |
1849 | kalloc_type_assign_zone_var(cur: &kt_buffer[start].ktv_var, |
1850 | end: &kt_buffer[end].ktv_var, heap_idx: heap_id); |
1851 | } |
1852 | |
1853 | __startup_func |
1854 | static void |
1855 | kalloc_type_split_heap( |
1856 | uint32_t start, |
1857 | uint32_t end, |
1858 | uint32_t heap_id) |
1859 | { |
1860 | uint32_t count = start; |
1861 | const char *p_name = NULL; |
1862 | |
1863 | while (count < end) { |
1864 | kalloc_type_var_view_t cur = kt_buffer[count].ktv_var; |
1865 | const char *c_name = cur->kt_name; |
1866 | |
1867 | if (!p_name) { |
1868 | assert(count == start); |
1869 | p_name = c_name; |
1870 | } |
1871 | if (strcmp(s1: c_name, s2: p_name) != 0) { |
1872 | kalloc_type_assign_heap(start, end: count, heap_id); |
1873 | start = count; |
1874 | p_name = c_name; |
1875 | } |
1876 | count++; |
1877 | } |
1878 | kalloc_type_assign_heap(start, end, heap_id); |
1879 | } |
1880 | |
1881 | __startup_func |
1882 | static void |
1883 | kalloc_type_view_init_var(void) |
1884 | { |
1885 | uint32_t buf_start = 0, unique_sig = 0; |
1886 | uint32_t *kt_skip_list_start; |
1887 | uint16_t *shuffle_buf; |
1888 | uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1; |
1889 | uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1; |
1890 | /* |
1891 | * Pick a random heap to split |
1892 | */ |
1893 | uint16_t split_heap = kmem_get_random16(upper_limit: flex_heap_count - 1); |
1894 | |
1895 | /* |
1896 | * Zones are created prior to parsing the views as zone budget is fixed |
1897 | * per sizeclass and special types identified while parsing are redirected |
1898 | * as they are discovered. |
1899 | */ |
1900 | kalloc_type_heap_init(); |
1901 | |
1902 | /* |
1903 | * Parse __kalloc_var sections and build array of pointers to views that |
1904 | * aren't rediected in kt_buffer. |
1905 | */ |
1906 | kt_count = kalloc_type_view_parse(type: KTV_VAR); |
1907 | assert(kt_count < UINT32_MAX); |
1908 | |
1909 | #if DEBUG || DEVELOPMENT |
1910 | vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t); |
1911 | vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t); |
1912 | assert(kt_scratch_size >= kt_buffer_size + sig_slist_size); |
1913 | #endif |
1914 | |
1915 | /* |
1916 | * Sort based on size class and signature |
1917 | */ |
1918 | qsort(a: kt_buffer, n: (size_t) kt_count, es: sizeof(kalloc_type_var_view_t), |
1919 | cmp: kalloc_type_cmp_var); |
1920 | |
1921 | buf_start = kalloc_type_handle_parray_var(); |
1922 | |
1923 | /* |
1924 | * Build a skip list that holds starts of unique signatures |
1925 | */ |
1926 | kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count); |
1927 | unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start, |
1928 | buf_start); |
1929 | shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1); |
1930 | /* |
1931 | * If we have only one heap then other elements share heap with pointer |
1932 | * arrays |
1933 | */ |
1934 | if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) { |
1935 | panic("kt_var_heaps is too small" ); |
1936 | } |
1937 | |
1938 | kmem_shuffle(shuffle_buf, count: flex_heap_count); |
1939 | /* |
1940 | * The index of the heap we decide to split is placed twice in the shuffle |
1941 | * buffer so that it gets twice the number of signatures that we split |
1942 | * evenly |
1943 | */ |
1944 | shuffle_buf[flex_heap_count] = split_heap; |
1945 | split_heap += (fixed_heaps + 1); |
1946 | |
1947 | for (uint32_t i = 1; i <= unique_sig; i++) { |
1948 | uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] + |
1949 | fixed_heaps + 1; |
1950 | uint32_t start = kt_skip_list_start[i - 1]; |
1951 | uint32_t end = kt_skip_list_start[i]; |
1952 | |
1953 | assert(heap_id <= kt_var_heaps); |
1954 | if (heap_id == split_heap) { |
1955 | kalloc_type_split_heap(start, end, heap_id); |
1956 | continue; |
1957 | } |
1958 | kalloc_type_assign_zone_var(cur: &kt_buffer[start].ktv_var, |
1959 | end: &kt_buffer[end].ktv_var, heap_idx: heap_id); |
1960 | } |
1961 | } |
1962 | |
1963 | __startup_func |
1964 | static void |
1965 | kalloc_init(void) |
1966 | { |
1967 | /* |
1968 | * Allocate scratch space to parse kalloc_type_views and create |
1969 | * other structures necessary to process them. |
1970 | */ |
1971 | uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t); |
1972 | |
1973 | static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE); |
1974 | kalloc_zsize_compute(); |
1975 | |
1976 | /* Initialize kalloc data buffers heap */ |
1977 | kalloc_heap_init(kheap: KHEAP_DATA_BUFFERS); |
1978 | |
1979 | /* Initialize kalloc shared buffers heap */ |
1980 | kalloc_heap_init(kheap: KHEAP_SHARED); |
1981 | |
1982 | kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kt_buffer, kt_scratch_size, |
1983 | flags: KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_KALLOC); |
1984 | |
1985 | /* |
1986 | * Handle fixed size views |
1987 | */ |
1988 | kalloc_type_view_init_fixed(); |
1989 | |
1990 | /* |
1991 | * Reset |
1992 | */ |
1993 | bzero(s: kt_buffer, kt_scratch_size); |
1994 | kt_count = max_count; |
1995 | |
1996 | /* |
1997 | * Handle variable size views |
1998 | */ |
1999 | kalloc_type_view_init_var(); |
2000 | |
2001 | /* |
2002 | * Free resources used |
2003 | */ |
2004 | kmem_free(map: kernel_map, addr: (vm_offset_t) kt_buffer, kt_scratch_size); |
2005 | } |
2006 | STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init); |
2007 | |
2008 | #pragma mark accessors |
2009 | |
2010 | #define KFREE_ABSURD_SIZE \ |
2011 | ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2) |
2012 | |
2013 | static void |
2014 | KALLOC_ZINFO_SALLOC(vm_size_t bytes) |
2015 | { |
2016 | thread_t thr = current_thread(); |
2017 | ledger_debit_thread(thread: thr, ledger: thr->t_ledger, entry: task_ledgers.tkm_shared, amount: bytes); |
2018 | } |
2019 | |
2020 | static void |
2021 | KALLOC_ZINFO_SFREE(vm_size_t bytes) |
2022 | { |
2023 | thread_t thr = current_thread(); |
2024 | ledger_credit_thread(thread: thr, ledger: thr->t_ledger, entry: task_ledgers.tkm_shared, amount: bytes); |
2025 | } |
2026 | |
2027 | static kmem_guard_t |
2028 | kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner) |
2029 | { |
2030 | kmem_guard_t guard = { |
2031 | .kmg_atomic = true, |
2032 | .kmg_tag = tag, |
2033 | .kmg_type_hash = type_hash, |
2034 | .kmg_context = os_hash_kernel_pointer(pointer: owner), |
2035 | }; |
2036 | |
2037 | /* |
2038 | * TODO: this use is really not sufficiently smart. |
2039 | */ |
2040 | |
2041 | return guard; |
2042 | } |
2043 | |
2044 | #if __arm64e__ || CONFIG_KERNEL_TAGGING |
2045 | |
2046 | #if __arm64e__ |
2047 | #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1) |
2048 | |
2049 | /* |
2050 | * Zone encoding is: |
2051 | * |
2052 | * <PAC SIG><1><1><PTR value><5 bits of size class> |
2053 | * |
2054 | * VM encoding is: |
2055 | * |
2056 | * <PAC SIG><1><0><PTR value><14 bits of page count> |
2057 | * |
2058 | * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits, |
2059 | * so that PAC authentication extends the proper sign bit. |
2060 | */ |
2061 | |
2062 | static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64); |
2063 | #else |
2064 | #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1) |
2065 | |
2066 | /* |
2067 | * Zone encoding is: |
2068 | * |
2069 | * <TBI><1><PTR value><5 bits of size class> |
2070 | * |
2071 | * VM encoding is: |
2072 | * |
2073 | * <TBI><0><PTR value><14 bits of page count> |
2074 | */ |
2075 | |
2076 | static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64); |
2077 | #endif |
2078 | |
2079 | SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT; |
2080 | |
2081 | __attribute__((always_inline)) |
2082 | struct kalloc_result |
2083 | __kalloc_array_decode(vm_address_t ptr) |
2084 | { |
2085 | struct kalloc_result kr; |
2086 | vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT; |
2087 | |
2088 | if (ptr & zone_mask) { |
2089 | kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf); |
2090 | ptr &= ~0x1full; |
2091 | } else if (__probable(ptr)) { |
2092 | kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT; |
2093 | ptr &= ~PAGE_MASK; |
2094 | ptr |= zone_mask; |
2095 | } else { |
2096 | kr.size = 0; |
2097 | } |
2098 | |
2099 | kr.addr = (void *)ptr; |
2100 | return kr; |
2101 | } |
2102 | |
2103 | static inline void * |
2104 | __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused) |
2105 | { |
2106 | return (void *)((vm_address_t)ptr | z->z_array_size_class); |
2107 | } |
2108 | |
2109 | static inline vm_address_t |
2110 | __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size) |
2111 | { |
2112 | addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT); |
2113 | |
2114 | return addr | atop(size); |
2115 | } |
2116 | |
2117 | #else |
2118 | |
2119 | SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0; |
2120 | |
2121 | /* |
2122 | * Encoding is: |
2123 | * bits 0..46: pointer value |
2124 | * bits 47..47: 0: zones, 1: VM |
2125 | * bits 48..63: zones: elem size, VM: number of pages |
2126 | */ |
2127 | |
2128 | #define KALLOC_ARRAY_TYPE_BIT 47 |
2129 | static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1); |
2130 | static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT); |
2131 | |
2132 | __attribute__((always_inline)) |
2133 | struct kalloc_result |
2134 | __kalloc_array_decode(vm_address_t ptr) |
2135 | { |
2136 | struct kalloc_result kr; |
2137 | uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT; |
2138 | |
2139 | kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1); |
2140 | if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) { |
2141 | kr.size <<= PAGE_SHIFT; |
2142 | } |
2143 | /* sign extend, so that it also works with NULL */ |
2144 | kr.addr = (void *)((long)(ptr << shift) >> shift); |
2145 | |
2146 | return kr; |
2147 | } |
2148 | |
2149 | static inline void * |
2150 | __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size) |
2151 | { |
2152 | vm_address_t addr = (vm_address_t)ptr; |
2153 | |
2154 | addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */ |
2155 | addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1); |
2156 | |
2157 | return (void *)addr; |
2158 | } |
2159 | |
2160 | static inline vm_address_t |
2161 | __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size) |
2162 | { |
2163 | addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */ |
2164 | addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT); |
2165 | |
2166 | return addr; |
2167 | } |
2168 | |
2169 | #endif |
2170 | |
2171 | vm_size_t |
2172 | kalloc_next_good_size(vm_size_t size, uint32_t period) |
2173 | { |
2174 | uint32_t scale = kalloc_log2down((uint32_t)size); |
2175 | vm_size_t step, size_class; |
2176 | |
2177 | if (size < KHEAP_STEP_START) { |
2178 | return KHEAP_STEP_START; |
2179 | } |
2180 | if (size < 2 * KHEAP_STEP_START) { |
2181 | return 2 * KHEAP_STEP_START; |
2182 | } |
2183 | |
2184 | if (size < KHEAP_MAX_SIZE) { |
2185 | step = 1ul << (scale - 1); |
2186 | } else { |
2187 | step = round_page(x: 1ul << (scale - kalloc_log2down(period))); |
2188 | } |
2189 | |
2190 | size_class = (size + step) & -step; |
2191 | #if KASAN_CLASSIC |
2192 | if (size > K_SIZE_CLASS(size_class)) { |
2193 | return kalloc_next_good_size(size_class, period); |
2194 | } |
2195 | size_class = K_SIZE_CLASS(size_class); |
2196 | #endif |
2197 | return size_class; |
2198 | } |
2199 | |
2200 | |
2201 | #pragma mark kalloc |
2202 | |
2203 | static inline kalloc_heap_t |
2204 | kalloc_type_get_heap(kalloc_type_var_view_t kt_view, bool kt_free __unused) |
2205 | { |
2206 | /* |
2207 | * Redirect data-only views |
2208 | */ |
2209 | if (kalloc_type_is_data(kt_flags: kt_view->kt_flags)) { |
2210 | return KHEAP_DATA_BUFFERS; |
2211 | } |
2212 | |
2213 | if (kt_view->kt_flags & KT_PROCESSED) { |
2214 | return KHEAP_KT_VAR; |
2215 | } |
2216 | |
2217 | return KHEAP_DEFAULT; |
2218 | } |
2219 | |
2220 | __attribute__((noinline)) |
2221 | static struct kalloc_result |
2222 | kalloc_large( |
2223 | kalloc_heap_t kheap, |
2224 | vm_size_t req_size, |
2225 | zalloc_flags_t flags, |
2226 | uint16_t kt_hash, |
2227 | void *owner __unused) |
2228 | { |
2229 | kma_flags_t kma_flags = KMA_KASAN_GUARD | KMA_TAG; |
2230 | vm_tag_t tag; |
2231 | vm_offset_t addr, size; |
2232 | |
2233 | if (flags & Z_NOFAIL) { |
2234 | panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)" , |
2235 | (size_t)req_size); |
2236 | } |
2237 | |
2238 | /* |
2239 | * kmem_alloc could block so we return if noblock |
2240 | * |
2241 | * also, reject sizes larger than our address space is quickly, |
2242 | * as kt_size or IOMallocArraySize() expect this. |
2243 | */ |
2244 | if ((flags & Z_NOWAIT) || |
2245 | (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) { |
2246 | return (struct kalloc_result){ }; |
2247 | } |
2248 | |
2249 | if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) { |
2250 | return (struct kalloc_result){ }; |
2251 | } |
2252 | |
2253 | /* |
2254 | * (73465472) on Intel we didn't use to pass this flag, |
2255 | * which in turned allowed kalloc_large() memory to be shared |
2256 | * with user directly. |
2257 | * |
2258 | * We're bound by this unfortunate ABI. |
2259 | */ |
2260 | if ((flags & Z_MAY_COPYINMAP) == 0) { |
2261 | #ifndef __x86_64__ |
2262 | kma_flags |= KMA_KOBJECT; |
2263 | #endif |
2264 | } else { |
2265 | assert(kheap == KHEAP_DATA_BUFFERS); |
2266 | kma_flags &= ~KMA_TAG; |
2267 | } |
2268 | if (flags & Z_NOPAGEWAIT) { |
2269 | kma_flags |= KMA_NOPAGEWAIT; |
2270 | } |
2271 | if (flags & Z_ZERO) { |
2272 | kma_flags |= KMA_ZERO; |
2273 | } |
2274 | if (kheap == KHEAP_DATA_BUFFERS) { |
2275 | kma_flags |= KMA_DATA; |
2276 | } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) { |
2277 | kma_flags |= KMA_SPRAYQTN; |
2278 | } |
2279 | |
2280 | |
2281 | tag = zalloc_flags_get_tag(flags); |
2282 | if (flags & Z_VM_TAG_BT_BIT) { |
2283 | tag = vm_tag_bt() ?: tag; |
2284 | } |
2285 | if (tag == VM_KERN_MEMORY_NONE) { |
2286 | tag = kheap->kh_tag; |
2287 | } |
2288 | |
2289 | size = round_page(x: req_size); |
2290 | if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) { |
2291 | req_size = round_page(x: size); |
2292 | } |
2293 | |
2294 | addr = kmem_alloc_guard(map: kernel_map, size: req_size, mask: 0, |
2295 | flags: kma_flags, guard: kalloc_guard(tag, type_hash: kt_hash, owner)).kmr_address; |
2296 | |
2297 | if (addr != 0) { |
2298 | counter_inc(&kalloc_large_count); |
2299 | counter_add(&kalloc_large_total, amount: size); |
2300 | KALLOC_ZINFO_SALLOC(bytes: size); |
2301 | if (flags & Z_KALLOC_ARRAY) { |
2302 | addr = __kalloc_array_encode_vm(addr, size: req_size); |
2303 | } |
2304 | } else { |
2305 | addr = 0; |
2306 | } |
2307 | |
2308 | DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr); |
2309 | return (struct kalloc_result){ .addr = (void *)addr, .size = req_size }; |
2310 | } |
2311 | |
2312 | #if KASAN |
2313 | |
2314 | static inline void |
2315 | kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used) |
2316 | { |
2317 | #if KASAN_CLASSIC |
2318 | /* |
2319 | * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer |
2320 | * tagging of the memory region is performed here. |
2321 | */ |
2322 | kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false, |
2323 | __builtin_frame_address(0)); |
2324 | #endif /* KASAN_CLASSIC */ |
2325 | |
2326 | #if KASAN_TBI |
2327 | kasan_tbi_retag_unused_space((vm_offset_t)addr, size, used ? :1); |
2328 | #endif /* KASAN_TBI */ |
2329 | } |
2330 | #endif /* KASAN */ |
2331 | |
2332 | static inline struct kalloc_result |
2333 | kalloc_zone( |
2334 | zone_t z, |
2335 | zone_stats_t zstats, |
2336 | zalloc_flags_t flags, |
2337 | vm_size_t req_size) |
2338 | { |
2339 | struct kalloc_result kr; |
2340 | vm_size_t esize; |
2341 | |
2342 | kr = zalloc_ext(zone: z, zstats: zstats ?: z->z_stats, flags: flags | Z_SKIP_KASAN); |
2343 | esize = kr.size; |
2344 | |
2345 | if (__probable(kr.addr)) { |
2346 | if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) { |
2347 | req_size = esize; |
2348 | } else { |
2349 | kr.size = req_size; |
2350 | } |
2351 | #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST) |
2352 | kr.addr = zone_element_pgz_oob_adjust(addr: kr.addr, req_size, elem_size: esize); |
2353 | #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */ |
2354 | |
2355 | #if KASAN |
2356 | kalloc_mark_unused_space(kr.addr, esize, kr.size); |
2357 | #endif /* KASAN */ |
2358 | |
2359 | if (flags & Z_KALLOC_ARRAY) { |
2360 | kr.addr = __kalloc_array_encode_zone(z, ptr: kr.addr, size: kr.size); |
2361 | } |
2362 | } |
2363 | |
2364 | DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr); |
2365 | return kr; |
2366 | } |
2367 | |
2368 | static zone_id_t |
2369 | kalloc_use_shared_heap( |
2370 | kalloc_heap_t kheap, |
2371 | zone_stats_t zstats, |
2372 | zone_id_t zstart, |
2373 | zalloc_flags_t *flags) |
2374 | { |
2375 | if (kheap->kh_heap_id != KHEAP_ID_DATA_BUFFERS) { |
2376 | zone_stats_t zstats_cpu = zpercpu_get(zstats); |
2377 | |
2378 | if (os_atomic_load(&zstats_cpu->zs_alloc_not_shared, relaxed) == 0) { |
2379 | *flags |= Z_SET_NOTSHARED; |
2380 | return KHEAP_SHARED->kh_zstart; |
2381 | } |
2382 | } |
2383 | |
2384 | return zstart; |
2385 | } |
2386 | |
2387 | #undef kalloc_ext |
2388 | |
2389 | struct kalloc_result |
2390 | kalloc_ext( |
2391 | void *kheap_or_kt_view, |
2392 | vm_size_t size, |
2393 | zalloc_flags_t flags, |
2394 | void *owner) |
2395 | { |
2396 | kalloc_type_var_view_t kt_view; |
2397 | kalloc_heap_t kheap; |
2398 | zone_stats_t zstats = NULL; |
2399 | zone_t z; |
2400 | uint16_t kt_hash; |
2401 | zone_id_t zstart; |
2402 | |
2403 | if (kt_is_var_view(kheap_or_kt_view)) { |
2404 | kt_view = kt_demangle_var_view(ptr: kheap_or_kt_view); |
2405 | kheap = kalloc_type_get_heap(kt_view, false); |
2406 | /* |
2407 | * Use stats from view if present, else use stats from kheap. |
2408 | * KHEAP_KT_VAR accumulates stats for all allocations going to |
2409 | * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS |
2410 | * use stats from the respective zones. |
2411 | */ |
2412 | zstats = kt_view->kt_stats; |
2413 | kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags); |
2414 | zstart = kt_view->kt_heap_start ?: kheap->kh_zstart; |
2415 | } else { |
2416 | kt_view = NULL; |
2417 | kheap = kheap_or_kt_view; |
2418 | kt_hash = kheap->kh_type_hash; |
2419 | zstart = kheap->kh_zstart; |
2420 | } |
2421 | |
2422 | if (!zstats) { |
2423 | zstats = kheap->kh_stats; |
2424 | } |
2425 | |
2426 | zstart = kalloc_use_shared_heap(kheap, zstats, zstart, flags: &flags); |
2427 | z = kalloc_zone_for_size_with_flags(zid: zstart, size, flags); |
2428 | if (z) { |
2429 | return kalloc_zone(z, zstats, flags, req_size: size); |
2430 | } else { |
2431 | return kalloc_large(kheap, req_size: size, flags, kt_hash, owner); |
2432 | } |
2433 | } |
2434 | |
2435 | #if XNU_PLATFORM_MacOSX |
2436 | void * |
2437 | kalloc_external(vm_size_t size); |
2438 | void * |
2439 | kalloc_external(vm_size_t size) |
2440 | { |
2441 | zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC); |
2442 | return kheap_alloc(KHEAP_DEFAULT, size, flags); |
2443 | } |
2444 | #endif /* XNU_PLATFORM_MacOSX */ |
2445 | |
2446 | void * |
2447 | kalloc_data_external(vm_size_t size, zalloc_flags_t flags); |
2448 | void * |
2449 | kalloc_data_external(vm_size_t size, zalloc_flags_t flags) |
2450 | { |
2451 | flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA); |
2452 | return kheap_alloc(KHEAP_DATA_BUFFERS, size, flags); |
2453 | } |
2454 | |
2455 | __abortlike |
2456 | static void |
2457 | kalloc_data_require_panic(void *addr, vm_size_t size) |
2458 | { |
2459 | zone_id_t zid = zone_id_for_element(addr, esize: size); |
2460 | |
2461 | if (zid != ZONE_ID_INVALID) { |
2462 | zone_t z = &zone_array[zid]; |
2463 | zone_security_flags_t zsflags = zone_security_array[zid]; |
2464 | |
2465 | if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) { |
2466 | panic("kalloc_data_require failed: address %p in [%s%s]" , |
2467 | addr, zone_heap_name(z), zone_name(z)); |
2468 | } |
2469 | |
2470 | panic("kalloc_data_require failed: address %p in [%s%s], " |
2471 | "size too large %zd > %zd" , addr, |
2472 | zone_heap_name(z), zone_name(z), |
2473 | (size_t)size, (size_t)zone_elem_inner_size(z)); |
2474 | } else { |
2475 | panic("kalloc_data_require failed: address %p not in zone native map" , |
2476 | addr); |
2477 | } |
2478 | } |
2479 | |
2480 | __abortlike |
2481 | static void |
2482 | kalloc_non_data_require_panic(void *addr, vm_size_t size) |
2483 | { |
2484 | zone_id_t zid = zone_id_for_element(addr, esize: size); |
2485 | |
2486 | if (zid != ZONE_ID_INVALID) { |
2487 | zone_t z = &zone_array[zid]; |
2488 | zone_security_flags_t zsflags = zone_security_array[zid]; |
2489 | |
2490 | switch (zsflags.z_kheap_id) { |
2491 | case KHEAP_ID_NONE: |
2492 | case KHEAP_ID_DATA_BUFFERS: |
2493 | case KHEAP_ID_KT_VAR: |
2494 | panic("kalloc_non_data_require failed: address %p in [%s%s]" , |
2495 | addr, zone_heap_name(z), zone_name(z)); |
2496 | default: |
2497 | break; |
2498 | } |
2499 | |
2500 | panic("kalloc_non_data_require failed: address %p in [%s%s], " |
2501 | "size too large %zd > %zd" , addr, |
2502 | zone_heap_name(z), zone_name(z), |
2503 | (size_t)size, (size_t)zone_elem_inner_size(z)); |
2504 | } else { |
2505 | panic("kalloc_non_data_require failed: address %p not in zone native map" , |
2506 | addr); |
2507 | } |
2508 | } |
2509 | |
2510 | void |
2511 | kalloc_data_require(void *addr, vm_size_t size) |
2512 | { |
2513 | zone_id_t zid = zone_id_for_element(addr, esize: size); |
2514 | |
2515 | if (zid != ZONE_ID_INVALID) { |
2516 | zone_t z = &zone_array[zid]; |
2517 | zone_security_flags_t zsflags = zone_security_array[zid]; |
2518 | if (zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS && |
2519 | size <= zone_elem_inner_size(zone: z)) { |
2520 | return; |
2521 | } |
2522 | } else if (kmem_range_id_contains(range_id: KMEM_RANGE_ID_DATA, |
2523 | addr: (vm_address_t)pgz_decode(addr, size), size)) { |
2524 | return; |
2525 | } |
2526 | |
2527 | kalloc_data_require_panic(addr, size); |
2528 | } |
2529 | |
2530 | void |
2531 | kalloc_non_data_require(void *addr, vm_size_t size) |
2532 | { |
2533 | zone_id_t zid = zone_id_for_element(addr, esize: size); |
2534 | |
2535 | if (zid != ZONE_ID_INVALID) { |
2536 | zone_t z = &zone_array[zid]; |
2537 | zone_security_flags_t zsflags = zone_security_array[zid]; |
2538 | switch (zsflags.z_kheap_id) { |
2539 | case KHEAP_ID_NONE: |
2540 | if (!zsflags.z_kalloc_type) { |
2541 | break; |
2542 | } |
2543 | OS_FALLTHROUGH; |
2544 | case KHEAP_ID_KT_VAR: |
2545 | if (size < zone_elem_inner_size(zone: z)) { |
2546 | return; |
2547 | } |
2548 | break; |
2549 | default: |
2550 | break; |
2551 | } |
2552 | } else if (!kmem_range_id_contains(range_id: KMEM_RANGE_ID_DATA, |
2553 | addr: (vm_address_t)pgz_decode(addr, size), size)) { |
2554 | return; |
2555 | } |
2556 | |
2557 | kalloc_non_data_require_panic(addr, size); |
2558 | } |
2559 | |
2560 | void * |
2561 | kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags) |
2562 | { |
2563 | /* |
2564 | * Callsites from a kext that aren't in the BootKC on macOS or |
2565 | * any callsites on armv7 are not processed during startup, |
2566 | * default to using kheap_alloc |
2567 | * |
2568 | * Additionally when size is greater KHEAP_MAX_SIZE zone is left |
2569 | * NULL as we need to use the vm for the allocation |
2570 | * |
2571 | */ |
2572 | if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) { |
2573 | vm_size_t size = kalloc_type_get_size(kt_size: kt_view->kt_size); |
2574 | flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC); |
2575 | return kalloc_ext(kheap_or_kt_view: KHEAP_DEFAULT, size, flags, NULL).addr; |
2576 | } |
2577 | |
2578 | flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC); |
2579 | return kalloc_type_impl(kt_view, flags); |
2580 | } |
2581 | |
2582 | void * |
2583 | kalloc_type_var_impl_external( |
2584 | kalloc_type_var_view_t kt_view, |
2585 | vm_size_t size, |
2586 | zalloc_flags_t flags, |
2587 | void *owner); |
2588 | void * |
2589 | kalloc_type_var_impl_external( |
2590 | kalloc_type_var_view_t kt_view, |
2591 | vm_size_t size, |
2592 | zalloc_flags_t flags, |
2593 | void *owner) |
2594 | { |
2595 | flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC); |
2596 | return kalloc_type_var_impl(kt_view, size, flags, owner); |
2597 | } |
2598 | |
2599 | #pragma mark kfree |
2600 | |
2601 | __abortlike |
2602 | static void |
2603 | kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z) |
2604 | { |
2605 | zone_security_flags_t zsflags = zone_security_config(z); |
2606 | const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id]; |
2607 | |
2608 | if (zsflags.z_kalloc_type) { |
2609 | panic_include_kalloc_types = true; |
2610 | kalloc_type_src_zone = z; |
2611 | panic("kfree: addr %p found in kalloc type zone '%s'" |
2612 | "but being freed to %s heap" , data, z->z_name, kheap_name); |
2613 | } |
2614 | |
2615 | if (zsflags.z_kheap_id == KHEAP_ID_NONE) { |
2616 | panic("kfree: addr %p, size %zd found in regular zone '%s%s'" , |
2617 | data, size, zone_heap_name(z), z->z_name); |
2618 | } else { |
2619 | panic("kfree: addr %p, size %zd found in heap %s* instead of %s*" , |
2620 | data, size, zone_heap_name(z), kheap_name); |
2621 | } |
2622 | } |
2623 | |
2624 | __abortlike |
2625 | static void |
2626 | kfree_size_confusion_panic(zone_t z, void *data, |
2627 | size_t oob_offs, size_t size, size_t zsize) |
2628 | { |
2629 | if (z) { |
2630 | panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' " |
2631 | "with elem_size %zd" , |
2632 | data, size, oob_offs, zone_heap_name(z), z->z_name, zsize); |
2633 | } else { |
2634 | panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone" , |
2635 | data, size, oob_offs); |
2636 | } |
2637 | } |
2638 | |
2639 | __abortlike |
2640 | static void |
2641 | kfree_size_invalid_panic(void *data, size_t size) |
2642 | { |
2643 | panic("kfree: addr %p trying to free with nonsensical size %zd" , |
2644 | data, size); |
2645 | } |
2646 | |
2647 | __abortlike |
2648 | static void |
2649 | kfree_size_require_panic(void *data, size_t size, size_t min_size, |
2650 | size_t max_size) |
2651 | { |
2652 | panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]" , |
2653 | data, size, min_size, max_size); |
2654 | } |
2655 | |
2656 | static void |
2657 | kfree_size_require( |
2658 | kalloc_heap_t kheap, |
2659 | void *addr, |
2660 | vm_size_t min_size, |
2661 | vm_size_t max_size) |
2662 | { |
2663 | assert3u(min_size, <=, max_size); |
2664 | zone_t max_zone = kalloc_zone_for_size(zid: kheap->kh_zstart, size: max_size); |
2665 | vm_size_t max_zone_size = zone_elem_inner_size(zone: max_zone); |
2666 | vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL); |
2667 | if (elem_size > max_zone_size || elem_size < min_size) { |
2668 | kfree_size_require_panic(data: addr, size: elem_size, min_size, max_size: max_zone_size); |
2669 | } |
2670 | } |
2671 | |
2672 | static void |
2673 | kfree_large( |
2674 | vm_offset_t addr, |
2675 | vm_size_t size, |
2676 | kmf_flags_t flags, |
2677 | void *owner) |
2678 | { |
2679 | size = kmem_free_guard(map: kernel_map, addr, size, |
2680 | flags: flags | KMF_TAG | KMF_KASAN_GUARD, |
2681 | guard: kalloc_guard(VM_KERN_MEMORY_NONE, type_hash: 0, owner)); |
2682 | |
2683 | counter_dec(&kalloc_large_count); |
2684 | counter_add(&kalloc_large_total, amount: -(uint64_t)size); |
2685 | KALLOC_ZINFO_SFREE(bytes: size); |
2686 | DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr); |
2687 | } |
2688 | |
2689 | static void |
2690 | kfree_zone( |
2691 | void *kheap_or_kt_view __unsafe_indexable, |
2692 | void *data, |
2693 | vm_size_t size, |
2694 | zone_t z, |
2695 | vm_size_t zsize) |
2696 | { |
2697 | zone_security_flags_t zsflags = zone_security_config(z); |
2698 | kalloc_type_var_view_t kt_view; |
2699 | kalloc_heap_t kheap; |
2700 | zone_stats_t zstats = NULL; |
2701 | |
2702 | if (kt_is_var_view(kheap_or_kt_view)) { |
2703 | kt_view = kt_demangle_var_view(ptr: kheap_or_kt_view); |
2704 | kheap = kalloc_type_get_heap(kt_view, true); |
2705 | /* |
2706 | * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT |
2707 | * we will end up having incorrect stats. Cross frees may happen on |
2708 | * macOS due to allocation from an unprocessed view and free from |
2709 | * a processed view or vice versa. |
2710 | */ |
2711 | zstats = kt_view->kt_stats; |
2712 | } else { |
2713 | kt_view = NULL; |
2714 | kheap = kheap_or_kt_view; |
2715 | } |
2716 | |
2717 | if (!zstats) { |
2718 | zstats = kheap->kh_stats; |
2719 | } |
2720 | |
2721 | zsflags = zone_security_config(z); |
2722 | if (kheap == KHEAP_DATA_BUFFERS) { |
2723 | if (kheap->kh_heap_id != zsflags.z_kheap_id) { |
2724 | kfree_heap_confusion_panic(kheap, data, size, z); |
2725 | } |
2726 | } else { |
2727 | if ((kheap->kh_heap_id != zsflags.z_kheap_id) && |
2728 | (zsflags.z_kheap_id != KHEAP_ID_SHARED)) { |
2729 | kfree_heap_confusion_panic(kheap, data, size, z); |
2730 | } |
2731 | } |
2732 | |
2733 | DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data); |
2734 | |
2735 | /* needs to be __nosan because the user size might be partial */ |
2736 | __nosan_bzero(dst: data, sz: zsize); |
2737 | zfree_ext(zone: z, zstats: zstats ?: z->z_stats, addr: data, ZFREE_PACK_SIZE(zsize, size)); |
2738 | } |
2739 | |
2740 | void |
2741 | kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size) |
2742 | { |
2743 | vm_size_t bucket_size; |
2744 | zone_t z; |
2745 | |
2746 | if (data == NULL) { |
2747 | return; |
2748 | } |
2749 | |
2750 | if (size > KFREE_ABSURD_SIZE) { |
2751 | kfree_size_invalid_panic(data, size); |
2752 | } |
2753 | |
2754 | if (size <= KHEAP_MAX_SIZE) { |
2755 | vm_size_t oob_offs; |
2756 | |
2757 | bucket_size = zone_element_size(addr: data, z: &z, true, oob_offs: &oob_offs); |
2758 | if (size + oob_offs > bucket_size || bucket_size == 0) { |
2759 | kfree_size_confusion_panic(z, data, |
2760 | oob_offs, size, zsize: bucket_size); |
2761 | } |
2762 | |
2763 | data = (char *)data - oob_offs; |
2764 | kfree_zone(kheap_or_kt_view, data, size, z, zsize: bucket_size); |
2765 | } else { |
2766 | kfree_large(addr: (vm_offset_t)data, size, flags: KMF_NONE, NULL); |
2767 | } |
2768 | } |
2769 | |
2770 | void |
2771 | kfree_addr_ext(kalloc_heap_t kheap, void *data) |
2772 | { |
2773 | vm_offset_t oob_offs; |
2774 | vm_size_t size, usize = 0; |
2775 | zone_t z; |
2776 | |
2777 | if (data == NULL) { |
2778 | return; |
2779 | } |
2780 | |
2781 | size = zone_element_size(addr: data, z: &z, true, oob_offs: &oob_offs); |
2782 | if (size) { |
2783 | #if KASAN_CLASSIC |
2784 | usize = kasan_user_size((vm_offset_t)data); |
2785 | #endif |
2786 | data = (char *)data - oob_offs; |
2787 | kfree_zone(kheap_or_kt_view: kheap, data, size: usize, z, zsize: size); |
2788 | } else { |
2789 | kfree_large(addr: (vm_offset_t)data, size: 0, flags: KMF_GUESS_SIZE, NULL); |
2790 | } |
2791 | } |
2792 | |
2793 | #if XNU_PLATFORM_MacOSX |
2794 | void |
2795 | kfree_external(void *addr, vm_size_t size); |
2796 | void |
2797 | kfree_external(void *addr, vm_size_t size) |
2798 | { |
2799 | kalloc_heap_t kheap = KHEAP_DEFAULT; |
2800 | |
2801 | kfree_ext(kheap_or_kt_view: kheap, data: addr, size); |
2802 | } |
2803 | #endif /* XNU_PLATFORM_MacOSX */ |
2804 | |
2805 | void |
2806 | (kheap_free_bounded)(kalloc_heap_t kheap, void *addr, |
2807 | vm_size_t min_sz, vm_size_t max_sz) |
2808 | { |
2809 | if (__improbable(addr == NULL)) { |
2810 | return; |
2811 | } |
2812 | kfree_size_require(kheap, addr, min_size: min_sz, max_size: max_sz); |
2813 | kfree_addr_ext(kheap, data: addr); |
2814 | } |
2815 | |
2816 | void * |
2817 | kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags) |
2818 | { |
2819 | zone_stats_t zs = kt_view->kt_zv.zv_stats; |
2820 | zone_t z = kt_view->kt_zv.zv_zone; |
2821 | zone_stats_t zs_cpu = zpercpu_get(zs); |
2822 | |
2823 | if ((flags & Z_SET_NOTSHARED) || |
2824 | os_atomic_load(&zs_cpu->zs_alloc_not_shared, relaxed)) { |
2825 | return zalloc_ext(zone: z, zstats: zs, flags).addr; |
2826 | } |
2827 | |
2828 | assert(zone_security_config(z).z_kheap_id != KHEAP_ID_DATA_BUFFERS); |
2829 | return zalloc_ext(zone: kt_view->kt_zshared, zstats: zs, flags: flags | Z_SET_NOTSHARED).addr; |
2830 | } |
2831 | |
2832 | void |
2833 | kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr) |
2834 | { |
2835 | /* |
2836 | * If callsite is from a kext that isn't in the BootKC, it wasn't |
2837 | * processed during startup so default to using kheap_alloc |
2838 | * |
2839 | * Additionally when size is greater KHEAP_MAX_SIZE zone is left |
2840 | * NULL as we need to use the vm for the allocation/free |
2841 | */ |
2842 | if (kt_view->kt_zv.zv_zone == ZONE_NULL) { |
2843 | return kheap_free(KHEAP_DEFAULT, ptr, |
2844 | kalloc_type_get_size(kt_view->kt_size)); |
2845 | } |
2846 | return kfree_type_impl(kt_view, ptr); |
2847 | } |
2848 | |
2849 | void |
2850 | kfree_type_var_impl_external( |
2851 | kalloc_type_var_view_t kt_view, |
2852 | void *ptr, |
2853 | vm_size_t size); |
2854 | void |
2855 | kfree_type_var_impl_external( |
2856 | kalloc_type_var_view_t kt_view, |
2857 | void *ptr, |
2858 | vm_size_t size) |
2859 | { |
2860 | return kfree_type_var_impl(kt_view, ptr, size); |
2861 | } |
2862 | |
2863 | void |
2864 | kfree_data_external(void *ptr, vm_size_t size); |
2865 | void |
2866 | kfree_data_external(void *ptr, vm_size_t size) |
2867 | { |
2868 | return kheap_free(KHEAP_DATA_BUFFERS, ptr, size); |
2869 | } |
2870 | |
2871 | void |
2872 | kfree_data_addr_external(void *ptr); |
2873 | void |
2874 | kfree_data_addr_external(void *ptr) |
2875 | { |
2876 | return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr); |
2877 | } |
2878 | |
2879 | #pragma mark krealloc |
2880 | |
2881 | __abortlike |
2882 | static void |
2883 | krealloc_size_invalid_panic(void *data, size_t size) |
2884 | { |
2885 | panic("krealloc: addr %p trying to free with nonsensical size %zd" , |
2886 | data, size); |
2887 | } |
2888 | |
2889 | __attribute__((noinline)) |
2890 | static struct kalloc_result |
2891 | krealloc_large( |
2892 | kalloc_heap_t kheap, |
2893 | vm_offset_t addr, |
2894 | vm_size_t old_size, |
2895 | vm_size_t new_size, |
2896 | zalloc_flags_t flags, |
2897 | uint16_t kt_hash, |
2898 | void *owner __unused) |
2899 | { |
2900 | kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_TAG | KMR_KASAN_GUARD; |
2901 | vm_size_t new_req_size = new_size; |
2902 | vm_size_t old_req_size = old_size; |
2903 | uint64_t delta; |
2904 | kmem_return_t kmr; |
2905 | vm_tag_t tag; |
2906 | |
2907 | if (flags & Z_NOFAIL) { |
2908 | panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)" , |
2909 | (size_t)new_req_size); |
2910 | } |
2911 | |
2912 | /* |
2913 | * kmem_alloc could block so we return if noblock |
2914 | * |
2915 | * also, reject sizes larger than our address space is quickly, |
2916 | * as kt_size or IOMallocArraySize() expect this. |
2917 | */ |
2918 | if ((flags & Z_NOWAIT) || |
2919 | (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) { |
2920 | return (struct kalloc_result){ }; |
2921 | } |
2922 | |
2923 | /* |
2924 | * (73465472) on Intel we didn't use to pass this flag, |
2925 | * which in turned allowed kalloc_large() memory to be shared |
2926 | * with user directly. |
2927 | * |
2928 | * We're bound by this unfortunate ABI. |
2929 | */ |
2930 | if ((flags & Z_MAY_COPYINMAP) == 0) { |
2931 | #ifndef __x86_64__ |
2932 | kmr_flags |= KMR_KOBJECT; |
2933 | #endif |
2934 | } else { |
2935 | assert(kheap == KHEAP_DATA_BUFFERS); |
2936 | kmr_flags &= ~KMR_TAG; |
2937 | } |
2938 | if (flags & Z_NOPAGEWAIT) { |
2939 | kmr_flags |= KMR_NOPAGEWAIT; |
2940 | } |
2941 | if (flags & Z_ZERO) { |
2942 | kmr_flags |= KMR_ZERO; |
2943 | } |
2944 | if (kheap == KHEAP_DATA_BUFFERS) { |
2945 | kmr_flags |= KMR_DATA; |
2946 | } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) { |
2947 | kmr_flags |= KMR_SPRAYQTN; |
2948 | } |
2949 | if (flags & Z_REALLOCF) { |
2950 | kmr_flags |= KMR_REALLOCF; |
2951 | } |
2952 | |
2953 | |
2954 | tag = zalloc_flags_get_tag(flags); |
2955 | if (flags & Z_VM_TAG_BT_BIT) { |
2956 | tag = vm_tag_bt() ?: tag; |
2957 | } |
2958 | if (tag == VM_KERN_MEMORY_NONE) { |
2959 | tag = kheap->kh_tag; |
2960 | } |
2961 | |
2962 | kmr = kmem_realloc_guard(map: kernel_map, oldaddr: addr, oldsize: old_req_size, newsize: new_req_size, |
2963 | flags: kmr_flags, guard: kalloc_guard(tag, type_hash: kt_hash, owner)); |
2964 | |
2965 | new_size = round_page(x: new_req_size); |
2966 | old_size = round_page(x: old_req_size); |
2967 | |
2968 | if (kmr.kmr_address != 0) { |
2969 | delta = (uint64_t)(new_size - old_size); |
2970 | } else if (flags & Z_REALLOCF) { |
2971 | counter_dec(&kalloc_large_count); |
2972 | delta = (uint64_t)(-old_size); |
2973 | } else { |
2974 | delta = 0; |
2975 | } |
2976 | |
2977 | counter_add(&kalloc_large_total, amount: delta); |
2978 | KALLOC_ZINFO_SALLOC(bytes: delta); |
2979 | |
2980 | if (addr != 0 || (flags & Z_REALLOCF)) { |
2981 | DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size, |
2982 | void*, addr); |
2983 | } |
2984 | if (__improbable(kmr.kmr_address == 0)) { |
2985 | return (struct kalloc_result){ }; |
2986 | } |
2987 | |
2988 | DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size, |
2989 | void*, kmr.kmr_address); |
2990 | |
2991 | if (flags & Z_KALLOC_ARRAY) { |
2992 | kmr.kmr_address = __kalloc_array_encode_vm(addr: kmr.kmr_address, |
2993 | size: new_req_size); |
2994 | } |
2995 | return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size }; |
2996 | } |
2997 | |
2998 | #undef krealloc_ext |
2999 | |
3000 | struct kalloc_result |
3001 | krealloc_ext( |
3002 | void *kheap_or_kt_view __unsafe_indexable, |
3003 | void *addr, |
3004 | vm_size_t old_size, |
3005 | vm_size_t new_size, |
3006 | zalloc_flags_t flags, |
3007 | void *owner) |
3008 | { |
3009 | vm_size_t old_bucket_size, new_bucket_size, min_size; |
3010 | kalloc_type_var_view_t kt_view; |
3011 | kalloc_heap_t kheap; |
3012 | zone_stats_t zstats = NULL; |
3013 | struct kalloc_result kr; |
3014 | vm_offset_t oob_offs = 0; |
3015 | zone_t old_z, new_z; |
3016 | uint16_t kt_hash = 0; |
3017 | zone_id_t zstart; |
3018 | |
3019 | if (old_size > KFREE_ABSURD_SIZE) { |
3020 | krealloc_size_invalid_panic(data: addr, size: old_size); |
3021 | } |
3022 | |
3023 | if (addr == NULL && new_size == 0) { |
3024 | return (struct kalloc_result){ }; |
3025 | } |
3026 | |
3027 | if (kt_is_var_view(kheap_or_kt_view)) { |
3028 | kt_view = kt_demangle_var_view(ptr: kheap_or_kt_view); |
3029 | kheap = kalloc_type_get_heap(kt_view, false); |
3030 | /* |
3031 | * Similar to kalloc_ext: Use stats from view if present, |
3032 | * else use stats from kheap. |
3033 | * |
3034 | * krealloc_type isn't exposed to kexts, so we don't need to |
3035 | * handle cross frees and can rely on stats from view or kheap. |
3036 | */ |
3037 | zstats = kt_view->kt_stats; |
3038 | kt_hash = KT_GET_HASH(kt_view->kt_flags); |
3039 | zstart = kt_view->kt_heap_start ?: kheap->kh_zstart; |
3040 | } else { |
3041 | kt_view = NULL; |
3042 | kheap = kheap_or_kt_view; |
3043 | kt_hash = kheap->kh_type_hash; |
3044 | zstart = kheap->kh_zstart; |
3045 | } |
3046 | |
3047 | if (!zstats) { |
3048 | zstats = kheap->kh_stats; |
3049 | } |
3050 | /* |
3051 | * Find out the size of the bucket in which the new sized allocation |
3052 | * would land. If it matches the bucket of the original allocation, |
3053 | * simply return the same address. |
3054 | */ |
3055 | if (new_size == 0) { |
3056 | new_z = ZONE_NULL; |
3057 | new_bucket_size = new_size = 0; |
3058 | } else { |
3059 | zstart = kalloc_use_shared_heap(kheap, zstats, zstart, flags: &flags); |
3060 | new_z = kalloc_zone_for_size_with_flags(zid: zstart, size: new_size, flags); |
3061 | new_bucket_size = new_z ? zone_elem_inner_size(zone: new_z) : round_page(x: new_size); |
3062 | } |
3063 | #if !KASAN_CLASSIC |
3064 | if (flags & Z_FULLSIZE) { |
3065 | new_size = new_bucket_size; |
3066 | } |
3067 | #endif /* !KASAN_CLASSIC */ |
3068 | |
3069 | if (addr == NULL) { |
3070 | old_z = ZONE_NULL; |
3071 | old_size = old_bucket_size = 0; |
3072 | } else if (kheap_size_from_zone(addr, size: old_size, flags)) { |
3073 | old_bucket_size = zone_element_size(addr, z: &old_z, true, oob_offs: &oob_offs); |
3074 | if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) { |
3075 | kfree_size_confusion_panic(z: old_z, data: addr, |
3076 | oob_offs, size: old_size, zsize: old_bucket_size); |
3077 | } |
3078 | __builtin_assume(old_z != ZONE_NULL); |
3079 | } else { |
3080 | old_z = ZONE_NULL; |
3081 | old_bucket_size = round_page(x: old_size); |
3082 | } |
3083 | min_size = MIN(old_size, new_size); |
3084 | |
3085 | if (old_bucket_size == new_bucket_size && old_z) { |
3086 | kr.addr = (char *)addr - oob_offs; |
3087 | kr.size = new_size; |
3088 | #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST) |
3089 | kr.addr = zone_element_pgz_oob_adjust(addr: kr.addr, |
3090 | req_size: new_size, elem_size: new_bucket_size); |
3091 | if (kr.addr != addr) { |
3092 | memmove(dst: kr.addr, src: addr, n: min_size); |
3093 | bzero(s: (char *)kr.addr + min_size, |
3094 | n: kr.size - min_size); |
3095 | } |
3096 | #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */ |
3097 | #if KASAN |
3098 | /* |
3099 | * On KASAN kernels, treat a reallocation effectively as a new |
3100 | * allocation and add a sanity check around the existing one |
3101 | * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account |
3102 | * to much extra work, on KASAN_TBI, assign a new tag both to the |
3103 | * buffer and to the potential free space. |
3104 | */ |
3105 | #if KASAN_CLASSIC |
3106 | kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size); |
3107 | kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size, |
3108 | KASAN_GUARD_SIZE, false, __builtin_frame_address(0)); |
3109 | #endif /* KASAN_CLASSIC */ |
3110 | #if KASAN_TBI |
3111 | /* |
3112 | * Validate the current buffer, then generate a new tag, |
3113 | * even if the address is stable, it's a "new" allocation. |
3114 | */ |
3115 | __asan_loadN((vm_offset_t)addr, old_size); |
3116 | kr.addr = (void *)vm_memtag_assign_tag((vm_offset_t)kr.addr, kr.size); |
3117 | vm_memtag_set_tag((vm_offset_t)kr.addr, kr.size); |
3118 | kasan_tbi_retag_unused_space((vm_offset_t)kr.addr, new_bucket_size, kr.size); |
3119 | #endif /* KASAN_TBI */ |
3120 | #endif /* KASAN */ |
3121 | goto out_success; |
3122 | } |
3123 | |
3124 | #if !KASAN |
3125 | /* |
3126 | * Fallthrough to krealloc_large() for KASAN, |
3127 | * because we can't use kasan_check_alloc() |
3128 | * on kalloc_large() memory. |
3129 | * |
3130 | * kmem_realloc_guard() will perform all the validations, |
3131 | * and re-tagging. |
3132 | */ |
3133 | if (old_bucket_size == new_bucket_size) { |
3134 | kr.addr = (char *)addr - oob_offs; |
3135 | kr.size = new_size; |
3136 | goto out_success; |
3137 | } |
3138 | #endif |
3139 | |
3140 | if (addr && !old_z && new_size && !new_z) { |
3141 | return krealloc_large(kheap, addr: (vm_offset_t)addr, |
3142 | old_size, new_size, flags, kt_hash, owner); |
3143 | } |
3144 | |
3145 | if (!new_size) { |
3146 | kr.addr = NULL; |
3147 | kr.size = 0; |
3148 | } else if (new_z) { |
3149 | kr = kalloc_zone(z: new_z, zstats, |
3150 | flags: flags & ~Z_KALLOC_ARRAY, req_size: new_size); |
3151 | } else if (old_z || addr == NULL) { |
3152 | kr = kalloc_large(kheap, req_size: new_size, |
3153 | flags: flags & ~Z_KALLOC_ARRAY, kt_hash, owner); |
3154 | } |
3155 | |
3156 | if (addr && kr.addr) { |
3157 | __nosan_memcpy(dst: kr.addr, src: addr, sz: min_size); |
3158 | } |
3159 | |
3160 | if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) { |
3161 | if (old_z) { |
3162 | kfree_zone(kheap_or_kt_view, |
3163 | data: (char *)addr - oob_offs, size: old_size, |
3164 | z: old_z, zsize: old_bucket_size); |
3165 | } else { |
3166 | kfree_large(addr: (vm_offset_t)addr, size: old_size, flags: KMF_NONE, owner); |
3167 | } |
3168 | } |
3169 | |
3170 | if (__improbable(kr.addr == NULL)) { |
3171 | return kr; |
3172 | } |
3173 | |
3174 | out_success: |
3175 | if ((flags & Z_KALLOC_ARRAY) == 0) { |
3176 | return kr; |
3177 | } |
3178 | |
3179 | if (new_z) { |
3180 | kr.addr = __kalloc_array_encode_zone(z: new_z, |
3181 | ptr: kr.addr, size: kr.size); |
3182 | } else { |
3183 | kr.addr = (void *)__kalloc_array_encode_vm(addr: (vm_offset_t)kr.addr, |
3184 | size: kr.size); |
3185 | } |
3186 | return kr; |
3187 | } |
3188 | |
3189 | void * |
3190 | krealloc_data_external( |
3191 | void *ptr, |
3192 | vm_size_t old_size, |
3193 | vm_size_t new_size, |
3194 | zalloc_flags_t flags); |
3195 | void * |
3196 | krealloc_data_external( |
3197 | void *ptr, |
3198 | vm_size_t old_size, |
3199 | vm_size_t new_size, |
3200 | zalloc_flags_t flags) |
3201 | { |
3202 | flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA); |
3203 | return krealloc_ext(kheap_or_kt_view: KHEAP_DATA_BUFFERS, addr: ptr, old_size, new_size, flags, NULL).addr; |
3204 | } |
3205 | |
3206 | __startup_func |
3207 | static void |
3208 | kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap) |
3209 | { |
3210 | kheap->kh_zstart = parent_heap->kh_zstart; |
3211 | kheap->kh_heap_id = parent_heap->kh_heap_id; |
3212 | kheap->kh_tag = parent_heap->kh_tag; |
3213 | kheap->kh_stats = zalloc_percpu_permanent_type(struct zone_stats); |
3214 | zone_view_count += 1; |
3215 | } |
3216 | |
3217 | __startup_func |
3218 | static void |
3219 | kheap_init_data(kalloc_heap_t kheap) |
3220 | { |
3221 | kheap_init(parent_heap: KHEAP_DATA_BUFFERS, kheap); |
3222 | kheap->kh_views = KHEAP_DATA_BUFFERS->kh_views; |
3223 | KHEAP_DATA_BUFFERS->kh_views = kheap; |
3224 | } |
3225 | |
3226 | __startup_func |
3227 | static void |
3228 | kheap_init_var(kalloc_heap_t kheap) |
3229 | { |
3230 | uint16_t idx; |
3231 | struct kheap_info *parent_heap; |
3232 | |
3233 | kheap_init(parent_heap: KHEAP_KT_VAR, kheap); |
3234 | idx = kmem_get_random16(upper_limit: kt_var_heaps - kt_var_ptr_heaps - 1) + |
3235 | KT_VAR__FIRST_FLEXIBLE_HEAP; |
3236 | parent_heap = &kalloc_type_heap_array[idx]; |
3237 | kheap->kh_zstart = parent_heap->kh_zstart; |
3238 | kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust( |
3239 | hash: (uint32_t) early_random(), shift: 0); |
3240 | kheap->kh_views = parent_heap->kh_views; |
3241 | parent_heap->kh_views = kheap; |
3242 | } |
3243 | |
3244 | __startup_func |
3245 | void |
3246 | kheap_startup_init(kalloc_heap_t kheap) |
3247 | { |
3248 | switch (kheap->kh_heap_id) { |
3249 | case KHEAP_ID_DATA_BUFFERS: |
3250 | kheap_init_data(kheap); |
3251 | break; |
3252 | case KHEAP_ID_KT_VAR: |
3253 | kheap_init_var(kheap); |
3254 | break; |
3255 | default: |
3256 | panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d" , |
3257 | kheap->kh_heap_id); |
3258 | } |
3259 | } |
3260 | |
3261 | #pragma mark IOKit/libkern helpers |
3262 | |
3263 | #if XNU_PLATFORM_MacOSX |
3264 | |
3265 | void * |
3266 | kern_os_malloc_external(size_t size); |
3267 | void * |
3268 | kern_os_malloc_external(size_t size) |
3269 | { |
3270 | if (size == 0) { |
3271 | return NULL; |
3272 | } |
3273 | |
3274 | return kheap_alloc(KERN_OS_MALLOC, size, |
3275 | Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN)); |
3276 | } |
3277 | |
3278 | void |
3279 | kern_os_free_external(void *addr); |
3280 | void |
3281 | kern_os_free_external(void *addr) |
3282 | { |
3283 | kheap_free_addr(KERN_OS_MALLOC, addr); |
3284 | } |
3285 | |
3286 | void * |
3287 | kern_os_realloc_external(void *addr, size_t nsize); |
3288 | void * |
3289 | kern_os_realloc_external(void *addr, size_t nsize) |
3290 | { |
3291 | zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN); |
3292 | vm_size_t osize, oob_offs = 0; |
3293 | |
3294 | if (addr == NULL) { |
3295 | return kern_os_malloc_external(size: nsize); |
3296 | } |
3297 | |
3298 | osize = zone_element_size(addr, NULL, false, oob_offs: &oob_offs); |
3299 | if (osize == 0) { |
3300 | osize = kmem_size_guard(map: kernel_map, addr: (vm_offset_t)addr, |
3301 | guard: kalloc_guard(VM_KERN_MEMORY_LIBKERN, type_hash: 0, NULL)); |
3302 | #if KASAN_CLASSIC |
3303 | } else { |
3304 | osize = kasan_user_size((vm_offset_t)addr); |
3305 | #endif |
3306 | } |
3307 | return __kheap_realloc(kheap: KERN_OS_MALLOC, addr, old_size: osize - oob_offs, new_size: nsize, flags, NULL); |
3308 | } |
3309 | |
3310 | #endif /* XNU_PLATFORM_MacOSX */ |
3311 | |
3312 | void |
3313 | kern_os_zfree(zone_t zone, void *addr, vm_size_t size) |
3314 | { |
3315 | #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE) |
3316 | #pragma unused(size) |
3317 | zfree(zone, addr); |
3318 | #else |
3319 | if (zone_owns(zone, addr)) { |
3320 | zfree(zone, addr); |
3321 | } else { |
3322 | /* |
3323 | * Third party kexts might not know about the operator new |
3324 | * and be allocated from the default heap |
3325 | */ |
3326 | printf("kern_os_zfree: kheap_free called for object from zone %s\n" , |
3327 | zone->z_name); |
3328 | kheap_free(KHEAP_DEFAULT, addr, size); |
3329 | } |
3330 | #endif |
3331 | } |
3332 | |
3333 | bool |
3334 | IOMallocType_from_vm(kalloc_type_view_t ktv) |
3335 | { |
3336 | return kalloc_type_from_vm(kt_flags: ktv->kt_flags); |
3337 | } |
3338 | |
3339 | void |
3340 | kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize) |
3341 | { |
3342 | #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE) |
3343 | #pragma unused(esize) |
3344 | #else |
3345 | /* |
3346 | * For third party kexts that have been compiled with sdk pre macOS 11, |
3347 | * an allocation of an OSObject that is defined in xnu or first pary |
3348 | * kexts, by directly calling new will lead to using the default heap |
3349 | * as it will call OSObject_operator_new_external. If this object |
3350 | * is freed by xnu, it panics as xnu uses the typed free which |
3351 | * requires the object to have been allocated in a kalloc.type zone. |
3352 | * To workaround this issue, detect if the allocation being freed is |
3353 | * from the default heap and allow freeing to it. |
3354 | */ |
3355 | zone_id_t zid = zone_id_for_element(addr, esize); |
3356 | if (__probable(zid < MAX_ZONES)) { |
3357 | zone_security_flags_t zsflags = zone_security_array[zid]; |
3358 | if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) { |
3359 | return kheap_free(KHEAP_DEFAULT, addr, esize); |
3360 | } |
3361 | } |
3362 | #endif |
3363 | kfree_type_impl_external(kt_view: ktv, ptr: addr); |
3364 | } |
3365 | |
3366 | #pragma mark tests |
3367 | #if DEBUG || DEVELOPMENT |
3368 | |
3369 | #include <sys/random.h> |
3370 | |
3371 | /* |
3372 | * Ensure that the feature is on when the ZSECURITY_CONFIG is present. |
3373 | * |
3374 | * Note: Presence of zones with name kalloc.type* is used to |
3375 | * determine if the feature is on. |
3376 | */ |
3377 | static int |
3378 | kalloc_type_feature_on(void) |
3379 | { |
3380 | boolean_t zone_found = false; |
3381 | const char kalloc_type_str[] = "kalloc.type" ; |
3382 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
3383 | zone_t z = kalloc_type_zarray[i]; |
3384 | while (z != NULL) { |
3385 | zone_found = true; |
3386 | if (strncmp(z->z_name, kalloc_type_str, |
3387 | strlen(kalloc_type_str)) != 0) { |
3388 | return 0; |
3389 | } |
3390 | z = z->z_kt_next; |
3391 | } |
3392 | } |
3393 | |
3394 | if (!zone_found) { |
3395 | return 0; |
3396 | } |
3397 | |
3398 | return 1; |
3399 | } |
3400 | |
3401 | /* |
3402 | * Ensure that the policy uses the zone budget completely |
3403 | */ |
3404 | static int |
3405 | kalloc_type_test_policy(int64_t in) |
3406 | { |
3407 | uint16_t zone_budget = (uint16_t) in; |
3408 | uint16_t max_bucket_freq = 25; |
3409 | uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {}; |
3410 | uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {}; |
3411 | uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {}; |
3412 | uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {}; |
3413 | uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2]; |
3414 | uint16_t wasted_zone_budget = 0, total_types = 0; |
3415 | uint16_t n_zones = 0, n_zones_cal = 0; |
3416 | int ret = 0; |
3417 | |
3418 | /* |
3419 | * Need a minimum of 2 zones per size class |
3420 | */ |
3421 | if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) { |
3422 | return ret; |
3423 | } |
3424 | read_random((void *)&random[0], sizeof(random)); |
3425 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
3426 | uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1; |
3427 | uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1; |
3428 | |
3429 | freq_list[i] = r1 > r2 ? r2 : r1; |
3430 | freq_total_list[i] = r1 > r2 ? r1 : r2; |
3431 | } |
3432 | wasted_zone_budget = kalloc_type_apply_policy( |
3433 | freq_list, freq_total_list, |
3434 | zones_per_sig, zones_per_type, zone_budget); |
3435 | |
3436 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
3437 | total_types += freq_total_list[i]; |
3438 | } |
3439 | |
3440 | n_zones = kmem_get_random16(total_types); |
3441 | printf("Dividing %u zones amongst %u types\n" , n_zones, total_types); |
3442 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
3443 | uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones, |
3444 | freq_total_list[i], total_types, |
3445 | (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false); |
3446 | |
3447 | n_zones_cal += n_zones_for_type; |
3448 | |
3449 | printf("%u\t%u\n" , freq_total_list[i], n_zones_for_type); |
3450 | } |
3451 | printf("-----------------------\n%u\t%u\n" , total_types, |
3452 | n_zones_cal); |
3453 | |
3454 | if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) { |
3455 | ret = 1; |
3456 | } |
3457 | return ret; |
3458 | } |
3459 | |
3460 | /* |
3461 | * Ensure that size of adopters of kalloc_type fit in the zone |
3462 | * they have been assigned. |
3463 | */ |
3464 | static int |
3465 | kalloc_type_check_size(zone_t z) |
3466 | { |
3467 | kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views; |
3468 | |
3469 | while (kt_cur != NULL) { |
3470 | if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) { |
3471 | return 0; |
3472 | } |
3473 | kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next; |
3474 | } |
3475 | |
3476 | return 1; |
3477 | } |
3478 | |
3479 | struct test_kt_data { |
3480 | int a; |
3481 | }; |
3482 | |
3483 | static int |
3484 | kalloc_type_test_data_redirect(void) |
3485 | { |
3486 | struct kalloc_type_view ktv_data = { |
3487 | .kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data), |
3488 | .kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data), |
3489 | }; |
3490 | if (!kalloc_type_is_data(ktv_data.kt_flags)) { |
3491 | printf("%s: data redirect failed\n" , __func__); |
3492 | return 0; |
3493 | } |
3494 | return 1; |
3495 | } |
3496 | |
3497 | static int |
3498 | run_kalloc_type_test(int64_t in, int64_t *out) |
3499 | { |
3500 | *out = 0; |
3501 | for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) { |
3502 | zone_t z = kalloc_type_zarray[i]; |
3503 | while (z != NULL) { |
3504 | if (!kalloc_type_check_size(z)) { |
3505 | printf("%s: size check failed\n" , __func__); |
3506 | return 0; |
3507 | } |
3508 | z = z->z_kt_next; |
3509 | } |
3510 | } |
3511 | |
3512 | if (!kalloc_type_test_policy(in)) { |
3513 | printf("%s: policy check failed\n" , __func__); |
3514 | return 0; |
3515 | } |
3516 | |
3517 | if (!kalloc_type_feature_on()) { |
3518 | printf("%s: boot-arg is on but feature isn't\n" , __func__); |
3519 | return 0; |
3520 | } |
3521 | |
3522 | if (!kalloc_type_test_data_redirect()) { |
3523 | printf("%s: kalloc_type redirect for all data signature failed\n" , |
3524 | __func__); |
3525 | return 0; |
3526 | } |
3527 | |
3528 | printf("%s: test passed\n" , __func__); |
3529 | |
3530 | *out = 1; |
3531 | return 0; |
3532 | } |
3533 | SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test); |
3534 | |
3535 | static vm_size_t |
3536 | test_bucket_size(kalloc_heap_t kheap, vm_size_t size) |
3537 | { |
3538 | zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size); |
3539 | |
3540 | return z ? zone_elem_inner_size(z) : round_page(size); |
3541 | } |
3542 | |
3543 | static int |
3544 | run_kalloc_test(int64_t in __unused, int64_t *out) |
3545 | { |
3546 | *out = 0; |
3547 | uint64_t *data_ptr; |
3548 | void *strippedp_old, *strippedp_new; |
3549 | size_t alloc_size = 0, old_alloc_size = 0; |
3550 | struct kalloc_result kr = {}; |
3551 | |
3552 | printf("%s: test running\n" , __func__); |
3553 | |
3554 | /* |
3555 | * Test size 0: alloc, free, realloc |
3556 | */ |
3557 | data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL, |
3558 | NULL).addr; |
3559 | if (!data_ptr) { |
3560 | printf("%s: kalloc 0 returned null\n" , __func__); |
3561 | return 0; |
3562 | } |
3563 | kheap_free(KHEAP_DATA_BUFFERS, data_ptr, alloc_size); |
3564 | |
3565 | data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL, |
3566 | NULL).addr; |
3567 | alloc_size = sizeof(uint64_t) + 1; |
3568 | data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, kr.addr, old_alloc_size, |
3569 | alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr; |
3570 | if (!data_ptr) { |
3571 | printf("%s: krealloc -> old size 0 failed\n" , __func__); |
3572 | return 0; |
3573 | } |
3574 | *data_ptr = 0; |
3575 | |
3576 | /* |
3577 | * Test krealloc: same sizeclass, different size classes, 2pgs, |
3578 | * VM (with owner) |
3579 | */ |
3580 | old_alloc_size = alloc_size; |
3581 | alloc_size++; |
3582 | kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size, |
3583 | Z_WAITOK | Z_NOFAIL, NULL); |
3584 | |
3585 | strippedp_old = (void *)vm_memtag_canonicalize_address((vm_offset_t)data_ptr); |
3586 | strippedp_new = (void *)vm_memtag_canonicalize_address((vm_offset_t)kr.addr); |
3587 | |
3588 | if (!kr.addr || (strippedp_old != strippedp_new) || |
3589 | (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) != |
3590 | test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) { |
3591 | printf("%s: krealloc -> same size class failed\n" , __func__); |
3592 | return 0; |
3593 | } |
3594 | data_ptr = kr.addr; |
3595 | *data_ptr = 0; |
3596 | |
3597 | old_alloc_size = alloc_size; |
3598 | alloc_size *= 2; |
3599 | kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size, |
3600 | Z_WAITOK | Z_NOFAIL, NULL); |
3601 | |
3602 | strippedp_old = (void *)vm_memtag_canonicalize_address((vm_offset_t)data_ptr); |
3603 | strippedp_new = (void *)vm_memtag_canonicalize_address((vm_offset_t)kr.addr); |
3604 | |
3605 | if (!kr.addr || (strippedp_old == strippedp_new) || |
3606 | (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) == |
3607 | test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) { |
3608 | printf("%s: krealloc -> different size class failed\n" , __func__); |
3609 | return 0; |
3610 | } |
3611 | data_ptr = kr.addr; |
3612 | *data_ptr = 0; |
3613 | |
3614 | kheap_free(KHEAP_DATA_BUFFERS, kr.addr, alloc_size); |
3615 | |
3616 | alloc_size = 3544; |
3617 | data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, |
3618 | Z_WAITOK | Z_FULLSIZE, &data_ptr).addr; |
3619 | if (!data_ptr) { |
3620 | printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n" , |
3621 | __func__); |
3622 | return 0; |
3623 | } |
3624 | *data_ptr = 0; |
3625 | |
3626 | data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, alloc_size, |
3627 | PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr; |
3628 | if (!data_ptr) { |
3629 | printf("%s: krealloc -> 2pgs returned not null\n" , __func__); |
3630 | return 0; |
3631 | } |
3632 | *data_ptr = 0; |
3633 | |
3634 | data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, PAGE_SIZE * 2, |
3635 | KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr; |
3636 | if (!data_ptr) { |
3637 | printf("%s: krealloc -> VM1 returned not null\n" , __func__); |
3638 | return 0; |
3639 | } |
3640 | *data_ptr = 0; |
3641 | |
3642 | data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 2, |
3643 | KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr; |
3644 | *data_ptr = 0; |
3645 | if (!data_ptr) { |
3646 | printf("%s: krealloc -> VM2 returned not null\n" , __func__); |
3647 | return 0; |
3648 | } |
3649 | |
3650 | krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 4, |
3651 | 0, Z_REALLOCF | Z_WAITOK, &data_ptr); |
3652 | |
3653 | printf("%s: test passed\n" , __func__); |
3654 | *out = 1; |
3655 | return 0; |
3656 | } |
3657 | SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test); |
3658 | |
3659 | #endif |
3660 | |