1 | /* |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | |
59 | #ifndef _KERN_ZALLOC_INTERNAL_H_ |
60 | #define _KERN_ZALLOC_INTERNAL_H_ |
61 | |
62 | #include <kern/zalloc.h> |
63 | #include <kern/locks.h> |
64 | #include <kern/simple_lock.h> |
65 | |
66 | #include <os/atomic_private.h> |
67 | #include <sys/queue.h> |
68 | #include <vm/vm_map_internal.h> |
69 | |
70 | #if KASAN |
71 | #include <san/kasan.h> |
72 | #include <kern/spl.h> |
73 | #endif /* !KASAN */ |
74 | |
75 | /* |
76 | * Disable zalloc zero validation under kasan as it is |
77 | * double-duty with what kasan already does. |
78 | */ |
79 | #if KASAN |
80 | #define ZALLOC_ENABLE_ZERO_CHECK 0 |
81 | #else |
82 | #define ZALLOC_ENABLE_ZERO_CHECK 1 |
83 | #endif |
84 | |
85 | #if KASAN |
86 | #define ZALLOC_ENABLE_LOGGING 0 |
87 | #elif DEBUG || DEVELOPMENT |
88 | #define ZALLOC_ENABLE_LOGGING 1 |
89 | #else |
90 | #define ZALLOC_ENABLE_LOGGING 0 |
91 | #endif |
92 | |
93 | /*! |
94 | * @file <kern/zalloc_internal.h> |
95 | * |
96 | * @abstract |
97 | * Exposes some guts of zalloc to interact with the VM, debugging, copyio and |
98 | * kalloc subsystems. |
99 | */ |
100 | |
101 | __BEGIN_DECLS |
102 | |
103 | #pragma GCC visibility push(hidden) |
104 | |
105 | /* |
106 | * A zone is a collection of fixed size blocks for which there |
107 | * is fast allocation/deallocation access. Kernel routines can |
108 | * use zones to manage data structures dynamically, creating a zone |
109 | * for each type of data structure to be managed. |
110 | * |
111 | */ |
112 | |
113 | /*! |
114 | * @typedef zone_pva_t |
115 | * |
116 | * @brief |
117 | * Type used to point to a page virtual address in the zone allocator. |
118 | * |
119 | * @description |
120 | * - Valid pages have the top bit set. |
121 | * - 0 represents the "NULL" page |
122 | * - non 0 values with the top bit cleared represent queue heads, |
123 | * indexed from the beginning of the __DATA section of the kernel. |
124 | * (see zone_pageq_base). |
125 | */ |
126 | typedef struct zone_packed_virtual_address { |
127 | uint32_t packed_address; |
128 | } zone_pva_t; |
129 | |
130 | /*! |
131 | * @struct zone_stats |
132 | * |
133 | * @abstract |
134 | * Per-cpu structure used for basic zone stats. |
135 | * |
136 | * @discussion |
137 | * The values aren't scaled for per-cpu zones. |
138 | */ |
139 | struct zone_stats { |
140 | uint64_t zs_mem_allocated; |
141 | uint64_t zs_mem_freed; |
142 | uint64_t zs_alloc_fail; |
143 | uint32_t zs_alloc_rr; /* allocation rr bias */ |
144 | uint32_t _Atomic zs_alloc_not_shared; |
145 | }; |
146 | |
147 | typedef struct zone_magazine *zone_magazine_t; |
148 | |
149 | /*! |
150 | * @struct zone_depot |
151 | * |
152 | * @abstract |
153 | * Holds a list of full and empty magazines. |
154 | * |
155 | * @discussion |
156 | * The data structure is a "STAILQ" and an "SLIST" combined with counters |
157 | * to know their lengths in O(1). Here is a graphical example: |
158 | * |
159 | * zd_full = 3 |
160 | * zd_empty = 1 |
161 | * ╭─── zd_head |
162 | * │ ╭─ zd_tail |
163 | * │ ╰────────────────────────────────────╮ |
164 | * │ ╭───────╮ ╭───────╮ ╭───────╮ v ╭───────╮ |
165 | * ╰───>│███████┼──>│███████┼──>│███████┼──>│ ┼─> X |
166 | * ╰───────╯ ╰───────╯ ╰───────╯ ╰───────╯ |
167 | */ |
168 | struct zone_depot { |
169 | uint32_t zd_full; |
170 | uint32_t zd_empty; |
171 | zone_magazine_t zd_head; |
172 | zone_magazine_t *zd_tail; |
173 | }; |
174 | |
175 | /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */ |
176 | #define Z_MAGIC_QUO(s) (((1ull << 32) - 1) / (uint64_t)(s) + 1) |
177 | #define Z_MAGIC_ALIGNED(s) (~0u / (uint32_t)(s) + 1) |
178 | |
179 | /* |
180 | * Returns (offs / size) if offs is small enough |
181 | * and magic = Z_MAGIC_QUO(size) |
182 | */ |
183 | static inline uint32_t |
184 | Z_FAST_QUO(uint64_t offs, uint64_t magic) |
185 | { |
186 | return (offs * magic) >> 32; |
187 | } |
188 | |
189 | /* |
190 | * Returns (offs % size) if offs is small enough |
191 | * and magic = Z_MAGIC_QUO(size) |
192 | */ |
193 | static inline uint32_t |
194 | Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size) |
195 | { |
196 | uint32_t lowbits = (uint32_t)(offs * magic); |
197 | |
198 | return (lowbits * size) >> 32; |
199 | } |
200 | |
201 | /* |
202 | * Returns whether (offs % size) == 0 if offs is small enough |
203 | * and magic = Z_MAGIC_ALIGNED(size) |
204 | */ |
205 | static inline bool |
206 | Z_FAST_ALIGNED(uint64_t offs, uint32_t magic) |
207 | { |
208 | return (uint32_t)(offs * magic) < magic; |
209 | } |
210 | |
211 | struct zone_size_params { |
212 | uint32_t z_align_magic; /* magic to use with Z_FAST_ALIGNED() */ |
213 | uint32_t z_elem_size; /* size of an element */ |
214 | }; |
215 | |
216 | struct zone_expand { |
217 | struct zone_expand *ze_next; |
218 | thread_t ze_thread; |
219 | bool ze_pg_wait; |
220 | bool ze_vm_priv; |
221 | bool ze_clear_priv; |
222 | }; |
223 | |
224 | #define Z_WMA_UNIT (1u << 8) |
225 | #define Z_WMA_MIX(base, e) ((3 * (base) + (e) * Z_WMA_UNIT) / 4) |
226 | |
227 | struct zone { |
228 | /* |
229 | * Readonly / rarely written fields |
230 | */ |
231 | |
232 | /* |
233 | * The first 4 fields match a zone_view. |
234 | * |
235 | * z_self points back to the zone when the zone is initialized, |
236 | * or is NULL else. |
237 | */ |
238 | struct zone *z_self; |
239 | zone_stats_t z_stats; |
240 | const char *z_name; |
241 | struct zone_view *z_views; |
242 | struct zone_expand *z_expander; |
243 | |
244 | uint64_t z_quo_magic; |
245 | uint32_t z_align_magic; |
246 | uint16_t z_elem_size; |
247 | uint16_t z_elem_offs; |
248 | uint16_t z_chunk_pages; |
249 | uint16_t z_chunk_elems; |
250 | |
251 | uint32_t /* 32 bits */ |
252 | /* |
253 | * Lifecycle state (Mutable after creation) |
254 | */ |
255 | z_destroyed :1, /* zone is (being) destroyed */ |
256 | z_async_refilling :1, /* asynchronous allocation pending? */ |
257 | z_depot_cleanup :1, /* per cpu depots need cleaning */ |
258 | z_expanding_wait :1, /* is thread waiting for expansion? */ |
259 | z_exhausted_wait :1, /* are threads waiting for exhaustion end */ |
260 | z_exhausts :1, /* whether the zone exhausts by design */ |
261 | |
262 | /* |
263 | * Behavior configuration bits |
264 | */ |
265 | z_percpu :1, /* the zone is percpu */ |
266 | z_smr :1, /* the zone uses SMR */ |
267 | z_permanent :1, /* the zone allocations are permanent */ |
268 | z_nocaching :1, /* disallow zone caching for this zone */ |
269 | collectable :1, /* garbage collect empty pages */ |
270 | no_callout :1, |
271 | z_destructible :1, /* zone can be zdestroy()ed */ |
272 | |
273 | _reserved :6, |
274 | |
275 | /* |
276 | * Debugging features |
277 | */ |
278 | z_pgz_tracked :1, /* this zone is tracked by pgzalloc */ |
279 | z_pgz_use_guards :1, /* this zone uses guards with PGZ */ |
280 | z_kasan_fakestacks :1, |
281 | z_kasan_quarantine :1, /* whether to use the kasan quarantine */ |
282 | z_tags_sizeclass :6, /* idx into zone_tags_sizeclasses to associate |
283 | * sizeclass for a particualr kalloc tag */ |
284 | z_uses_tags :1, |
285 | z_log_on :1, /* zone logging was enabled by boot-arg */ |
286 | z_tbi_tag :1; /* Zone supports tbi tagging */ |
287 | |
288 | uint8_t z_cacheline1[0] __attribute__((aligned(64))); |
289 | |
290 | /* |
291 | * Zone caching / recirculation cacheline |
292 | * |
293 | * z_recirc* fields are protected by the recirculation lock. |
294 | * |
295 | * z_recirc_cont_wma: |
296 | * weighted moving average of the number of contentions per second, |
297 | * in Z_WMA_UNIT units (fixed point decimal). |
298 | * |
299 | * z_recirc_cont_cur: |
300 | * count of recorded contentions that will be fused |
301 | * in z_recirc_cont_wma at the next period. |
302 | * |
303 | * Note: if caching is disabled, |
304 | * this field is used under the zone lock. |
305 | * |
306 | * z_elems_free_{min,wma} (overloaded on z_recirc_empty*): |
307 | * tracks the history of the minimum values of z_elems_free over time |
308 | * with "min" being the minimum it hit for the current period, |
309 | * and "wma" the weighted moving average of those value. |
310 | * |
311 | * This field is used if z_pcpu_cache is NULL, |
312 | * otherwise it aliases with z_recirc_empty_{min,wma} |
313 | * |
314 | * z_recirc_{full,empty}_{min,wma}: |
315 | * tracks the history of the the minimum number of full/empty |
316 | * magazines in the depot over time, with "min" being the minimum |
317 | * it hit for the current period, and "wma" the weighted moving |
318 | * average of those value. |
319 | */ |
320 | struct zone_cache *__zpercpu z_pcpu_cache; |
321 | struct zone_depot z_recirc; |
322 | |
323 | hw_lck_ticket_t z_recirc_lock; |
324 | uint32_t z_recirc_full_min; |
325 | uint32_t z_recirc_full_wma; |
326 | union { |
327 | uint32_t z_recirc_empty_min; |
328 | uint32_t z_elems_free_min; |
329 | }; |
330 | union { |
331 | uint32_t z_recirc_empty_wma; |
332 | uint32_t z_elems_free_wma; |
333 | }; |
334 | uint32_t z_recirc_cont_cur; |
335 | uint32_t z_recirc_cont_wma; |
336 | |
337 | uint16_t z_depot_size; |
338 | uint16_t z_depot_limit; |
339 | |
340 | uint8_t z_cacheline2[0] __attribute__((aligned(64))); |
341 | |
342 | /* |
343 | * often mutated fields |
344 | */ |
345 | |
346 | hw_lck_ticket_t z_lock; |
347 | |
348 | /* |
349 | * Page accounting (wired / VA) |
350 | * |
351 | * Those numbers are unscaled for z_percpu zones |
352 | * (zone_scale_for_percpu() needs to be used to find the true value). |
353 | */ |
354 | uint32_t z_wired_max; /* how large can this zone grow */ |
355 | uint32_t z_wired_hwm; /* z_wired_cur high watermark */ |
356 | uint32_t z_wired_cur; /* number of pages used by this zone */ |
357 | uint32_t z_wired_empty; /* pages collectable by GC */ |
358 | uint32_t z_va_cur; /* amount of VA used by this zone */ |
359 | |
360 | /* |
361 | * list of metadata structs, which maintain per-page free element lists |
362 | */ |
363 | zone_pva_t z_pageq_empty; /* populated, completely empty pages */ |
364 | zone_pva_t z_pageq_partial;/* populated, partially filled pages */ |
365 | zone_pva_t z_pageq_full; /* populated, completely full pages */ |
366 | zone_pva_t z_pageq_va; /* non-populated VA pages */ |
367 | |
368 | /* |
369 | * Zone statistics |
370 | * |
371 | * z_elems_avail: |
372 | * number of elements in the zone (at all). |
373 | */ |
374 | uint32_t z_elems_free; /* Number of free elements */ |
375 | uint32_t z_elems_avail; /* Number of elements available */ |
376 | uint32_t z_elems_rsv; |
377 | uint32_t z_array_size_class; |
378 | |
379 | struct zone *z_kt_next; |
380 | |
381 | uint8_t z_cacheline3[0] __attribute__((aligned(64))); |
382 | |
383 | #if KASAN_CLASSIC |
384 | uint16_t z_kasan_redzone; |
385 | spl_t z_kasan_spl; |
386 | #endif |
387 | |
388 | #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI |
389 | /* |
390 | * the allocation logs are used when: |
391 | * |
392 | * - zlog<n>= boot-args are used (and then z_log_on is set) |
393 | * |
394 | * - the leak detection was triggered for the zone. |
395 | * In that case, the log can't ever be freed, |
396 | * but it can be enabled/disabled dynamically. |
397 | */ |
398 | struct btlog *z_btlog; |
399 | struct btlog *z_btlog_disabled; |
400 | #endif |
401 | } __attribute__((aligned((64)))); |
402 | |
403 | /*! |
404 | * @typedef zone_security_flags_t |
405 | * |
406 | * @brief |
407 | * Type used to store the immutable security properties of a zone. |
408 | * |
409 | * @description |
410 | * These properties influence the security nature of a zone and can't be |
411 | * modified after lockdown. |
412 | */ |
413 | typedef struct zone_security_flags { |
414 | uint16_t |
415 | /* |
416 | * Security sensitive configuration bits |
417 | */ |
418 | z_submap_idx :8, /* a Z_SUBMAP_IDX_* value */ |
419 | z_kheap_id :2, /* zone_kheap_id_t when part of a kalloc heap */ |
420 | z_kalloc_type :1, /* zones that does types based seggregation */ |
421 | z_lifo :1, /* depot and recirculation layer are LIFO */ |
422 | z_pgz_use_guards :1, /* this zone uses guards with PGZ */ |
423 | z_submap_from_end :1, /* allocate from the left or the right ? */ |
424 | z_noencrypt :1, /* do not encrypt pages when hibernating */ |
425 | z_unused :1; |
426 | /* |
427 | * Signature equivalance zone |
428 | */ |
429 | zone_id_t z_sig_eq; |
430 | } zone_security_flags_t; |
431 | |
432 | |
433 | /* |
434 | * Zsecurity config to enable strict free of iokit objects to zone |
435 | * or heap they were allocated from. |
436 | * |
437 | * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not |
438 | * not break third party kexts that haven't yet been recompiled |
439 | * to use the new iokit macros. |
440 | */ |
441 | #if XNU_PLATFORM_MacOSX && __x86_64__ |
442 | # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE OFF |
443 | #else |
444 | # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE ON |
445 | #endif |
446 | |
447 | /* |
448 | * Zsecurity config to enable the read-only allocator |
449 | */ |
450 | #if KASAN_CLASSIC |
451 | # define ZSECURITY_CONFIG_READ_ONLY OFF |
452 | #else |
453 | # define ZSECURITY_CONFIG_READ_ONLY ON |
454 | #endif |
455 | |
456 | /* |
457 | * Zsecurity config to enable making heap feng-shui |
458 | * less reliable. |
459 | */ |
460 | #if KASAN_CLASSIC |
461 | # define ZSECURITY_CONFIG_SAD_FENG_SHUI OFF |
462 | # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 1 |
463 | #else |
464 | # define ZSECURITY_CONFIG_SAD_FENG_SHUI ON |
465 | # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 4 |
466 | #endif |
467 | |
468 | /* |
469 | * Zsecurity config to enable adjusting of elements |
470 | * with PGZ-OOB to right-align them in their space. |
471 | */ |
472 | #if KASAN || defined(__x86_64__) || CONFIG_KERNEL_TAGGING |
473 | # define ZSECURITY_CONFIG_PGZ_OOB_ADJUST OFF |
474 | #else |
475 | # define ZSECURITY_CONFIG_PGZ_OOB_ADJUST ON |
476 | #endif |
477 | |
478 | /* |
479 | * Zsecurity config to enable kalloc type segregation |
480 | */ |
481 | #if XNU_TARGET_OS_WATCH || KASAN_CLASSIC |
482 | # define ZSECURITY_CONFIG_KT_BUDGET 120 |
483 | # define ZSECURITY_CONFIG_KT_VAR_BUDGET 6 |
484 | #else |
485 | # define ZSECURITY_CONFIG_KT_BUDGET 260 |
486 | # define ZSECURITY_CONFIG_KT_VAR_BUDGET 6 |
487 | #endif |
488 | |
489 | |
490 | __options_decl(kalloc_type_options_t, uint64_t, { |
491 | /* |
492 | * kalloc type option to switch default accounting to private. |
493 | */ |
494 | KT_OPTIONS_ACCT = 0x00000001, |
495 | /* |
496 | * kalloc type option to print additional stats regarding zone |
497 | * budget distribution and signatures. |
498 | */ |
499 | KT_OPTIONS_DEBUG = 0x00000002, |
500 | /* |
501 | * kalloc type option to allow loose freeing between heaps |
502 | */ |
503 | KT_OPTIONS_LOOSE_FREE = 0x00000004, |
504 | }); |
505 | |
506 | __enum_decl(kt_var_heap_id_t, uint32_t, { |
507 | /* |
508 | * Fake "data" heap used to link views of data-only allocation that |
509 | * have been redirected to KHEAP_DATA_BUFFERS |
510 | */ |
511 | KT_VAR_DATA_HEAP, |
512 | /* |
513 | * Heaps for pointer arrays |
514 | */ |
515 | KT_VAR_PTR_HEAP0, |
516 | KT_VAR_PTR_HEAP1, |
517 | /* |
518 | * Indicating first additional heap added |
519 | */ |
520 | KT_VAR__FIRST_FLEXIBLE_HEAP, |
521 | }); |
522 | |
523 | /* |
524 | * Zone submap indices |
525 | * |
526 | * Z_SUBMAP_IDX_VM |
527 | * this map has the special property that its allocations |
528 | * can be done without ever locking the submap, and doesn't use |
529 | * VM entries in the map (which limits certain VM map operations on it). |
530 | * |
531 | * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone). |
532 | * |
533 | * On LP64 it is also used to restrict VM allocations on LP64 lower |
534 | * in the kernel VA space, for pointer packing purposes. |
535 | * |
536 | * Z_SUBMAP_IDX_GENERAL_{0,1,2,3} |
537 | * used for unrestricted allocations |
538 | * |
539 | * Z_SUBMAP_IDX_DATA |
540 | * used to sequester bags of bytes from all other allocations and allow VA reuse |
541 | * within the map |
542 | * |
543 | * Z_SUBMAP_IDX_READ_ONLY |
544 | * used for the read-only allocator |
545 | */ |
546 | __enum_decl(zone_submap_idx_t, uint32_t, { |
547 | Z_SUBMAP_IDX_VM, |
548 | Z_SUBMAP_IDX_READ_ONLY, |
549 | Z_SUBMAP_IDX_GENERAL_0, |
550 | #if ZSECURITY_CONFIG(SAD_FENG_SHUI) |
551 | Z_SUBMAP_IDX_GENERAL_1, |
552 | Z_SUBMAP_IDX_GENERAL_2, |
553 | Z_SUBMAP_IDX_GENERAL_3, |
554 | #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */ |
555 | Z_SUBMAP_IDX_DATA, |
556 | |
557 | Z_SUBMAP_IDX_COUNT, |
558 | }); |
559 | |
560 | #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN) |
561 | |
562 | /* |
563 | * Variable kalloc_type heap config |
564 | */ |
565 | struct kheap_info { |
566 | zone_id_t kh_zstart; |
567 | kalloc_heap_t kh_views; |
568 | kalloc_type_var_view_t kt_views; |
569 | }; |
570 | typedef union kalloc_type_views { |
571 | struct kalloc_type_view *ktv_fixed; |
572 | struct kalloc_type_var_view *ktv_var; |
573 | } kalloc_type_views_t; |
574 | |
575 | #define KT_VAR_MAX_HEAPS 8 |
576 | #define MAX_ZONES 690 |
577 | extern struct kheap_info kalloc_type_heap_array[KT_VAR_MAX_HEAPS]; |
578 | extern zone_id_t _Atomic num_zones; |
579 | extern uint32_t zone_view_count; |
580 | extern struct zone zone_array[MAX_ZONES]; |
581 | extern struct zone_size_params zone_ro_size_params[ZONE_ID__LAST_RO + 1]; |
582 | extern zone_security_flags_t zone_security_array[]; |
583 | extern const char * const kalloc_heap_names[KHEAP_ID_COUNT]; |
584 | extern mach_memory_info_t *panic_kext_memory_info; |
585 | extern vm_size_t panic_kext_memory_size; |
586 | extern vm_offset_t panic_fault_address; |
587 | extern uint16_t _zc_mag_size; |
588 | |
589 | #define zone_index_foreach(i) \ |
590 | for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \ |
591 | i < num_zones_##i; i++) |
592 | |
593 | #define zone_foreach(z) \ |
594 | for (zone_t z = &zone_array[1], \ |
595 | last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \ |
596 | z < last_zone_##z; z++) |
597 | |
598 | __abortlike |
599 | extern void zone_invalid_panic(zone_t zone); |
600 | |
601 | __pure2 |
602 | static inline zone_id_t |
603 | zone_index(zone_t z) |
604 | { |
605 | unsigned long delta; |
606 | uint64_t quo; |
607 | |
608 | delta = (unsigned long)z - (unsigned long)zone_array; |
609 | if (delta >= MAX_ZONES * sizeof(*z)) { |
610 | zone_invalid_panic(zone: z); |
611 | } |
612 | quo = Z_FAST_QUO(offs: delta, Z_MAGIC_QUO(sizeof(*z))); |
613 | __builtin_assume(quo < MAX_ZONES); |
614 | return (zone_id_t)quo; |
615 | } |
616 | |
617 | __pure2 |
618 | static inline bool |
619 | zone_is_ro(zone_t zone) |
620 | { |
621 | return zone >= &zone_array[ZONE_ID__FIRST_RO] && |
622 | zone <= &zone_array[ZONE_ID__LAST_RO]; |
623 | } |
624 | |
625 | static inline bool |
626 | zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size) |
627 | { |
628 | return atop(addr ^ (addr + size - 1)) != 0; |
629 | } |
630 | |
631 | __pure2 |
632 | static inline uint16_t |
633 | zone_elem_redzone(zone_t zone) |
634 | { |
635 | #if KASAN_CLASSIC |
636 | return zone->z_kasan_redzone; |
637 | #else |
638 | (void)zone; |
639 | return 0; |
640 | #endif |
641 | } |
642 | |
643 | __pure2 |
644 | static inline uint16_t |
645 | zone_elem_inner_offs(zone_t zone) |
646 | { |
647 | return zone->z_elem_offs; |
648 | } |
649 | |
650 | __pure2 |
651 | static inline uint16_t |
652 | zone_elem_outer_offs(zone_t zone) |
653 | { |
654 | return zone_elem_inner_offs(zone) - zone_elem_redzone(zone); |
655 | } |
656 | |
657 | __pure2 |
658 | static inline vm_offset_t |
659 | zone_elem_inner_size(zone_t zone) |
660 | { |
661 | return zone->z_elem_size; |
662 | } |
663 | |
664 | __pure2 |
665 | static inline vm_offset_t |
666 | zone_elem_outer_size(zone_t zone) |
667 | { |
668 | return zone_elem_inner_size(zone) + zone_elem_redzone(zone); |
669 | } |
670 | |
671 | __pure2 |
672 | static inline zone_security_flags_t |
673 | zone_security_config(zone_t z) |
674 | { |
675 | zone_id_t zid = zone_index(z); |
676 | return zone_security_array[zid]; |
677 | } |
678 | |
679 | static inline uint32_t |
680 | zone_count_free(zone_t zone) |
681 | { |
682 | return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size; |
683 | } |
684 | |
685 | static inline uint32_t |
686 | zone_count_allocated(zone_t zone) |
687 | { |
688 | return zone->z_elems_avail - zone_count_free(zone); |
689 | } |
690 | |
691 | static inline vm_size_t |
692 | zone_scale_for_percpu(zone_t zone, vm_size_t size) |
693 | { |
694 | if (zone->z_percpu) { |
695 | size *= zpercpu_count(); |
696 | } |
697 | return size; |
698 | } |
699 | |
700 | static inline vm_size_t |
701 | zone_size_wired(zone_t zone) |
702 | { |
703 | /* |
704 | * this either require the zone lock, |
705 | * or to be used for statistics purposes only. |
706 | */ |
707 | vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed)); |
708 | return zone_scale_for_percpu(zone, size); |
709 | } |
710 | |
711 | static inline vm_size_t |
712 | zone_size_free(zone_t zone) |
713 | { |
714 | return zone_scale_for_percpu(zone, |
715 | size: zone_elem_inner_size(zone) * zone_count_free(zone)); |
716 | } |
717 | |
718 | /* Under KASAN builds, this also accounts for quarantined elements. */ |
719 | static inline vm_size_t |
720 | zone_size_allocated(zone_t zone) |
721 | { |
722 | return zone_scale_for_percpu(zone, |
723 | size: zone_elem_inner_size(zone) * zone_count_allocated(zone)); |
724 | } |
725 | |
726 | static inline vm_size_t |
727 | zone_size_wasted(zone_t zone) |
728 | { |
729 | return zone_size_wired(zone) - zone_scale_for_percpu(zone, |
730 | size: zone_elem_outer_size(zone) * zone->z_elems_avail); |
731 | } |
732 | |
733 | __pure2 |
734 | static inline bool |
735 | zone_exhaustible(zone_t zone) |
736 | { |
737 | return zone->z_wired_max != ~0u; |
738 | } |
739 | |
740 | __pure2 |
741 | static inline bool |
742 | zone_exhausted(zone_t zone) |
743 | { |
744 | return zone->z_wired_cur >= zone->z_wired_max; |
745 | } |
746 | |
747 | /* |
748 | * Set and get the signature equivalance for the given zone |
749 | */ |
750 | extern void zone_set_sig_eq(zone_t zone, zone_id_t sig_eq); |
751 | extern zone_id_t zone_get_sig_eq(zone_t zone); |
752 | /* |
753 | * Return the accumulated allocated memory on the given zone stats |
754 | */ |
755 | static inline vm_size_t |
756 | zone_stats_get_mem_allocated(zone_stats_t stats) |
757 | { |
758 | return stats->zs_mem_allocated; |
759 | } |
760 | |
761 | /* |
762 | * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a |
763 | * userspace reboot is needed. The only other way to query for this information |
764 | * is via mach_memory_info() which is unavailable on release kernels. |
765 | */ |
766 | extern uint64_t get_zones_collectable_bytes(void); |
767 | |
768 | /*! |
769 | * @enum zone_gc_level_t |
770 | * |
771 | * @const ZONE_GC_TRIM |
772 | * Request a trimming GC: it will trim allocations in excess |
773 | * of the working set size estimate only. |
774 | * |
775 | * @const ZONE_GC_DRAIN |
776 | * Request a draining GC: this is an aggressive mode that will |
777 | * cause all caches to be drained and all free pages returned to the system. |
778 | * |
779 | * @const ZONE_GC_JETSAM |
780 | * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or |
781 | * @c ZONE_GC_DRAIN depending on the state of the zone map. |
782 | * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever |
783 | * request a @c ZONE_GC_JETSAM level. |
784 | */ |
785 | __enum_closed_decl(zone_gc_level_t, uint32_t, { |
786 | ZONE_GC_TRIM, |
787 | ZONE_GC_DRAIN, |
788 | ZONE_GC_JETSAM, |
789 | }); |
790 | |
791 | /*! |
792 | * @function zone_gc |
793 | * |
794 | * @brief |
795 | * Reduces memory used by zones by trimming caches and freelists. |
796 | * |
797 | * @discussion |
798 | * @c zone_gc() is called: |
799 | * - by the pageout daemon when the system needs more free pages. |
800 | * - by the VM when contiguous page allocation requests get stuck |
801 | * (see vm_page_find_contiguous()). |
802 | * |
803 | * @param level The zone GC level requested. |
804 | */ |
805 | extern void zone_gc(zone_gc_level_t level); |
806 | |
807 | extern void zone_gc_trim(void); |
808 | extern void zone_gc_drain(void); |
809 | |
810 | #define ZONE_WSS_UPDATE_PERIOD 15 |
811 | /*! |
812 | * @function compute_zone_working_set_size |
813 | * |
814 | * @brief |
815 | * Recomputes the working set size for every zone |
816 | * |
817 | * @discussion |
818 | * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10), |
819 | * computing an exponential moving average with a weight of 75%, |
820 | * so that the history of the last minute is the dominating factor. |
821 | */ |
822 | extern void compute_zone_working_set_size(void *); |
823 | |
824 | /* Debug logging for zone-map-exhaustion jetsams. */ |
825 | extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity); |
826 | extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size); |
827 | |
828 | /* Bootstrap zone module (create zone zone) */ |
829 | extern void zone_bootstrap(void); |
830 | |
831 | /* Force-enable caching on a zone, generally unsafe to call directly */ |
832 | extern void zone_enable_caching(zone_t zone); |
833 | |
834 | /*! |
835 | * @function zone_early_mem_init |
836 | * |
837 | * @brief |
838 | * Steal memory from pmap (prior to initialization of zalloc) |
839 | * for the special vm zones that allow bootstrap memory and store |
840 | * the range so as to facilitate range checking in zfree. |
841 | * |
842 | * @param size the size to steal (must be a page multiple) |
843 | */ |
844 | __startup_func |
845 | extern vm_offset_t zone_early_mem_init( |
846 | vm_size_t size); |
847 | |
848 | /*! |
849 | * @function zone_get_early_alloc_size |
850 | * |
851 | * @brief |
852 | * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple |
853 | * of the allocation granule for the zone with the given creation flags and |
854 | * element size. |
855 | */ |
856 | __startup_func |
857 | extern vm_size_t zone_get_early_alloc_size( |
858 | const char *name __unused, |
859 | vm_size_t elem_size, |
860 | zone_create_flags_t flags, |
861 | vm_size_t min_elems); |
862 | |
863 | /*! |
864 | * @function zone_cram_early |
865 | * |
866 | * @brief |
867 | * Cram memory allocated with @c zone_early_mem_init() into a zone. |
868 | * |
869 | * @param zone The zone to cram memory into. |
870 | * @param newmem The base address for the memory to cram. |
871 | * @param size The size of the memory to cram into the zone. |
872 | */ |
873 | __startup_func |
874 | extern void zone_cram_early( |
875 | zone_t zone, |
876 | vm_offset_t newmem, |
877 | vm_size_t size); |
878 | |
879 | extern bool zone_maps_owned( |
880 | vm_address_t addr, |
881 | vm_size_t size); |
882 | |
883 | #if KASAN_LIGHT |
884 | extern bool kasan_zone_maps_owned( |
885 | vm_address_t addr, |
886 | vm_size_t size); |
887 | #endif /* KASAN_LIGHT */ |
888 | |
889 | extern void zone_map_sizes( |
890 | vm_map_size_t *psize, |
891 | vm_map_size_t *pfree, |
892 | vm_map_size_t *plargest_free); |
893 | |
894 | extern bool |
895 | zone_map_nearing_exhaustion(void); |
896 | |
897 | static inline vm_tag_t |
898 | zalloc_flags_get_tag(zalloc_flags_t flags) |
899 | { |
900 | return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT); |
901 | } |
902 | |
903 | extern struct kalloc_result zalloc_ext( |
904 | zone_t zone, |
905 | zone_stats_t zstats, |
906 | zalloc_flags_t flags); |
907 | |
908 | #if KASAN |
909 | #define ZFREE_PACK_SIZE(esize, usize) (((uint64_t)(usize) << 32) | (esize)) |
910 | #define ZFREE_ELEM_SIZE(combined) ((uint32_t)(combined)) |
911 | #define ZFREE_USER_SIZE(combined) ((combined) >> 32) |
912 | #else |
913 | #define ZFREE_PACK_SIZE(esize, usize) (esize) |
914 | #define ZFREE_ELEM_SIZE(combined) (combined) |
915 | #endif |
916 | |
917 | extern void zfree_ext( |
918 | zone_t zone, |
919 | zone_stats_t zstats, |
920 | void *addr, |
921 | uint64_t combined_size); |
922 | |
923 | extern zone_id_t zone_id_for_element( |
924 | void *addr, |
925 | vm_size_t esize); |
926 | |
927 | #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST) |
928 | extern void *zone_element_pgz_oob_adjust( |
929 | void *addr, |
930 | vm_size_t req_size, |
931 | vm_size_t elem_size); |
932 | #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */ |
933 | |
934 | extern void zone_element_bounds_check( |
935 | vm_address_t addr, |
936 | vm_size_t len); |
937 | |
938 | extern vm_size_t zone_element_size( |
939 | void *addr, |
940 | zone_t *z, |
941 | bool clear_oob, |
942 | vm_offset_t *oob_offs); |
943 | |
944 | /*! |
945 | * @function zone_spans_ro_va |
946 | * |
947 | * @abstract |
948 | * This function is used to check whether the specified address range |
949 | * spans through the read-only zone range. |
950 | * |
951 | * @discussion |
952 | * This only checks for the range specified within ZONE_ADDR_READONLY. |
953 | * The parameters addr_start and addr_end are stripped off of PAC bits |
954 | * before the check is made. |
955 | */ |
956 | extern bool zone_spans_ro_va( |
957 | vm_offset_t addr_start, |
958 | vm_offset_t addr_end); |
959 | |
960 | /*! |
961 | * @function __zalloc_ro_mut_atomic |
962 | * |
963 | * @abstract |
964 | * This function is called from the pmap to perform the specified atomic |
965 | * operation on memory from the read-only allocator. |
966 | * |
967 | * @discussion |
968 | * This function is for internal use only and should not be called directly. |
969 | */ |
970 | static inline uint64_t |
971 | __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value) |
972 | { |
973 | #define __ZALLOC_RO_MUT_OP(op, op2) \ |
974 | case ZRO_ATOMIC_##op##_8: \ |
975 | return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \ |
976 | case ZRO_ATOMIC_##op##_16: \ |
977 | return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \ |
978 | case ZRO_ATOMIC_##op##_32: \ |
979 | return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \ |
980 | case ZRO_ATOMIC_##op##_64: \ |
981 | return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst) |
982 | |
983 | switch (op) { |
984 | __ZALLOC_RO_MUT_OP(OR, or_orig); |
985 | __ZALLOC_RO_MUT_OP(XOR, xor_orig); |
986 | __ZALLOC_RO_MUT_OP(AND, and_orig); |
987 | __ZALLOC_RO_MUT_OP(ADD, add_orig); |
988 | __ZALLOC_RO_MUT_OP(XCHG, xchg); |
989 | default: |
990 | panic("%s: Invalid atomic operation: %d" , __func__, op); |
991 | } |
992 | |
993 | #undef __ZALLOC_RO_MUT_OP |
994 | } |
995 | |
996 | /*! |
997 | * @function zone_owns |
998 | * |
999 | * @abstract |
1000 | * This function is a soft version of zone_require that checks if a given |
1001 | * pointer belongs to the specified zone and should not be used outside |
1002 | * allocator code. |
1003 | * |
1004 | * @discussion |
1005 | * Note that zone_owns() can only work with: |
1006 | * - zones not allowing foreign memory |
1007 | * - zones in the general submap. |
1008 | * |
1009 | * @param zone the zone the address needs to belong to. |
1010 | * @param addr the element address to check. |
1011 | */ |
1012 | extern bool zone_owns( |
1013 | zone_t zone, |
1014 | void *addr); |
1015 | |
1016 | /**! |
1017 | * @function zone_submap |
1018 | * |
1019 | * @param zsflags the security flags of a specified zone. |
1020 | * @returns the zone (sub)map this zone allocates from. |
1021 | */ |
1022 | __pure2 |
1023 | extern vm_map_t zone_submap( |
1024 | zone_security_flags_t zsflags); |
1025 | |
1026 | #ifndef VM_TAG_SIZECLASSES |
1027 | #error MAX_TAG_ZONES |
1028 | #endif |
1029 | #if VM_TAG_SIZECLASSES |
1030 | |
1031 | extern uint16_t zone_index_from_tag_index( |
1032 | uint32_t tag_zone_index); |
1033 | |
1034 | #endif /* VM_TAG_SIZECLASSES */ |
1035 | |
1036 | extern lck_grp_t zone_locks_grp; |
1037 | |
1038 | static inline void |
1039 | zone_lock(zone_t zone) |
1040 | { |
1041 | #if KASAN_FAKESTACK |
1042 | spl_t s = 0; |
1043 | if (zone->z_kasan_fakestacks) { |
1044 | s = splsched(); |
1045 | } |
1046 | #endif /* KASAN_FAKESTACK */ |
1047 | hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp); |
1048 | #if KASAN_FAKESTACK |
1049 | zone->z_kasan_spl = s; |
1050 | #endif /* KASAN_FAKESTACK */ |
1051 | } |
1052 | |
1053 | static inline void |
1054 | zone_unlock(zone_t zone) |
1055 | { |
1056 | #if KASAN_FAKESTACK |
1057 | spl_t s = zone->z_kasan_spl; |
1058 | zone->z_kasan_spl = 0; |
1059 | #endif /* KASAN_FAKESTACK */ |
1060 | hw_lck_ticket_unlock(tlock: &zone->z_lock); |
1061 | #if KASAN_FAKESTACK |
1062 | if (zone->z_kasan_fakestacks) { |
1063 | splx(s); |
1064 | } |
1065 | #endif /* KASAN_FAKESTACK */ |
1066 | } |
1067 | |
1068 | #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */ |
1069 | |
1070 | int track_this_zone(const char *zonename, const char *logname); |
1071 | extern bool panic_include_kalloc_types; |
1072 | extern zone_t kalloc_type_src_zone; |
1073 | extern zone_t kalloc_type_dst_zone; |
1074 | |
1075 | #if DEBUG || DEVELOPMENT |
1076 | extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag); |
1077 | #endif /* DEBUG || DEVELOPMENT */ |
1078 | |
1079 | #pragma GCC visibility pop |
1080 | |
1081 | __END_DECLS |
1082 | |
1083 | #endif /* _KERN_ZALLOC_INTERNAL_H_ */ |
1084 | |