zalloc.c source code [xnu/osfmk/kern/zalloc.c]

1	/*
2	* Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56	/*
57	*/
58	/*
59	* File: kern/zalloc.c
60	* Author: Avadis Tevanian, Jr.
61	*
62	* Zone-based memory allocator. A zone is a collection of fixed size
63	* data blocks for which quick allocation/deallocation is possible.
64	*/
65	#include <zone_debug.h>
66
67	#include <mach/mach_types.h>
68	#include <mach/vm_param.h>
69	#include <mach/kern_return.h>
70	#include <mach/mach_host_server.h>
71	#include <mach/task_server.h>
72	#include <mach/machine/vm_types.h>
73	#include <mach/vm_map.h>
74	#include <mach/sdt.h>
75
76	#include <kern/bits.h>
77	#include <kern/kern_types.h>
78	#include <kern/assert.h>
79	#include <kern/backtrace.h>
80	#include <kern/host.h>
81	#include <kern/macro_help.h>
82	#include <kern/sched.h>
83	#include <kern/locks.h>
84	#include <kern/sched_prim.h>
85	#include <kern/misc_protos.h>
86	#include <kern/thread_call.h>
87	#include <kern/zalloc.h>
88	#include <kern/kalloc.h>
89
90	#include <prng/random.h>
91
92	#include <vm/pmap.h>
93	#include <vm/vm_map.h>
94	#include <vm/vm_kern.h>
95	#include <vm/vm_page.h>
96
97	#include <pexpert/pexpert.h>
98
99	#include <machine/machparam.h>
100	#include <machine/machine_routines.h> /* ml_cpu_get_info */
101
102	#include <libkern/OSDebug.h>
103	#include <libkern/OSAtomic.h>
104	#include <libkern/section_keywords.h>
105	#include <sys/kdebug.h>
106
107	#include <san/kasan.h>
108
109	/*
110	* ZONE_ALIAS_ADDR (deprecated)
111	*/
112
113	#define from_zone_map(addr, size) \
114	((vm_offset_t)(addr) >= zone_map_min_address && \
115	((vm_offset_t)(addr) + size - 1) < zone_map_max_address )
116
117	/*
118	* Zone Corruption Debugging
119	*
120	* We use three techniques to detect modification of a zone element
121	* after it's been freed.
122	*
123	* (1) Check the freelist next pointer for sanity.
124	* (2) Store a backup of the next pointer at the end of the element,
125	* and compare it to the primary next pointer when the element is allocated
126	* to detect corruption of the freelist due to use-after-free bugs.
127	* The backup pointer is also XORed with a per-boot random cookie.
128	* (3) Poison the freed element by overwriting it with 0xdeadbeef,
129	* and check for that value when the element is being reused to make sure
130	* no part of the element has been modified while it was on the freelist.
131	* This will also help catch read-after-frees, as code will now dereference
132	* 0xdeadbeef instead of a valid but freed pointer.
133	*
134	* (1) and (2) occur for every allocation and free to a zone.
135	* This is done to make it slightly more difficult for an attacker to
136	* manipulate the freelist to behave in a specific way.
137	*
138	* Poisoning (3) occurs periodically for every N frees (counted per-zone)
139	* and on every free for zones smaller than a cacheline. If -zp
140	* is passed as a boot arg, poisoning occurs for every free.
141	*
142	* Performance slowdown is inversely proportional to the frequency of poisoning,
143	* with a 4-5% hit around N=1, down to ~0.3% at N=16 and just "noise" at N=32
144	* and higher. You can expect to find a 100% reproducible bug in an average of
145	* N tries, with a standard deviation of about N, but you will want to set
146	* "-zp" to always poison every free if you are attempting to reproduce
147	* a known bug.
148	*
149	* For a more heavyweight, but finer-grained method of detecting misuse
150	* of zone memory, look up the "Guard mode" zone allocator in gzalloc.c.
151	*
152	* Zone Corruption Logging
153	*
154	* You can also track where corruptions come from by using the boot-arguments
155	* "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later
156	* in this document for more implementation and usage information.
157	*
158	* Zone Leak Detection
159	*
160	* To debug leaks of zone memory, use the zone leak detection tool 'zleaks'
161	* found later in this file via the showtopztrace and showz* macros in kgmacros,
162	* or use zlog without the -zc argument.
163	*
164	*/
165
166	/ Returns TRUE if we rolled over the counter at factor /
167	static inline boolean_t
168	sample_counter(volatile uint32_t * count_p, uint32_t factor)
169	{
170	uint32_t old_count, new_count;
171	boolean_t rolled_over;
172
173	do {
174	new_count = old_count = *count_p;
175
176	if (++new_count >= factor) {
177	rolled_over = TRUE;
178	new_count = `0`;
179	} else {
180	rolled_over = FALSE;
181	}
182
183	} while (!OSCompareAndSwap(old_count, new_count, count_p));
184
185	return rolled_over;
186	}
187
188	#if defined(__LP64__)
189	#define ZP_POISON 0xdeadbeefdeadbeef
190	#else
191	#define ZP_POISON 0xdeadbeef
192	#endif
193
194	boolean_t zfree_poison_element(zone_t zone, vm_offset_t elem);
195	void zalloc_poison_element(boolean_t check_poison, zone_t zone, vm_offset_t addr);
196
197	#define ZP_DEFAULT_SAMPLING_FACTOR 16
198	#define ZP_DEFAULT_SCALE_FACTOR 4
199
200	/*
201	* A zp_factor of 0 indicates zone poisoning is disabled,
202	* however, we still poison zones smaller than zp_tiny_zone_limit (a cacheline).
203	* Passing the -no-zp boot-arg disables even this behavior.
204	* In all cases, we record and check the integrity of a backup pointer.
205	*/
206
207	/ set by zp-factor=N boot arg, zero indicates non-tiny poisoning disabled /
208	#if DEBUG
209	#define DEFAULT_ZP_FACTOR (1)
210	#else
211	#define DEFAULT_ZP_FACTOR (0)
212	#endif
213	uint32_t zp_factor = DEFAULT_ZP_FACTOR;
214
215	/ set by zp-scale=N boot arg, scales zp_factor by zone size /
216	uint32_t zp_scale = `0`;
217
218	/ set in zp_init, zero indicates -no-zp boot-arg /
219	vm_size_t zp_tiny_zone_limit = `0`;
220
221	/ initialized to a per-boot random value in zp_init /
222	uintptr_t zp_poisoned_cookie = `0`;
223	uintptr_t zp_nopoison_cookie = `0`;
224
225	#if VM_MAX_TAG_ZONES
226	boolean_t zone_tagging_on;
227	#endif /* VM_MAX_TAG_ZONES */
228
229	SECURITY_READ_ONLY_LATE(boolean_t) copyio_zalloc_check = TRUE;
230	static struct bool_gen zone_bool_gen;
231
232	/*
233	* initialize zone poisoning
234	* called from zone_bootstrap before any allocations are made from zalloc
235	*/
236	static inline void
237	zp_init(void)
238	{
239	char temp_buf[`16`];
240
241	/*
242	* Initialize backup pointer random cookie for poisoned elements
243	* Try not to call early_random() back to back, it may return
244	* the same value if mach_absolute_time doesn't have sufficient time
245	* to tick over between calls. <rdar://problem/11597395>
246	* (This is only a problem on embedded devices)
247	*/
248	zp_poisoned_cookie = (uintptr_t) early_random();
249
250	/*
251	* Always poison zones smaller than a cacheline,
252	* because it's pretty close to free
253	*/
254	ml_cpu_info_t cpu_info;
255	ml_cpu_get_info(&cpu_info);
256	zp_tiny_zone_limit = (vm_size_t) cpu_info.cache_line_size;
257
258	zp_factor = ZP_DEFAULT_SAMPLING_FACTOR;
259	zp_scale = ZP_DEFAULT_SCALE_FACTOR;
260
261	//TODO: Bigger permutation?
262	/*
263	* Permute the default factor +/- 1 to make it less predictable
264	* This adds or subtracts ~4 poisoned objects per 1000 frees.
265	*/
266	if (zp_factor != `0`) {
267	uint32_t rand_bits = early_random() & `0x3`;
268
269	if (rand_bits == `0x1`)
270	zp_factor += `1`;
271	else if (rand_bits == `0x2`)
272	zp_factor -= `1`;
273	/ if 0x0 or 0x3, leave it alone /
274	}
275
276	/ -zp: enable poisoning for every alloc and free /
277	if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) {
278	zp_factor = `1`;
279	}
280
281	/ -no-zp: disable poisoning completely even for tiny zones /
282	if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) {
283	zp_factor = `0`;
284	zp_tiny_zone_limit = `0`;
285	printf("Zone poisoning disabled\n");
286	}
287
288	/ zp-factor=XXXX: override how often to poison freed zone elements /
289	if (PE_parse_boot_argn("zp-factor", &zp_factor, sizeof(zp_factor))) {
290	printf("Zone poisoning factor override: %u\n", zp_factor);
291	}
292
293	/ zp-scale=XXXX: override how much zone size scales zp-factor by /
294	if (PE_parse_boot_argn("zp-scale", &zp_scale, sizeof(zp_scale))) {
295	printf("Zone poisoning scale factor override: %u\n", zp_scale);
296	}
297
298	/ Initialize backup pointer random cookie for unpoisoned elements /
299	zp_nopoison_cookie = (uintptr_t) early_random();
300
301	#if MACH_ASSERT
302	if (zp_poisoned_cookie == zp_nopoison_cookie)
303	panic("early_random() is broken: %p and %p are not random\n",
304	(void ) zp_poisoned_cookie, (void* *) zp_nopoison_cookie);
305	#endif
306
307	/*
308	* Use the last bit in the backup pointer to hint poisoning state
309	* to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so
310	* the low bits are zero.
311	*/
312	zp_poisoned_cookie \|= (uintptr_t)`0x1ULL`;
313	zp_nopoison_cookie &= ~((uintptr_t)`0x1ULL`);
314
315	#if defined(__LP64__)
316	/*
317	* Make backup pointers more obvious in GDB for 64 bit
318	* by making OxFFFFFF... ^ cookie = 0xFACADE...
319	* (0xFACADE = 0xFFFFFF ^ 0x053521)
320	* (0xC0FFEE = 0xFFFFFF ^ 0x3f0011)
321	* The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked
322	* by the sanity check, so it's OK for that part of the cookie to be predictable.
323	*
324	* TODO: Use #defines, xors, and shifts
325	*/
326
327	zp_poisoned_cookie &= `0x000000FFFFFFFFFF`;
328	zp_poisoned_cookie \|= `0x0535210000000000`; / 0xFACADE /
329
330	zp_nopoison_cookie &= `0x000000FFFFFFFFFF`;
331	zp_nopoison_cookie \|= `0x3f00110000000000`; / 0xC0FFEE /
332	#endif
333	}
334
335	/*
336	* These macros are used to keep track of the number
337	* of pages being used by the zone currently. The
338	* z->page_count is not protected by the zone lock.
339	*/
340	#define ZONE_PAGE_COUNT_INCR(z, count) \
341	{ \
342	OSAddAtomic64(count, &(z->page_count)); \
343	}
344
345	#define ZONE_PAGE_COUNT_DECR(z, count) \
346	{ \
347	OSAddAtomic64(-count, &(z->page_count)); \
348	}
349
350	vm_map_t zone_map = VM_MAP_NULL;
351
352	/ for is_sane_zone_element and garbage collection /
353
354	vm_offset_t zone_map_min_address = `0`; / initialized in zone_init /
355	vm_offset_t zone_map_max_address = `0`;
356
357	/ Globals for random boolean generator for elements in free list /
358	#define MAX_ENTROPY_PER_ZCRAM 4
359
360	/ VM region for all metadata structures /
361	vm_offset_t zone_metadata_region_min = `0`;
362	vm_offset_t zone_metadata_region_max = `0`;
363	decl_lck_mtx_data(static ,zone_metadata_region_lck)
364	lck_attr_t zone_metadata_lock_attr;
365	lck_mtx_ext_t zone_metadata_region_lck_ext;
366
367	/ Helpful for walking through a zone's free element list. /
368	struct zone_free_element {
369	struct zone_free_element *next;
370	/ ... /
371	/ void backup_ptr; /*
372	};
373
374	#if CONFIG_ZCACHE
375
376	#if !CONFIG_GZALLOC
377	bool use_caching = TRUE;
378	#else
379	bool use_caching = FALSE;
380	#endif /* !CONFIG_GZALLOC */
381
382	/*
383	* Decides whether per-cpu zone caching is to be enabled for all zones.
384	* Can be set to TRUE via the boot-arg '-zcache_all'.
385	*/
386	bool cache_all_zones = FALSE;
387
388	/*
389	* Specifies a single zone to enable CPU caching for.
390	* Can be set using boot-args: zcc_enable_for_zone_name=<zone>
391	*/
392	static char cache_zone_name[MAX_ZONE_NAME];
393
394	static inline bool zone_caching_enabled(zone_t z)
395	{
396	return (z->cpu_cache_enabled && !z->tags && !z->zleak_on);
397	}
398
399	#endif /* CONFIG_ZCACHE */
400
401	/*
402	* Protects zone_array, num_zones, num_zones_in_use, and zone_empty_bitmap
403	*/
404	decl_simple_lock_data(, all_zones_lock)
405	unsigned int num_zones_in_use;
406	unsigned int num_zones;
407
408	#define MAX_ZONES 320
409	struct zone zone_array[MAX_ZONES];
410
411	/ Used to keep track of empty slots in the zone_array /
412	bitmap_t zone_empty_bitmap[BITMAP_LEN(MAX_ZONES)];
413
414	#if DEBUG \|\| DEVELOPMENT
415	/*
416	* Used for sysctl kern.run_zone_test which is not thread-safe. Ensure only one thread goes through at a time.
417	* Or we can end up with multiple test zones (if a second zinit() comes through before zdestroy()), which could lead us to
418	* run out of zones.
419	*/
420	decl_simple_lock_data(, zone_test_lock)
421	static boolean_t zone_test_running = FALSE;
422	static zone_t test_zone_ptr = NULL;
423	#endif /* DEBUG \|\| DEVELOPMENT */
424
425	#define PAGE_METADATA_GET_ZINDEX(page_meta) \
426	(page_meta->zindex)
427
428	#define PAGE_METADATA_GET_ZONE(page_meta) \
429	(&(zone_array[page_meta->zindex]))
430
431	#define PAGE_METADATA_SET_ZINDEX(page_meta, index) \
432	page_meta->zindex = (index);
433
434	struct zone_page_metadata {
435	queue_chain_t pages; / linkage pointer for metadata lists /
436
437	/ Union for maintaining start of element free list and real metadata (for multipage allocations) /
438	union {
439	/*
440	* The start of the freelist can be maintained as a 32-bit offset instead of a pointer because
441	* the free elements would be at max ZONE_MAX_ALLOC_SIZE bytes away from the metadata. Offset
442	* from start of the allocation chunk to free element list head.
443	*/
444	uint32_t freelist_offset;
445	/*
446	* This field is used to lookup the real metadata for multipage allocations, where we mark the
447	* metadata for all pages except the first as "fake" metadata using MULTIPAGE_METADATA_MAGIC.
448	* Offset from this fake metadata to real metadata of allocation chunk (-ve offset).
449	*/
450	uint32_t real_metadata_offset;
451	};
452
453	/*
454	* For the first page in the allocation chunk, this represents the total number of free elements in
455	* the chunk.
456	*/
457	uint16_t free_count;
458	unsigned zindex : ZINDEX_BITS; / Zone index within the zone_array /
459	unsigned page_count : PAGECOUNT_BITS; / Count of pages within the allocation chunk /
460	};
461
462	/ Macro to get page index (within zone_map) of page containing element /
463	#define PAGE_INDEX_FOR_ELEMENT(element) \
464	(((vm_offset_t)trunc_page(element) - zone_map_min_address) / PAGE_SIZE)
465
466	/ Macro to get metadata structure given a page index in zone_map /
467	#define PAGE_METADATA_FOR_PAGE_INDEX(index) \
468	(zone_metadata_region_min + ((index) * sizeof(struct zone_page_metadata)))
469
470	/ Macro to get index (within zone_map) for given metadata /
471	#define PAGE_INDEX_FOR_METADATA(page_meta) \
472	(((vm_offset_t)page_meta - zone_metadata_region_min) / sizeof(struct zone_page_metadata))
473
474	/ Macro to get page for given page index in zone_map /
475	#define PAGE_FOR_PAGE_INDEX(index) \
476	(zone_map_min_address + (PAGE_SIZE * (index)))
477
478	/ Macro to get the actual metadata for a given address /
479	#define PAGE_METADATA_FOR_ELEMENT(element) \
480	(struct zone_page_metadata *)(PAGE_METADATA_FOR_PAGE_INDEX(PAGE_INDEX_FOR_ELEMENT(element)))
481
482	/ Magic value to indicate empty element free list /
483	#define PAGE_METADATA_EMPTY_FREELIST ((uint32_t)(~0))
484
485	vm_map_copy_t create_vm_map_copy(vm_offset_t start_addr, vm_size_t total_size, vm_size_t used_size);
486	boolean_t get_zone_info(zone_t z, mach_zone_name_t zn, mach_zone_info_t zi);
487	boolean_t is_zone_map_nearing_exhaustion(void);
488	extern void vm_pageout_garbage_collect(int collect);
489
490	static inline void *
491	page_metadata_get_freelist(struct zone_page_metadata *page_meta)
492	{
493	assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
494	if (page_meta->freelist_offset == PAGE_METADATA_EMPTY_FREELIST)
495	return NULL;
496	else {
497	if (from_zone_map(page_meta, sizeof(struct zone_page_metadata)))
498	return (void *)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)) + page_meta->freelist_offset);
499	else
500	return (void *)((vm_offset_t)page_meta + page_meta->freelist_offset);
501	}
502	}
503
504	static inline void
505	page_metadata_set_freelist(struct zone_page_metadata page_meta, void* *addr)
506	{
507	assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
508	if (addr == NULL)
509	page_meta->freelist_offset = PAGE_METADATA_EMPTY_FREELIST;
510	else {
511	if (from_zone_map(page_meta, sizeof(struct zone_page_metadata)))
512	page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)));
513	else
514	page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - (vm_offset_t)page_meta);
515	}
516	}
517
518	static inline struct zone_page_metadata *
519	page_metadata_get_realmeta(struct zone_page_metadata *page_meta)
520	{
521	assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC);
522	return (struct zone_page_metadata *)((vm_offset_t)page_meta - page_meta->real_metadata_offset);
523	}
524
525	static inline void
526	page_metadata_set_realmeta(struct zone_page_metadata page_meta, struct* zone_page_metadata *real_meta)
527	{
528	assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC);
529	assert(PAGE_METADATA_GET_ZINDEX(real_meta) != MULTIPAGE_METADATA_MAGIC);
530	assert((vm_offset_t)page_meta > (vm_offset_t)real_meta);
531	vm_offset_t offset = (vm_offset_t)page_meta - (vm_offset_t)real_meta;
532	assert(offset <= UINT32_MAX);
533	page_meta->real_metadata_offset = (uint32_t)offset;
534	}
535
536	/ The backup pointer is stored in the last pointer-sized location in an element. /
537	static inline vm_offset_t *
538	get_backup_ptr(vm_size_t elem_size,
539	vm_offset_t *element)
540	{
541	return (vm_offset_t ) ((vm_offset_t)element + elem_size - sizeof*(vm_offset_t));
542	}
543
544	/*
545	* Routine to populate a page backing metadata in the zone_metadata_region.
546	* Must be called without the zone lock held as it might potentially block.
547	*/
548	static inline void
549	zone_populate_metadata_page(struct zone_page_metadata *page_meta)
550	{
551	vm_offset_t page_metadata_begin = trunc_page(page_meta);
552	vm_offset_t page_metadata_end = trunc_page((vm_offset_t)page_meta + sizeof(struct zone_page_metadata));
553
554	for(;page_metadata_begin <= page_metadata_end; page_metadata_begin += PAGE_SIZE) {
555	#if !KASAN
556	/*
557	* This can race with another thread doing a populate on the same metadata
558	* page, where we see an updated pmap but unmapped KASan shadow, causing a
559	* fault in the shadow when we first access the metadata page. Avoid this
560	* by always synchronizing on the zone_metadata_region lock with KASan.
561	*/
562	if (pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin))
563	continue;
564	#endif
565	/ All updates to the zone_metadata_region are done under the zone_metadata_region_lck /
566	lck_mtx_lock(&zone_metadata_region_lck);
567	if (`0` == pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) {
568	kern_return_t __assert_only ret = kernel_memory_populate(zone_map,
569	page_metadata_begin,
570	PAGE_SIZE,
571	KMA_KOBJECT,
572	VM_KERN_MEMORY_OSFMK);
573
574	/ should not fail with the given arguments /
575	assert(ret == KERN_SUCCESS);
576	}
577	lck_mtx_unlock(&zone_metadata_region_lck);
578	}
579	return;
580	}
581
582	static inline uint16_t
583	get_metadata_alloc_count(struct zone_page_metadata *page_meta)
584	{
585	assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC);
586	struct zone *z = PAGE_METADATA_GET_ZONE(page_meta);
587	return ((page_meta->page_count * PAGE_SIZE) / z->elem_size);
588	}
589
590	/*
591	* Routine to lookup metadata for any given address.
592	* If init is marked as TRUE, this should be called without holding the zone lock
593	* since the initialization might block.
594	*/
595	static inline struct zone_page_metadata *
596	get_zone_page_metadata(struct zone_free_element *element, boolean_t init)
597	{
598	struct zone_page_metadata *page_meta = `0`;
599
600	if (from_zone_map(element, sizeof(struct zone_free_element))) {
601	page_meta = (struct zone_page_metadata *)(PAGE_METADATA_FOR_ELEMENT(element));
602	if (init)
603	zone_populate_metadata_page(page_meta);
604	} else {
605	page_meta = (struct zone_page_metadata *)(trunc_page((vm_offset_t)element));
606	}
607	if (init) {
608	bzero((char )page_meta, sizeof(struct* zone_page_metadata));
609	}
610	return ((PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC) ? page_meta : page_metadata_get_realmeta(page_meta));
611	}
612
613	/ Routine to get the page for a given metadata /
614	static inline vm_offset_t
615	get_zone_page(struct zone_page_metadata *page_meta)
616	{
617	if (from_zone_map(page_meta, sizeof(struct zone_page_metadata)))
618	return (vm_offset_t)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)));
619	else
620	return (vm_offset_t)(trunc_page(page_meta));
621	}
622
623	/*
624	* ZTAGS
625	*/
626
627	#if VM_MAX_TAG_ZONES
628
629	// for zones with tagging enabled:
630
631	// calculate a pointer to the tag base entry,
632	// holding either a uint32_t the first tag offset for a page in the zone map,
633	// or two uint16_t tags if the page can only hold one or two elements
634
635	#define ZTAGBASE(zone, element) \
636	(&((uint32_t *)zone_tagbase_min)[atop((element) - zone_map_min_address)])
637
638	// pointer to the tag for an element
639	#define ZTAG(zone, element) \
640	({ \
641	vm_tag_t * result; \
642	if ((zone)->tags_inline) { \
643	result = (vm_tag_t *) ZTAGBASE((zone), (element)); \
644	if ((page_mask & element) >= (zone)->elem_size) result++; \
645	} else { \
646	result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / (zone)->elem_size]; \
647	} \
648	result; \
649	})
650
651
652	static vm_offset_t zone_tagbase_min;
653	static vm_offset_t zone_tagbase_max;
654	static vm_offset_t zone_tagbase_map_size;
655	static vm_map_t zone_tagbase_map;
656
657	static vm_offset_t zone_tags_min;
658	static vm_offset_t zone_tags_max;
659	static vm_offset_t zone_tags_map_size;
660	static vm_map_t zone_tags_map;
661
662	// simple heap allocator for allocating the tags for new memory
663
664	decl_lck_mtx_data(,ztLock) / heap lock /
665	enum
666	{
667	ztFreeIndexCount = `8`,
668	ztFreeIndexMax = (ztFreeIndexCount - `1`),
669	ztTagsPerBlock = `4`
670	};
671
672	struct ztBlock
673	{
674	#if __LITTLE_ENDIAN__
675	uint64_t free:`1`,
676	next:`21`,
677	prev:`21`,
678	size:`21`;
679	#else
680	// ztBlock needs free bit least significant
681	#error !__LITTLE_ENDIAN__
682	#endif
683	};
684	typedef struct ztBlock ztBlock;
685
686	static ztBlock * ztBlocks;
687	static uint32_t ztBlocksCount;
688	static uint32_t ztBlocksFree;
689
690	static uint32_t
691	ztLog2up(uint32_t size)
692	{
693	if (`1` == size) size = `0`;
694	else size = `32` - __builtin_clz(size - `1`);
695	return (size);
696	}
697
698	static uint32_t
699	ztLog2down(uint32_t size)
700	{
701	size = `31` - __builtin_clz(size);
702	return (size);
703	}
704
705	static void
706	ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
707	{
708	vm_map_offset_t addr = (vm_map_offset_t) address;
709	vm_map_offset_t page, end;
710
711	page = trunc_page(addr);
712	end = round_page(addr + size);
713
714	for (; page < end; page += page_size)
715	{
716	if (!pmap_find_phys(kernel_pmap, page))
717	{
718	kern_return_t __unused
719	ret = kernel_memory_populate(map, page, PAGE_SIZE,
720	KMA_KOBJECT \| flags, VM_KERN_MEMORY_DIAG);
721	assert(ret == KERN_SUCCESS);
722	}
723	}
724	}
725
726	static boolean_t
727	ztPresent(const void * address, size_t size)
728	{
729	vm_map_offset_t addr = (vm_map_offset_t) address;
730	vm_map_offset_t page, end;
731	boolean_t result;
732
733	page = trunc_page(addr);
734	end = round_page(addr + size);
735	for (result = TRUE; (page < end); page += page_size)
736	{
737	result = pmap_find_phys(kernel_pmap, page);
738	if (!result) break;
739	}
740	return (result);
741	}
742
743
744	void __unused
745	ztDump(boolean_t sanity);
746	void __unused
747	ztDump(boolean_t sanity)
748	{
749	uint32_t q, cq, p;
750
751	for (q = `0`; q <= ztFreeIndexMax; q++)
752	{
753	p = q;
754	do
755	{
756	if (sanity)
757	{
758	cq = ztLog2down(ztBlocks[p].size);
759	if (cq > ztFreeIndexMax) cq = ztFreeIndexMax;
760	if (!ztBlocks[p].free
761	\|\| ((p != q) && (q != cq))
762	\|\| (ztBlocks[ztBlocks[p].next].prev != p)
763	\|\| (ztBlocks[ztBlocks[p].prev].next != p))
764	{
765	kprintf("zterror at %d", p);
766	ztDump(FALSE);
767	kprintf("zterror at %d", p);
768	assert(FALSE);
769	}
770	continue;
771	}
772	kprintf("zt[%03d]%c %d, %d, %d\n",
773	p, ztBlocks[p].free ? `'F'` : `'A'`,
774	ztBlocks[p].next, ztBlocks[p].prev,
775	ztBlocks[p].size);
776	p = ztBlocks[p].next;
777	if (p == q) break;
778	}
779	while (p != q);
780	if (!sanity) printf("\n");
781	}
782	if (!sanity) printf("-----------------------\n");
783	}
784
785
786
787	#define ZTBDEQ(idx) \
788	ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
789	ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
790
791	static void
792	ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
793	{
794	uint32_t q, w, p, size, merge;
795
796	assert(count);
797	ztBlocksFree += count;
798
799	// merge with preceding
800	merge = (index + count);
801	if ((merge < ztBlocksCount)
802	&& ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
803	&& ztBlocks[merge].free)
804	{
805	ZTBDEQ(merge);
806	count += ztBlocks[merge].size;
807	}
808
809	// merge with following
810	merge = (index - `1`);
811	if ((merge > ztFreeIndexMax)
812	&& ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
813	&& ztBlocks[merge].free)
814	{
815	size = ztBlocks[merge].size;
816	count += size;
817	index -= size;
818	ZTBDEQ(index);
819	}
820
821	q = ztLog2down(count);
822	if (q > ztFreeIndexMax) q = ztFreeIndexMax;
823	w = q;
824	// queue in order of size
825	while (TRUE)
826	{
827	p = ztBlocks[w].next;
828	if (p == q) break;
829	if (ztBlocks[p].size >= count) break;
830	w = p;
831	}
832	ztBlocks[p].prev = index;
833	ztBlocks[w].next = index;
834
835	// fault in first
836	ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), `0`);
837
838	// mark first & last with free flag and size
839	ztBlocks[index].free = TRUE;
840	ztBlocks[index].size = count;
841	ztBlocks[index].prev = w;
842	ztBlocks[index].next = p;
843	if (count > `1`)
844	{
845	index += (count - `1`);
846	// fault in last
847	ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), `0`);
848	ztBlocks[index].free = TRUE;
849	ztBlocks[index].size = count;
850	}
851	}
852
853	static uint32_t
854	ztAlloc(zone_t zone, uint32_t count)
855	{
856	uint32_t q, w, p, leftover;
857
858	assert(count);
859
860	q = ztLog2up(count);
861	if (q > ztFreeIndexMax) q = ztFreeIndexMax;
862	do
863	{
864	w = q;
865	while (TRUE)
866	{
867	p = ztBlocks[w].next;
868	if (p == q) break;
869	if (ztBlocks[p].size >= count)
870	{
871	// dequeue, mark both ends allocated
872	ztBlocks[w].next = ztBlocks[p].next;
873	ztBlocks[ztBlocks[p].next].prev = w;
874	ztBlocks[p].free = FALSE;
875	ztBlocksFree -= ztBlocks[p].size;
876	if (ztBlocks[p].size > `1`) ztBlocks[p + ztBlocks[p].size - `1`].free = FALSE;
877
878	// fault all the allocation
879	ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), `0`);
880	// mark last as allocated
881	if (count > `1`) ztBlocks[p + count - `1`].free = FALSE;
882	// free remainder
883	leftover = ztBlocks[p].size - count;
884	if (leftover) ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
885
886	return (p);
887	}
888	w = p;
889	}
890	q++;
891	}
892	while (q <= ztFreeIndexMax);
893
894	return (-`1U`);
895	}
896
897	static void
898	ztInit(vm_size_t max_zonemap_size, lck_grp_t * group)
899	{
900	kern_return_t ret;
901	vm_map_kernel_flags_t vmk_flags;
902	uint32_t idx;
903
904	lck_mtx_init(&ztLock, group, LCK_ATTR_NULL);
905
906	// allocate submaps VM_KERN_MEMORY_DIAG
907
908	zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
909	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
910	vmk_flags.vmkf_permanent = TRUE;
911	ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
912	FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
913	&zone_tagbase_map);
914
915	if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed");
916	zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
917
918	zone_tags_map_size = `2048``1024` sizeof(vm_tag_t);
919	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
920	vmk_flags.vmkf_permanent = TRUE;
921	ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
922	FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
923	&zone_tags_map);
924
925	if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed");
926	zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
927
928	ztBlocks = (ztBlock *) zone_tags_min;
929	ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
930
931	// initialize the qheads
932	lck_mtx_lock(&ztLock);
933
934	ztFault(zone_tags_map, &ztBlocks[`0`], sizeof(ztBlocks[`0`]), `0`);
935	for (idx = `0`; idx < ztFreeIndexCount; idx++)
936	{
937	ztBlocks[idx].free = TRUE;
938	ztBlocks[idx].next = idx;
939	ztBlocks[idx].prev = idx;
940	ztBlocks[idx].size = `0`;
941	}
942	// free remaining space
943	ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
944
945	lck_mtx_unlock(&ztLock);
946	}
947
948	static void
949	ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
950	{
951	uint32_t * tagbase;
952	uint32_t count, block, blocks, idx;
953	size_t pages;
954
955	pages = atop(size);
956	tagbase = ZTAGBASE(zone, mem);
957
958	lck_mtx_lock(&ztLock);
959
960	// fault tagbase
961	ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), `0`);
962
963	if (!zone->tags_inline)
964	{
965	// allocate tags
966	count = (uint32_t)(size / zone->elem_size);
967	blocks = ((count + ztTagsPerBlock - `1`) / ztTagsPerBlock);
968	block = ztAlloc(zone, blocks);
969	if (-`1U` == block) ztDump(false);
970	assert(-`1U` != block);
971	}
972
973	lck_mtx_unlock(&ztLock);
974
975	if (!zone->tags_inline)
976	{
977	// set tag base for each page
978	block *= ztTagsPerBlock;
979	for (idx = `0`; idx < pages; idx++)
980	{
981	tagbase[idx] = block + (uint32_t)((ptoa(idx) + (zone->elem_size - `1`)) / zone->elem_size);
982	}
983	}
984	}
985
986	static void
987	ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
988	{
989	uint32_t * tagbase;
990	uint32_t count, block, blocks, idx;
991	size_t pages;
992
993	// set tag base for each page
994	pages = atop(size);
995	tagbase = ZTAGBASE(zone, mem);
996	block = tagbase[`0`];
997	for (idx = `0`; idx < pages; idx++)
998	{
999	tagbase[idx] = `0xFFFFFFFF`;
1000	}
1001
1002	lck_mtx_lock(&ztLock);
1003	if (!zone->tags_inline)
1004	{
1005	count = (uint32_t)(size / zone->elem_size);
1006	blocks = ((count + ztTagsPerBlock - `1`) / ztTagsPerBlock);
1007	assert(block != `0xFFFFFFFF`);
1008	block /= ztTagsPerBlock;
1009	ztFree(NULL / zone is unlocked /, block, blocks);
1010	}
1011
1012	lck_mtx_unlock(&ztLock);
1013	}
1014
1015	uint32_t
1016	zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
1017	{
1018	zone_t z;
1019	uint32_t idx;
1020
1021	simple_lock(&all_zones_lock);
1022
1023	for (idx = `0`; idx < num_zones; idx++)
1024	{
1025	z = &(zone_array[idx]);
1026	if (!z->tags) continue;
1027	if (tag_zone_index != z->tag_zone_index) continue;
1028	*elem_size = z->elem_size;
1029	break;
1030	}
1031
1032	simple_unlock(&all_zones_lock);
1033
1034	if (idx == num_zones) idx = -`1U`;
1035
1036	return (idx);
1037	}
1038
1039	#endif /* VM_MAX_TAG_ZONES */
1040
1041	/ Routine to get the size of a zone allocated address. If the address doesnt belong to the*
1042	* zone_map, returns 0.
1043	*/
1044	vm_size_t
1045	zone_element_size(void addr, zone_t z)
1046	{
1047	struct zone *src_zone;
1048	if (from_zone_map(addr, sizeof(void *))) {
1049	struct zone_page_metadata page_meta = get_zone_page_metadata((struct* zone_free_element *)addr, FALSE);
1050	src_zone = PAGE_METADATA_GET_ZONE(page_meta);
1051	if (z) {
1052	*z = src_zone;
1053	}
1054	return (src_zone->elem_size);
1055	} else {
1056	#if CONFIG_GZALLOC
1057	vm_size_t gzsize;
1058	if (gzalloc_element_size(addr, z, &gzsize)) {
1059	return gzsize;
1060	}
1061	#endif /* CONFIG_GZALLOC */
1062
1063	return `0`;
1064	}
1065	}
1066
1067	#if DEBUG \|\| DEVELOPMENT
1068
1069	vm_size_t
1070	zone_element_info(void addr, vm_tag_t ptag)
1071	{
1072	vm_size_t size = `0`;
1073	vm_tag_t tag = VM_KERN_MEMORY_NONE;
1074	struct zone * src_zone;
1075
1076	if (from_zone_map(addr, sizeof(void *))) {
1077	struct zone_page_metadata page_meta = get_zone_page_metadata((struct* zone_free_element *)addr, FALSE);
1078	src_zone = PAGE_METADATA_GET_ZONE(page_meta);
1079	#if VM_MAX_TAG_ZONES
1080	if (__improbable(src_zone->tags)) {
1081	tag = (ZTAG(src_zone, (vm_offset_t) addr)[`0`] >> `1`);
1082	}
1083	#endif /* VM_MAX_TAG_ZONES */
1084	size = src_zone->elem_size;
1085	} else {
1086	#if CONFIG_GZALLOC
1087	gzalloc_element_size(addr, NULL, &size);
1088	#endif /* CONFIG_GZALLOC */
1089	}
1090	*ptag = tag;
1091	return size;
1092	}
1093
1094	#endif /* DEBUG \|\| DEVELOPMENT */
1095
1096	/*
1097	* Zone checking helper function.
1098	* A pointer that satisfies these conditions is OK to be a freelist next pointer
1099	* A pointer that doesn't satisfy these conditions indicates corruption
1100	*/
1101	static inline boolean_t
1102	is_sane_zone_ptr(zone_t zone,
1103	vm_offset_t addr,
1104	size_t obj_size)
1105	{
1106	/ Must be aligned to pointer boundary /
1107	if (__improbable((addr & (sizeof(vm_offset_t) - `1`)) != `0`))
1108	return FALSE;
1109
1110	/ Must be a kernel address /
1111	if (__improbable(!pmap_kernel_va(addr)))
1112	return FALSE;
1113
1114	/ Must be from zone map if the zone only uses memory from the zone_map /
1115	/*
1116	* TODO: Remove the zone->collectable check when every
1117	* zone using foreign memory is properly tagged with allows_foreign
1118	*/
1119	if (zone->collectable && !zone->allows_foreign) {
1120	/ check if addr is from zone map /
1121	if (addr >= zone_map_min_address &&
1122	(addr + obj_size - `1`) < zone_map_max_address )
1123	return TRUE;
1124
1125	return FALSE;
1126	}
1127
1128	return TRUE;
1129	}
1130
1131	static inline boolean_t
1132	is_sane_zone_page_metadata(zone_t zone,
1133	vm_offset_t page_meta)
1134	{
1135	/ NULL page metadata structures are invalid /
1136	if (page_meta == `0`)
1137	return FALSE;
1138	return is_sane_zone_ptr(zone, page_meta, sizeof(struct zone_page_metadata));
1139	}
1140
1141	static inline boolean_t
1142	is_sane_zone_element(zone_t zone,
1143	vm_offset_t addr)
1144	{
1145	/ NULL is OK because it indicates the tail of the list /
1146	if (addr == `0`)
1147	return TRUE;
1148	return is_sane_zone_ptr(zone, addr, zone->elem_size);
1149	}
1150
1151	/ Someone wrote to freed memory. /
1152	static inline void / noreturn /
1153	zone_element_was_modified_panic(zone_t zone,
1154	vm_offset_t element,
1155	vm_offset_t found,
1156	vm_offset_t expected,
1157	vm_offset_t offset)
1158	{
1159	panic("a freed zone element has been modified in zone %s: expected %p but found %p, bits changed %p, at offset %d of %d in element %p, cookies %p %p",
1160	zone->zone_name,
1161	(void *) expected,
1162	(void *) found,
1163	(void *) (expected ^ found),
1164	(uint32_t) offset,
1165	(uint32_t) zone->elem_size,
1166	(void *) element,
1167	(void *) zp_nopoison_cookie,
1168	(void *) zp_poisoned_cookie);
1169	}
1170
1171	/*
1172	* The primary and backup pointers don't match.
1173	* Determine which one was likely the corrupted pointer, find out what it
1174	* probably should have been, and panic.
1175	* I would like to mark this as noreturn, but panic() isn't marked noreturn.
1176	*/
1177	static void / noreturn /
1178	backup_ptr_mismatch_panic(zone_t zone,
1179	vm_offset_t element,
1180	vm_offset_t primary,
1181	vm_offset_t backup)
1182	{
1183	vm_offset_t likely_backup;
1184	vm_offset_t likely_primary;
1185
1186	likely_primary = primary ^ zp_nopoison_cookie;
1187	boolean_t sane_backup;
1188	boolean_t sane_primary = is_sane_zone_element(zone, likely_primary);
1189	boolean_t element_was_poisoned = (backup & `0x1`) ? TRUE : FALSE;
1190
1191	#if defined(__LP64__)
1192	/ We can inspect the tag in the upper bits for additional confirmation /
1193	if ((backup & `0xFFFFFF0000000000`) == `0xFACADE0000000000`)
1194	element_was_poisoned = TRUE;
1195	else if ((backup & `0xFFFFFF0000000000`) == `0xC0FFEE0000000000`)
1196	element_was_poisoned = FALSE;
1197	#endif
1198
1199	if (element_was_poisoned) {
1200	likely_backup = backup ^ zp_poisoned_cookie;
1201	sane_backup = is_sane_zone_element(zone, likely_backup);
1202	} else {
1203	likely_backup = backup ^ zp_nopoison_cookie;
1204	sane_backup = is_sane_zone_element(zone, likely_backup);
1205	}
1206
1207	/ The primary is definitely the corrupted one /
1208	if (!sane_primary && sane_backup)
1209	zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), `0`);
1210
1211	/ The backup is definitely the corrupted one /
1212	if (sane_primary && !sane_backup)
1213	zone_element_was_modified_panic(zone, element, backup,
1214	(likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
1215	zone->elem_size - sizeof(vm_offset_t));
1216
1217	/*
1218	* Not sure which is the corrupted one.
1219	* It's less likely that the backup pointer was overwritten with
1220	* ( (sane address) ^ (valid cookie) ), so we'll guess that the
1221	* primary pointer has been overwritten with a sane but incorrect address.
1222	*/
1223	if (sane_primary && sane_backup)
1224	zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), `0`);
1225
1226	/ Neither are sane, so just guess. /
1227	zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), `0`);
1228	}
1229
1230	/*
1231	* Adds the element to the head of the zone's free list
1232	* Keeps a backup next-pointer at the end of the element
1233	*/
1234	static inline void
1235	free_to_zone(zone_t zone,
1236	vm_offset_t element,
1237	boolean_t poison)
1238	{
1239	vm_offset_t old_head;
1240	struct zone_page_metadata *page_meta;
1241
1242	vm_offset_t primary = (vm_offset_t ) element;
1243	vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
1244
1245	page_meta = get_zone_page_metadata((struct zone_free_element *)element, FALSE);
1246	assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
1247	old_head = (vm_offset_t)page_metadata_get_freelist(page_meta);
1248
1249	if (__improbable(!is_sane_zone_element(zone, old_head)))
1250	panic("zfree: invalid head pointer %p for freelist of zone %s\n",
1251	(void *) old_head, zone->zone_name);
1252
1253	if (__improbable(!is_sane_zone_element(zone, element)))
1254	panic("zfree: freeing invalid pointer %p to zone %s\n",
1255	(void *) element, zone->zone_name);
1256
1257	if (__improbable(old_head == element))
1258	panic("zfree: double free of %p to zone %s\n",
1259	(void *) element, zone->zone_name);
1260	/*
1261	* Always write a redundant next pointer
1262	* So that it is more difficult to forge, xor it with a random cookie
1263	* A poisoned element is indicated by using zp_poisoned_cookie
1264	* instead of zp_nopoison_cookie
1265	*/
1266
1267	*backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie);
1268
1269	/*
1270	* Insert this element at the head of the free list. We also xor the
1271	* primary pointer with the zp_nopoison_cookie to make sure a free
1272	* element does not provide the location of the next free element directly.
1273	*/
1274	*primary = old_head ^ zp_nopoison_cookie;
1275	page_metadata_set_freelist(page_meta, (struct zone_free_element *)element);
1276	page_meta->free_count++;
1277	if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) {
1278	if (page_meta->free_count == `1`) {
1279	/ first foreign element freed on page, move from all_used /
1280	re_queue_tail(&zone->pages.any_free_foreign, &(page_meta->pages));
1281	} else {
1282	/ no other list transitions /
1283	}
1284	} else if (page_meta->free_count == get_metadata_alloc_count(page_meta)) {
1285	/ whether the page was on the intermediate or all_used, queue, move it to free /
1286	re_queue_tail(&zone->pages.all_free, &(page_meta->pages));
1287	zone->count_all_free_pages += page_meta->page_count;
1288	} else if (page_meta->free_count == `1`) {
1289	/ first free element on page, move from all_used /
1290	re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
1291	}
1292	zone->count--;
1293	zone->countfree++;
1294
1295	#if KASAN_ZALLOC
1296	kasan_poison_range(element, zone->elem_size, ASAN_HEAP_FREED);
1297	#endif
1298	}
1299
1300
1301	/*
1302	* Removes an element from the zone's free list, returning 0 if the free list is empty.
1303	* Verifies that the next-pointer and backup next-pointer are intact,
1304	* and verifies that a poisoned element hasn't been modified.
1305	*/
1306	static inline vm_offset_t
1307	try_alloc_from_zone(zone_t zone,
1308	vm_tag_t tag __unused,
1309	boolean_t* check_poison)
1310	{
1311	vm_offset_t element;
1312	struct zone_page_metadata *page_meta;
1313
1314	*check_poison = FALSE;
1315
1316	/ if zone is empty, bail /
1317	if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign))
1318	page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
1319	else if (!queue_empty(&zone->pages.intermediate))
1320	page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
1321	else if (!queue_empty(&zone->pages.all_free)) {
1322	page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
1323	assert(zone->count_all_free_pages >= page_meta->page_count);
1324	zone->count_all_free_pages -= page_meta->page_count;
1325	} else {
1326	return `0`;
1327	}
1328	/ Check if page_meta passes is_sane_zone_element /
1329	if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta)))
1330	panic("zalloc: invalid metadata structure %p for freelist of zone %s\n",
1331	(void *) page_meta, zone->zone_name);
1332	assert(PAGE_METADATA_GET_ZONE(page_meta) == zone);
1333	element = (vm_offset_t)page_metadata_get_freelist(page_meta);
1334
1335	if (__improbable(!is_sane_zone_ptr(zone, element, zone->elem_size)))
1336	panic("zfree: invalid head pointer %p for freelist of zone %s\n",
1337	(void *) element, zone->zone_name);
1338
1339	vm_offset_t primary = (vm_offset_t ) element;
1340	vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary);
1341
1342	/*
1343	* Since the primary next pointer is xor'ed with zp_nopoison_cookie
1344	* for obfuscation, retrieve the original value back
1345	*/
1346	vm_offset_t next_element = *primary ^ zp_nopoison_cookie;
1347	vm_offset_t next_element_primary = *primary;
1348	vm_offset_t next_element_backup = *backup;
1349
1350	/*
1351	* backup_ptr_mismatch_panic will determine what next_element
1352	* should have been, and print it appropriately
1353	*/
1354	if (__improbable(!is_sane_zone_element(zone, next_element)))
1355	backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
1356
1357	/ Check the backup pointer for the regular cookie /
1358	if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) {
1359
1360	/ Check for the poisoned cookie instead /
1361	if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie)))
1362	/ Neither cookie is valid, corruption has occurred /
1363	backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup);
1364
1365	/*
1366	* Element was marked as poisoned, so check its integrity before using it.
1367	*/
1368	*check_poison = TRUE;
1369	}
1370
1371	/ Make sure the page_meta is at the correct offset from the start of page /
1372	if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element, FALSE)))
1373	panic("zalloc: Incorrect metadata %p found in zone %s page queue. Expected metadata: %p\n",
1374	page_meta, zone->zone_name, get_zone_page_metadata((struct zone_free_element *)element, FALSE));
1375
1376	/ Make sure next_element belongs to the same page as page_meta /
1377	if (next_element) {
1378	if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element, FALSE)))
1379	panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n",
1380	(void )next_element, (void* *)element, zone->zone_name);
1381	}
1382
1383	/ Remove this element from the free list /
1384	page_metadata_set_freelist(page_meta, (struct zone_free_element *)next_element);
1385	page_meta->free_count--;
1386
1387	if (page_meta->free_count == `0`) {
1388	/ move to all used /
1389	re_queue_tail(&zone->pages.all_used, &(page_meta->pages));
1390	} else {
1391	if (!zone->allows_foreign \|\| from_zone_map(element, zone->elem_size)) {
1392	if (get_metadata_alloc_count(page_meta) == page_meta->free_count + `1`) {
1393	/ remove from free, move to intermediate /
1394	re_queue_tail(&zone->pages.intermediate, &(page_meta->pages));
1395	}
1396	}
1397	}
1398	zone->countfree--;
1399	zone->count++;
1400	zone->sum_count++;
1401
1402	#if VM_MAX_TAG_ZONES
1403	if (__improbable(zone->tags)) {
1404	// set the tag with b0 clear so the block remains inuse
1405	ZTAG(zone, element)[`0`] = (tag << `1`);
1406	}
1407	#endif /* VM_MAX_TAG_ZONES */
1408
1409
1410	#if KASAN_ZALLOC
1411	kasan_poison_range(element, zone->elem_size, ASAN_VALID);
1412	#endif
1413
1414	return element;
1415	}
1416
1417	/*
1418	* End of zone poisoning
1419	*/
1420
1421	/*
1422	* Zone info options
1423	*/
1424	#define ZINFO_SLOTS MAX_ZONES /* for now */
1425
1426	zone_t zone_find_largest(void);
1427
1428	/*
1429	* Async allocation of zones
1430	* This mechanism allows for bootstrapping an empty zone which is setup with
1431	* non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call
1432	* to zalloc_async. We perform a zalloc() (which may block) and then an immediate free.
1433	* This will prime the zone for the next use.
1434	*
1435	* Currently the thread_callout function (zalloc_async) will loop through all zones
1436	* looking for any zone with async_pending set and do the work for it.
1437	*
1438	* NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call,
1439	* then zalloc_noblock to an empty zone may succeed.
1440	*/
1441	void zalloc_async(
1442	thread_call_param_t p0,
1443	thread_call_param_t p1);
1444
1445	static thread_call_data_t call_async_alloc;
1446
1447	/*
1448	* Align elements that use the zone page list to 32 byte boundaries.
1449	*/
1450	#define ZONE_ELEMENT_ALIGNMENT 32
1451
1452	#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
1453	#define zone_sleep(zone) \
1454	(void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN_ALWAYS, (event_t)(zone), THREAD_UNINT);
1455
1456	/*
1457	* The zone_locks_grp allows for collecting lock statistics.
1458	* All locks are associated to this group in zinit.
1459	* Look at tools/lockstat for debugging lock contention.
1460	*/
1461
1462	lck_grp_t zone_locks_grp;
1463	lck_grp_attr_t zone_locks_grp_attr;
1464
1465	#define lock_zone_init(zone) \
1466	MACRO_BEGIN \
1467	lck_attr_setdefault(&(zone)->lock_attr); \
1468	lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \
1469	&zone_locks_grp, &(zone)->lock_attr); \
1470	MACRO_END
1471
1472	#define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock)
1473
1474	/*
1475	* Exclude more than one concurrent garbage collection
1476	*/
1477	decl_lck_mtx_data(, zone_gc_lock)
1478
1479	lck_attr_t zone_gc_lck_attr;
1480	lck_grp_t zone_gc_lck_grp;
1481	lck_grp_attr_t zone_gc_lck_grp_attr;
1482	lck_mtx_ext_t zone_gc_lck_ext;
1483
1484	boolean_t zone_gc_allowed = TRUE;
1485	boolean_t panic_include_zprint = FALSE;
1486
1487	mach_memory_info_t *panic_kext_memory_info = NULL;
1488	vm_size_t panic_kext_memory_size = `0`;
1489
1490	#define ZALLOC_DEBUG_ZONEGC 0x00000001
1491	#define ZALLOC_DEBUG_ZCRAM 0x00000002
1492	uint32_t zalloc_debug = `0`;
1493
1494	/*
1495	* Zone leak debugging code
1496	*
1497	* When enabled, this code keeps a log to track allocations to a particular zone that have not
1498	* yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
1499	* only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
1500	* off by default.
1501	*
1502	* Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
1503	* is the name of the zone you wish to log.
1504	*
1505	* This code only tracks one zone, so you need to identify which one is leaking first.
1506	* Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
1507	* garbage collector. Note that the zone name printed in the panic message is not necessarily the one
1508	* containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
1509	* is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
1510	* next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
1511	* See the help in the kgmacros for usage info.
1512	*
1513	*
1514	* Zone corruption logging
1515	*
1516	* Logging can also be used to help identify the source of a zone corruption. First, identify the zone
1517	* that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
1518	* with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
1519	* corruption is detected, examining the log will show you the stack traces of the callers who last allocated
1520	* and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
1521	* corrupted to examine its history. This should lead to the source of the corruption.
1522	*/
1523
1524	static boolean_t log_records_init = FALSE;
1525	static int log_records; / size of the log, expressed in number of records /
1526
1527	#define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
1528
1529	static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
1530	static int num_zones_logged = `0`;
1531
1532	static char zone_name_to_log[MAX_ZONE_NAME] = ""; / the zone name we're logging, if any /
1533
1534	/ Log allocations and frees to help debug a zone element corruption /
1535	boolean_t corruption_debug_flag = DEBUG; / enabled by "-zc" boot-arg /
1536	/ Making pointer scanning leaks detection possible for all zones /
1537
1538	#if DEBUG \|\| DEVELOPMENT
1539	boolean_t leak_scan_debug_flag = FALSE; / enabled by "-zl" boot-arg /
1540	#endif /* DEBUG \|\| DEVELOPMENT */
1541
1542
1543	/*
1544	* The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
1545	* the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
1546	* is the number of stacks suspected of leaking, we don't need many records.
1547	*/
1548
1549	#if defined(__LP64__)
1550	#define ZRECORDS_MAX 2560 /* Max records allowed in the log */
1551	#else
1552	#define ZRECORDS_MAX 1536 /* Max records allowed in the log */
1553	#endif
1554	#define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
1555
1556	/*
1557	* Each record in the log contains a pointer to the zone element it refers to,
1558	* and a small array to hold the pc's from the stack trace. A
1559	* record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
1560	* the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
1561	* If the log fills, old records are replaced as if it were a circular buffer.
1562	*/
1563
1564
1565	/*
1566	* Decide if we want to log this zone by doing a string compare between a zone name and the name
1567	* of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
1568	* possible to include spaces in strings passed in via the boot-args, a period in the logname will
1569	* match a space in the zone name.
1570	*/
1571
1572	int
1573	track_this_zone(const char zonename, const* char *logname)
1574	{
1575	unsigned int len;
1576	const char *zc = zonename;
1577	const char *lc = logname;
1578
1579	/*
1580	* Compare the strings. We bound the compare by MAX_ZONE_NAME.
1581	*/
1582
1583	for (len = `1`; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
1584
1585	/*
1586	* If the current characters don't match, check for a space in
1587	* in the zone name and a corresponding period in the log name.
1588	* If that's not there, then the strings don't match.
1589	*/
1590
1591	if (zc != lc && !(zc == `' '` && lc == `'.'`))
1592	break;
1593
1594	/*
1595	* The strings are equal so far. If we're at the end, then it's a match.
1596	*/
1597
1598	if (*zc == `'\0'`)
1599	return TRUE;
1600	}
1601
1602	return FALSE;
1603	}
1604
1605
1606	/*
1607	* Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
1608	* the buffer for the records has been allocated.
1609	*/
1610
1611	#define DO_LOGGING(z) (z->zone_logging == TRUE && z->zlog_btlog)
1612
1613	extern boolean_t kmem_alloc_ready;
1614
1615	#if CONFIG_ZLEAKS
1616	#pragma mark -
1617	#pragma mark Zone Leak Detection
1618
1619	/*
1620	* The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
1621	* allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
1622	* backtrace. Every free, we examine the table and determine if the allocation was being tracked,
1623	* and stop tracking it if it was being tracked.
1624	*
1625	* We track the allocations in the zallocations hash table, which stores the address that was returned from
1626	* the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
1627	* stores the backtrace associated with that allocation. This provides uniquing for the relatively large
1628	* backtraces - we don't store them more than once.
1629	*
1630	* Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
1631	* a large amount of virtual space.
1632	*/
1633	#define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
1634	#define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
1635	#define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
1636	#define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
1637	uint32_t zleak_state = `0`; / State of collection, as above /
1638
1639	boolean_t panic_include_ztrace = FALSE; / Enable zleak logging on panic /
1640	vm_size_t zleak_global_tracking_threshold; / Size of zone map at which to start collecting data /
1641	vm_size_t zleak_per_zone_tracking_threshold; / Size a zone will have before we will collect data on it /
1642	unsigned int zleak_sample_factor = `1000`; / Allocations per sample attempt /
1643
1644	/*
1645	* Counters for allocation statistics.
1646	*/
1647
1648	/ Times two active records want to occupy the same spot /
1649	unsigned int z_alloc_collisions = `0`;
1650	unsigned int z_trace_collisions = `0`;
1651
1652	/ Times a new record lands on a spot previously occupied by a freed allocation /
1653	unsigned int z_alloc_overwrites = `0`;
1654	unsigned int z_trace_overwrites = `0`;
1655
1656	/ Times a new alloc or trace is put into the hash table /
1657	unsigned int z_alloc_recorded = `0`;
1658	unsigned int z_trace_recorded = `0`;
1659
1660	/ Times zleak_log returned false due to not being able to acquire the lock /
1661	unsigned int z_total_conflicts = `0`;
1662
1663
1664	#pragma mark struct zallocation
1665	/*
1666	* Structure for keeping track of an allocation
1667	* An allocation bucket is in use if its element is not NULL
1668	*/
1669	struct zallocation {
1670	uintptr_t za_element; / the element that was zalloc'ed or zfree'ed, NULL if bucket unused /
1671	vm_size_t za_size; / how much memory did this allocation take up? /
1672	uint32_t za_trace_index; / index into ztraces for backtrace associated with allocation /
1673	/ TODO: #if this out /
1674	uint32_t za_hit_count; / for determining effectiveness of hash function /
1675	};
1676
1677	/ Size must be a power of two for the zhash to be able to just mask off bits instead of mod /
1678	uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
1679	uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
1680
1681	vm_size_t zleak_max_zonemap_size;
1682
1683	/ Hashmaps of allocations and their corresponding traces /
1684	static struct zallocation* zallocations;
1685	static struct ztrace* ztraces;
1686
1687	/ not static so that panic can see this, see kern/debug.c /
1688	struct ztrace* top_ztrace;
1689
1690	/ Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. /
1691	static lck_spin_t zleak_lock;
1692	static lck_attr_t zleak_lock_attr;
1693	static lck_grp_t zleak_lock_grp;
1694	static lck_grp_attr_t zleak_lock_grp_attr;
1695
1696	/*
1697	* Initializes the zone leak monitor. Called from zone_init()
1698	*/
1699	static void
1700	zleak_init(vm_size_t max_zonemap_size)
1701	{
1702	char scratch_buf[`16`];
1703	boolean_t zleak_enable_flag = FALSE;
1704
1705	zleak_max_zonemap_size = max_zonemap_size;
1706	zleak_global_tracking_threshold = max_zonemap_size / `2`;
1707	zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / `8`;
1708
1709	#if CONFIG_EMBEDDED
1710	if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
1711	zleak_enable_flag = TRUE;
1712	printf("zone leak detection enabled\n");
1713	} else {
1714	zleak_enable_flag = FALSE;
1715	printf("zone leak detection disabled\n");
1716	}
1717	#else /* CONFIG_EMBEDDED */
1718	/ -zleakoff (flag to disable zone leak monitor) /
1719	if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
1720	zleak_enable_flag = FALSE;
1721	printf("zone leak detection disabled\n");
1722	} else {
1723	zleak_enable_flag = TRUE;
1724	printf("zone leak detection enabled\n");
1725	}
1726	#endif /* CONFIG_EMBEDDED */
1727
1728	/ zfactor=XXXX (override how often to sample the zone allocator) /
1729	if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
1730	printf("Zone leak factor override: %u\n", zleak_sample_factor);
1731	}
1732
1733	/ zleak-allocs=XXXX (override number of buckets in zallocations) /
1734	if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
1735	printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets);
1736	/ uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) /
1737	if (zleak_alloc_buckets == `0` \|\| (zleak_alloc_buckets & (zleak_alloc_buckets-`1`))) {
1738	printf("Override isn't a power of two, bad things might happen!\n");
1739	}
1740	}
1741
1742	/ zleak-traces=XXXX (override number of buckets in ztraces) /
1743	if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
1744	printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets);
1745	/ uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) /
1746	if (zleak_trace_buckets == `0` \|\| (zleak_trace_buckets & (zleak_trace_buckets-`1`))) {
1747	printf("Override isn't a power of two, bad things might happen!\n");
1748	}
1749	}
1750
1751	/ allocate the zleak_lock /
1752	lck_grp_attr_setdefault(&zleak_lock_grp_attr);
1753	lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr);
1754	lck_attr_setdefault(&zleak_lock_attr);
1755	lck_spin_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
1756
1757	if (zleak_enable_flag) {
1758	zleak_state = ZLEAK_STATE_ENABLED;
1759	}
1760	}
1761
1762	#if CONFIG_ZLEAKS
1763
1764	/*
1765	* Support for kern.zleak.active sysctl - a simplified
1766	* version of the zleak_state variable.
1767	*/
1768	int
1769	get_zleak_state(void)
1770	{
1771	if (zleak_state & ZLEAK_STATE_FAILED)
1772	return (-`1`);
1773	if (zleak_state & ZLEAK_STATE_ACTIVE)
1774	return (`1`);
1775	return (`0`);
1776	}
1777
1778	#endif
1779
1780
1781	kern_return_t
1782	zleak_activate(void)
1783	{
1784	kern_return_t retval;
1785	vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
1786	vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
1787	void *allocations_ptr = NULL;
1788	void *traces_ptr = NULL;
1789
1790	/ Only one thread attempts to activate at a time /
1791	if (zleak_state & (ZLEAK_STATE_ACTIVE \| ZLEAK_STATE_ACTIVATING \| ZLEAK_STATE_FAILED)) {
1792	return KERN_SUCCESS;
1793	}
1794
1795	/ Indicate that we're doing the setup /
1796	lck_spin_lock(&zleak_lock);
1797	if (zleak_state & (ZLEAK_STATE_ACTIVE \| ZLEAK_STATE_ACTIVATING \| ZLEAK_STATE_FAILED)) {
1798	lck_spin_unlock(&zleak_lock);
1799	return KERN_SUCCESS;
1800	}
1801
1802	zleak_state \|= ZLEAK_STATE_ACTIVATING;
1803	lck_spin_unlock(&zleak_lock);
1804
1805	/ Allocate and zero tables /
1806	retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK);
1807	if (retval != KERN_SUCCESS) {
1808	goto fail;
1809	}
1810
1811	retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK);
1812	if (retval != KERN_SUCCESS) {
1813	goto fail;
1814	}
1815
1816	bzero(allocations_ptr, z_alloc_size);
1817	bzero(traces_ptr, z_trace_size);
1818
1819	/ Everything's set. Install tables, mark active. /
1820	zallocations = allocations_ptr;
1821	ztraces = traces_ptr;
1822
1823	/*
1824	* Initialize the top_ztrace to the first entry in ztraces,
1825	* so we don't have to check for null in zleak_log
1826	*/
1827	top_ztrace = &ztraces[`0`];
1828
1829	/*
1830	* Note that we do need a barrier between installing
1831	* the tables and setting the active flag, because the zfree()
1832	* path accesses the table without a lock if we're active.
1833	*/
1834	lck_spin_lock(&zleak_lock);
1835	zleak_state \|= ZLEAK_STATE_ACTIVE;
1836	zleak_state &= ~ZLEAK_STATE_ACTIVATING;
1837	lck_spin_unlock(&zleak_lock);
1838
1839	return `0`;
1840
1841	fail:
1842	/*
1843	* If we fail to allocate memory, don't further tax
1844	* the system by trying again.
1845	*/
1846	lck_spin_lock(&zleak_lock);
1847	zleak_state \|= ZLEAK_STATE_FAILED;
1848	zleak_state &= ~ZLEAK_STATE_ACTIVATING;
1849	lck_spin_unlock(&zleak_lock);
1850
1851	if (allocations_ptr != NULL) {
1852	kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
1853	}
1854
1855	if (traces_ptr != NULL) {
1856	kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
1857	}
1858
1859	return retval;
1860	}
1861
1862	/*
1863	* TODO: What about allocations that never get deallocated,
1864	* especially ones with unique backtraces? Should we wait to record
1865	* until after boot has completed?
1866	* (How many persistent zallocs are there?)
1867	*/
1868
1869	/*
1870	* This function records the allocation in the allocations table,
1871	* and stores the associated backtrace in the traces table
1872	* (or just increments the refcount if the trace is already recorded)
1873	* If the allocation slot is in use, the old allocation is replaced with the new allocation, and
1874	* the associated trace's refcount is decremented.
1875	* If the trace slot is in use, it returns.
1876	* The refcount is incremented by the amount of memory the allocation consumes.
1877	* The return value indicates whether to try again next time.
1878	*/
1879	static boolean_t
1880	zleak_log(uintptr_t* bt,
1881	uintptr_t addr,
1882	uint32_t depth,
1883	vm_size_t allocation_size)
1884	{
1885	/ Quit if there's someone else modifying the hash tables /
1886	if (!lck_spin_try_lock(&zleak_lock)) {
1887	z_total_conflicts++;
1888	return FALSE;
1889	}
1890
1891	struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
1892
1893	uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
1894	struct ztrace* trace = &ztraces[trace_index];
1895
1896	allocation->za_hit_count++;
1897	trace->zt_hit_count++;
1898
1899	/*
1900	* If the allocation bucket we want to be in is occupied, and if the occupier
1901	* has the same trace as us, just bail.
1902	*/
1903	if (allocation->za_element != (uintptr_t) `0` && trace_index == allocation->za_trace_index) {
1904	z_alloc_collisions++;
1905
1906	lck_spin_unlock(&zleak_lock);
1907	return TRUE;
1908	}
1909
1910	/ STEP 1: Store the backtrace in the traces array. /
1911	/ A size of zero indicates that the trace bucket is free. /
1912
1913	if (trace->zt_size > `0` && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != `0` ) {
1914	/*
1915	* Different unique trace with same hash!
1916	* Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
1917	* and get out of the way for later chances
1918	*/
1919	trace->zt_collisions++;
1920	z_trace_collisions++;
1921
1922	lck_spin_unlock(&zleak_lock);
1923	return TRUE;
1924	} else if (trace->zt_size > `0`) {
1925	/ Same trace, already added, so increment refcount /
1926	trace->zt_size += allocation_size;
1927	} else {
1928	/ Found an unused trace bucket, record the trace here! /
1929	if (trace->zt_depth != `0`) / if this slot was previously used but not currently in use /
1930	z_trace_overwrites++;
1931
1932	z_trace_recorded++;
1933	trace->zt_size = allocation_size;
1934	memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)) );
1935
1936	trace->zt_depth = depth;
1937	trace->zt_collisions = `0`;
1938	}
1939
1940	/ STEP 2: Store the allocation record in the allocations array. /
1941
1942	if (allocation->za_element != (uintptr_t) `0`) {
1943	/*
1944	* Straight up replace any allocation record that was there. We don't want to do the work
1945	* to preserve the allocation entries that were there, because we only record a subset of the
1946	* allocations anyways.
1947	*/
1948
1949	z_alloc_collisions++;
1950
1951	struct ztrace* associated_trace = &ztraces[allocation->za_trace_index];
1952	/ Knock off old allocation's size, not the new allocation /
1953	associated_trace->zt_size -= allocation->za_size;
1954	} else if (allocation->za_trace_index != `0`) {
1955	/ Slot previously used but not currently in use /
1956	z_alloc_overwrites++;
1957	}
1958
1959	allocation->za_element = addr;
1960	allocation->za_trace_index = trace_index;
1961	allocation->za_size = allocation_size;
1962
1963	z_alloc_recorded++;
1964
1965	if (top_ztrace->zt_size < trace->zt_size)
1966	top_ztrace = trace;
1967
1968	lck_spin_unlock(&zleak_lock);
1969	return TRUE;
1970	}
1971
1972	/*
1973	* Free the allocation record and release the stacktrace.
1974	* This should be as fast as possible because it will be called for every free.
1975	*/
1976	static void
1977	zleak_free(uintptr_t addr,
1978	vm_size_t allocation_size)
1979	{
1980	if (addr == (uintptr_t) `0`)
1981	return;
1982
1983	struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
1984
1985	/ Double-checked locking: check to find out if we're interested, lock, check to make*
1986	* sure it hasn't changed, then modify it, and release the lock.
1987	*/
1988
1989	if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
1990	/ if the allocation was the one, grab the lock, check again, then delete it /
1991	lck_spin_lock(&zleak_lock);
1992
1993	if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
1994	struct ztrace *trace;
1995
1996	/ allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! /
1997	if (allocation->za_size != allocation_size) {
1998	panic("Freeing as size %lu memory that was allocated with size %lu\n",
1999	(uintptr_t)allocation_size, (uintptr_t)allocation->za_size);
2000	}
2001
2002	trace = &ztraces[allocation->za_trace_index];
2003
2004	/ size of 0 indicates trace bucket is unused /
2005	if (trace->zt_size > `0`) {
2006	trace->zt_size -= allocation_size;
2007	}
2008
2009	/ A NULL element means the allocation bucket is unused /
2010	allocation->za_element = `0`;
2011	}
2012	lck_spin_unlock(&zleak_lock);
2013	}
2014	}
2015
2016	#endif /* CONFIG_ZLEAKS */
2017
2018	/ These functions outside of CONFIG_ZLEAKS because they are also used in*
2019	* mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix.
2020	*/
2021
2022	/ "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm /
2023	uintptr_t
2024	hash_mix(uintptr_t x)
2025	{
2026	#ifndef __LP64__
2027	x += ~(x << `15`);
2028	x ^= (x >> `10`);
2029	x += (x << `3` );
2030	x ^= (x >> `6` );
2031	x += ~(x << `11`);
2032	x ^= (x >> `16`);
2033	#else
2034	x += ~(x << `32`);
2035	x ^= (x >> `22`);
2036	x += ~(x << `13`);
2037	x ^= (x >> `8` );
2038	x += (x << `3` );
2039	x ^= (x >> `15`);
2040	x += ~(x << `27`);
2041	x ^= (x >> `31`);
2042	#endif
2043	return x;
2044	}
2045
2046	uint32_t
2047	hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
2048	{
2049
2050	uintptr_t hash = `0`;
2051	uintptr_t mask = max_size - `1`;
2052
2053	while (depth) {
2054	hash += bt[--depth];
2055	}
2056
2057	hash = hash_mix(hash) & mask;
2058
2059	assert(hash < max_size);
2060
2061	return (uint32_t) hash;
2062	}
2063
2064	/*
2065	* TODO: Determine how well distributed this is
2066	* max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
2067	*/
2068	uint32_t
2069	hashaddr(uintptr_t pt, uint32_t max_size)
2070	{
2071	uintptr_t hash = `0`;
2072	uintptr_t mask = max_size - `1`;
2073
2074	hash = hash_mix(pt) & mask;
2075
2076	assert(hash < max_size);
2077
2078	return (uint32_t) hash;
2079	}
2080
2081	/ End of all leak-detection code /
2082	#pragma mark -
2083
2084	#define ZONE_MAX_ALLOC_SIZE (32 * 1024)
2085	#define ZONE_ALLOC_FRAG_PERCENT(alloc_size, ele_size) (((alloc_size % ele_size) * 100) / alloc_size)
2086
2087	/ Used to manage copying in of new zone names /
2088	static vm_offset_t zone_names_start;
2089	static vm_offset_t zone_names_next;
2090
2091	static vm_size_t
2092	compute_element_size(vm_size_t requested_size)
2093	{
2094	vm_size_t element_size = requested_size;
2095
2096	/ Zone elements must fit both a next pointer and a backup pointer /
2097	vm_size_t minimum_element_size = sizeof(vm_offset_t) * `2`;
2098	if (element_size < minimum_element_size)
2099	element_size = minimum_element_size;
2100
2101	/*
2102	* Round element size to a multiple of sizeof(pointer)
2103	* This also enforces that allocations will be aligned on pointer boundaries
2104	*/
2105	element_size = ((element_size-`1`) + sizeof(vm_offset_t)) -
2106	((element_size-`1`) % sizeof(vm_offset_t));
2107
2108	return element_size;
2109	}
2110
2111	#if KASAN_ZALLOC
2112
2113	/*
2114	* Called from zinit().
2115	*
2116	* Fixes up the zone's element size to incorporate the redzones.
2117	*/
2118	static void
2119	kasan_update_element_size_for_redzone(
2120	zone_t zone, / the zone that needs to be updated /
2121	vm_size_t size, /* requested zone element size /
2122	vm_size_t max, /* maximum memory to use /
2123	const char name) /* zone name /
2124	{
2125	/ Expand the zone allocation size to include the redzones. For page-multiple*
2126	* zones add a full guard page because they likely require alignment. kalloc
2127	* and fakestack handles its own KASan state, so ignore those zones. */
2128	/ XXX: remove this when zinit_with_options() is a thing /
2129	const char *kalloc_name = "kalloc.";
2130	const char *fakestack_name = "fakestack.";
2131	if (strncmp(name, kalloc_name, strlen(kalloc_name)) == `0`) {
2132	zone->kasan_redzone = `0`;
2133	} else if (strncmp(name, fakestack_name, strlen(fakestack_name)) == `0`) {
2134	zone->kasan_redzone = `0`;
2135	} else {
2136	if ((*size % PAGE_SIZE) != `0`) {
2137	zone->kasan_redzone = KASAN_GUARD_SIZE;
2138	} else {
2139	zone->kasan_redzone = PAGE_SIZE;
2140	}
2141	max = (max / size) (size + zone->kasan_redzone `2`);
2142	size += zone->kasan_redzone `2`;
2143	}
2144	}
2145
2146	/*
2147	* Called from zalloc_internal() to fix up the address of the newly
2148	* allocated element.
2149	*
2150	* Returns the element address skipping over the redzone on the left.
2151	*/
2152	static vm_offset_t
2153	kasan_fixup_allocated_element_address(
2154	zone_t zone, / the zone the element belongs to /
2155	vm_offset_t addr) / address of the element, including the redzone /
2156	{
2157	/ Fixup the return address to skip the redzone /
2158	if (zone->kasan_redzone) {
2159	addr = kasan_alloc(addr, zone->elem_size,
2160	zone->elem_size - `2` * zone->kasan_redzone, zone->kasan_redzone);
2161	}
2162	return addr;
2163	}
2164
2165	/*
2166	* Called from zfree() to add the element being freed to the KASan quarantine.
2167	*
2168	* Returns true if the newly-freed element made it into the quarantine without
2169	* displacing another, false otherwise. In the latter case, addrp points to the
2170	* address of the displaced element, which will be freed by the zone.
2171	*/
2172	static bool
2173	kasan_quarantine_freed_element(
2174	zone_t zonep, /* the zone the element is being freed to /
2175	void *addrp) /* address of the element being freed /
2176	{
2177	zone_t zone = *zonep;
2178	void addr = addrp;
2179
2180	/*
2181	* Resize back to the real allocation size and hand off to the KASan
2182	* quarantine. `addr` may then point to a different allocation, if the
2183	* current element replaced another in the quarantine. The zone then
2184	* takes ownership of the swapped out free element.
2185	*/
2186	vm_size_t usersz = zone->elem_size - `2` * zone->kasan_redzone;
2187	vm_size_t sz = usersz;
2188
2189	if (addr && zone->kasan_redzone) {
2190	kasan_check_free((vm_address_t)addr, usersz, KASAN_HEAP_ZALLOC);
2191	addr = (void *)kasan_dealloc((vm_address_t)addr, &sz);
2192	assert(sz == zone->elem_size);
2193	}
2194	if (addr && zone->kasan_quarantine) {
2195	kasan_free(&addr, &sz, KASAN_HEAP_ZALLOC, zonep, usersz, true);
2196	if (!addr) {
2197	return TRUE;
2198	}
2199	}
2200	*addrp = addr;
2201	return FALSE;
2202	}
2203
2204	#endif /* KASAN_ZALLOC */
2205
2206	/*
2207	* zinit initializes a new zone. The zone data structures themselves
2208	* are stored in a zone, which is initially a static structure that
2209	* is initialized by zone_init.
2210	*/
2211
2212	zone_t
2213	zinit(
2214	vm_size_t size, / the size of an element /
2215	vm_size_t max, / maximum memory to use /
2216	vm_size_t alloc, / allocation size /
2217	const char name) /* a name for the zone /
2218	{
2219	zone_t z;
2220
2221	size = compute_element_size(size);
2222
2223	simple_lock(&all_zones_lock);
2224
2225	assert(num_zones < MAX_ZONES);
2226	assert(num_zones_in_use <= num_zones);
2227
2228	/ If possible, find a previously zdestroy'ed zone in the zone_array that we can reuse instead of initializing a new zone. /
2229	for (int index = bitmap_first(zone_empty_bitmap, MAX_ZONES);
2230	index >= `0` && index < (int)num_zones;
2231	index = bitmap_next(zone_empty_bitmap, index)) {
2232	z = &(zone_array[index]);
2233
2234	/*
2235	* If the zone name and the element size are the same, we can just reuse the old zone struct.
2236	* Otherwise hand out a new zone from the zone_array.
2237	*/
2238	if (!strcmp(z->zone_name, name)) {
2239	vm_size_t old_size = z->elem_size;
2240	#if KASAN_ZALLOC
2241	old_size -= z->kasan_redzone * `2`;
2242	#endif
2243	if (old_size == size) {
2244	/ Clear the empty bit for this zone, increment num_zones_in_use, and mark the zone as valid again. /
2245	bitmap_clear(zone_empty_bitmap, index);
2246	num_zones_in_use++;
2247	z->zone_valid = TRUE;
2248
2249	/ All other state is already set up since the zone was previously in use. Return early. /
2250	simple_unlock(&all_zones_lock);
2251	return (z);
2252	}
2253	}
2254	}
2255
2256	/ If we're here, it means we didn't find a zone above that we could simply reuse. Set up a new zone. /
2257
2258	/ Clear the empty bit for the new zone /
2259	bitmap_clear(zone_empty_bitmap, num_zones);
2260
2261	z = &(zone_array[num_zones]);
2262	z->index = num_zones;
2263
2264	num_zones++;
2265	num_zones_in_use++;
2266
2267	/*
2268	* Initialize the zone lock here before dropping the all_zones_lock. Otherwise we could race with
2269	* zalloc_async() and try to grab the zone lock before it has been initialized, causing a panic.
2270	*/
2271	lock_zone_init(z);
2272
2273	simple_unlock(&all_zones_lock);
2274
2275	#if KASAN_ZALLOC
2276	kasan_update_element_size_for_redzone(z, &size, &max, name);
2277	#endif
2278
2279	max = round_page(max);
2280
2281	vm_size_t best_alloc = PAGE_SIZE;
2282
2283	if ((size % PAGE_SIZE) == `0`) {
2284	/ zero fragmentation by definition /
2285	best_alloc = size;
2286	} else {
2287	vm_size_t alloc_size;
2288	for (alloc_size = (`2` * PAGE_SIZE); alloc_size <= ZONE_MAX_ALLOC_SIZE; alloc_size += PAGE_SIZE) {
2289	if (ZONE_ALLOC_FRAG_PERCENT(alloc_size, size) < ZONE_ALLOC_FRAG_PERCENT(best_alloc, size)) {
2290	best_alloc = alloc_size;
2291	}
2292	}
2293	}
2294
2295	alloc = best_alloc;
2296	if (max && (max < alloc))
2297	max = alloc;
2298
2299	z->free_elements = NULL;
2300	queue_init(&z->pages.any_free_foreign);
2301	queue_init(&z->pages.all_free);
2302	queue_init(&z->pages.intermediate);
2303	queue_init(&z->pages.all_used);
2304	z->cur_size = `0`;
2305	z->page_count = `0`;
2306	z->max_size = max;
2307	z->elem_size = size;
2308	z->alloc_size = alloc;
2309	z->count = `0`;
2310	z->countfree = `0`;
2311	z->count_all_free_pages = `0`;
2312	z->sum_count = `0LL`;
2313	z->doing_alloc_without_vm_priv = FALSE;
2314	z->doing_alloc_with_vm_priv = FALSE;
2315	z->exhaustible = FALSE;
2316	z->collectable = TRUE;
2317	z->allows_foreign = FALSE;
2318	z->expandable = TRUE;
2319	z->waiting = FALSE;
2320	z->async_pending = FALSE;
2321	z->caller_acct = TRUE;
2322	z->noencrypt = FALSE;
2323	z->no_callout = FALSE;
2324	z->async_prio_refill = FALSE;
2325	z->gzalloc_exempt = FALSE;
2326	z->alignment_required = FALSE;
2327	z->zone_replenishing = FALSE;
2328	z->prio_refill_watermark = `0`;
2329	z->zone_replenish_thread = NULL;
2330	z->zp_count = `0`;
2331	z->kasan_quarantine = TRUE;
2332	z->zone_valid = TRUE;
2333	z->cpu_cache_enabled = FALSE;
2334
2335	#if CONFIG_ZLEAKS
2336	z->zleak_capture = `0`;
2337	z->zleak_on = FALSE;
2338	#endif /* CONFIG_ZLEAKS */
2339
2340	/*
2341	* If the VM is ready to handle kmem_alloc requests, copy the zone name passed in.
2342	*
2343	* Else simply maintain a pointer to the name string. The only zones we'll actually have
2344	* to do this for would be the VM-related zones that are created very early on before any
2345	* kexts can be loaded (unloaded). So we should be fine with just a pointer in this case.
2346	*/
2347	if (kmem_alloc_ready) {
2348	size_t len = MIN(strlen(name)+`1`, MACH_ZONE_NAME_MAX_LEN);
2349
2350	if (zone_names_start == `0` \|\| ((zone_names_next - zone_names_start) + len) > PAGE_SIZE) {
2351	printf("zalloc: allocating memory for zone names buffer\n");
2352	kern_return_t retval = kmem_alloc_kobject(kernel_map, &zone_names_start,
2353	PAGE_SIZE, VM_KERN_MEMORY_OSFMK);
2354	if (retval != KERN_SUCCESS) {
2355	panic("zalloc: zone_names memory allocation failed");
2356	}
2357	bzero((char *)zone_names_start, PAGE_SIZE);
2358	zone_names_next = zone_names_start;
2359	}
2360
2361	strlcpy((char *)zone_names_next, name, len);
2362	z->zone_name = (char *)zone_names_next;
2363	zone_names_next += len;
2364	} else {
2365	z->zone_name = name;
2366	}
2367
2368	/*
2369	* Check for and set up zone leak detection if requested via boot-args. We recognized two
2370	* boot-args:
2371	*
2372	* zlog=<zone_to_log>
2373	* zrecs=<num_records_in_log>
2374	*
2375	* The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
2376	* control the size of the log. If zrecs is not specified, a default value is used.
2377	*/
2378
2379	if (num_zones_logged < max_num_zones_to_log) {
2380
2381	int i = `1`; / zlog0 isn't allowed. /
2382	boolean_t zone_logging_enabled = FALSE;
2383	char zlog_name[MAX_ZONE_NAME] = ""; / Temp. buffer to create the strings zlog1, zlog2 etc... /
2384
2385	while (i <= max_num_zones_to_log) {
2386
2387	snprintf(zlog_name, MAX_ZONE_NAME, "zlog%d", i);
2388
2389	if (PE_parse_boot_argn(zlog_name, zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
2390	if (track_this_zone(z->zone_name, zone_name_to_log)) {
2391	if (z->zone_valid) {
2392	z->zone_logging = TRUE;
2393	zone_logging_enabled = TRUE;
2394	num_zones_logged++;
2395	break;
2396	}
2397	}
2398	}
2399	i++;
2400	}
2401
2402	if (zone_logging_enabled == FALSE) {
2403	/*
2404	* Backwards compat. with the old boot-arg used to specify single zone logging i.e. zlog
2405	* Needs to happen after the newer zlogn checks because the prefix will match all the zlogn
2406	* boot-args.
2407	*/
2408	if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
2409	if (track_this_zone(z->zone_name, zone_name_to_log)) {
2410	if (z->zone_valid) {
2411	z->zone_logging = TRUE;
2412	zone_logging_enabled = TRUE;
2413	num_zones_logged++;
2414	}
2415	}
2416	}
2417	}
2418
2419	if (log_records_init == FALSE && zone_logging_enabled == TRUE) {
2420	if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
2421	/*
2422	* Don't allow more than ZRECORDS_MAX records even if the user asked for more.
2423	* This prevents accidentally hogging too much kernel memory and making the system
2424	* unusable.
2425	*/
2426
2427	log_records = MIN(ZRECORDS_MAX, log_records);
2428	log_records_init = TRUE;
2429	} else {
2430	log_records = ZRECORDS_DEFAULT;
2431	log_records_init = TRUE;
2432	}
2433	}
2434
2435	/*
2436	* If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are
2437	* zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. kmem_alloc_ready is set to
2438	* TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one
2439	* of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
2440	* later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized
2441	* right now.
2442	*/
2443	if (kmem_alloc_ready) {
2444
2445	zone_t curr_zone = NULL;
2446	unsigned int max_zones = `0`, zone_idx = `0`;
2447
2448	simple_lock(&all_zones_lock);
2449	max_zones = num_zones;
2450	simple_unlock(&all_zones_lock);
2451
2452	for (zone_idx = `0`; zone_idx < max_zones; zone_idx++) {
2453
2454	curr_zone = &(zone_array[zone_idx]);
2455
2456	if (!curr_zone->zone_valid) {
2457	continue;
2458	}
2459
2460	/*
2461	* We work with the zone unlocked here because we could end up needing the zone lock to
2462	* enable logging for this zone e.g. need a VM object to allocate memory to enable logging for the
2463	* VM objects zone.
2464	*
2465	* We don't expect these zones to be needed at this early a time in boot and so take this chance.
2466	*/
2467	if (curr_zone->zone_logging && curr_zone->zlog_btlog == NULL) {
2468
2469	curr_zone->zlog_btlog = btlog_create(log_records, MAX_ZTRACE_DEPTH, (corruption_debug_flag == FALSE) / caller_will_remove_entries_for_element? /);
2470
2471	if (curr_zone->zlog_btlog) {
2472
2473	printf("zone: logging started for zone %s\n", curr_zone->zone_name);
2474	} else {
2475	printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
2476	curr_zone->zone_logging = FALSE;
2477	}
2478	}
2479
2480	}
2481	}
2482	}
2483
2484	#if CONFIG_GZALLOC
2485	gzalloc_zone_init(z);
2486	#endif
2487
2488	#if CONFIG_ZCACHE
2489	/ Check if boot-arg specified it should have a cache /
2490	if (cache_all_zones \|\| track_this_zone(name, cache_zone_name)) {
2491	zone_change(z, Z_CACHING_ENABLED, TRUE);
2492	}
2493	#endif
2494
2495	return(z);
2496	}
2497	unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated, zone_replenish_throttle_count;
2498
2499	static void zone_replenish_thread(zone_t);
2500
2501	/ High priority VM privileged thread used to asynchronously refill a designated*
2502	* zone, such as the reserved VM map entry zone.
2503	*/
2504	__attribute__((noreturn))
2505	static void
2506	zone_replenish_thread(zone_t z)
2507	{
2508	vm_size_t free_size;
2509	current_thread()->options \|= TH_OPT_VMPRIV;
2510
2511	for (;;) {
2512	lock_zone(z);
2513	assert(z->zone_valid);
2514	z->zone_replenishing = TRUE;
2515	assert(z->prio_refill_watermark != `0`);
2516	while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
2517	assert(z->doing_alloc_without_vm_priv == FALSE);
2518	assert(z->doing_alloc_with_vm_priv == FALSE);
2519	assert(z->async_prio_refill == TRUE);
2520
2521	unlock_zone(z);
2522	int zflags = KMA_KOBJECT\|KMA_NOPAGEWAIT;
2523	vm_offset_t space, alloc_size;
2524	kern_return_t kr;
2525
2526	if (vm_pool_low())
2527	alloc_size = round_page(z->elem_size);
2528	else
2529	alloc_size = z->alloc_size;
2530
2531	if (z->noencrypt)
2532	zflags \|= KMA_NOENCRYPT;
2533
2534	/ Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory /
2535	if (is_zone_map_nearing_exhaustion()) {
2536	thread_wakeup((event_t) &vm_pageout_garbage_collect);
2537	}
2538
2539	kr = kernel_memory_allocate(zone_map, &space, alloc_size, `0`, zflags, VM_KERN_MEMORY_ZONE);
2540
2541	if (kr == KERN_SUCCESS) {
2542	zcram(z, space, alloc_size);
2543	} else if (kr == KERN_RESOURCE_SHORTAGE) {
2544	VM_PAGE_WAIT();
2545	} else if (kr == KERN_NO_SPACE) {
2546	kr = kernel_memory_allocate(kernel_map, &space, alloc_size, `0`, zflags, VM_KERN_MEMORY_ZONE);
2547	if (kr == KERN_SUCCESS) {
2548	zcram(z, space, alloc_size);
2549	} else {
2550	assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, `1`, `100` * NSEC_PER_USEC);
2551	thread_block(THREAD_CONTINUE_NULL);
2552	}
2553	}
2554
2555	lock_zone(z);
2556	assert(z->zone_valid);
2557	zone_replenish_loops++;
2558	}
2559
2560	z->zone_replenishing = FALSE;
2561	/ Signal any potential throttled consumers, terminating*
2562	* their timer-bounded waits.
2563	*/
2564	thread_wakeup(z);
2565
2566	assert_wait(&z->zone_replenish_thread, THREAD_UNINT);
2567	unlock_zone(z);
2568	thread_block(THREAD_CONTINUE_NULL);
2569	zone_replenish_wakeups++;
2570	}
2571	}
2572
2573	void
2574	zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) {
2575	z->prio_refill_watermark = low_water_mark;
2576
2577	z->async_prio_refill = TRUE;
2578	OSMemoryBarrier();
2579	kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread);
2580
2581	if (tres != KERN_SUCCESS) {
2582	panic("zone_prio_refill_configure, thread create: 0x%x", tres);
2583	}
2584
2585	thread_deallocate(z->zone_replenish_thread);
2586	}
2587
2588	void
2589	zdestroy(zone_t z)
2590	{
2591	unsigned int zindex;
2592
2593	assert(z != NULL);
2594
2595	lock_zone(z);
2596	assert(z->zone_valid);
2597
2598	/ Assert that the zone does not have any allocations in flight /
2599	assert(z->doing_alloc_without_vm_priv == FALSE);
2600	assert(z->doing_alloc_with_vm_priv == FALSE);
2601	assert(z->async_pending == FALSE);
2602	assert(z->waiting == FALSE);
2603	assert(z->async_prio_refill == FALSE);
2604
2605	#if !KASAN_ZALLOC
2606	/*
2607	* Unset the valid bit. We'll hit an assert failure on further operations on this zone, until zinit() is called again.
2608	* Leave the zone valid for KASan as we will see zfree's on quarantined free elements even after the zone is destroyed.
2609	*/
2610	z->zone_valid = FALSE;
2611	#endif
2612	unlock_zone(z);
2613
2614	#if CONFIG_ZCACHE
2615	/ Drain the per-cpu caches if caching is enabled for the zone. /
2616	if (zone_caching_enabled(z)) {
2617	panic("zdestroy: Zone caching enabled for zone %s", z->zone_name);
2618	}
2619	#endif /* CONFIG_ZCACHE */
2620
2621	/ Dump all the free elements /
2622	drop_free_elements(z);
2623
2624	#if CONFIG_GZALLOC
2625	/ If the zone is gzalloc managed dump all the elements in the free cache /
2626	gzalloc_empty_free_cache(z);
2627	#endif
2628
2629	lock_zone(z);
2630
2631	#if !KASAN_ZALLOC
2632	/ Assert that all counts are zero /
2633	assert(z->count == `0`);
2634	assert(z->countfree == `0`);
2635	assert(z->cur_size == `0`);
2636	assert(z->page_count == `0`);
2637	assert(z->count_all_free_pages == `0`);
2638
2639	/ Assert that all queues except the foreign queue are empty. The zone allocator doesn't know how to free up foreign memory. /
2640	assert(queue_empty(&z->pages.all_used));
2641	assert(queue_empty(&z->pages.intermediate));
2642	assert(queue_empty(&z->pages.all_free));
2643	#endif
2644
2645	zindex = z->index;
2646
2647	unlock_zone(z);
2648
2649	simple_lock(&all_zones_lock);
2650
2651	assert(!bitmap_test(zone_empty_bitmap, zindex));
2652	/ Mark the zone as empty in the bitmap /
2653	bitmap_set(zone_empty_bitmap, zindex);
2654	num_zones_in_use--;
2655	assert(num_zones_in_use > `0`);
2656
2657	simple_unlock(&all_zones_lock);
2658	}
2659
2660	/ Initialize the metadata for an allocation chunk /
2661	static inline void
2662	zcram_metadata_init(vm_offset_t newmem, vm_size_t size, struct zone_page_metadata *chunk_metadata)
2663	{
2664	struct zone_page_metadata *page_metadata;
2665
2666	/ The first page is the real metadata for this allocation chunk. We mark the others as fake metadata /
2667	size -= PAGE_SIZE;
2668	newmem += PAGE_SIZE;
2669
2670	for (; size > `0`; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
2671	page_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE);
2672	assert(page_metadata != chunk_metadata);
2673	PAGE_METADATA_SET_ZINDEX(page_metadata, MULTIPAGE_METADATA_MAGIC);
2674	page_metadata_set_realmeta(page_metadata, chunk_metadata);
2675	page_metadata->free_count = `0`;
2676	}
2677	return;
2678	}
2679
2680
2681	static void
2682	random_free_to_zone(
2683	zone_t zone,
2684	vm_offset_t newmem,
2685	vm_offset_t first_element_offset,
2686	int element_count,
2687	unsigned int *entropy_buffer)
2688	{
2689	vm_offset_t last_element_offset;
2690	vm_offset_t element_addr;
2691	vm_size_t elem_size;
2692	int index;
2693
2694	assert(element_count && element_count <= ZONE_CHUNK_MAXELEMENTS);
2695	elem_size = zone->elem_size;
2696	last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size);
2697	for (index = `0`; index < element_count; index++) {
2698	assert(first_element_offset <= last_element_offset);
2699	if (
2700	#if DEBUG \|\| DEVELOPMENT
2701	leak_scan_debug_flag \|\| __improbable(zone->tags) \|\|
2702	#endif /* DEBUG \|\| DEVELOPMENT */
2703	random_bool_gen_bits(&zone_bool_gen, entropy_buffer, MAX_ENTROPY_PER_ZCRAM, `1`)) {
2704	element_addr = newmem + first_element_offset;
2705	first_element_offset += elem_size;
2706	} else {
2707	element_addr = newmem + last_element_offset;
2708	last_element_offset -= elem_size;
2709	}
2710	if (element_addr != (vm_offset_t)zone) {
2711	zone->count++; / compensate for free_to_zone /
2712	free_to_zone(zone, element_addr, FALSE);
2713	}
2714	zone->cur_size += elem_size;
2715	}
2716	}
2717
2718	/*
2719	* Cram the given memory into the specified zone. Update the zone page count accordingly.
2720	*/
2721	void
2722	zcram(
2723	zone_t zone,
2724	vm_offset_t newmem,
2725	vm_size_t size)
2726	{
2727	vm_size_t elem_size;
2728	boolean_t from_zm = FALSE;
2729	int element_count;
2730	unsigned int entropy_buffer[MAX_ENTROPY_PER_ZCRAM] = { `0` };
2731
2732	/ Basic sanity checks /
2733	assert(zone != ZONE_NULL && newmem != (vm_offset_t)`0`);
2734	assert(!zone->collectable \|\| zone->allows_foreign
2735	\|\| (from_zone_map(newmem, size)));
2736
2737	elem_size = zone->elem_size;
2738
2739	KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) \| DBG_FUNC_START, zone->index, size);
2740
2741	if (from_zone_map(newmem, size))
2742	from_zm = TRUE;
2743
2744	if (!from_zm) {
2745	/ We cannot support elements larger than page size for foreign memory because we*
2746	* put metadata on the page itself for each page of foreign memory. We need to do
2747	* this in order to be able to reach the metadata when any element is freed
2748	*/
2749	assert((zone->allows_foreign == TRUE) && (zone->elem_size <= (PAGE_SIZE - sizeof(struct zone_page_metadata))));
2750	}
2751
2752	if (zalloc_debug & ZALLOC_DEBUG_ZCRAM)
2753	kprintf("zcram(%p[%s], 0x%lx%s, 0x%lx)\n", zone, zone->zone_name,
2754	(unsigned long)newmem, from_zm ? "" : "[F]", (unsigned long)size);
2755
2756	ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
2757
2758	/*
2759	* Initialize the metadata for all pages. We dont need the zone lock
2760	* here because we are not manipulating any zone related state yet.
2761	*/
2762
2763	struct zone_page_metadata *chunk_metadata;
2764	size_t zone_page_metadata_size = sizeof(struct zone_page_metadata);
2765
2766	assert((newmem & PAGE_MASK) == `0`);
2767	assert((size & PAGE_MASK) == `0`);
2768
2769	chunk_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE);
2770	chunk_metadata->pages.next = NULL;
2771	chunk_metadata->pages.prev = NULL;
2772	page_metadata_set_freelist(chunk_metadata, `0`);
2773	PAGE_METADATA_SET_ZINDEX(chunk_metadata, zone->index);
2774	chunk_metadata->free_count = `0`;
2775	assert((size / PAGE_SIZE) <= ZONE_CHUNK_MAXPAGES);
2776	chunk_metadata->page_count = (unsigned)(size / PAGE_SIZE);
2777
2778	zcram_metadata_init(newmem, size, chunk_metadata);
2779
2780	#if VM_MAX_TAG_ZONES
2781	if (__improbable(zone->tags)) {
2782	assert(from_zm);
2783	ztMemoryAdd(zone, newmem, size);
2784	}
2785	#endif /* VM_MAX_TAG_ZONES */
2786
2787	lock_zone(zone);
2788	assert(zone->zone_valid);
2789	enqueue_tail(&zone->pages.all_used, &(chunk_metadata->pages));
2790
2791	if (!from_zm) {
2792	/ We cannot support elements larger than page size for foreign memory because we*
2793	* put metadata on the page itself for each page of foreign memory. We need to do
2794	* this in order to be able to reach the metadata when any element is freed
2795	*/
2796
2797	for (; size > `0`; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
2798	vm_offset_t first_element_offset = `0`;
2799	if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == `0`){
2800	first_element_offset = zone_page_metadata_size;
2801	} else {
2802	first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
2803	}
2804	element_count = (unsigned int)((PAGE_SIZE - first_element_offset) / elem_size);
2805	random_free_to_zone(zone, newmem, first_element_offset, element_count, entropy_buffer);
2806	}
2807	} else {
2808	element_count = (unsigned int)(size / elem_size);
2809	random_free_to_zone(zone, newmem, `0`, element_count, entropy_buffer);
2810	}
2811	unlock_zone(zone);
2812
2813	KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) \| DBG_FUNC_END, zone->index);
2814
2815	}
2816
2817	/*
2818	* Fill a zone with enough memory to contain at least nelem elements.
2819	* Return the number of elements actually put into the zone, which may
2820	* be more than the caller asked for since the memory allocation is
2821	* rounded up to the next zone allocation size.
2822	*/
2823	int
2824	zfill(
2825	zone_t zone,
2826	int nelem)
2827	{
2828	kern_return_t kr;
2829	vm_offset_t memory;
2830
2831	vm_size_t alloc_size = zone->alloc_size;
2832	vm_size_t elem_per_alloc = alloc_size / zone->elem_size;
2833	vm_size_t nalloc = (nelem + elem_per_alloc - `1`) / elem_per_alloc;
2834
2835	/ Don't mix-and-match zfill with foreign memory /
2836	assert(!zone->allows_foreign);
2837
2838	/ Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory /
2839	if (is_zone_map_nearing_exhaustion()) {
2840	thread_wakeup((event_t) &vm_pageout_garbage_collect);
2841	}
2842
2843	kr = kernel_memory_allocate(zone_map, &memory, nalloc * alloc_size, `0`, KMA_KOBJECT, VM_KERN_MEMORY_ZONE);
2844	if (kr != KERN_SUCCESS) {
2845	printf("%s: kernel_memory_allocate() of %lu bytes failed\n",
2846	__func__, (unsigned long)(nalloc * alloc_size));
2847	return `0`;
2848	}
2849
2850	for (vm_size_t i = `0`; i < nalloc; i++) {
2851	zcram(zone, memory + i * alloc_size, alloc_size);
2852	}
2853
2854	return (int)(nalloc * elem_per_alloc);
2855	}
2856
2857	/*
2858	* Initialize the "zone of zones" which uses fixed memory allocated
2859	* earlier in memory initialization. zone_bootstrap is called
2860	* before zone_init.
2861	*/
2862	void
2863	zone_bootstrap(void)
2864	{
2865	char temp_buf[`16`];
2866
2867	if (!PE_parse_boot_argn("zalloc_debug", &zalloc_debug, sizeof(zalloc_debug)))
2868	zalloc_debug = `0`;
2869
2870	/ Set up zone element poisoning /
2871	zp_init();
2872
2873	random_bool_init(&zone_bool_gen);
2874
2875	/ should zlog log to debug zone corruption instead of leaks? /
2876	if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
2877	corruption_debug_flag = TRUE;
2878	}
2879
2880	#if DEBUG \|\| DEVELOPMENT
2881	/ should perform zone element size checking in copyin/copyout? /
2882	if (PE_parse_boot_argn("-no-copyio-zalloc-check", temp_buf, sizeof(temp_buf))) {
2883	copyio_zalloc_check = FALSE;
2884	}
2885	#if VM_MAX_TAG_ZONES
2886	/ enable tags for zones that ask for /
2887	if (PE_parse_boot_argn("-zt", temp_buf, sizeof(temp_buf))) {
2888	zone_tagging_on = TRUE;
2889	}
2890	#endif /* VM_MAX_TAG_ZONES */
2891	/ disable element location randomization in a page /
2892	if (PE_parse_boot_argn("-zl", temp_buf, sizeof(temp_buf))) {
2893	leak_scan_debug_flag = TRUE;
2894	}
2895	#endif
2896
2897	simple_lock_init(&all_zones_lock, `0`);
2898
2899	num_zones_in_use = `0`;
2900	num_zones = `0`;
2901	/ Mark all zones as empty /
2902	bitmap_full(zone_empty_bitmap, BITMAP_LEN(MAX_ZONES));
2903	zone_names_next = zone_names_start = `0`;
2904
2905	#if DEBUG \|\| DEVELOPMENT
2906	simple_lock_init(&zone_test_lock, `0`);
2907	#endif /* DEBUG \|\| DEVELOPMENT */
2908
2909	thread_call_setup(&call_async_alloc, zalloc_async, NULL);
2910
2911	/ initializing global lock group for zones /
2912	lck_grp_attr_setdefault(&zone_locks_grp_attr);
2913	lck_grp_init(&zone_locks_grp, "zone_locks", &zone_locks_grp_attr);
2914
2915	lck_attr_setdefault(&zone_metadata_lock_attr);
2916	lck_mtx_init_ext(&zone_metadata_region_lck, &zone_metadata_region_lck_ext, &zone_locks_grp, &zone_metadata_lock_attr);
2917
2918	#if CONFIG_ZCACHE
2919	/ zcc_enable_for_zone_name=<zone>: enable per-cpu zone caching for <zone>. /
2920	if (PE_parse_boot_arg_str("zcc_enable_for_zone_name", cache_zone_name, sizeof(cache_zone_name))) {
2921	printf("zcache: caching enabled for zone %s\n", cache_zone_name);
2922	}
2923
2924	/ -zcache_all: enable per-cpu zone caching for all zones, overrides 'zcc_enable_for_zone_name'. /
2925	if (PE_parse_boot_argn("-zcache_all", temp_buf, sizeof(temp_buf))) {
2926	cache_all_zones = TRUE;
2927	printf("zcache: caching enabled for all zones\n");
2928	}
2929	#endif /* CONFIG_ZCACHE */
2930	}
2931
2932	/*
2933	* We're being very conservative here and picking a value of 95%. We might need to lower this if
2934	* we find that we're not catching the problem and are still hitting zone map exhaustion panics.
2935	*/
2936	#define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95
2937
2938	/*
2939	* Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit.
2940	* Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default.
2941	*/
2942	unsigned int zone_map_jetsam_limit = ZONE_MAP_JETSAM_LIMIT_DEFAULT;
2943
2944	/*
2945	* Returns pid of the task with the largest number of VM map entries.
2946	*/
2947	extern pid_t find_largest_process_vm_map_entries(void);
2948
2949	/*
2950	* Callout to jetsam. If pid is -1, we wake up the memorystatus thread to do asynchronous kills.
2951	* For any other pid we try to kill that process synchronously.
2952	*/
2953	boolean_t memorystatus_kill_on_zone_map_exhaustion(pid_t pid);
2954
2955	void get_zone_map_size(uint64_t current_size, uint64_t capacity)
2956	{
2957	*current_size = zone_map->size;
2958	*capacity = vm_map_max(zone_map) - vm_map_min(zone_map);
2959	}
2960
2961	void get_largest_zone_info(char zone_name, size_t zone_name_len, uint64_t zone_size)
2962	{
2963	zone_t largest_zone = zone_find_largest();
2964	strlcpy(zone_name, largest_zone->zone_name, zone_name_len);
2965	*zone_size = largest_zone->cur_size;
2966	}
2967
2968	boolean_t is_zone_map_nearing_exhaustion(void)
2969	{
2970	uint64_t size = zone_map->size;
2971	uint64_t capacity = vm_map_max(zone_map) - vm_map_min(zone_map);
2972	if (size > ((capacity * zone_map_jetsam_limit) / `100`)) {
2973	return TRUE;
2974	}
2975	return FALSE;
2976	}
2977
2978	extern zone_t vm_map_entry_zone;
2979	extern zone_t vm_object_zone;
2980
2981	#define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
2982
2983	/*
2984	* Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread
2985	* to walk through the jetsam priority bands and kill processes.
2986	*/
2987	static void kill_process_in_largest_zone(void)
2988	{
2989	pid_t pid = -`1`;
2990	zone_t largest_zone = zone_find_largest();
2991
2992	printf("zone_map_exhaustion: Zone map size %lld, capacity %lld [jetsam limit %d%%]\n", (uint64_t)zone_map->size,
2993	(uint64_t)(vm_map_max(zone_map) - vm_map_min(zone_map)), zone_map_jetsam_limit);
2994	printf("zone_map_exhaustion: Largest zone %s, size %lu\n", largest_zone->zone_name, (uintptr_t)largest_zone->cur_size);
2995
2996	/*
2997	* We want to make sure we don't call this function from userspace. Or we could end up trying to synchronously kill the process
2998	* whose context we're in, causing the system to hang.
2999	*/
3000	assert(current_task() == kernel_task);
3001
3002	/*
3003	* If vm_object_zone is the largest, check to see if the number of elements in vm_map_entry_zone is comparable. If so, consider
3004	* vm_map_entry_zone as the largest. This lets us target a specific process to jetsam to quickly recover from the zone map bloat.
3005	*/
3006	if (largest_zone == vm_object_zone) {
3007	unsigned int vm_object_zone_count = vm_object_zone->count;
3008	unsigned int vm_map_entry_zone_count = vm_map_entry_zone->count;
3009	/ Is the VM map entries zone count >= 98% of the VM objects zone count? /
3010	if (vm_map_entry_zone_count >= ((vm_object_zone_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / `100`)) {
3011	largest_zone = vm_map_entry_zone;
3012	printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n", (uintptr_t)largest_zone->cur_size);
3013	}
3014	}
3015
3016	/ TODO: Extend this to check for the largest process in other zones as well. /
3017	if (largest_zone == vm_map_entry_zone) {
3018	pid = find_largest_process_vm_map_entries();
3019	} else {
3020	printf("zone_map_exhaustion: Nothing to do for the largest zone [%s]. Waking up memorystatus thread.\n", largest_zone->zone_name);
3021	}
3022	if (!memorystatus_kill_on_zone_map_exhaustion(pid)) {
3023	printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n", pid);
3024	}
3025	}
3026
3027	/ Global initialization of Zone Allocator.*
3028	* Runs after zone_bootstrap.
3029	*/
3030	void
3031	zone_init(
3032	vm_size_t max_zonemap_size)
3033	{
3034	kern_return_t retval;
3035	vm_offset_t zone_min;
3036	vm_offset_t zone_max;
3037	vm_offset_t zone_metadata_space;
3038	unsigned int zone_pages;
3039	vm_map_kernel_flags_t vmk_flags;
3040
3041	#if VM_MAX_TAG_ZONES
3042	if (zone_tagging_on) ztInit(max_zonemap_size, &zone_locks_grp);
3043	#endif
3044
3045	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
3046	vmk_flags.vmkf_permanent = TRUE;
3047	retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
3048	FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_ZONE,
3049	&zone_map);
3050
3051	if (retval != KERN_SUCCESS)
3052	panic("zone_init: kmem_suballoc failed");
3053	zone_max = zone_min + round_page(max_zonemap_size);
3054	#if CONFIG_GZALLOC
3055	gzalloc_init(max_zonemap_size);
3056	#endif
3057
3058	/*
3059	* Setup garbage collection information:
3060	*/
3061	zone_map_min_address = zone_min;
3062	zone_map_max_address = zone_max;
3063
3064	zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
3065	zone_metadata_space = round_page(zone_pages * sizeof(struct zone_page_metadata));
3066	retval = kernel_memory_allocate(zone_map, &zone_metadata_region_min, zone_metadata_space,
3067	`0`, KMA_KOBJECT \| KMA_VAONLY \| KMA_PERMANENT, VM_KERN_MEMORY_OSFMK);
3068	if (retval != KERN_SUCCESS)
3069	panic("zone_init: zone_metadata_region initialization failed!");
3070	zone_metadata_region_max = zone_metadata_region_min + zone_metadata_space;
3071
3072	#if defined(__LP64__)
3073	/*
3074	* ensure that any vm_page_t that gets created from
3075	* the vm_page zone can be packed properly (see vm_page.h
3076	* for the packing requirements
3077	*/
3078	if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_metadata_region_max))) != (vm_page_t)zone_metadata_region_max)
3079	panic("VM_PAGE_PACK_PTR failed on zone_metadata_region_max - %p", (void *)zone_metadata_region_max);
3080
3081	if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_max_address))) != (vm_page_t)zone_map_max_address)
3082	panic("VM_PAGE_PACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address);
3083	#endif
3084
3085	lck_grp_attr_setdefault(&zone_gc_lck_grp_attr);
3086	lck_grp_init(&zone_gc_lck_grp, "zone_gc", &zone_gc_lck_grp_attr);
3087	lck_attr_setdefault(&zone_gc_lck_attr);
3088	lck_mtx_init_ext(&zone_gc_lock, &zone_gc_lck_ext, &zone_gc_lck_grp, &zone_gc_lck_attr);
3089
3090	#if CONFIG_ZLEAKS
3091	/*
3092	* Initialize the zone leak monitor
3093	*/
3094	zleak_init(max_zonemap_size);
3095	#endif /* CONFIG_ZLEAKS */
3096
3097	#if VM_MAX_TAG_ZONES
3098	if (zone_tagging_on) vm_allocation_zones_init();
3099	#endif
3100
3101	int jetsam_limit_temp = `0`;
3102	if (PE_parse_boot_argn("zone_map_jetsam_limit", &jetsam_limit_temp, sizeof (jetsam_limit_temp)) &&
3103	jetsam_limit_temp > `0` && jetsam_limit_temp <= `100`)
3104	zone_map_jetsam_limit = jetsam_limit_temp;
3105	}
3106
3107	#pragma mark -
3108	#pragma mark zalloc_canblock
3109
3110	extern boolean_t early_boot_complete;
3111
3112	void
3113	zalloc_poison_element(boolean_t check_poison, zone_t zone, vm_offset_t addr)
3114	{
3115	vm_offset_t inner_size = zone->elem_size;
3116	if (__improbable(check_poison && addr)) {
3117	vm_offset_t element_cursor = ((vm_offset_t ) addr) + `1`;
3118	vm_offset_t backup = get_backup_ptr(inner_size, (vm_offset_t ) addr);
3119
3120	for ( ; element_cursor < backup ; element_cursor++)
3121	if (__improbable(*element_cursor != ZP_POISON))
3122	zone_element_was_modified_panic(zone,
3123	addr,
3124	*element_cursor,
3125	ZP_POISON,
3126	((vm_offset_t)element_cursor) - addr);
3127	}
3128
3129	if (addr) {
3130	/*
3131	* Clear out the old next pointer and backup to avoid leaking the cookie
3132	* and so that only values on the freelist have a valid cookie
3133	*/
3134
3135	vm_offset_t primary = (vm_offset_t ) addr;
3136	vm_offset_t *backup = get_backup_ptr(inner_size, primary);
3137
3138	*primary = ZP_POISON;
3139	*backup = ZP_POISON;
3140	}
3141	}
3142
3143	/*
3144	* zalloc returns an element from the specified zone.
3145	*/
3146	static void *
3147	zalloc_internal(
3148	zone_t zone,
3149	boolean_t canblock,
3150	boolean_t nopagewait,
3151	vm_size_t
3152	#if !VM_MAX_TAG_ZONES
3153	__unused
3154	#endif
3155	reqsize,
3156	vm_tag_t tag)
3157	{
3158	vm_offset_t addr = `0`;
3159	kern_return_t retval;
3160	uintptr_t zbt[MAX_ZTRACE_DEPTH]; / used in zone leak logging and zone leak detection /
3161	unsigned int numsaved = `0`;
3162	boolean_t zone_replenish_wakeup = FALSE, zone_alloc_throttle = FALSE;
3163	thread_t thr = current_thread();
3164	boolean_t check_poison = FALSE;
3165	boolean_t set_doing_alloc_with_vm_priv = FALSE;
3166
3167	#if CONFIG_ZLEAKS
3168	uint32_t zleak_tracedepth = `0`; / log this allocation if nonzero /
3169	#endif /* CONFIG_ZLEAKS */
3170
3171	#if KASAN
3172	/*
3173	* KASan uses zalloc() for fakestack, which can be called anywhere. However,
3174	* we make sure these calls can never block.
3175	*/
3176	boolean_t irq_safe = FALSE;
3177	const char *fakestack_name = "fakestack.";
3178	if (strncmp(zone->zone_name, fakestack_name, strlen(fakestack_name)) == `0`) {
3179	irq_safe = TRUE;
3180	}
3181	#elif MACH_ASSERT
3182	/ In every other case, zalloc() from interrupt context is unsafe. /
3183	const boolean_t irq_safe = FALSE;
3184	#endif
3185
3186	assert(zone != ZONE_NULL);
3187	assert(irq_safe \|\| ml_get_interrupts_enabled() \|\| ml_is_quiescing() \|\| debug_mode_active() \|\| !early_boot_complete);
3188
3189	#if CONFIG_GZALLOC
3190	addr = gzalloc_alloc(zone, canblock);
3191	#endif
3192	/*
3193	* If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
3194	*/
3195	if (__improbable(DO_LOGGING(zone)))
3196	numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH);
3197
3198	#if CONFIG_ZLEAKS
3199	/*
3200	* Zone leak detection: capture a backtrace every zleak_sample_factor
3201	* allocations in this zone.
3202	*/
3203	if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) {
3204	/ Avoid backtracing twice if zone logging is on /
3205	if (numsaved == `0`)
3206	zleak_tracedepth = backtrace(zbt, MAX_ZTRACE_DEPTH);
3207	else
3208	zleak_tracedepth = numsaved;
3209	}
3210	#endif /* CONFIG_ZLEAKS */
3211
3212	#if VM_MAX_TAG_ZONES
3213	if (__improbable(zone->tags)) vm_tag_will_update_zone(tag, zone->tag_zone_index);
3214	#endif /* VM_MAX_TAG_ZONES */
3215
3216	#if CONFIG_ZCACHE
3217	if (__probable(addr == `0`)) {
3218	if (zone_caching_enabled(zone)) {
3219	addr = zcache_alloc_from_cpu_cache(zone);
3220	if (addr) {
3221	#if KASAN_ZALLOC
3222	addr = kasan_fixup_allocated_element_address(zone, addr);
3223	#endif
3224	DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
3225	return((void *)addr);
3226	}
3227	}
3228	}
3229	#endif /* CONFIG_ZCACHE */
3230
3231	lock_zone(zone);
3232	assert(zone->zone_valid);
3233
3234	if (zone->async_prio_refill && zone->zone_replenish_thread) {
3235	vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size));
3236	vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size;
3237	zone_replenish_wakeup = (zfreec < zrefillwm);
3238	zone_alloc_throttle = (((zfreec < (zrefillwm / `2`)) && ((thr->options & TH_OPT_VMPRIV) == `0`)) \|\| (zfreec == `0`));
3239
3240	do {
3241	if (zone_replenish_wakeup) {
3242	zone_replenish_wakeups_initiated++;
3243	/ Signal the potentially waiting*
3244	* refill thread.
3245	*/
3246	thread_wakeup(&zone->zone_replenish_thread);
3247
3248	/ We don't want to wait around for zone_replenish_thread to bump up the free count*
3249	* if we're in zone_gc(). This keeps us from deadlocking with zone_replenish_thread.
3250	*/
3251	if (thr->options & TH_OPT_ZONE_GC)
3252	break;
3253
3254	unlock_zone(zone);
3255	/ Scheduling latencies etc. may prevent*
3256	* the refill thread from keeping up
3257	* with demand. Throttle consumers
3258	* when we fall below half the
3259	* watermark, unless VM privileged
3260	*/
3261	if (zone_alloc_throttle) {
3262	zone_replenish_throttle_count++;
3263	assert_wait_timeout(zone, THREAD_UNINT, `1`, NSEC_PER_MSEC);
3264	thread_block(THREAD_CONTINUE_NULL);
3265	}
3266	lock_zone(zone);
3267	assert(zone->zone_valid);
3268	}
3269
3270	zfreec = (zone->cur_size - (zone->count * zone->elem_size));
3271	zrefillwm = zone->prio_refill_watermark * zone->elem_size;
3272	zone_replenish_wakeup = (zfreec < zrefillwm);
3273	zone_alloc_throttle = (((zfreec < (zrefillwm / `2`)) && ((thr->options & TH_OPT_VMPRIV) == `0`)) \|\| (zfreec == `0`));
3274
3275	} while (zone_alloc_throttle == TRUE);
3276	}
3277
3278	if (__probable(addr == `0`))
3279	addr = try_alloc_from_zone(zone, tag, &check_poison);
3280
3281	/ If we're here because of zone_gc(), we didn't wait for zone_replenish_thread to finish.*
3282	* So we need to ensure that we did successfully grab an element. And we only need to assert
3283	* this for zones that have a replenish thread configured (in this case, the Reserved VM map
3284	* entries zone).
3285	*/
3286	if (thr->options & TH_OPT_ZONE_GC && zone->async_prio_refill)
3287	assert(addr != `0`);
3288
3289	while ((addr == `0`) && canblock) {
3290	/*
3291	* zone is empty, try to expand it
3292	*
3293	* Note that we now allow up to 2 threads (1 vm_privliged and 1 non-vm_privliged)
3294	* to expand the zone concurrently... this is necessary to avoid stalling
3295	* vm_privileged threads running critical code necessary to continue compressing/swapping
3296	* pages (i.e. making new free pages) from stalling behind non-vm_privileged threads
3297	* waiting to acquire free pages when the vm_page_free_count is below the
3298	* vm_page_free_reserved limit.
3299	*/
3300	if ((zone->doing_alloc_without_vm_priv \|\| zone->doing_alloc_with_vm_priv) &&
3301	(((thr->options & TH_OPT_VMPRIV) == `0`) \|\| zone->doing_alloc_with_vm_priv)) {
3302	/*
3303	* This is a non-vm_privileged thread and a non-vm_privileged or
3304	* a vm_privileged thread is already expanding the zone...
3305	* OR
3306	* this is a vm_privileged thread and a vm_privileged thread is
3307	* already expanding the zone...
3308	*
3309	* In either case wait for a thread to finish, then try again.
3310	*/
3311	zone->waiting = TRUE;
3312	zone_sleep(zone);
3313	} else {
3314	vm_offset_t space;
3315	vm_size_t alloc_size;
3316	int retry = `0`;
3317
3318	if ((zone->cur_size + zone->elem_size) >
3319	zone->max_size) {
3320	if (zone->exhaustible)
3321	break;
3322	if (zone->expandable) {
3323	/*
3324	* We're willing to overflow certain
3325	* zones, but not without complaining.
3326	*
3327	* This is best used in conjunction
3328	* with the collectable flag. What we
3329	* want is an assurance we can get the
3330	* memory back, assuming there's no
3331	* leak.
3332	*/
3333	zone->max_size += (zone->max_size >> `1`);
3334	} else {
3335	unlock_zone(zone);
3336
3337	panic_include_zprint = TRUE;
3338	#if CONFIG_ZLEAKS
3339	if (zleak_state & ZLEAK_STATE_ACTIVE)
3340	panic_include_ztrace = TRUE;
3341	#endif /* CONFIG_ZLEAKS */
3342	panic("zalloc: zone \"%s\" empty.", zone->zone_name);
3343	}
3344	}
3345	/*
3346	* It is possible that a BG thread is refilling/expanding the zone
3347	* and gets pre-empted during that operation. That blocks all other
3348	* threads from making progress leading to a watchdog timeout. To
3349	* avoid that, boost the thread priority using the rwlock boost
3350	*/
3351	set_thread_rwlock_boost();
3352
3353	if ((thr->options & TH_OPT_VMPRIV)) {
3354	zone->doing_alloc_with_vm_priv = TRUE;
3355	set_doing_alloc_with_vm_priv = TRUE;
3356	} else {
3357	zone->doing_alloc_without_vm_priv = TRUE;
3358	}
3359	unlock_zone(zone);
3360
3361	for (;;) {
3362	int zflags = KMA_KOBJECT\|KMA_NOPAGEWAIT;
3363
3364	if (vm_pool_low() \|\| retry >= `1`)
3365	alloc_size =
3366	round_page(zone->elem_size);
3367	else
3368	alloc_size = zone->alloc_size;
3369
3370	if (zone->noencrypt)
3371	zflags \|= KMA_NOENCRYPT;
3372
3373	/ Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory /
3374	if (is_zone_map_nearing_exhaustion()) {
3375	thread_wakeup((event_t) &vm_pageout_garbage_collect);
3376	}
3377
3378	retval = kernel_memory_allocate(zone_map, &space, alloc_size, `0`, zflags, VM_KERN_MEMORY_ZONE);
3379	if (retval == KERN_SUCCESS) {
3380	#if CONFIG_ZLEAKS
3381	if ((zleak_state & (ZLEAK_STATE_ENABLED \| ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
3382	if (zone_map->size >= zleak_global_tracking_threshold) {
3383	kern_return_t kr;
3384
3385	kr = zleak_activate();
3386	if (kr != KERN_SUCCESS) {
3387	printf("Failed to activate live zone leak debugging (%d).\n", kr);
3388	}
3389	}
3390	}
3391
3392	if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
3393	if (zone->cur_size > zleak_per_zone_tracking_threshold) {
3394	zone->zleak_on = TRUE;
3395	}
3396	}
3397	#endif /* CONFIG_ZLEAKS */
3398	zcram(zone, space, alloc_size);
3399
3400	break;
3401	} else if (retval != KERN_RESOURCE_SHORTAGE) {
3402	retry++;
3403
3404	if (retry == `3`) {
3405	panic_include_zprint = TRUE;
3406	#if CONFIG_ZLEAKS
3407	if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
3408	panic_include_ztrace = TRUE;
3409	}
3410	#endif /* CONFIG_ZLEAKS */
3411	if (retval == KERN_NO_SPACE) {
3412	zone_t zone_largest = zone_find_largest();
3413	panic("zalloc: zone map exhausted while allocating from zone %s, likely due to memory leak in zone %s (%lu total bytes, %d elements allocated)",
3414	zone->zone_name, zone_largest->zone_name,
3415	(unsigned long)zone_largest->cur_size, zone_largest->count);
3416
3417	}
3418	panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
3419	}
3420	} else {
3421	break;
3422	}
3423	}
3424	lock_zone(zone);
3425	assert(zone->zone_valid);
3426
3427	if (set_doing_alloc_with_vm_priv == TRUE)
3428	zone->doing_alloc_with_vm_priv = FALSE;
3429	else
3430	zone->doing_alloc_without_vm_priv = FALSE;
3431
3432	if (zone->waiting) {
3433	zone->waiting = FALSE;
3434	zone_wakeup(zone);
3435	}
3436	clear_thread_rwlock_boost();
3437
3438	addr = try_alloc_from_zone(zone, tag, &check_poison);
3439	if (addr == `0` &&
3440	retval == KERN_RESOURCE_SHORTAGE) {
3441	if (nopagewait == TRUE)
3442	break; / out of the main while loop /
3443	unlock_zone(zone);
3444
3445	VM_PAGE_WAIT();
3446	lock_zone(zone);
3447	assert(zone->zone_valid);
3448	}
3449	}
3450	if (addr == `0`)
3451	addr = try_alloc_from_zone(zone, tag, &check_poison);
3452	}
3453
3454	#if CONFIG_ZLEAKS
3455	/ Zone leak detection:*
3456	* If we're sampling this allocation, add it to the zleaks hash table.
3457	*/
3458	if (addr && zleak_tracedepth > `0`) {
3459	/ Sampling can fail if another sample is happening at the same time in a different zone. /
3460	if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
3461	/ If it failed, roll back the counter so we sample the next allocation instead. /
3462	zone->zleak_capture = zleak_sample_factor;
3463	}
3464	}
3465	#endif /* CONFIG_ZLEAKS */
3466
3467
3468	if ((addr == `0`) && (!canblock \|\| nopagewait) && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
3469	zone->async_pending = TRUE;
3470	unlock_zone(zone);
3471	thread_call_enter(&call_async_alloc);
3472	lock_zone(zone);
3473	assert(zone->zone_valid);
3474	addr = try_alloc_from_zone(zone, tag, &check_poison);
3475	}
3476
3477	#if VM_MAX_TAG_ZONES
3478	if (__improbable(zone->tags) && addr) {
3479	if (reqsize) reqsize = zone->elem_size - reqsize;
3480	vm_tag_update_zone_size(tag, zone->tag_zone_index, zone->elem_size, reqsize);
3481	}
3482	#endif /* VM_MAX_TAG_ZONES */
3483
3484	unlock_zone(zone);
3485
3486	if (__improbable(DO_LOGGING(zone) && addr)) {
3487	btlog_add_entry(zone->zlog_btlog, (void )addr, ZOP_ALLOC, (void* **)zbt, numsaved);
3488	}
3489
3490	zalloc_poison_element(check_poison, zone, addr);
3491
3492	if (addr) {
3493	#if DEBUG \|\| DEVELOPMENT
3494	if (__improbable(leak_scan_debug_flag && !(zone->elem_size & (sizeof(uintptr_t) - `1`)))) {
3495	unsigned int count, idx;
3496	/ Fill element, from tail, with backtrace in reverse order /
3497	if (numsaved == `0`) numsaved = backtrace(zbt, MAX_ZTRACE_DEPTH);
3498	count = (unsigned int)(zone->elem_size / sizeof(uintptr_t));
3499	if (count >= numsaved) count = numsaved - `1`;
3500	for (idx = `0`; idx < count; idx++) ((uintptr_t *)addr)[count - `1` - idx] = zbt[idx + `1`];
3501	}
3502	#endif /* DEBUG \|\| DEVELOPMENT */
3503	}
3504
3505	TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
3506
3507
3508	#if KASAN_ZALLOC
3509	addr = kasan_fixup_allocated_element_address(zone, addr);
3510	#endif
3511
3512	DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
3513
3514	return((void *)addr);
3515	}
3516
3517	void *
3518	zalloc(zone_t zone)
3519	{
3520	return (zalloc_internal(zone, TRUE, FALSE, `0`, VM_KERN_MEMORY_NONE));
3521	}
3522
3523	void *
3524	zalloc_noblock(zone_t zone)
3525	{
3526	return (zalloc_internal(zone, FALSE, FALSE, `0`, VM_KERN_MEMORY_NONE));
3527	}
3528
3529	void *
3530	zalloc_nopagewait(zone_t zone)
3531	{
3532	return (zalloc_internal(zone, TRUE, TRUE, `0`, VM_KERN_MEMORY_NONE));
3533	}
3534
3535	void *
3536	zalloc_canblock_tag(zone_t zone, boolean_t canblock, vm_size_t reqsize, vm_tag_t tag)
3537	{
3538	return (zalloc_internal(zone, canblock, FALSE, reqsize, tag));
3539	}
3540
3541	void *
3542	zalloc_canblock(zone_t zone, boolean_t canblock)
3543	{
3544	return (zalloc_internal(zone, canblock, FALSE, `0`, VM_KERN_MEMORY_NONE));
3545	}
3546
3547	void *
3548	zalloc_attempt(zone_t zone)
3549	{
3550	boolean_t check_poison = FALSE;
3551	vm_offset_t addr = try_alloc_from_zone(zone, VM_KERN_MEMORY_NONE, &check_poison);
3552	zalloc_poison_element(check_poison, zone, addr);
3553	return (void *)addr;
3554	}
3555
3556	void
3557	zfree_direct(zone_t zone, vm_offset_t elem)
3558	{
3559	boolean_t poison = zfree_poison_element(zone, elem);
3560	free_to_zone(zone, elem, poison);
3561	}
3562
3563
3564	void
3565	zalloc_async(
3566	__unused thread_call_param_t p0,
3567	__unused thread_call_param_t p1)
3568	{
3569	zone_t current_z = NULL;
3570	unsigned int max_zones, i;
3571	void *elt = NULL;
3572	boolean_t pending = FALSE;
3573
3574	simple_lock(&all_zones_lock);
3575	max_zones = num_zones;
3576	simple_unlock(&all_zones_lock);
3577	for (i = `0`; i < max_zones; i++) {
3578	current_z = &(zone_array[i]);
3579
3580	if (current_z->no_callout == TRUE) {
3581	/ async_pending will never be set /
3582	continue;
3583	}
3584
3585	lock_zone(current_z);
3586	if (current_z->zone_valid && current_z->async_pending == TRUE) {
3587	current_z->async_pending = FALSE;
3588	pending = TRUE;
3589	}
3590	unlock_zone(current_z);
3591
3592	if (pending == TRUE) {
3593	elt = zalloc_canblock_tag(current_z, TRUE, `0`, VM_KERN_MEMORY_OSFMK);
3594	zfree(current_z, elt);
3595	pending = FALSE;
3596	}
3597	}
3598	}
3599
3600	/*
3601	* zget returns an element from the specified zone
3602	* and immediately returns nothing if there is nothing there.
3603	*/
3604	void *
3605	zget(
3606	zone_t zone)
3607	{
3608	return zalloc_internal(zone, FALSE, TRUE, `0`, VM_KERN_MEMORY_NONE);
3609	}
3610
3611	/ Keep this FALSE by default. Large memory machine run orders of magnitude*
3612	slower in debug mode when true. Use debugger to enable if needed /*
3613	/ static / boolean_t zone_check = FALSE;
3614
3615	static void zone_check_freelist(zone_t zone, vm_offset_t elem)
3616	{
3617	struct zone_free_element *this;
3618	struct zone_page_metadata *thispage;
3619
3620	if (zone->allows_foreign) {
3621	for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign);
3622	!queue_end(&zone->pages.any_free_foreign, &(thispage->pages));
3623	thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) {
3624	for (this = page_metadata_get_freelist(thispage);
3625	this != NULL;
3626	this = this->next) {
3627	if (!is_sane_zone_element(zone, (vm_address_t)this) \|\| (vm_address_t)this == elem)
3628	panic("zone_check_freelist");
3629	}
3630	}
3631	}
3632	for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.all_free);
3633	!queue_end(&zone->pages.all_free, &(thispage->pages));
3634	thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) {
3635	for (this = page_metadata_get_freelist(thispage);
3636	this != NULL;
3637	this = this->next) {
3638	if (!is_sane_zone_element(zone, (vm_address_t)this) \|\| (vm_address_t)this == elem)
3639	panic("zone_check_freelist");
3640	}
3641	}
3642	for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate);
3643	!queue_end(&zone->pages.intermediate, &(thispage->pages));
3644	thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) {
3645	for (this = page_metadata_get_freelist(thispage);
3646	this != NULL;
3647	this = this->next) {
3648	if (!is_sane_zone_element(zone, (vm_address_t)this) \|\| (vm_address_t)this == elem)
3649	panic("zone_check_freelist");
3650	}
3651	}
3652	}
3653
3654	boolean_t
3655	zfree_poison_element(zone_t zone, vm_offset_t elem)
3656	{
3657	boolean_t poison = FALSE;
3658	if (zp_factor != `0` \|\| zp_tiny_zone_limit != `0`) {
3659	/*
3660	* Poison the memory before it ends up on the freelist to catch
3661	* use-after-free and use of uninitialized memory
3662	*
3663	* Always poison tiny zones' elements (limit is 0 if -no-zp is set)
3664	* Also poison larger elements periodically
3665	*/
3666
3667	vm_offset_t inner_size = zone->elem_size;
3668
3669	uint32_t sample_factor = zp_factor + (((uint32_t)inner_size) >> zp_scale);
3670
3671	if (inner_size <= zp_tiny_zone_limit)
3672	poison = TRUE;
3673	else if (zp_factor != `0` && sample_counter(&zone->zp_count, sample_factor) == TRUE)
3674	poison = TRUE;
3675
3676	if (__improbable(poison)) {
3677
3678	/ memset_pattern{4\|8} could help make this faster: <rdar://problem/4662004> /
3679	/ Poison everything but primary and backup /
3680	vm_offset_t element_cursor = ((vm_offset_t ) elem) + `1`;
3681	vm_offset_t backup = get_backup_ptr(inner_size, (vm_offset_t )elem);
3682
3683	for ( ; element_cursor < backup; element_cursor++)
3684	*element_cursor = ZP_POISON;
3685	}
3686	}
3687	return poison;
3688	}
3689	void
3690	zfree(
3691	zone_t zone,
3692	void *addr)
3693	{
3694	vm_offset_t elem = (vm_offset_t) addr;
3695	uintptr_t zbt[MAX_ZTRACE_DEPTH]; / only used if zone logging is enabled via boot-args /
3696	unsigned int numsaved = `0`;
3697	boolean_t gzfreed = FALSE;
3698	boolean_t poison = FALSE;
3699	#if VM_MAX_TAG_ZONES
3700	vm_tag_t tag;
3701	#endif /* VM_MAX_TAG_ZONES */
3702
3703	assert(zone != ZONE_NULL);
3704	DTRACE_VM2(zfree, zone_t, zone, void*, addr);
3705	#if KASAN_ZALLOC
3706	if (kasan_quarantine_freed_element(&zone, &addr)) {
3707	return;
3708	}
3709	elem = (vm_offset_t)addr;
3710	#endif
3711
3712	/*
3713	* If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
3714	*/
3715
3716	if (__improbable(DO_LOGGING(zone) && corruption_debug_flag))
3717	numsaved = OSBacktrace((void *)zbt, MAX_ZTRACE_DEPTH);
3718
3719	#if MACH_ASSERT
3720	/ Basic sanity checks /
3721	if (zone == ZONE_NULL \|\| elem == (vm_offset_t)`0`)
3722	panic("zfree: NULL");
3723	#endif
3724
3725	#if CONFIG_GZALLOC
3726	gzfreed = gzalloc_free(zone, addr);
3727	#endif
3728
3729	if (!gzfreed) {
3730	struct zone_page_metadata page_meta = get_zone_page_metadata((struct* zone_free_element *)addr, FALSE);
3731	if (zone != PAGE_METADATA_GET_ZONE(page_meta)) {
3732	panic("Element %p from zone %s caught being freed to wrong zone %s\n", addr, PAGE_METADATA_GET_ZONE(page_meta)->zone_name, zone->zone_name);
3733	}
3734	}
3735
3736	TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr);
3737
3738	if (__improbable(!gzfreed && zone->collectable && !zone->allows_foreign &&
3739	!from_zone_map(elem, zone->elem_size))) {
3740	panic("zfree: non-allocated memory in collectable zone!");
3741	}
3742
3743	if (!gzfreed) {
3744	poison = zfree_poison_element(zone, elem);
3745	}
3746
3747	/*
3748	* See if we're doing logging on this zone. There are two styles of logging used depending on
3749	* whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
3750	*/
3751
3752	if (__improbable(DO_LOGGING(zone))) {
3753	if (corruption_debug_flag) {
3754	/*
3755	* We're logging to catch a corruption. Add a record of this zfree operation
3756	* to log.
3757	*/
3758	btlog_add_entry(zone->zlog_btlog, (void )addr, ZOP_FREE, (void* **)zbt, numsaved);
3759	} else {
3760	/*
3761	* We're logging to catch a leak. Remove any record we might have for this
3762	* element since it's being freed. Note that we may not find it if the buffer
3763	* overflowed and that's OK. Since the log is of a limited size, old records
3764	* get overwritten if there are more zallocs than zfrees.
3765	*/
3766	btlog_remove_entries_for_element(zone->zlog_btlog, (void *)addr);
3767	}
3768	}
3769
3770	#if CONFIG_ZCACHE
3771	if (zone_caching_enabled(zone)) {
3772	int __assert_only ret = zcache_free_to_cpu_cache(zone, addr);
3773	assert(ret != FALSE);
3774	return;
3775	}
3776	#endif /* CONFIG_ZCACHE */
3777
3778	lock_zone(zone);
3779	assert(zone->zone_valid);
3780
3781	if (zone_check) {
3782	zone_check_freelist(zone, elem);
3783	}
3784
3785	if (__probable(!gzfreed)) {
3786	#if VM_MAX_TAG_ZONES
3787	if (__improbable(zone->tags)) {
3788	tag = (ZTAG(zone, elem)[`0`] >> `1`);
3789	// set the tag with b0 clear so the block remains inuse
3790	ZTAG(zone, elem)[`0`] = `0xFFFE`;
3791	}
3792	#endif /* VM_MAX_TAG_ZONES */
3793	free_to_zone(zone, elem, poison);
3794	}
3795
3796	if (__improbable(zone->count < `0`)) {
3797	panic("zfree: zone count underflow in zone %s while freeing element %p, possible cause: double frees or freeing memory that did not come from this zone",
3798	zone->zone_name, addr);
3799	}
3800
3801	#if CONFIG_ZLEAKS
3802	/*
3803	* Zone leak detection: un-track the allocation
3804	*/
3805	if (zone->zleak_on) {
3806	zleak_free(elem, zone->elem_size);
3807	}
3808	#endif /* CONFIG_ZLEAKS */
3809
3810	#if VM_MAX_TAG_ZONES
3811	if (__improbable(zone->tags) && __probable(!gzfreed)) {
3812	vm_tag_update_zone_size(tag, zone->tag_zone_index, -((int64_t)zone->elem_size), `0`);
3813	}
3814	#endif /* VM_MAX_TAG_ZONES */
3815
3816	unlock_zone(zone);
3817	}
3818
3819	/ Change a zone's flags.*
3820	* This routine must be called immediately after zinit.
3821	*/
3822	void
3823	zone_change(
3824	zone_t zone,
3825	unsigned int item,
3826	boolean_t value)
3827	{
3828	assert( zone != ZONE_NULL );
3829	assert( value == TRUE \|\| value == FALSE );
3830
3831	switch(item){
3832	case Z_NOENCRYPT:
3833	zone->noencrypt = value;
3834	break;
3835	case Z_EXHAUST:
3836	zone->exhaustible = value;
3837	break;
3838	case Z_COLLECT:
3839	zone->collectable = value;
3840	break;
3841	case Z_EXPAND:
3842	zone->expandable = value;
3843	break;
3844	case Z_FOREIGN:
3845	zone->allows_foreign = value;
3846	break;
3847	case Z_CALLERACCT:
3848	zone->caller_acct = value;
3849	break;
3850	case Z_NOCALLOUT:
3851	zone->no_callout = value;
3852	break;
3853	case Z_TAGS_ENABLED:
3854	#if VM_MAX_TAG_ZONES
3855	{
3856	static int tag_zone_index;
3857	zone->tags = TRUE;
3858	zone->tags_inline = (((page_size + zone->elem_size - `1`) / zone->elem_size) <= (sizeof(uint32_t) / sizeof(uint16_t)));
3859	zone->tag_zone_index = OSAddAtomic(`1`, &tag_zone_index);
3860	}
3861	#endif /* VM_MAX_TAG_ZONES */
3862	break;
3863	case Z_GZALLOC_EXEMPT:
3864	zone->gzalloc_exempt = value;
3865	#if CONFIG_GZALLOC
3866	gzalloc_reconfigure(zone);
3867	#endif
3868	break;
3869	case Z_ALIGNMENT_REQUIRED:
3870	zone->alignment_required = value;
3871	#if KASAN_ZALLOC
3872	if (zone->kasan_redzone == KASAN_GUARD_SIZE) {
3873	/ Don't disturb alignment with the redzone for zones with*
3874	* specific alignment requirements. */
3875	zone->elem_size -= zone->kasan_redzone * `2`;
3876	zone->kasan_redzone = `0`;
3877	}
3878	#endif
3879	#if CONFIG_GZALLOC
3880	gzalloc_reconfigure(zone);
3881	#endif
3882	break;
3883	case Z_KASAN_QUARANTINE:
3884	zone->kasan_quarantine = value;
3885	break;
3886	case Z_CACHING_ENABLED:
3887	#if CONFIG_ZCACHE
3888	if (value == TRUE && use_caching) {
3889	if (zcache_ready()) {
3890	zcache_init(zone);
3891	} else {
3892	zone->cpu_cache_enable_when_ready = TRUE;
3893	}
3894
3895	}
3896	#endif
3897	break;
3898	default:
3899	panic("Zone_change: Wrong Item Type!");
3900	/ break; /
3901	}
3902	}
3903
3904	/*
3905	* Return the expected number of free elements in the zone.
3906	* This calculation will be incorrect if items are zfree'd that
3907	* were never zalloc'd/zget'd. The correct way to stuff memory
3908	* into a zone is by zcram.
3909	*/
3910
3911	integer_t
3912	zone_free_count(zone_t zone)
3913	{
3914	integer_t free_count;
3915
3916	lock_zone(zone);
3917	free_count = zone->countfree;
3918	unlock_zone(zone);
3919
3920	assert(free_count >= `0`);
3921
3922	return(free_count);
3923	}
3924
3925	/ Drops the elements in the free queue of a zone. Called by zone_gc() on each zone, and when a zone is zdestroy'ed. /
3926	void
3927	drop_free_elements(zone_t z)
3928	{
3929	vm_size_t elt_size, size_freed;
3930	unsigned int total_freed_pages = `0`;
3931	uint64_t old_all_free_count;
3932	struct zone_page_metadata *page_meta;
3933	queue_head_t page_meta_head;
3934
3935	lock_zone(z);
3936	if (queue_empty(&z->pages.all_free)) {
3937	unlock_zone(z);
3938	return;
3939	}
3940
3941	/*
3942	* Snatch all of the free elements away from the zone.
3943	*/
3944	elt_size = z->elem_size;
3945	old_all_free_count = z->count_all_free_pages;
3946	queue_new_head(&z->pages.all_free, &page_meta_head, struct zone_page_metadata *, pages);
3947	queue_init(&z->pages.all_free);
3948	z->count_all_free_pages = `0`;
3949	unlock_zone(z);
3950
3951	/ Iterate through all elements to find out size and count of elements we snatched /
3952	size_freed = `0`;
3953	queue_iterate(&page_meta_head, page_meta, struct zone_page_metadata *, pages) {
3954	assert(from_zone_map((vm_address_t)page_meta, sizeof(page_meta))); /* foreign elements should be in any_free_foreign /
3955	size_freed += elt_size * page_meta->free_count;
3956	}
3957
3958	/ Update the zone size and free element count /
3959	lock_zone(z);
3960	z->cur_size -= size_freed;
3961	z->countfree -= size_freed/elt_size;
3962	unlock_zone(z);
3963
3964	while ((page_meta = (struct zone_page_metadata *)dequeue_head(&page_meta_head)) != NULL) {
3965	vm_address_t free_page_address;
3966	/ Free the pages for metadata and account for them /
3967	free_page_address = get_zone_page(page_meta);
3968	ZONE_PAGE_COUNT_DECR(z, page_meta->page_count);
3969	total_freed_pages += page_meta->page_count;
3970	old_all_free_count -= page_meta->page_count;
3971	#if KASAN_ZALLOC
3972	kasan_poison_range(free_page_address, page_meta->page_count * PAGE_SIZE, ASAN_VALID);
3973	#endif
3974	#if VM_MAX_TAG_ZONES
3975	if (z->tags) ztMemoryRemove(z, free_page_address, (page_meta->page_count * PAGE_SIZE));
3976	#endif /* VM_MAX_TAG_ZONES */
3977	kmem_free(zone_map, free_page_address, (page_meta->page_count * PAGE_SIZE));
3978	if (current_thread()->options & TH_OPT_ZONE_GC) {
3979	thread_yield_to_preemption();
3980	}
3981	}
3982
3983	/ We freed all the pages from the all_free list for this zone /
3984	assert(old_all_free_count == `0`);
3985
3986	if (zalloc_debug & ZALLOC_DEBUG_ZONEGC)
3987	kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages);
3988	}
3989
3990	/ Zone garbage collection*
3991	*
3992	* zone_gc will walk through all the free elements in all the
3993	* zones that are marked collectable looking for reclaimable
3994	* pages. zone_gc is called by consider_zone_gc when the system
3995	* begins to run out of memory.
3996	*
3997	* We should ensure that zone_gc never blocks.
3998	*/
3999	void
4000	zone_gc(boolean_t consider_jetsams)
4001	{
4002	unsigned int max_zones;
4003	zone_t z;
4004	unsigned int i;
4005
4006	if (consider_jetsams) {
4007	kill_process_in_largest_zone();
4008	/*
4009	* If we do end up jetsamming something, we need to do a zone_gc so that
4010	* we can reclaim free zone elements and update the zone map size.
4011	* Fall through.
4012	*/
4013	}
4014
4015	lck_mtx_lock(&zone_gc_lock);
4016
4017	current_thread()->options \|= TH_OPT_ZONE_GC;
4018
4019	simple_lock(&all_zones_lock);
4020	max_zones = num_zones;
4021	simple_unlock(&all_zones_lock);
4022
4023	if (zalloc_debug & ZALLOC_DEBUG_ZONEGC)
4024	kprintf("zone_gc() starting...\n");
4025
4026	for (i = `0`; i < max_zones; i++) {
4027	z = &(zone_array[i]);
4028	assert(z != ZONE_NULL);
4029
4030	if (!z->collectable) {
4031	continue;
4032	}
4033	#if CONFIG_ZCACHE
4034	if (zone_caching_enabled(z)) {
4035	zcache_drain_depot(z);
4036	}
4037	#endif /* CONFIG_ZCACHE */
4038	if (queue_empty(&z->pages.all_free)) {
4039	continue;
4040	}
4041
4042	drop_free_elements(z);
4043	}
4044
4045	current_thread()->options &= ~TH_OPT_ZONE_GC;
4046
4047	lck_mtx_unlock(&zone_gc_lock);
4048	}
4049
4050	extern vm_offset_t kmapoff_kaddr;
4051	extern unsigned int kmapoff_pgcnt;
4052
4053	/*
4054	* consider_zone_gc:
4055	*
4056	* Called by the pageout daemon when the system needs more free pages.
4057	*/
4058
4059	void
4060	consider_zone_gc(boolean_t consider_jetsams)
4061	{
4062	if (kmapoff_kaddr != `0`) {
4063	/*
4064	* One-time reclaim of kernel_map resources we allocated in
4065	* early boot.
4066	*/
4067	(void) vm_deallocate(kernel_map,
4068	kmapoff_kaddr, kmapoff_pgcnt * PAGE_SIZE_64);
4069	kmapoff_kaddr = `0`;
4070	}
4071
4072	if (zone_gc_allowed)
4073	zone_gc(consider_jetsams);
4074	}
4075
4076	/*
4077	* Creates a vm_map_copy_t to return to the caller of mach_* MIG calls
4078	* requesting zone information.
4079	* Frees unused pages towards the end of the region, and zero'es out unused
4080	* space on the last page.
4081	*/
4082	vm_map_copy_t
4083	create_vm_map_copy(
4084	vm_offset_t start_addr,
4085	vm_size_t total_size,
4086	vm_size_t used_size)
4087	{
4088	kern_return_t kr;
4089	vm_offset_t end_addr;
4090	vm_size_t free_size;
4091	vm_map_copy_t copy;
4092
4093	if (used_size != total_size) {
4094	end_addr = start_addr + used_size;
4095	free_size = total_size - (round_page(end_addr) - start_addr);
4096
4097	if (free_size >= PAGE_SIZE) {
4098	kmem_free(ipc_kernel_map,
4099	round_page(end_addr), free_size);
4100	}
4101	bzero((char *) end_addr, round_page(end_addr) - end_addr);
4102	}
4103
4104	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)start_addr,
4105	(vm_map_size_t)used_size, TRUE, &copy);
4106	assert(kr == KERN_SUCCESS);
4107
4108	return copy;
4109	}
4110
4111	boolean_t
4112	get_zone_info(
4113	zone_t z,
4114	mach_zone_name_t *zn,
4115	mach_zone_info_t *zi)
4116	{
4117	struct zone zcopy;
4118
4119	assert(z != ZONE_NULL);
4120	lock_zone(z);
4121	if (!z->zone_valid) {
4122	unlock_zone(z);
4123	return FALSE;
4124	}
4125	zcopy = *z;
4126	unlock_zone(z);
4127
4128	if (zn != NULL) {
4129	/ assuming here the name data is static /
4130	(void) __nosan_strlcpy(zn->mzn_name, zcopy.zone_name,
4131	strlen(zcopy.zone_name)+`1`);
4132	}
4133
4134	if (zi != NULL) {
4135	zi->mzi_count = (uint64_t)zcopy.count;
4136	zi->mzi_cur_size = ptoa_64(zcopy.page_count);
4137	zi->mzi_max_size = (uint64_t)zcopy.max_size;
4138	zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
4139	zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
4140	zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size;
4141	zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible;
4142	zi->mzi_collectable = `0`;
4143	if (zcopy.collectable) {
4144	SET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable, ((uint64_t)zcopy.count_all_free_pages * PAGE_SIZE));
4145	SET_MZI_COLLECTABLE_FLAG(zi->mzi_collectable, TRUE);
4146	}
4147	}
4148
4149	return TRUE;
4150	}
4151
4152	kern_return_t
4153	task_zone_info(
4154	__unused task_t task,
4155	__unused mach_zone_name_array_t *namesp,
4156	__unused mach_msg_type_number_t *namesCntp,
4157	__unused task_zone_info_array_t *infop,
4158	__unused mach_msg_type_number_t *infoCntp)
4159	{
4160	return KERN_FAILURE;
4161	}
4162
4163	kern_return_t
4164	mach_zone_info(
4165	host_priv_t host,
4166	mach_zone_name_array_t *namesp,
4167	mach_msg_type_number_t *namesCntp,
4168	mach_zone_info_array_t *infop,
4169	mach_msg_type_number_t *infoCntp)
4170	{
4171	return (mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL));
4172	}
4173
4174
4175	kern_return_t
4176	mach_memory_info(
4177	host_priv_t host,
4178	mach_zone_name_array_t *namesp,
4179	mach_msg_type_number_t *namesCntp,
4180	mach_zone_info_array_t *infop,
4181	mach_msg_type_number_t *infoCntp,
4182	mach_memory_info_array_t *memoryInfop,
4183	mach_msg_type_number_t *memoryInfoCntp)
4184	{
4185	mach_zone_name_t *names;
4186	vm_offset_t names_addr;
4187	vm_size_t names_size;
4188
4189	mach_zone_info_t *info;
4190	vm_offset_t info_addr;
4191	vm_size_t info_size;
4192
4193	mach_memory_info_t *memory_info;
4194	vm_offset_t memory_info_addr;
4195	vm_size_t memory_info_size;
4196	vm_size_t memory_info_vmsize;
4197	unsigned int num_info;
4198
4199	unsigned int max_zones, used_zones, i;
4200	mach_zone_name_t *zn;
4201	mach_zone_info_t *zi;
4202	kern_return_t kr;
4203
4204	uint64_t zones_collectable_bytes = `0`;
4205
4206	if (host == HOST_NULL)
4207	return KERN_INVALID_HOST;
4208	#if CONFIG_DEBUGGER_FOR_ZONE_INFO
4209	if (!PE_i_can_has_debugger(NULL))
4210	return KERN_INVALID_HOST;
4211	#endif
4212
4213	/*
4214	* We assume that zones aren't freed once allocated.
4215	* We won't pick up any zones that are allocated later.
4216	*/
4217
4218	simple_lock(&all_zones_lock);
4219	max_zones = (unsigned int)(num_zones);
4220	simple_unlock(&all_zones_lock);
4221
4222	names_size = round_page(max_zones * sizeof *names);
4223	kr = kmem_alloc_pageable(ipc_kernel_map,
4224	&names_addr, names_size, VM_KERN_MEMORY_IPC);
4225	if (kr != KERN_SUCCESS)
4226	return kr;
4227	names = (mach_zone_name_t *) names_addr;
4228
4229	info_size = round_page(max_zones * sizeof *info);
4230	kr = kmem_alloc_pageable(ipc_kernel_map,
4231	&info_addr, info_size, VM_KERN_MEMORY_IPC);
4232	if (kr != KERN_SUCCESS) {
4233	kmem_free(ipc_kernel_map,
4234	names_addr, names_size);
4235	return kr;
4236	}
4237	info = (mach_zone_info_t *) info_addr;
4238
4239	zn = &names[`0`];
4240	zi = &info[`0`];
4241
4242	used_zones = max_zones;
4243	for (i = `0`; i < max_zones; i++) {
4244	if (!get_zone_info(&(zone_array[i]), zn, zi)) {
4245	used_zones--;
4246	continue;
4247	}
4248	zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable);
4249	zn++;
4250	zi++;
4251	}
4252
4253	namesp = (mach_zone_name_t ) create_vm_map_copy(names_addr, names_size, used_zones * sizeof *names);
4254	*namesCntp = used_zones;
4255
4256	infop = (mach_zone_info_t ) create_vm_map_copy(info_addr, info_size, used_zones * sizeof *info);
4257	*infoCntp = used_zones;
4258
4259	num_info = `0`;
4260	memory_info_addr = `0`;
4261
4262	if (memoryInfop && memoryInfoCntp)
4263	{
4264	vm_map_copy_t copy;
4265	num_info = vm_page_diagnose_estimate();
4266	memory_info_size = num_info * sizeof(*memory_info);
4267	memory_info_vmsize = round_page(memory_info_size);
4268	kr = kmem_alloc_pageable(ipc_kernel_map,
4269	&memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC);
4270	if (kr != KERN_SUCCESS) {
4271	return kr;
4272	}
4273
4274	kr = vm_map_wire_kernel(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
4275	VM_PROT_READ\|VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
4276	assert(kr == KERN_SUCCESS);
4277
4278	memory_info = (mach_memory_info_t *) memory_info_addr;
4279	vm_page_diagnose(memory_info, num_info, zones_collectable_bytes);
4280
4281	kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE);
4282	assert(kr == KERN_SUCCESS);
4283
4284	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr,
4285	(vm_map_size_t)memory_info_size, TRUE, &copy);
4286	assert(kr == KERN_SUCCESS);
4287
4288	memoryInfop = (mach_memory_info_t ) copy;
4289	*memoryInfoCntp = num_info;
4290	}
4291
4292	return KERN_SUCCESS;
4293	}
4294
4295	kern_return_t
4296	mach_zone_info_for_zone(
4297	host_priv_t host,
4298	mach_zone_name_t name,
4299	mach_zone_info_t *infop)
4300	{
4301	unsigned int max_zones, i;
4302	zone_t zone_ptr;
4303
4304	if (host == HOST_NULL)
4305	return KERN_INVALID_HOST;
4306	#if CONFIG_DEBUGGER_FOR_ZONE_INFO
4307	if (!PE_i_can_has_debugger(NULL))
4308	return KERN_INVALID_HOST;
4309	#endif
4310
4311	if (infop == NULL) {
4312	return KERN_INVALID_ARGUMENT;
4313	}
4314
4315	simple_lock(&all_zones_lock);
4316	max_zones = (unsigned int)(num_zones);
4317	simple_unlock(&all_zones_lock);
4318
4319	zone_ptr = ZONE_NULL;
4320	for (i = `0`; i < max_zones; i++) {
4321	zone_t z = &(zone_array[i]);
4322	assert(z != ZONE_NULL);
4323
4324	/ Find the requested zone by name /
4325	if (track_this_zone(z->zone_name, name.mzn_name)) {
4326	zone_ptr = z;
4327	break;
4328	}
4329	}
4330
4331	/ No zones found with the requested zone name /
4332	if (zone_ptr == ZONE_NULL) {
4333	return KERN_INVALID_ARGUMENT;
4334	}
4335
4336	if (get_zone_info(zone_ptr, NULL, infop)) {
4337	return KERN_SUCCESS;
4338	}
4339	return KERN_FAILURE;
4340	}
4341
4342	kern_return_t
4343	mach_zone_info_for_largest_zone(
4344	host_priv_t host,
4345	mach_zone_name_t *namep,
4346	mach_zone_info_t *infop)
4347	{
4348	if (host == HOST_NULL)
4349	return KERN_INVALID_HOST;
4350	#if CONFIG_DEBUGGER_FOR_ZONE_INFO
4351	if (!PE_i_can_has_debugger(NULL))
4352	return KERN_INVALID_HOST;
4353	#endif
4354
4355	if (namep == NULL \|\| infop == NULL) {
4356	return KERN_INVALID_ARGUMENT;
4357	}
4358
4359	if (get_zone_info(zone_find_largest(), namep, infop)) {
4360	return KERN_SUCCESS;
4361	}
4362	return KERN_FAILURE;
4363	}
4364
4365	uint64_t
4366	get_zones_collectable_bytes(void)
4367	{
4368	unsigned int i, max_zones;
4369	uint64_t zones_collectable_bytes = `0`;
4370	mach_zone_info_t zi;
4371
4372	simple_lock(&all_zones_lock);
4373	max_zones = (unsigned int)(num_zones);
4374	simple_unlock(&all_zones_lock);
4375
4376	for (i = `0`; i < max_zones; i++) {
4377	if (get_zone_info(&(zone_array[i]), NULL, &zi)) {
4378	zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi.mzi_collectable);
4379	}
4380	}
4381
4382	return zones_collectable_bytes;
4383	}
4384
4385	kern_return_t
4386	mach_zone_get_zlog_zones(
4387	host_priv_t host,
4388	mach_zone_name_array_t *namesp,
4389	mach_msg_type_number_t *namesCntp)
4390	{
4391	#if DEBUG \|\| DEVELOPMENT
4392	unsigned int max_zones, logged_zones, i;
4393	kern_return_t kr;
4394	zone_t zone_ptr;
4395	mach_zone_name_t *names;
4396	vm_offset_t names_addr;
4397	vm_size_t names_size;
4398
4399	if (host == HOST_NULL)
4400	return KERN_INVALID_HOST;
4401
4402	if (namesp == NULL \|\| namesCntp == NULL)
4403	return KERN_INVALID_ARGUMENT;
4404
4405	simple_lock(&all_zones_lock);
4406	max_zones = (unsigned int)(num_zones);
4407	simple_unlock(&all_zones_lock);
4408
4409	names_size = round_page(max_zones * sizeof *names);
4410	kr = kmem_alloc_pageable(ipc_kernel_map,
4411	&names_addr, names_size, VM_KERN_MEMORY_IPC);
4412	if (kr != KERN_SUCCESS)
4413	return kr;
4414	names = (mach_zone_name_t *) names_addr;
4415
4416	zone_ptr = ZONE_NULL;
4417	logged_zones = `0`;
4418	for (i = `0`; i < max_zones; i++) {
4419	zone_t z = &(zone_array[i]);
4420	assert(z != ZONE_NULL);
4421
4422	/ Copy out the zone name if zone logging is enabled /
4423	if(z->zlog_btlog) {
4424	get_zone_info(z, &names[logged_zones], NULL);
4425	logged_zones++;
4426	}
4427	}
4428
4429	namesp = (mach_zone_name_t ) create_vm_map_copy(names_addr, names_size, logged_zones * sizeof *names);
4430	*namesCntp = logged_zones;
4431
4432	return KERN_SUCCESS;
4433
4434	#else /* DEBUG \|\| DEVELOPMENT */
4435	#pragma unused(host, namesp, namesCntp)
4436	return KERN_FAILURE;
4437	#endif /* DEBUG \|\| DEVELOPMENT */
4438	}
4439
4440	kern_return_t
4441	mach_zone_get_btlog_records(
4442	host_priv_t host,
4443	mach_zone_name_t name,
4444	zone_btrecord_array_t *recsp,
4445	mach_msg_type_number_t *recsCntp)
4446	{
4447	#if DEBUG \|\| DEVELOPMENT
4448	unsigned int max_zones, i, numrecs = `0`;
4449	zone_btrecord_t *recs;
4450	kern_return_t kr;
4451	zone_t zone_ptr;
4452	vm_offset_t recs_addr;
4453	vm_size_t recs_size;
4454
4455	if (host == HOST_NULL)
4456	return KERN_INVALID_HOST;
4457
4458	if (recsp == NULL \|\| recsCntp == NULL)
4459	return KERN_INVALID_ARGUMENT;
4460
4461	simple_lock(&all_zones_lock);
4462	max_zones = (unsigned int)(num_zones);
4463	simple_unlock(&all_zones_lock);
4464
4465	zone_ptr = ZONE_NULL;
4466	for (i = `0`; i < max_zones; i++) {
4467	zone_t z = &(zone_array[i]);
4468	assert(z != ZONE_NULL);
4469
4470	/ Find the requested zone by name /
4471	if (track_this_zone(z->zone_name, name.mzn_name)) {
4472	zone_ptr = z;
4473	break;
4474	}
4475	}
4476
4477	/ No zones found with the requested zone name /
4478	if (zone_ptr == ZONE_NULL) {
4479	return KERN_INVALID_ARGUMENT;
4480	}
4481
4482	/ Logging not turned on for the requested zone /
4483	if (!DO_LOGGING(zone_ptr)) {
4484	return KERN_FAILURE;
4485	}
4486
4487	/ Allocate memory for btlog records /
4488	numrecs = (unsigned int)(get_btlog_records_count(zone_ptr->zlog_btlog));
4489	recs_size = round_page(numrecs * sizeof *recs);
4490
4491	kr = kmem_alloc_pageable(ipc_kernel_map, &recs_addr, recs_size, VM_KERN_MEMORY_IPC);
4492	if (kr != KERN_SUCCESS) {
4493	return kr;
4494	}
4495
4496	/*
4497	* We will call get_btlog_records() below which populates this region while holding a spinlock
4498	* (the btlog lock). So these pages need to be wired.
4499	*/
4500	kr = vm_map_wire_kernel(ipc_kernel_map, recs_addr, recs_addr + recs_size,
4501	VM_PROT_READ\|VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
4502	assert(kr == KERN_SUCCESS);
4503
4504	recs = (zone_btrecord_t *)recs_addr;
4505	get_btlog_records(zone_ptr->zlog_btlog, recs, &numrecs);
4506
4507	kr = vm_map_unwire(ipc_kernel_map, recs_addr, recs_addr + recs_size, FALSE);
4508	assert(kr == KERN_SUCCESS);
4509
4510	recsp = (zone_btrecord_t ) create_vm_map_copy(recs_addr, recs_size, numrecs * sizeof *recs);
4511	*recsCntp = numrecs;
4512
4513	return KERN_SUCCESS;
4514
4515	#else /* DEBUG \|\| DEVELOPMENT */
4516	#pragma unused(host, name, recsp, recsCntp)
4517	return KERN_FAILURE;
4518	#endif /* DEBUG \|\| DEVELOPMENT */
4519	}
4520
4521
4522	#if DEBUG \|\| DEVELOPMENT
4523
4524	kern_return_t
4525	mach_memory_info_check(void)
4526	{
4527	mach_memory_info_t * memory_info;
4528	mach_memory_info_t * info;
4529	zone_t zone;
4530	unsigned int idx, num_info, max_zones;
4531	vm_offset_t memory_info_addr;
4532	kern_return_t kr;
4533	size_t memory_info_size, memory_info_vmsize;
4534	uint64_t top_wired, zonestotal, total;
4535
4536	num_info = vm_page_diagnose_estimate();
4537	memory_info_size = num_info * sizeof(*memory_info);
4538	memory_info_vmsize = round_page(memory_info_size);
4539	kr = kmem_alloc(kernel_map, &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_DIAG);
4540	assert (kr == KERN_SUCCESS);
4541
4542	memory_info = (mach_memory_info_t *) memory_info_addr;
4543	vm_page_diagnose(memory_info, num_info, `0`);
4544
4545	simple_lock(&all_zones_lock);
4546	max_zones = num_zones;
4547	simple_unlock(&all_zones_lock);
4548
4549	top_wired = total = zonestotal = `0`;
4550	for (idx = `0`; idx < max_zones; idx++)
4551	{
4552	zone = &(zone_array[idx]);
4553	assert(zone != ZONE_NULL);
4554	lock_zone(zone);
4555	zonestotal += ptoa_64(zone->page_count);
4556	unlock_zone(zone);
4557	}
4558	for (idx = `0`; idx < num_info; idx++)
4559	{
4560	info = &memory_info[idx];
4561	if (!info->size) continue;
4562	if (VM_KERN_COUNT_WIRED == info->site) top_wired = info->size;
4563	if (VM_KERN_SITE_HIDE & info->flags) continue;
4564	if (!(VM_KERN_SITE_WIRED & info->flags)) continue;
4565	total += info->size;
4566	}
4567	total += zonestotal;
4568
4569	printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n", total, top_wired, zonestotal, top_wired - total);
4570
4571	kmem_free(kernel_map, memory_info_addr, memory_info_vmsize);
4572
4573	return (kr);
4574	}
4575
4576	extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4577
4578	#endif /* DEBUG \|\| DEVELOPMENT */
4579
4580	kern_return_t
4581	mach_zone_force_gc(
4582	host_t host)
4583	{
4584	if (host == HOST_NULL)
4585	return KERN_INVALID_HOST;
4586
4587	#if DEBUG \|\| DEVELOPMENT
4588	/ Callout to buffer cache GC to drop elements in the apfs zones /
4589	if (consider_buffer_cache_collect != NULL) {
4590	(void)(*consider_buffer_cache_collect)(`0`);
4591	}
4592	consider_zone_gc(FALSE);
4593	#endif /* DEBUG \|\| DEVELOPMENT */
4594	return (KERN_SUCCESS);
4595	}
4596
4597	extern unsigned int stack_total;
4598	extern unsigned long long stack_allocs;
4599
4600	#if defined(__i386__) \|\| defined (__x86_64__)
4601	extern unsigned int inuse_ptepages_count;
4602	extern long long alloc_ptepages_count;
4603	#endif
4604
4605	zone_t
4606	zone_find_largest(void)
4607	{
4608	unsigned int i;
4609	unsigned int max_zones;
4610	zone_t the_zone;
4611	zone_t zone_largest;
4612
4613	simple_lock(&all_zones_lock);
4614	max_zones = num_zones;
4615	simple_unlock(&all_zones_lock);
4616
4617	zone_largest = &(zone_array[`0`]);
4618	for (i = `0`; i < max_zones; i++) {
4619	the_zone = &(zone_array[i]);
4620	if (the_zone->cur_size > zone_largest->cur_size) {
4621	zone_largest = the_zone;
4622	}
4623	}
4624	return zone_largest;
4625	}
4626
4627	#if ZONE_DEBUG
4628
4629	/ should we care about locks here ? /
4630
4631	#define zone_in_use(z) ( z->count \|\| z->free_elements \
4632	\|\| !queue_empty(&z->pages.all_free) \
4633	\|\| !queue_empty(&z->pages.intermediate) \
4634	\|\| (z->allows_foreign && !queue_empty(&z->pages.any_free_foreign)))
4635
4636
4637	#endif /* ZONE_DEBUG */
4638
4639
4640	/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
4641
4642	#if DEBUG \|\| DEVELOPMENT
4643
4644	static uintptr_t *
4645	zone_copy_all_allocations_inqueue(zone_t z, queue_head_t * queue, uintptr_t * elems)
4646	{
4647	struct zone_page_metadata *page_meta;
4648	vm_offset_t free, elements;
4649	vm_offset_t idx, numElements, freeCount, bytesAvail, metaSize;
4650
4651	queue_iterate(queue, page_meta, struct zone_page_metadata *, pages)
4652	{
4653	elements = get_zone_page(page_meta);
4654	bytesAvail = ptoa(page_meta->page_count);
4655	freeCount = `0`;
4656	if (z->allows_foreign && !from_zone_map(elements, z->elem_size))
4657	{
4658	metaSize = (sizeof(struct zone_page_metadata) + ZONE_ELEMENT_ALIGNMENT - `1`) & ~(ZONE_ELEMENT_ALIGNMENT - `1`);
4659	bytesAvail -= metaSize;
4660	elements += metaSize;
4661	}
4662	numElements = bytesAvail / z->elem_size;
4663	// construct array of all possible elements
4664	for (idx = `0`; idx < numElements; idx++)
4665	{
4666	elems[idx] = INSTANCE_PUT(elements + idx * z->elem_size);
4667	}
4668	// remove from the array all free elements
4669	free = (vm_offset_t)page_metadata_get_freelist(page_meta);
4670	while (free)
4671	{
4672	// find idx of free element
4673	for (idx = `0`; (idx < numElements) && (elems[idx] != INSTANCE_PUT(free)); idx++) {}
4674	assert(idx < numElements);
4675	// remove it
4676	bcopy(&elems[idx + `1`], &elems[idx], (numElements - (idx + `1`)) * sizeof(elems[`0`]));
4677	numElements--;
4678	freeCount++;
4679	// next free element
4680	vm_offset_t primary = (vm_offset_t ) free;
4681	free = *primary ^ zp_nopoison_cookie;
4682	}
4683	elems += numElements;
4684	}
4685
4686	return (elems);
4687	}
4688
4689	kern_return_t
4690	zone_leaks(const char * zoneName, uint32_t nameLen, leak_site_proc proc, void * refCon)
4691	{
4692	uintptr_t zbt[MAX_ZTRACE_DEPTH];
4693	zone_t zone;
4694	uintptr_t * array;
4695	uintptr_t * next;
4696	uintptr_t element, bt;
4697	uint32_t idx, count, found;
4698	uint32_t btidx, btcount, nobtcount, btfound;
4699	uint32_t elemSize;
4700	uint64_t maxElems;
4701	unsigned int max_zones;
4702	kern_return_t kr;
4703
4704	simple_lock(&all_zones_lock);
4705	max_zones = num_zones;
4706	simple_unlock(&all_zones_lock);
4707
4708	for (idx = `0`; idx < max_zones; idx++)
4709	{
4710	if (!strncmp(zoneName, zone_array[idx].zone_name, nameLen)) break;
4711	}
4712	if (idx >= max_zones) return (KERN_INVALID_NAME);
4713	zone = &zone_array[idx];
4714
4715	elemSize = (uint32_t) zone->elem_size;
4716	maxElems = ptoa(zone->page_count) / elemSize;
4717
4718	if ((zone->alloc_size % elemSize)
4719	&& !leak_scan_debug_flag) return (KERN_INVALID_CAPABILITY);
4720
4721	kr = kmem_alloc_kobject(kernel_map, (vm_offset_t *) &array,
4722	maxElems * sizeof(uintptr_t), VM_KERN_MEMORY_DIAG);
4723	if (KERN_SUCCESS != kr) return (kr);
4724
4725	lock_zone(zone);
4726
4727	next = array;
4728	next = zone_copy_all_allocations_inqueue(zone, &zone->pages.any_free_foreign, next);
4729	next = zone_copy_all_allocations_inqueue(zone, &zone->pages.intermediate, next);
4730	next = zone_copy_all_allocations_inqueue(zone, &zone->pages.all_used, next);
4731	count = (uint32_t)(next - array);
4732
4733	unlock_zone(zone);
4734
4735	zone_leaks_scan(array, count, (uint32_t)zone->elem_size, &found);
4736	assert(found <= count);
4737
4738	for (idx = `0`; idx < count; idx++)
4739	{
4740	element = array[idx];
4741	if (kInstanceFlagReferenced & element) continue;
4742	element = INSTANCE_PUT(element) & ~kInstanceFlags;
4743	}
4744
4745	if (zone->zlog_btlog && !corruption_debug_flag)
4746	{
4747	// btlog_copy_backtraces_for_elements will set kInstanceFlagReferenced on elements it found
4748	btlog_copy_backtraces_for_elements(zone->zlog_btlog, array, &count, elemSize, proc, refCon);
4749	}
4750
4751	for (nobtcount = idx = `0`; idx < count; idx++)
4752	{
4753	element = array[idx];
4754	if (!element) continue;
4755	if (kInstanceFlagReferenced & element) continue;
4756	element = INSTANCE_PUT(element) & ~kInstanceFlags;
4757
4758	// see if we can find any backtrace left in the element
4759	btcount = (typeof(btcount)) (zone->elem_size / sizeof(uintptr_t));
4760	if (btcount >= MAX_ZTRACE_DEPTH) btcount = MAX_ZTRACE_DEPTH - `1`;
4761	for (btfound = btidx = `0`; btidx < btcount; btidx++)
4762	{
4763	bt = ((uintptr_t *)element)[btcount - `1` - btidx];
4764	if (!VM_KERNEL_IS_SLID(bt)) break;
4765	zbt[btfound++] = bt;
4766	}
4767	if (btfound) (*proc)(refCon, `1`, elemSize, &zbt[`0`], btfound);
4768	else nobtcount++;
4769	}
4770	if (nobtcount)
4771	{
4772	// fake backtrace when we found nothing
4773	zbt[`0`] = (uintptr_t) &zalloc;
4774	(*proc)(refCon, nobtcount, elemSize, &zbt[`0`], `1`);
4775	}
4776
4777	kmem_free(kernel_map, (vm_offset_t) array, maxElems * sizeof(uintptr_t));
4778
4779	return (KERN_SUCCESS);
4780	}
4781
4782	boolean_t
4783	kdp_is_in_zone(void addr, const* char *zone_name)
4784	{
4785	zone_t z;
4786	return (zone_element_size(addr, &z) && !strcmp(z->zone_name, zone_name));
4787	}
4788
4789	boolean_t
4790	run_zone_test(void)
4791	{
4792	unsigned int i = `0`, max_iter = `5`;
4793	void * test_ptr;
4794	zone_t test_zone;
4795
4796	simple_lock(&zone_test_lock);
4797	if (!zone_test_running) {
4798	zone_test_running = TRUE;
4799	} else {
4800	simple_unlock(&zone_test_lock);
4801	printf("run_zone_test: Test already running.\n");
4802	return FALSE;
4803	}
4804	simple_unlock(&zone_test_lock);
4805
4806	printf("run_zone_test: Testing zinit(), zalloc(), zfree() and zdestroy() on zone \"test_zone_sysctl\"\n");
4807
4808	/ zinit() and zdestroy() a zone with the same name a bunch of times, verify that we get back the same zone each time /
4809	do {
4810	test_zone = zinit(sizeof(uint64_t), `100` * sizeof(uint64_t), sizeof(uint64_t), "test_zone_sysctl");
4811	if (test_zone == NULL) {
4812	printf("run_zone_test: zinit() failed\n");
4813	return FALSE;
4814	}
4815
4816	#if KASAN_ZALLOC
4817	if (test_zone_ptr == NULL && zone_free_count(test_zone) != `0`) {
4818	#else
4819	if (zone_free_count(test_zone) != `0`) {
4820	#endif
4821	printf("run_zone_test: free count is not zero\n");
4822	return FALSE;
4823	}
4824
4825	if (test_zone_ptr == NULL) {
4826	/ Stash the zone pointer returned on the fist zinit /
4827	printf("run_zone_test: zone created for the first time\n");
4828	test_zone_ptr = test_zone;
4829	} else if (test_zone != test_zone_ptr) {
4830	printf("run_zone_test: old zone pointer and new zone pointer don't match\n");
4831	return FALSE;
4832	}
4833
4834	test_ptr = zalloc(test_zone);
4835	if (test_ptr == NULL) {
4836	printf("run_zone_test: zalloc() failed\n");
4837	return FALSE;
4838	}
4839	zfree(test_zone, test_ptr);
4840
4841	zdestroy(test_zone);
4842	i++;
4843
4844	printf("run_zone_test: Iteration %d successful\n", i);
4845	} while (i < max_iter);
4846
4847	printf("run_zone_test: Test passed\n");
4848
4849	simple_lock(&zone_test_lock);
4850	zone_test_running = FALSE;
4851	simple_unlock(&zone_test_lock);
4852
4853	return TRUE;
4854	}
4855
4856	#endif /* DEBUG \|\| DEVELOPMENT */
4857

Browse the source code of xnu/osfmk/kern/zalloc.c