mcache.c source code [xnu/bsd/kern/mcache.c]

1	/*
2	* Copyright (c) 2006-2014 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	/*
30	* Memory allocator with per-CPU caching, derived from the kmem magazine
31	* concept and implementation as described in the following paper:
32	* http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
33	* That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
34	* reserved. Use is subject to license terms.
35	*
36	* There are several major differences between this and the original kmem
37	* magazine: this derivative implementation allows for multiple objects to
38	* be allocated and freed from/to the object cache in one call; in addition,
39	* it provides for better flexibility where the user is allowed to define
40	* its own slab allocator (instead of the default zone allocator). Finally,
41	* no object construction/destruction takes place at the moment, although
42	* this could be added in future to improve efficiency.
43	*/
44
45	#include <sys/param.h>
46	#include <sys/types.h>
47	#include <sys/malloc.h>
48	#include <sys/mbuf.h>
49	#include <sys/queue.h>
50	#include <sys/kernel.h>
51	#include <sys/systm.h>
52
53	#include <kern/debug.h>
54	#include <kern/zalloc.h>
55	#include <kern/cpu_number.h>
56	#include <kern/locks.h>
57	#include <kern/thread_call.h>
58
59	#include <libkern/libkern.h>
60	#include <libkern/OSAtomic.h>
61	#include <libkern/OSDebug.h>
62
63	#include <mach/vm_param.h>
64	#include <machine/limits.h>
65	#include <machine/machine_routines.h>
66
67	#include <string.h>
68
69	#include <sys/mcache.h>
70
71	#define MCACHE_SIZE(n) \
72	__builtin_offsetof(mcache_t, mc_cpu[n])
73
74	/ Allocate extra in case we need to manually align the pointer /
75	#define MCACHE_ALLOC_SIZE \
76	(sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE)
77
78	#define MCACHE_CPU(c) \
79	(mcache_cpu_t )((void )((char *)(c) + MCACHE_SIZE(cpu_number())))
80
81	/*
82	* MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
83	* to serialize accesses to the global list of caches in the system.
84	* They also record the thread currently running in the critical
85	* section, so that we can avoid recursive requests to reap the
86	* caches when memory runs low.
87	*/
88	#define MCACHE_LIST_LOCK() { \
89	lck_mtx_lock(mcache_llock); \
90	mcache_llock_owner = current_thread(); \
91	}
92
93	#define MCACHE_LIST_UNLOCK() { \
94	mcache_llock_owner = NULL; \
95	lck_mtx_unlock(mcache_llock); \
96	}
97
98	#define MCACHE_LOCK(l) lck_mtx_lock(l)
99	#define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
100	#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
101
102	static int ncpu;
103	static unsigned int cache_line_size;
104	static lck_mtx_t *mcache_llock;
105	static struct thread *mcache_llock_owner;
106	static lck_attr_t *mcache_llock_attr;
107	static lck_grp_t *mcache_llock_grp;
108	static lck_grp_attr_t *mcache_llock_grp_attr;
109	static struct zone *mcache_zone;
110	static const uint32_t mcache_reap_interval = `15`;
111	static const uint32_t mcache_reap_interval_leeway = `2`;
112	static UInt32 mcache_reaping;
113	static int mcache_ready;
114	static int mcache_updating;
115
116	static int mcache_bkt_contention = `3`;
117	#if DEBUG
118	static unsigned int mcache_flags = MCF_DEBUG;
119	#else
120	static unsigned int mcache_flags = `0`;
121	#endif
122
123	int mca_trn_max = MCA_TRN_MAX;
124
125	#define DUMP_MCA_BUF_SIZE 512
126	static char *mca_dump_buf;
127
128	static mcache_bkttype_t mcache_bkttype[] = {
129	{ `1`, `4096`, `32768`, NULL },
130	{ `3`, `2048`, `16384`, NULL },
131	{ `7`, `1024`, `12288`, NULL },
132	{ `15`, `256`, `8192`, NULL },
133	{ `31`, `64`, `4096`, NULL },
134	{ `47`, `0`, `2048`, NULL },
135	{ `63`, `0`, `1024`, NULL },
136	{ `95`, `0`, `512`, NULL },
137	{ `143`, `0`, `256`, NULL },
138	{ `165`, `0`, `0`, NULL },
139	};
140
141	static mcache_t mcache_create_common(const* char *, size_t, size_t,
142	mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
143	mcache_notifyfn_t, void , u_int32_t, int, int*);
144	static unsigned int mcache_slab_alloc(void , mcache_obj_t **,
145	unsigned int, int);
146	static void mcache_slab_free(void , mcache_obj_t , boolean_t);
147	static void mcache_slab_audit(void , mcache_obj_t , boolean_t);
148	static void mcache_cpu_refill(mcache_cpu_t , mcache_bkt_t , int);
149	static mcache_bkt_t mcache_bkt_alloc(mcache_t , mcache_bktlist_t *,
150	mcache_bkttype_t **);
151	static void mcache_bkt_free(mcache_t , mcache_bktlist_t , mcache_bkt_t *);
152	static void mcache_cache_bkt_enable(mcache_t *);
153	static void mcache_bkt_purge(mcache_t *);
154	static void mcache_bkt_destroy(mcache_t , mcache_bkttype_t ,
155	mcache_bkt_t , int*);
156	static void mcache_bkt_ws_update(mcache_t *);
157	static void mcache_bkt_ws_zero(mcache_t *);
158	static void mcache_bkt_ws_reap(mcache_t *);
159	static void mcache_dispatch(void ()(void* ), void* *);
160	static void mcache_cache_reap(mcache_t *);
161	static void mcache_cache_update(mcache_t *);
162	static void mcache_cache_bkt_resize(void *);
163	static void mcache_cache_enable(void *);
164	static void mcache_update(thread_call_param_t __unused, thread_call_param_t __unused);
165	static void mcache_update_timeout(void *);
166	static void mcache_applyall(void ()(mcache_t ));
167	static void mcache_reap_start(void *);
168	static void mcache_reap_done(void *);
169	static void mcache_reap_timeout(thread_call_param_t __unused, thread_call_param_t);
170	static void mcache_notify(mcache_t *, u_int32_t);
171	static void mcache_purge(void *);
172
173	static LIST_HEAD(, mcache) mcache_head;
174	mcache_t *mcache_audit_cache;
175
176	static thread_call_t mcache_reap_tcall;
177	static thread_call_t mcache_update_tcall;
178
179	/*
180	* Initialize the framework; this is currently called as part of BSD init.
181	*/
182	__private_extern__ void
183	mcache_init(void)
184	{
185	mcache_bkttype_t *btp;
186	unsigned int i;
187	char name[`32`];
188
189	VERIFY(mca_trn_max >= `2`);
190
191	ncpu = ml_get_max_cpus();
192	(void) mcache_cache_line_size(); / prime it /
193
194	mcache_llock_grp_attr = lck_grp_attr_alloc_init();
195	mcache_llock_grp = lck_grp_alloc_init("mcache.list",
196	mcache_llock_grp_attr);
197	mcache_llock_attr = lck_attr_alloc_init();
198	mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr);
199
200	mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL);
201	mcache_update_tcall = thread_call_allocate(mcache_update, NULL);
202	if (mcache_reap_tcall == NULL \|\| mcache_update_tcall == NULL)
203	panic("mcache_init: thread_call_allocate failed");
204
205	mcache_zone = zinit(MCACHE_ALLOC_SIZE, `256` * MCACHE_ALLOC_SIZE,
206	PAGE_SIZE, "mcache");
207	if (mcache_zone == NULL)
208	panic("mcache_init: failed to allocate mcache zone\n");
209	zone_change(mcache_zone, Z_CALLERACCT, FALSE);
210
211	LIST_INIT(&mcache_head);
212
213	for (i = `0`; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) {
214	btp = &mcache_bkttype[i];
215	(void) snprintf(name, sizeof (name), "bkt_%d",
216	btp->bt_bktsize);
217	btp->bt_cache = mcache_create(name,
218	(btp->bt_bktsize + `1`) * sizeof (void *), `0`, `0`, MCR_SLEEP);
219	}
220
221	PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof(mcache_flags));
222	mcache_flags &= MCF_FLAGS_MASK;
223
224	mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t),
225	`0`, `0`, MCR_SLEEP);
226
227	mcache_applyall(mcache_cache_bkt_enable);
228	mcache_ready = `1`;
229
230	printf("mcache: %d CPU(s), %d bytes CPU cache line size\n",
231	ncpu, CPU_CACHE_LINE_SIZE);
232	}
233
234	/*
235	* Return the global mcache flags.
236	*/
237	__private_extern__ unsigned int
238	mcache_getflags(void)
239	{
240	return (mcache_flags);
241	}
242
243	/*
244	* Return the CPU cache line size.
245	*/
246	__private_extern__ unsigned int
247	mcache_cache_line_size(void)
248	{
249	if (cache_line_size == `0`) {
250	ml_cpu_info_t cpu_info;
251	ml_cpu_get_info(&cpu_info);
252	cache_line_size = cpu_info.cache_line_size;
253	}
254	return (cache_line_size);
255	}
256
257	/*
258	* Create a cache using the zone allocator as the backend slab allocator.
259	* The caller may specify any alignment for the object; if it specifies 0
260	* the default alignment (MCACHE_ALIGN) will be used.
261	*/
262	__private_extern__ mcache_t *
263	mcache_create(const char *name, size_t bufsize, size_t align,
264	u_int32_t flags, int wait)
265	{
266	return (mcache_create_common(name, bufsize, align, mcache_slab_alloc,
267	mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, `1`,
268	wait));
269	}
270
271	/*
272	* Create a cache using a custom backend slab allocator. Since the caller
273	* is responsible for allocation, no alignment guarantee will be provided
274	* by this framework.
275	*/
276	__private_extern__ mcache_t *
277	mcache_create_ext(const char *name, size_t bufsize,
278	mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
279	mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
280	u_int32_t flags, int wait)
281	{
282	return (mcache_create_common(name, bufsize, `0`, allocfn,
283	freefn, auditfn, logfn, notifyfn, arg, flags, `0`, wait));
284	}
285
286	/*
287	* Common cache creation routine.
288	*/
289	static mcache_t *
290	mcache_create_common(const char *name, size_t bufsize, size_t align,
291	mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
292	mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
293	u_int32_t flags, int need_zone, int wait)
294	{
295	mcache_bkttype_t *btp;
296	mcache_t *cp = NULL;
297	size_t chunksize;
298	void buf, *pbuf;
299	int c;
300	char lck_name[`64`];
301
302	/ If auditing is on and print buffer is NULL, allocate it now /
303	if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) {
304	int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK;
305	MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP,
306	malloc_wait \| M_ZERO);
307	if (mca_dump_buf == NULL)
308	return (NULL);
309	}
310
311	buf = zalloc(mcache_zone);
312	if (buf == NULL)
313	goto fail;
314
315	bzero(buf, MCACHE_ALLOC_SIZE);
316
317	/*
318	* In case we didn't get a cache-aligned memory, round it up
319	* accordingly. This is needed in order to get the rest of
320	* structure members aligned properly. It also means that
321	* the memory span gets shifted due to the round up, but it
322	* is okay since we've allocated extra space for this.
323	*/
324	cp = (mcache_t *)
325	P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_LINE_SIZE);
326	pbuf = (void )((intptr_t)cp - sizeof** (void *));
327	*pbuf = buf;
328
329	/*
330	* Guaranteed alignment is valid only when we use the internal
331	* slab allocator (currently set to use the zone allocator).
332	*/
333	if (!need_zone) {
334	align = `1`;
335	} else {
336	/ Enforce 64-bit minimum alignment for zone-based buffers /
337	if (align == `0`)
338	align = MCACHE_ALIGN;
339	align = P2ROUNDUP(align, MCACHE_ALIGN);
340	}
341
342	if ((align & (align - `1`)) != `0`)
343	panic("mcache_create: bad alignment %lu", align);
344
345	cp->mc_align = align;
346	cp->mc_slab_alloc = allocfn;
347	cp->mc_slab_free = freefn;
348	cp->mc_slab_audit = auditfn;
349	cp->mc_slab_log = logfn;
350	cp->mc_slab_notify = notifyfn;
351	cp->mc_private = need_zone ? cp : arg;
352	cp->mc_bufsize = bufsize;
353	cp->mc_flags = (flags & MCF_FLAGS_MASK) \| mcache_flags;
354
355	(void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name);
356
357	(void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name);
358	cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init();
359	cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name,
360	cp->mc_cpu_lock_grp_attr);
361	cp->mc_cpu_lock_attr = lck_attr_alloc_init();
362
363	/*
364	* Allocation chunk size is the object's size plus any extra size
365	* needed to satisfy the object's alignment. It is enforced to be
366	* at least the size of an LP64 pointer to simplify auditing and to
367	* handle multiple-element allocation requests, where the elements
368	* returned are linked together in a list.
369	*/
370	chunksize = MAX(bufsize, sizeof (u_int64_t));
371	if (need_zone) {
372	VERIFY(align != `0` && (align % MCACHE_ALIGN) == `0`);
373	chunksize += sizeof (uint64_t) + align;
374	chunksize = P2ROUNDUP(chunksize, align);
375	if ((cp->mc_slab_zone = zinit(chunksize, `64` * `1024` * ncpu,
376	PAGE_SIZE, cp->mc_name)) == NULL)
377	goto fail;
378	zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE);
379	}
380	cp->mc_chunksize = chunksize;
381
382	/*
383	* Initialize the bucket layer.
384	*/
385	(void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name);
386	cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init();
387	cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
388	cp->mc_bkt_lock_grp_attr);
389	cp->mc_bkt_lock_attr = lck_attr_alloc_init();
390	lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp,
391	cp->mc_bkt_lock_attr);
392
393	(void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name);
394	cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init();
395	cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
396	cp->mc_sync_lock_grp_attr);
397	cp->mc_sync_lock_attr = lck_attr_alloc_init();
398	lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp,
399	cp->mc_sync_lock_attr);
400
401	for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++)
402	continue;
403
404	cp->cache_bkttype = btp;
405
406	/*
407	* Initialize the CPU layer. Each per-CPU structure is aligned
408	* on the CPU cache line boundary to prevent false sharing.
409	*/
410	for (c = `0`; c < ncpu; c++) {
411	mcache_cpu_t *ccp = &cp->mc_cpu[c];
412
413	VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE));
414	lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp,
415	cp->mc_cpu_lock_attr);
416	ccp->cc_objs = -`1`;
417	ccp->cc_pobjs = -`1`;
418	}
419
420	if (mcache_ready)
421	mcache_cache_bkt_enable(cp);
422
423	/ TODO: dynamically create sysctl for stats /
424
425	MCACHE_LIST_LOCK();
426	LIST_INSERT_HEAD(&mcache_head, cp, mc_list);
427	MCACHE_LIST_UNLOCK();
428
429	/*
430	* If cache buckets are enabled and this is the first cache
431	* created, start the periodic cache update.
432	*/
433	if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) {
434	mcache_updating = `1`;
435	mcache_update_timeout(NULL);
436	}
437	if (cp->mc_flags & MCF_DEBUG) {
438	printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
439	"chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e",
440	arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize);
441	}
442	return (cp);
443
444	fail:
445	if (buf != NULL)
446	zfree(mcache_zone, buf);
447	return (NULL);
448	}
449
450	/*
451	* Allocate one or more objects from a cache.
452	*/
453	__private_extern__ unsigned int
454	mcache_alloc_ext(mcache_t cp, mcache_obj_t list, unsigned* int num, int wait)
455	{
456	mcache_cpu_t *ccp;
457	mcache_obj_t *top = &(list);
458	mcache_bkt_t *bkt;
459	unsigned int need = num;
460	boolean_t nwretry = FALSE;
461
462	/ MCR_NOSLEEP and MCR_FAILOK are mutually exclusive /
463	VERIFY((wait & (MCR_NOSLEEP\|MCR_FAILOK)) != (MCR_NOSLEEP\|MCR_FAILOK));
464
465	ASSERT(list != NULL);
466	*list = NULL;
467
468	if (num == `0`)
469	return (`0`);
470
471	retry_alloc:
472	/ We may not always be running in the same CPU in case of retries /
473	ccp = MCACHE_CPU(cp);
474
475	MCACHE_LOCK(&ccp->cc_lock);
476	for (;;) {
477	/*
478	* If we have an object in the current CPU's filled bucket,
479	* chain the object to any previous objects and return if
480	* we've satisfied the number of requested objects.
481	*/
482	if (ccp->cc_objs > `0`) {
483	mcache_obj_t *tail;
484	int objs;
485
486	/*
487	* Objects in the bucket are already linked together
488	* with the most recently freed object at the head of
489	* the list; grab as many objects as we can.
490	*/
491	objs = MIN((unsigned int)ccp->cc_objs, need);
492	*list = ccp->cc_filled->bkt_obj[ccp->cc_objs - `1`];
493	ccp->cc_objs -= objs;
494	ccp->cc_alloc += objs;
495
496	tail = ccp->cc_filled->bkt_obj[ccp->cc_objs];
497	list = &tail->obj_next;
498	*list = NULL;
499
500	/ If we got them all, return to caller /
501	if ((need -= objs) == `0`) {
502	MCACHE_UNLOCK(&ccp->cc_lock);
503
504	if (!(cp->mc_flags & MCF_NOLEAKLOG) &&
505	cp->mc_slab_log != NULL)
506	(cp->mc_slab_log)(num, top, TRUE);
507
508	if (cp->mc_flags & MCF_DEBUG)
509	goto debug_alloc;
510
511	return (num);
512	}
513	}
514
515	/*
516	* The CPU's filled bucket is empty. If the previous filled
517	* bucket was full, exchange and try again.
518	*/
519	if (ccp->cc_pobjs > `0`) {
520	mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
521	continue;
522	}
523
524	/*
525	* If the bucket layer is disabled, allocate from slab. This
526	* can happen either because MCF_NOCPUCACHE is set, or because
527	* the bucket layer is currently being resized.
528	*/
529	if (ccp->cc_bktsize == `0`)
530	break;
531
532	/*
533	* Both of the CPU's buckets are empty; try to get a full
534	* bucket from the bucket layer. Upon success, refill this
535	* CPU and place any empty bucket into the empty list.
536	*/
537	bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL);
538	if (bkt != NULL) {
539	if (ccp->cc_pfilled != NULL)
540	mcache_bkt_free(cp, &cp->mc_empty,
541	ccp->cc_pfilled);
542	mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize);
543	continue;
544	}
545
546	/*
547	* The bucket layer has no full buckets; allocate the
548	* object(s) directly from the slab layer.
549	*/
550	break;
551	}
552	MCACHE_UNLOCK(&ccp->cc_lock);
553
554	need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait);
555
556	/*
557	* If this is a blocking allocation, or if it is non-blocking and
558	* the cache's full bucket is non-empty, then retry the allocation.
559	*/
560	if (need > `0`) {
561	if (!(wait & MCR_NONBLOCKING)) {
562	atomic_add_32(&cp->mc_wretry_cnt, `1`);
563	goto retry_alloc;
564	} else if ((wait & (MCR_NOSLEEP \| MCR_TRYHARD)) &&
565	!mcache_bkt_isempty(cp)) {
566	if (!nwretry)
567	nwretry = TRUE;
568	atomic_add_32(&cp->mc_nwretry_cnt, `1`);
569	goto retry_alloc;
570	} else if (nwretry) {
571	atomic_add_32(&cp->mc_nwfail_cnt, `1`);
572	}
573	}
574
575	if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
576	(cp->mc_slab_log)((num - need), top, TRUE);
577
578	if (!(cp->mc_flags & MCF_DEBUG))
579	return (num - need);
580
581	debug_alloc:
582	if (cp->mc_flags & MCF_DEBUG) {
583	mcache_obj_t **o = top;
584	unsigned int n;
585
586	n = `0`;
587	/*
588	* Verify that the chain of objects have the same count as
589	* what we are about to report to the caller. Any mismatch
590	* here means that the object list is insanely broken and
591	* therefore we must panic.
592	*/
593	while (*o != NULL) {
594	o = &(*o)->obj_next;
595	++n;
596	}
597	if (n != (num - need)) {
598	panic("mcache_alloc_ext: %s cp %p corrupted list "
599	"(got %d actual %d)\n", cp->mc_name,
600	(void *)cp, num - need, n);
601	}
602	}
603
604	/ Invoke the slab layer audit callback if auditing is enabled /
605	if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
606	(cp->mc_slab_audit)(cp->mc_private, top, TRUE);
607
608	return (num - need);
609	}
610
611	/*
612	* Allocate a single object from a cache.
613	*/
614	__private_extern__ void *
615	mcache_alloc(mcache_t cp, int* wait)
616	{
617	mcache_obj_t *buf;
618
619	(void) mcache_alloc_ext(cp, &buf, `1`, wait);
620	return (buf);
621	}
622
623	__private_extern__ void
624	mcache_waiter_inc(mcache_t *cp)
625	{
626	atomic_add_32(&cp->mc_waiter_cnt, `1`);
627	}
628
629	__private_extern__ void
630	mcache_waiter_dec(mcache_t *cp)
631	{
632	atomic_add_32(&cp->mc_waiter_cnt, -`1`);
633	}
634
635	__private_extern__ boolean_t
636	mcache_bkt_isempty(mcache_t *cp)
637	{
638	/*
639	* This isn't meant to accurately tell whether there are
640	* any full buckets in the cache; it is simply a way to
641	* obtain "hints" about the state of the cache.
642	*/
643	return (cp->mc_full.bl_total == `0`);
644	}
645
646	/*
647	* Notify the slab layer about an event.
648	*/
649	static void
650	mcache_notify(mcache_t *cp, u_int32_t event)
651	{
652	if (cp->mc_slab_notify != NULL)
653	(*cp->mc_slab_notify)(cp->mc_private, event);
654	}
655
656	/*
657	* Purge the cache and disable its buckets.
658	*/
659	static void
660	mcache_purge(void *arg)
661	{
662	mcache_t *cp = arg;
663
664	mcache_bkt_purge(cp);
665	/*
666	* We cannot simply call mcache_cache_bkt_enable() from here as
667	* a bucket resize may be in flight and we would cause the CPU
668	* layers of the cache to point to different sizes. Therefore,
669	* we simply increment the enable count so that during the next
670	* periodic cache update the buckets can be reenabled.
671	*/
672	lck_mtx_lock_spin(&cp->mc_sync_lock);
673	cp->mc_enable_cnt++;
674	lck_mtx_unlock(&cp->mc_sync_lock);
675	}
676
677	__private_extern__ boolean_t
678	mcache_purge_cache(mcache_t *cp, boolean_t async)
679	{
680	/*
681	* Purging a cache that has no per-CPU caches or is already
682	* in the process of being purged is rather pointless.
683	*/
684	if (cp->mc_flags & MCF_NOCPUCACHE)
685	return (FALSE);
686
687	lck_mtx_lock_spin(&cp->mc_sync_lock);
688	if (cp->mc_purge_cnt > `0`) {
689	lck_mtx_unlock(&cp->mc_sync_lock);
690	return (FALSE);
691	}
692	cp->mc_purge_cnt++;
693	lck_mtx_unlock(&cp->mc_sync_lock);
694
695	if (async)
696	mcache_dispatch(mcache_purge, cp);
697	else
698	mcache_purge(cp);
699
700	return (TRUE);
701	}
702
703	/*
704	* Free a single object to a cache.
705	*/
706	__private_extern__ void
707	mcache_free(mcache_t cp, void* *buf)
708	{
709	((mcache_obj_t *)buf)->obj_next = NULL;
710	mcache_free_ext(cp, (mcache_obj_t *)buf);
711	}
712
713	/*
714	* Free one or more objects to a cache.
715	*/
716	__private_extern__ void
717	mcache_free_ext(mcache_t cp, mcache_obj_t list)
718	{
719	mcache_cpu_t *ccp = MCACHE_CPU(cp);
720	mcache_bkttype_t *btp;
721	mcache_obj_t *nlist;
722	mcache_bkt_t *bkt;
723
724	if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
725	(*cp->mc_slab_log)(`0`, list, FALSE);
726
727	/ Invoke the slab layer audit callback if auditing is enabled /
728	if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
729	(*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
730
731	MCACHE_LOCK(&ccp->cc_lock);
732	for (;;) {
733	/*
734	* If there is space in the current CPU's filled bucket, put
735	* the object there and return once all objects are freed.
736	* Note the cast to unsigned integer takes care of the case
737	* where the bucket layer is disabled (when cc_objs is -1).
738	*/
739	if ((unsigned int)ccp->cc_objs <
740	(unsigned int)ccp->cc_bktsize) {
741	/*
742	* Reverse the list while we place the object into the
743	* bucket; this effectively causes the most recently
744	* freed object(s) to be reused during allocation.
745	*/
746	nlist = list->obj_next;
747	list->obj_next = (ccp->cc_objs == `0`) ? NULL :
748	ccp->cc_filled->bkt_obj[ccp->cc_objs - `1`];
749	ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list;
750	ccp->cc_free++;
751
752	if ((list = nlist) != NULL)
753	continue;
754
755	/ We are done; return to caller /
756	MCACHE_UNLOCK(&ccp->cc_lock);
757
758	/ If there is a waiter below, notify it /
759	if (cp->mc_waiter_cnt > `0`)
760	mcache_notify(cp, MCN_RETRYALLOC);
761	return;
762	}
763
764	/*
765	* The CPU's filled bucket is full. If the previous filled
766	* bucket was empty, exchange and try again.
767	*/
768	if (ccp->cc_pobjs == `0`) {
769	mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
770	continue;
771	}
772
773	/*
774	* If the bucket layer is disabled, free to slab. This can
775	* happen either because MCF_NOCPUCACHE is set, or because
776	* the bucket layer is currently being resized.
777	*/
778	if (ccp->cc_bktsize == `0`)
779	break;
780
781	/*
782	* Both of the CPU's buckets are full; try to get an empty
783	* bucket from the bucket layer. Upon success, empty this
784	* CPU and place any full bucket into the full list.
785	*/
786	bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp);
787	if (bkt != NULL) {
788	if (ccp->cc_pfilled != NULL)
789	mcache_bkt_free(cp, &cp->mc_full,
790	ccp->cc_pfilled);
791	mcache_cpu_refill(ccp, bkt, `0`);
792	continue;
793	}
794
795	/*
796	* We need an empty bucket to put our freed objects into
797	* but couldn't get an empty bucket from the bucket layer;
798	* attempt to allocate one. We do not want to block for
799	* allocation here, and if the bucket allocation fails
800	* we will simply fall through to the slab layer.
801	*/
802	MCACHE_UNLOCK(&ccp->cc_lock);
803	bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP);
804	MCACHE_LOCK(&ccp->cc_lock);
805
806	if (bkt != NULL) {
807	/*
808	* We have an empty bucket, but since we drop the
809	* CPU lock above, the cache's bucket size may have
810	* changed. If so, free the bucket and try again.
811	*/
812	if (ccp->cc_bktsize != btp->bt_bktsize) {
813	MCACHE_UNLOCK(&ccp->cc_lock);
814	mcache_free(btp->bt_cache, bkt);
815	MCACHE_LOCK(&ccp->cc_lock);
816	continue;
817	}
818
819	/*
820	* We have an empty bucket of the right size;
821	* add it to the bucket layer and try again.
822	*/
823	mcache_bkt_free(cp, &cp->mc_empty, bkt);
824	continue;
825	}
826
827	/*
828	* The bucket layer has no empty buckets; free the
829	* object(s) directly to the slab layer.
830	*/
831	break;
832	}
833	MCACHE_UNLOCK(&ccp->cc_lock);
834
835	/ If there is a waiter below, notify it /
836	if (cp->mc_waiter_cnt > `0`)
837	mcache_notify(cp, MCN_RETRYALLOC);
838
839	/ Advise the slab layer to purge the object(s) /
840	(*cp->mc_slab_free)(cp->mc_private, list,
841	(cp->mc_flags & MCF_DEBUG) \|\| cp->mc_purge_cnt);
842	}
843
844	/*
845	* Cache destruction routine.
846	*/
847	__private_extern__ void
848	mcache_destroy(mcache_t *cp)
849	{
850	void **pbuf;
851
852	MCACHE_LIST_LOCK();
853	LIST_REMOVE(cp, mc_list);
854	MCACHE_LIST_UNLOCK();
855
856	mcache_bkt_purge(cp);
857
858	/*
859	* This cache is dead; there should be no further transaction.
860	* If it's still invoked, make sure that it induces a fault.
861	*/
862	cp->mc_slab_alloc = NULL;
863	cp->mc_slab_free = NULL;
864	cp->mc_slab_audit = NULL;
865
866	lck_attr_free(cp->mc_bkt_lock_attr);
867	lck_grp_free(cp->mc_bkt_lock_grp);
868	lck_grp_attr_free(cp->mc_bkt_lock_grp_attr);
869
870	lck_attr_free(cp->mc_cpu_lock_attr);
871	lck_grp_free(cp->mc_cpu_lock_grp);
872	lck_grp_attr_free(cp->mc_cpu_lock_grp_attr);
873
874	lck_attr_free(cp->mc_sync_lock_attr);
875	lck_grp_free(cp->mc_sync_lock_grp);
876	lck_grp_attr_free(cp->mc_sync_lock_grp_attr);
877
878	/*
879	* TODO: We need to destroy the zone here, but cannot do it
880	* because there is no such way to achieve that. Until then
881	* the memory allocated for the zone structure is leaked.
882	* Once it is achievable, uncomment these lines:
883	*
884	* if (cp->mc_slab_zone != NULL) {
885	* zdestroy(cp->mc_slab_zone);
886	* cp->mc_slab_zone = NULL;
887	* }
888	*/
889
890	/ Get the original address since we're about to free it /
891	pbuf = (void )((intptr_t)cp - sizeof** (void *));
892
893	zfree(mcache_zone, *pbuf);
894	}
895
896	/*
897	* Internal slab allocator used as a backend for simple caches. The current
898	* implementation uses the zone allocator for simplicity reasons.
899	*/
900	static unsigned int
901	mcache_slab_alloc(void arg, mcache_obj_t *plist, unsigned* int num,
902	int wait)
903	{
904	#pragma unused(wait)
905	mcache_t *cp = arg;
906	unsigned int need = num;
907	size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
908	u_int32_t flags = cp->mc_flags;
909	void buf, base, **pbuf;
910	mcache_obj_t *list = plist;
911
912	*list = NULL;
913
914	for (;;) {
915	buf = zalloc(cp->mc_slab_zone);
916	if (buf == NULL)
917	break;
918
919	/ Get the aligned base address for this object /
920	base = (void )P2ROUNDUP((intptr_t)buf + sizeof* (u_int64_t),
921	cp->mc_align);
922
923	/*
924	* Wind back a pointer size from the aligned base and
925	* save the original address so we can free it later.
926	*/
927	pbuf = (void )((intptr_t)base - sizeof** (void *));
928	*pbuf = buf;
929
930	VERIFY (((intptr_t)base + cp->mc_bufsize) <=
931	((intptr_t)buf + cp->mc_chunksize));
932
933	/*
934	* If auditing is enabled, patternize the contents of
935	* the buffer starting from the 64-bit aligned base to
936	* the end of the buffer; the length is rounded up to
937	* the nearest 64-bit multiply; this is because we use
938	* 64-bit memory access to set/check the pattern.
939	*/
940	if (flags & MCF_DEBUG) {
941	VERIFY(((intptr_t)base + rsize) <=
942	((intptr_t)buf + cp->mc_chunksize));
943	mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
944	}
945
946	VERIFY(IS_P2ALIGNED(base, cp->mc_align));
947	list = (mcache_obj_t )base;
948
949	(*list)->obj_next = NULL;
950	list = plist = &(list)->obj_next;
951
952	/ If we got them all, return to mcache /
953	if (--need == `0`)
954	break;
955	}
956
957	return (num - need);
958	}
959
960	/*
961	* Internal slab deallocator used as a backend for simple caches.
962	*/
963	static void
964	mcache_slab_free(void arg, mcache_obj_t list, __unused boolean_t purged)
965	{
966	mcache_t *cp = arg;
967	mcache_obj_t *nlist;
968	size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
969	u_int32_t flags = cp->mc_flags;
970	void *base;
971	void **pbuf;
972
973	for (;;) {
974	nlist = list->obj_next;
975	list->obj_next = NULL;
976
977	base = list;
978	VERIFY(IS_P2ALIGNED(base, cp->mc_align));
979
980	/ Get the original address since we're about to free it /
981	pbuf = (void )((intptr_t)base - sizeof** (void *));
982
983	VERIFY(((intptr_t)base + cp->mc_bufsize) <=
984	((intptr_t)*pbuf + cp->mc_chunksize));
985
986	if (flags & MCF_DEBUG) {
987	VERIFY(((intptr_t)base + rsize) <=
988	((intptr_t)*pbuf + cp->mc_chunksize));
989	mcache_audit_free_verify(NULL, base, `0`, rsize);
990	}
991
992	/ Free it to zone /
993	zfree(cp->mc_slab_zone, *pbuf);
994
995	/ No more objects to free; return to mcache /
996	if ((list = nlist) == NULL)
997	break;
998	}
999	}
1000
1001	/*
1002	* Internal slab auditor for simple caches.
1003	*/
1004	static void
1005	mcache_slab_audit(void arg, mcache_obj_t list, boolean_t alloc)
1006	{
1007	mcache_t *cp = arg;
1008	size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t));
1009	void base, *pbuf;
1010
1011	while (list != NULL) {
1012	mcache_obj_t *next = list->obj_next;
1013
1014	base = list;
1015	VERIFY(IS_P2ALIGNED(base, cp->mc_align));
1016
1017	/ Get the original address /
1018	pbuf = (void )((intptr_t)base - sizeof** (void *));
1019
1020	VERIFY(((intptr_t)base + rsize) <=
1021	((intptr_t)*pbuf + cp->mc_chunksize));
1022
1023	if (!alloc)
1024	mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
1025	else
1026	mcache_audit_free_verify_set(NULL, base, `0`, rsize);
1027
1028	list = list->obj_next = next;
1029	}
1030	}
1031
1032	/*
1033	* Refill the CPU's filled bucket with bkt and save the previous one.
1034	*/
1035	static void
1036	mcache_cpu_refill(mcache_cpu_t ccp, mcache_bkt_t bkt, int objs)
1037	{
1038	ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -`1`) \|\|
1039	(ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
1040	ASSERT(ccp->cc_bktsize > `0`);
1041
1042	ccp->cc_pfilled = ccp->cc_filled;
1043	ccp->cc_pobjs = ccp->cc_objs;
1044	ccp->cc_filled = bkt;
1045	ccp->cc_objs = objs;
1046	}
1047
1048	/*
1049	* Allocate a bucket from the bucket layer.
1050	*/
1051	static mcache_bkt_t *
1052	mcache_bkt_alloc(mcache_t cp, mcache_bktlist_t blp, mcache_bkttype_t **btp)
1053	{
1054	mcache_bkt_t *bkt;
1055
1056	if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) {
1057	/*
1058	* The bucket layer lock is held by another CPU; increase
1059	* the contention count so that we can later resize the
1060	* bucket size accordingly.
1061	*/
1062	MCACHE_LOCK(&cp->mc_bkt_lock);
1063	cp->mc_bkt_contention++;
1064	}
1065
1066	if ((bkt = blp->bl_list) != NULL) {
1067	blp->bl_list = bkt->bkt_next;
1068	if (--blp->bl_total < blp->bl_min)
1069	blp->bl_min = blp->bl_total;
1070	blp->bl_alloc++;
1071	}
1072
1073	if (btp != NULL)
1074	*btp = cp->cache_bkttype;
1075
1076	MCACHE_UNLOCK(&cp->mc_bkt_lock);
1077
1078	return (bkt);
1079	}
1080
1081	/*
1082	* Free a bucket to the bucket layer.
1083	*/
1084	static void
1085	mcache_bkt_free(mcache_t cp, mcache_bktlist_t blp, mcache_bkt_t *bkt)
1086	{
1087	MCACHE_LOCK(&cp->mc_bkt_lock);
1088
1089	bkt->bkt_next = blp->bl_list;
1090	blp->bl_list = bkt;
1091	blp->bl_total++;
1092
1093	MCACHE_UNLOCK(&cp->mc_bkt_lock);
1094	}
1095
1096	/*
1097	* Enable the bucket layer of a cache.
1098	*/
1099	static void
1100	mcache_cache_bkt_enable(mcache_t *cp)
1101	{
1102	mcache_cpu_t *ccp;
1103	int cpu;
1104
1105	if (cp->mc_flags & MCF_NOCPUCACHE)
1106	return;
1107
1108	for (cpu = `0`; cpu < ncpu; cpu++) {
1109	ccp = &cp->mc_cpu[cpu];
1110	MCACHE_LOCK(&ccp->cc_lock);
1111	ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize;
1112	MCACHE_UNLOCK(&ccp->cc_lock);
1113	}
1114	}
1115
1116	/*
1117	* Purge all buckets from a cache and disable its bucket layer.
1118	*/
1119	static void
1120	mcache_bkt_purge(mcache_t *cp)
1121	{
1122	mcache_cpu_t *ccp;
1123	mcache_bkt_t bp, pbp;
1124	mcache_bkttype_t *btp;
1125	int cpu, objs, pobjs;
1126
1127	for (cpu = `0`; cpu < ncpu; cpu++) {
1128	ccp = &cp->mc_cpu[cpu];
1129
1130	MCACHE_LOCK(&ccp->cc_lock);
1131
1132	btp = cp->cache_bkttype;
1133	bp = ccp->cc_filled;
1134	pbp = ccp->cc_pfilled;
1135	objs = ccp->cc_objs;
1136	pobjs = ccp->cc_pobjs;
1137	ccp->cc_filled = NULL;
1138	ccp->cc_pfilled = NULL;
1139	ccp->cc_objs = -`1`;
1140	ccp->cc_pobjs = -`1`;
1141	ccp->cc_bktsize = `0`;
1142
1143	MCACHE_UNLOCK(&ccp->cc_lock);
1144
1145	if (bp != NULL)
1146	mcache_bkt_destroy(cp, btp, bp, objs);
1147	if (pbp != NULL)
1148	mcache_bkt_destroy(cp, btp, pbp, pobjs);
1149	}
1150
1151	mcache_bkt_ws_zero(cp);
1152	mcache_bkt_ws_reap(cp);
1153	}
1154
1155	/*
1156	* Free one or more objects in the bucket to the slab layer,
1157	* and also free the bucket itself.
1158	*/
1159	static void
1160	mcache_bkt_destroy(mcache_t cp, mcache_bkttype_t btp, mcache_bkt_t *bkt,
1161	int nobjs)
1162	{
1163	if (nobjs > `0`) {
1164	mcache_obj_t *top = bkt->bkt_obj[nobjs - `1`];
1165
1166	if (cp->mc_flags & MCF_DEBUG) {
1167	mcache_obj_t *o = top;
1168	int cnt = `0`;
1169
1170	/*
1171	* Verify that the chain of objects in the bucket is
1172	* valid. Any mismatch here means a mistake when the
1173	* object(s) were freed to the CPU layer, so we panic.
1174	*/
1175	while (o != NULL) {
1176	o = o->obj_next;
1177	++cnt;
1178	}
1179	if (cnt != nobjs) {
1180	panic("mcache_bkt_destroy: %s cp %p corrupted "
1181	"list in bkt %p (nobjs %d actual %d)\n",
1182	cp->mc_name, (void )cp, (void* *)bkt,
1183	nobjs, cnt);
1184	}
1185	}
1186
1187	/ Advise the slab layer to purge the object(s) /
1188	(*cp->mc_slab_free)(cp->mc_private, top,
1189	(cp->mc_flags & MCF_DEBUG) \|\| cp->mc_purge_cnt);
1190	}
1191	mcache_free(btp->bt_cache, bkt);
1192	}
1193
1194	/*
1195	* Update the bucket layer working set statistics.
1196	*/
1197	static void
1198	mcache_bkt_ws_update(mcache_t *cp)
1199	{
1200	MCACHE_LOCK(&cp->mc_bkt_lock);
1201
1202	cp->mc_full.bl_reaplimit = cp->mc_full.bl_min;
1203	cp->mc_full.bl_min = cp->mc_full.bl_total;
1204	cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min;
1205	cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1206
1207	MCACHE_UNLOCK(&cp->mc_bkt_lock);
1208	}
1209
1210	/*
1211	* Mark everything as eligible for reaping (working set is zero).
1212	*/
1213	static void
1214	mcache_bkt_ws_zero(mcache_t *cp)
1215	{
1216	MCACHE_LOCK(&cp->mc_bkt_lock);
1217
1218	cp->mc_full.bl_reaplimit = cp->mc_full.bl_total;
1219	cp->mc_full.bl_min = cp->mc_full.bl_total;
1220	cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_total;
1221	cp->mc_empty.bl_min = cp->mc_empty.bl_total;
1222
1223	MCACHE_UNLOCK(&cp->mc_bkt_lock);
1224	}
1225
1226	/*
1227	* Reap all buckets that are beyond the working set.
1228	*/
1229	static void
1230	mcache_bkt_ws_reap(mcache_t *cp)
1231	{
1232	long reap;
1233	mcache_bkt_t *bkt;
1234	mcache_bkttype_t *btp;
1235
1236	reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min);
1237	while (reap-- &&
1238	(bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL)
1239	mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize);
1240
1241	reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min);
1242	while (reap-- &&
1243	(bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL)
1244	mcache_bkt_destroy(cp, btp, bkt, `0`);
1245	}
1246
1247	static void
1248	mcache_reap_timeout(thread_call_param_t dummy __unused,
1249	thread_call_param_t arg)
1250	{
1251	volatile UInt32 *flag = arg;
1252
1253	ASSERT(flag == &mcache_reaping);
1254
1255	*flag = `0`;
1256	}
1257
1258	static void
1259	mcache_reap_done(void *flag)
1260	{
1261	uint64_t deadline, leeway;
1262
1263	clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC,
1264	&deadline);
1265	clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway,
1266	NSEC_PER_SEC, &leeway);
1267	thread_call_enter_delayed_with_leeway(mcache_reap_tcall, flag,
1268	deadline, leeway, THREAD_CALL_DELAY_LEEWAY);
1269	}
1270
1271	static void
1272	mcache_reap_start(void *arg)
1273	{
1274	UInt32 *flag = arg;
1275
1276	ASSERT(flag == &mcache_reaping);
1277
1278	mcache_applyall(mcache_cache_reap);
1279	mcache_dispatch(mcache_reap_done, flag);
1280	}
1281
1282	__private_extern__ void
1283	mcache_reap(void)
1284	{
1285	UInt32 *flag = &mcache_reaping;
1286
1287	if (mcache_llock_owner == current_thread() \|\|
1288	!OSCompareAndSwap(`0`, `1`, flag))
1289	return;
1290
1291	mcache_dispatch(mcache_reap_start, flag);
1292	}
1293
1294	__private_extern__ void
1295	mcache_reap_now(mcache_t *cp, boolean_t purge)
1296	{
1297	if (purge) {
1298	mcache_bkt_purge(cp);
1299	mcache_cache_bkt_enable(cp);
1300	} else {
1301	mcache_bkt_ws_zero(cp);
1302	mcache_bkt_ws_reap(cp);
1303	}
1304	}
1305
1306	static void
1307	mcache_cache_reap(mcache_t *cp)
1308	{
1309	mcache_bkt_ws_reap(cp);
1310	}
1311
1312	/*
1313	* Performs period maintenance on a cache.
1314	*/
1315	static void
1316	mcache_cache_update(mcache_t *cp)
1317	{
1318	int need_bkt_resize = `0`;
1319	int need_bkt_reenable = `0`;
1320
1321	lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED);
1322
1323	mcache_bkt_ws_update(cp);
1324
1325	/*
1326	* Cache resize and post-purge reenable are mutually exclusive.
1327	* If the cache was previously purged, there is no point of
1328	* increasing the bucket size as there was an indication of
1329	* memory pressure on the system.
1330	*/
1331	lck_mtx_lock_spin(&cp->mc_sync_lock);
1332	if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt)
1333	need_bkt_reenable = `1`;
1334	lck_mtx_unlock(&cp->mc_sync_lock);
1335
1336	MCACHE_LOCK(&cp->mc_bkt_lock);
1337	/*
1338	* If the contention count is greater than the threshold, and if
1339	* we are not already at the maximum bucket size, increase it.
1340	* Otherwise, if this cache was previously purged by the user
1341	* then we simply reenable it.
1342	*/
1343	if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf &&
1344	(int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) >
1345	mcache_bkt_contention && !need_bkt_reenable)
1346	need_bkt_resize = `1`;
1347
1348	cp ->mc_bkt_contention_prev = cp->mc_bkt_contention;
1349	MCACHE_UNLOCK(&cp->mc_bkt_lock);
1350
1351	if (need_bkt_resize)
1352	mcache_dispatch(mcache_cache_bkt_resize, cp);
1353	else if (need_bkt_reenable)
1354	mcache_dispatch(mcache_cache_enable, cp);
1355	}
1356
1357	/*
1358	* Recompute a cache's bucket size. This is an expensive operation
1359	* and should not be done frequently; larger buckets provide for a
1360	* higher transfer rate with the bucket while smaller buckets reduce
1361	* the memory consumption.
1362	*/
1363	static void
1364	mcache_cache_bkt_resize(void *arg)
1365	{
1366	mcache_t *cp = arg;
1367	mcache_bkttype_t *btp = cp->cache_bkttype;
1368
1369	if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) {
1370	mcache_bkt_purge(cp);
1371
1372	/*
1373	* Upgrade to the next bucket type with larger bucket size;
1374	* temporarily set the previous contention snapshot to a
1375	* negative number to prevent unnecessary resize request.
1376	*/
1377	MCACHE_LOCK(&cp->mc_bkt_lock);
1378	cp->cache_bkttype = ++btp;
1379	cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX;
1380	MCACHE_UNLOCK(&cp->mc_bkt_lock);
1381
1382	mcache_cache_enable(cp);
1383	}
1384	}
1385
1386	/*
1387	* Reenable a previously disabled cache due to purge.
1388	*/
1389	static void
1390	mcache_cache_enable(void *arg)
1391	{
1392	mcache_t *cp = arg;
1393
1394	lck_mtx_lock_spin(&cp->mc_sync_lock);
1395	cp->mc_purge_cnt = `0`;
1396	cp->mc_enable_cnt = `0`;
1397	lck_mtx_unlock(&cp->mc_sync_lock);
1398
1399	mcache_cache_bkt_enable(cp);
1400	}
1401
1402	static void
1403	mcache_update_timeout(__unused void *arg)
1404	{
1405	uint64_t deadline, leeway;
1406
1407	clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC,
1408	&deadline);
1409	clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway,
1410	NSEC_PER_SEC, &leeway);
1411	thread_call_enter_delayed_with_leeway(mcache_update_tcall, NULL,
1412	deadline, leeway, THREAD_CALL_DELAY_LEEWAY);
1413	}
1414
1415	static void
1416	mcache_update(thread_call_param_t arg __unused,
1417	thread_call_param_t dummy __unused)
1418	{
1419	mcache_applyall(mcache_cache_update);
1420	mcache_update_timeout(NULL);
1421	}
1422
1423	static void
1424	mcache_applyall(void (func)(mcache_t ))
1425	{
1426	mcache_t *cp;
1427
1428	MCACHE_LIST_LOCK();
1429	LIST_FOREACH(cp, &mcache_head, mc_list) {
1430	func(cp);
1431	}
1432	MCACHE_LIST_UNLOCK();
1433	}
1434
1435	static void
1436	mcache_dispatch(void (func)(void* ), void* *arg)
1437	{
1438	ASSERT(func != NULL);
1439	timeout(func, arg, hz/`1000`);
1440	}
1441
1442	__private_extern__ void
1443	mcache_buffer_log(mcache_audit_t mca, void* addr, mcache_t cp,
1444	struct timeval *base_ts)
1445	{
1446	struct timeval now, base = { `0`, `0` };
1447	void *stack[MCACHE_STACK_DEPTH + `1`];
1448	struct mca_trn *transaction;
1449
1450	transaction = &mca->mca_trns[mca->mca_next_trn];
1451
1452	mca->mca_addr = addr;
1453	mca->mca_cache = cp;
1454
1455	transaction->mca_thread = current_thread();
1456
1457	bzero(stack, sizeof (stack));
1458	transaction->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + `1`) - `1`;
1459	bcopy(&stack[`1`], transaction->mca_stack,
1460	sizeof (transaction->mca_stack));
1461
1462	microuptime(&now);
1463	if (base_ts != NULL)
1464	base = *base_ts;
1465	/ tstamp is in ms relative to base_ts /
1466	transaction->mca_tstamp = ((now.tv_usec - base.tv_usec) / `1000`);
1467	if ((now.tv_sec - base.tv_sec) > `0`)
1468	transaction->mca_tstamp += ((now.tv_sec - base.tv_sec) * `1000`);
1469
1470	mca->mca_next_trn =
1471	(mca->mca_next_trn + `1`) % mca_trn_max;
1472	}
1473
1474	__private_extern__ void
1475	mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1476	{
1477	u_int64_t buf_end = (u_int64_t )((void )((char* *)buf_arg + size));
1478	u_int64_t buf = (u_int64_t )buf_arg;
1479
1480	VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1481	VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1482
1483	while (buf < buf_end)
1484	*buf++ = pattern;
1485	}
1486
1487	__private_extern__ void *
1488	mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
1489	{
1490	u_int64_t buf_end = (u_int64_t )((void )((char* *)buf_arg + size));
1491	u_int64_t *buf;
1492
1493	VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1494	VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1495
1496	for (buf = buf_arg; buf < buf_end; buf++) {
1497	if (*buf != pattern)
1498	return (buf);
1499	}
1500	return (NULL);
1501	}
1502
1503	__private_extern__ void *
1504	mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
1505	size_t size)
1506	{
1507	u_int64_t buf_end = (u_int64_t )((void )((char* *)buf_arg + size));
1508	u_int64_t *buf;
1509
1510	VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
1511	VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t)));
1512
1513	for (buf = buf_arg; buf < buf_end; buf++) {
1514	if (*buf != old) {
1515	mcache_set_pattern(old, buf_arg,
1516	(uintptr_t)buf - (uintptr_t)buf_arg);
1517	return (buf);
1518	}
1519	*buf = new;
1520	}
1521	return (NULL);
1522	}
1523
1524	__private_extern__ void
1525	mcache_audit_free_verify(mcache_audit_t mca, void* *base, size_t offset,
1526	size_t size)
1527	{
1528	void *addr;
1529	u_int64_t *oaddr64;
1530	mcache_obj_t *next;
1531
1532	addr = (void *)((uintptr_t)base + offset);
1533	next = ((mcache_obj_t *)addr)->obj_next;
1534
1535	/ For the "obj_next" pointer in the buffer /
1536	oaddr64 = (u_int64_t )P2ROUNDDOWN(addr, sizeof* (u_int64_t));
1537	*oaddr64 = MCACHE_FREE_PATTERN;
1538
1539	if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN,
1540	(caddr_t)base, size)) != NULL) {
1541	mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1542	(int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1543	/ NOTREACHED /
1544	}
1545	((mcache_obj_t *)addr)->obj_next = next;
1546	}
1547
1548	__private_extern__ void
1549	mcache_audit_free_verify_set(mcache_audit_t mca, void* *base, size_t offset,
1550	size_t size)
1551	{
1552	void *addr;
1553	u_int64_t *oaddr64;
1554	mcache_obj_t *next;
1555
1556	addr = (void *)((uintptr_t)base + offset);
1557	next = ((mcache_obj_t *)addr)->obj_next;
1558
1559	/ For the "obj_next" pointer in the buffer /
1560	oaddr64 = (u_int64_t )P2ROUNDDOWN(addr, sizeof* (u_int64_t));
1561	*oaddr64 = MCACHE_FREE_PATTERN;
1562
1563	if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN,
1564	MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) {
1565	mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
1566	(int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
1567	/ NOTREACHED /
1568	}
1569	((mcache_obj_t *)addr)->obj_next = next;
1570	}
1571
1572	#undef panic
1573
1574	#define DUMP_TRN_FMT() \
1575	"%s transaction thread %p saved PC stack (%d deep):\n" \
1576	"\t%p, %p, %p, %p, %p, %p, %p, %p\n" \
1577	"\t%p, %p, %p, %p, %p, %p, %p, %p\n"
1578
1579	#define DUMP_TRN_FIELDS(s, x) \
1580	s, \
1581	mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \
1582	mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \
1583	mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \
1584	mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \
1585	mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \
1586	mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \
1587	mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \
1588	mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \
1589	mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15]
1590
1591	#define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max)
1592	#define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max)
1593
1594	__private_extern__ char *
1595	mcache_dump_mca(mcache_audit_t *mca)
1596	{
1597	if (mca_dump_buf == NULL)
1598	return (NULL);
1599
1600	snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE,
1601	"mca %p: addr %p, cache %p (%s) nxttrn %d\n"
1602	DUMP_TRN_FMT()
1603	DUMP_TRN_FMT(),
1604
1605	mca, mca->mca_addr, mca->mca_cache,
1606	mca->mca_cache ? mca->mca_cache->mc_name : "?",
1607	mca->mca_next_trn,
1608
1609	DUMP_TRN_FIELDS("last", MCA_TRN_LAST),
1610	DUMP_TRN_FIELDS("previous", MCA_TRN_PREV));
1611
1612	return (mca_dump_buf);
1613	}
1614
1615	__private_extern__ void
1616	mcache_audit_panic(mcache_audit_t mca, void* *addr, size_t offset,
1617	int64_t expected, int64_t got)
1618	{
1619	if (mca == NULL) {
1620	panic("mcache_audit: buffer %p modified after free at "
1621	"offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
1622	offset, got, expected);
1623	/ NOTREACHED /
1624	}
1625
1626	panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
1627	"(0x%llx instead of 0x%llx)\n%s\n",
1628	addr, offset, got, expected, mcache_dump_mca(mca));
1629	/ NOTREACHED /
1630	}
1631
1632	__private_extern__ int
1633	assfail(const char a, const* char f, int* l)
1634	{
1635	panic("assertion failed: %s, file: %s, line: %d", a, f, l);
1636	return (`0`);
1637	}
1638

Browse the source code of xnu/bsd/kern/mcache.c