1/*
2 * Copyright (c) 2016-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* BEGIN CSTYLED */
30/*
31 * A region represents a collection of one or more similarly-sized memory
32 * segments, each of which is a contiguous range of integers. A segment
33 * is either allocated or free, and is treated as disjoint from all other
34 * segments. That is, the contiguity applies only at the segment level,
35 * and a region with multiple segments is not contiguous at the region level.
36 * A segment always belongs to the segment freelist, or the allocated-address
37 * hash chain, as described below.
38 *
39 * The optional SKMEM_REGION_CR_NOREDIRECT flag indicates that the region
40 * stays intact even after a defunct. Otherwise, the segments belonging
41 * to the region will be freed at defunct time, and the span covered by
42 * the region will be redirected to zero-filled anonymous memory.
43 *
44 * Memory for a region is always created as pageable and purgeable. It is
45 * the client's responsibility to prepare (wire) it, and optionally insert
46 * it to the IOMMU, at segment construction time. When the segment is
47 * freed, the client is responsible for removing it from IOMMU (if needed),
48 * and complete (unwire) it.
49 *
50 * When the region is created with SKMEM_REGION_CR_PERSISTENT, the memory
51 * is immediately wired upon allocation (segment removed from freelist).
52 * It gets unwired when memory is discarded (segment inserted to freelist).
53 *
54 * The chronological life cycle of a segment is as such:
55 *
56 * SKSEG_STATE_DETACHED
57 * SKSEG_STATE_{MAPPED,MAPPED_WIRED}
58 * [segment allocated, useable by client]
59 * ...
60 * [client frees segment]
61 * SKSEG_STATE_{MAPPED,MAPPED_WIRED}
62 * [reclaim]
63 * SKSEG_STATE_DETACHED
64 *
65 * The region can also be marked as user-mappable (SKMEM_REGION_CR_MMAPOK);
66 * this allows it to be further marked with SKMEM_REGION_CR_UREADONLY to
67 * prevent modifications by the user task. Only user-mappable regions will
68 * be considered for inclusion during skmem_arena_mmap().
69 *
70 * Every skmem allocator has a region as its slab supplier. Each slab is
71 * exactly a segment. The allocator uses skmem_region_{alloc,free}() to
72 * create and destroy slabs.
73 *
74 * A region may be mirrored by another region; the latter acts as the master
75 * controller for both regions. Mirrored (slave) regions cannot be used
76 * directly by the skmem allocator. Region mirroring technique is used for
77 * managing shadow objects {umd,kmd} and {usd,ksd}, where an object in one
78 * region has the same size and lifetime as its shadow counterpart.
79 *
80 * CREATION/DESTRUCTION:
81 *
82 * At creation time, all segments are allocated and are immediately inserted
83 * into the freelist. Allocating a purgeable segment has very little cost,
84 * as it is not backed by physical memory until it is accessed. Immediate
85 * insertion into the freelist causes the mapping to be further torn down.
86 *
87 * At destruction time, the freelist is emptied, and each segment is then
88 * destroyed. The system will assert if it detects there are outstanding
89 * segments not yet returned to the region (not freed by the client.)
90 *
91 * ALLOCATION:
92 *
93 * Allocating involves searching the freelist for a segment; if found, the
94 * segment is removed from the freelist and is inserted into the allocated-
95 * address hash chain. The address of the memory object represented by
96 * the segment is used as hash key. The use of allocated-address hash chain
97 * is needed since we return the address of the memory object, and not the
98 * segment's itself, to the client.
99 *
100 * DEALLOCATION:
101 *
102 * Freeing a memory object causes the chain to be searched for a matching
103 * segment. The system will assert if a segment cannot be found, since
104 * that indicates that the memory object address is invalid. Once found,
105 * the segment is removed from the allocated-address hash chain, and is
106 * inserted to the freelist.
107 *
108 * Segment allocation and deallocation can be expensive. Because of this,
109 * we expect that most clients will utilize the skmem_cache slab allocator
110 * as the frontend instead.
111 */
112/* END CSTYLED */
113
114#include <skywalk/os_skywalk_private.h>
115#define _FN_KPRINTF /* don't redefine kprintf() */
116#include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
117
118static void skmem_region_destroy(struct skmem_region *skr);
119static void skmem_region_depopulate(struct skmem_region *);
120static int sksegment_cmp(const struct sksegment *, const struct sksegment *);
121static struct sksegment *sksegment_create(struct skmem_region *, uint32_t);
122static void sksegment_destroy(struct skmem_region *, struct sksegment *);
123static void sksegment_freelist_insert(struct skmem_region *,
124 struct sksegment *, boolean_t);
125static struct sksegment *sksegment_freelist_remove(struct skmem_region *,
126 struct sksegment *, uint32_t, boolean_t);
127static struct sksegment *sksegment_freelist_grow(struct skmem_region *);
128static struct sksegment *sksegment_alloc_with_idx(struct skmem_region *,
129 uint32_t);
130static void *skmem_region_alloc_common(struct skmem_region *,
131 struct sksegment *);
132static void *skmem_region_mirror_alloc(struct skmem_region *,
133 struct sksegment *, struct sksegment **);
134static void skmem_region_applyall(void (*)(struct skmem_region *));
135static void skmem_region_update(struct skmem_region *);
136static void skmem_region_update_func(thread_call_param_t, thread_call_param_t);
137static inline void skmem_region_retain_locked(struct skmem_region *);
138static inline boolean_t skmem_region_release_locked(struct skmem_region *);
139static int skmem_region_mib_get_sysctl SYSCTL_HANDLER_ARGS;
140
141RB_PROTOTYPE_PREV(segtfreehead, sksegment, sg_node, sksegment_cmp);
142RB_GENERATE_PREV(segtfreehead, sksegment, sg_node, sksegment_cmp);
143
144SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, region,
145 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
146 0, 0, skmem_region_mib_get_sysctl, "S,sk_stats_region",
147 "Skywalk region statistics");
148
149static LCK_ATTR_DECLARE(skmem_region_lock_attr, 0, 0);
150static LCK_GRP_DECLARE(skmem_region_lock_grp, "skmem_region");
151static LCK_MTX_DECLARE_ATTR(skmem_region_lock, &skmem_region_lock_grp,
152 &skmem_region_lock_attr);
153
154/* protected by skmem_region_lock */
155static TAILQ_HEAD(, skmem_region) skmem_region_head;
156
157static thread_call_t skmem_region_update_tc;
158
159#define SKMEM_REGION_UPDATE_INTERVAL 13 /* 13 seconds */
160static uint32_t skmem_region_update_interval = SKMEM_REGION_UPDATE_INTERVAL;
161
162#define SKMEM_WDT_MAXTIME 30 /* # of secs before watchdog */
163#define SKMEM_WDT_PURGE 3 /* retry purge threshold */
164
165#if (DEVELOPMENT || DEBUG)
166/* Mean Time Between Failures (ms) */
167static volatile uint64_t skmem_region_mtbf;
168
169static int skmem_region_mtbf_sysctl(struct sysctl_oid *, void *, int,
170 struct sysctl_req *);
171
172SYSCTL_PROC(_kern_skywalk_mem, OID_AUTO, region_mtbf,
173 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0,
174 skmem_region_mtbf_sysctl, "Q", "Region MTBF (ms)");
175
176SYSCTL_UINT(_kern_skywalk_mem, OID_AUTO, region_update_interval,
177 CTLFLAG_RW | CTLFLAG_LOCKED, &skmem_region_update_interval,
178 SKMEM_REGION_UPDATE_INTERVAL, "Region update interval (sec)");
179#endif /* (DEVELOPMENT || DEBUG) */
180
181#define SKMEM_REGION_LOCK() \
182 lck_mtx_lock(&skmem_region_lock)
183#define SKMEM_REGION_LOCK_ASSERT_HELD() \
184 LCK_MTX_ASSERT(&skmem_region_lock, LCK_MTX_ASSERT_OWNED)
185#define SKMEM_REGION_LOCK_ASSERT_NOTHELD() \
186 LCK_MTX_ASSERT(&skmem_region_lock, LCK_MTX_ASSERT_NOTOWNED)
187#define SKMEM_REGION_UNLOCK() \
188 lck_mtx_unlock(&skmem_region_lock)
189
190/*
191 * Hash table bounds. Start with the initial value, and rescale up to
192 * the specified limit. Ideally we don't need a limit, but in practice
193 * this helps guard against runaways. These values should be revisited
194 * in future and be adjusted as needed.
195 */
196#define SKMEM_REGION_HASH_INITIAL 32 /* initial hash table size */
197#define SKMEM_REGION_HASH_LIMIT 4096 /* hash table size limit */
198
199#define SKMEM_REGION_HASH_INDEX(_a, _s, _m) \
200 (((_a) + ((_a) >> (_s)) + ((_a) >> ((_s) << 1))) & (_m))
201#define SKMEM_REGION_HASH(_skr, _addr) \
202 (&(_skr)->skr_hash_table[SKMEM_REGION_HASH_INDEX((uintptr_t)_addr, \
203 (_skr)->skr_hash_shift, (_skr)->skr_hash_mask)])
204
205static SKMEM_TYPE_DEFINE(skr_zone, struct skmem_region);
206
207static unsigned int sg_size; /* size of zone element */
208static struct skmem_cache *skmem_sg_cache; /* cache for sksegment */
209
210static uint32_t skmem_seg_size = SKMEM_SEG_SIZE;
211static uint32_t skmem_md_seg_size = SKMEM_MD_SEG_SIZE;
212static uint32_t skmem_drv_buf_seg_size = SKMEM_DRV_BUF_SEG_SIZE;
213static uint32_t skmem_drv_buf_seg_eff_size = SKMEM_DRV_BUF_SEG_SIZE;
214uint32_t skmem_usr_buf_seg_size = SKMEM_USR_BUF_SEG_SIZE;
215
216#define SKMEM_TAG_SEGMENT_BMAP "com.apple.skywalk.segment.bmap"
217static SKMEM_TAG_DEFINE(skmem_tag_segment_bmap, SKMEM_TAG_SEGMENT_BMAP);
218
219#define SKMEM_TAG_SEGMENT_HASH "com.apple.skywalk.segment.hash"
220static SKMEM_TAG_DEFINE(skmem_tag_segment_hash, SKMEM_TAG_SEGMENT_HASH);
221
222#define SKMEM_TAG_REGION_MIB "com.apple.skywalk.region.mib"
223static SKMEM_TAG_DEFINE(skmem_tag_region_mib, SKMEM_TAG_REGION_MIB);
224
225#define BMAPSZ 64
226
227/* 64-bit mask with range */
228#define BMASK64(_beg, _end) \
229 ((((uint64_t)-1) >> ((BMAPSZ - 1) - (_end))) & ~((1ULL << (_beg)) - 1))
230
231static int __skmem_region_inited = 0;
232
233void
234skmem_region_init(void)
235{
236 boolean_t randomize_seg_size;
237
238 _CASSERT(sizeof(bitmap_t) == sizeof(uint64_t));
239 _CASSERT(BMAPSZ == (sizeof(bitmap_t) << 3));
240 _CASSERT((SKMEM_SEG_SIZE % SKMEM_PAGE_SIZE) == 0);
241 _CASSERT(SKMEM_REGION_HASH_LIMIT >= SKMEM_REGION_HASH_INITIAL);
242 ASSERT(!__skmem_region_inited);
243
244 /* enforce the ordering here */
245 _CASSERT(SKMEM_REGION_GUARD_HEAD == 0);
246 _CASSERT(SKMEM_REGION_SCHEMA == 1);
247 _CASSERT(SKMEM_REGION_RING == 2);
248 _CASSERT(SKMEM_REGION_BUF_DEF == 3);
249 _CASSERT(SKMEM_REGION_BUF_LARGE == 4);
250 _CASSERT(SKMEM_REGION_RXBUF_DEF == 5);
251 _CASSERT(SKMEM_REGION_RXBUF_LARGE == 6);
252 _CASSERT(SKMEM_REGION_TXBUF_DEF == 7);
253 _CASSERT(SKMEM_REGION_TXBUF_LARGE == 8);
254 _CASSERT(SKMEM_REGION_UMD == 9);
255 _CASSERT(SKMEM_REGION_TXAUSD == 10);
256 _CASSERT(SKMEM_REGION_RXFUSD == 11);
257 _CASSERT(SKMEM_REGION_UBFT == 12);
258 _CASSERT(SKMEM_REGION_USTATS == 13);
259 _CASSERT(SKMEM_REGION_FLOWADV == 14);
260 _CASSERT(SKMEM_REGION_NEXUSADV == 15);
261 _CASSERT(SKMEM_REGION_SYSCTLS == 16);
262 _CASSERT(SKMEM_REGION_GUARD_TAIL == 17);
263 _CASSERT(SKMEM_REGION_KMD == 18);
264 _CASSERT(SKMEM_REGION_RXKMD == 19);
265 _CASSERT(SKMEM_REGION_TXKMD == 20);
266 _CASSERT(SKMEM_REGION_KBFT == 21);
267 _CASSERT(SKMEM_REGION_RXKBFT == 22);
268 _CASSERT(SKMEM_REGION_TXKBFT == 23);
269 _CASSERT(SKMEM_REGION_TXAKSD == 24);
270 _CASSERT(SKMEM_REGION_RXFKSD == 25);
271 _CASSERT(SKMEM_REGION_KSTATS == 26);
272 _CASSERT(SKMEM_REGION_INTRINSIC == 27);
273
274 _CASSERT(SREG_GUARD_HEAD == SKMEM_REGION_GUARD_HEAD);
275 _CASSERT(SREG_SCHEMA == SKMEM_REGION_SCHEMA);
276 _CASSERT(SREG_RING == SKMEM_REGION_RING);
277 _CASSERT(SREG_BUF_DEF == SKMEM_REGION_BUF_DEF);
278 _CASSERT(SREG_BUF_LARGE == SKMEM_REGION_BUF_LARGE);
279 _CASSERT(SREG_RXBUF_DEF == SKMEM_REGION_RXBUF_DEF);
280 _CASSERT(SREG_RXBUF_LARGE == SKMEM_REGION_RXBUF_LARGE);
281 _CASSERT(SREG_TXBUF_DEF == SKMEM_REGION_TXBUF_DEF);
282 _CASSERT(SREG_TXBUF_LARGE == SKMEM_REGION_TXBUF_LARGE);
283 _CASSERT(SREG_UMD == SKMEM_REGION_UMD);
284 _CASSERT(SREG_TXAUSD == SKMEM_REGION_TXAUSD);
285 _CASSERT(SREG_RXFUSD == SKMEM_REGION_RXFUSD);
286 _CASSERT(SREG_UBFT == SKMEM_REGION_UBFT);
287 _CASSERT(SREG_USTATS == SKMEM_REGION_USTATS);
288 _CASSERT(SREG_FLOWADV == SKMEM_REGION_FLOWADV);
289 _CASSERT(SREG_NEXUSADV == SKMEM_REGION_NEXUSADV);
290 _CASSERT(SREG_SYSCTLS == SKMEM_REGION_SYSCTLS);
291 _CASSERT(SREG_GUARD_TAIL == SKMEM_REGION_GUARD_TAIL);
292 _CASSERT(SREG_KMD == SKMEM_REGION_KMD);
293 _CASSERT(SREG_RXKMD == SKMEM_REGION_RXKMD);
294 _CASSERT(SREG_TXKMD == SKMEM_REGION_TXKMD);
295 _CASSERT(SREG_KBFT == SKMEM_REGION_KBFT);
296 _CASSERT(SREG_RXKBFT == SKMEM_REGION_RXKBFT);
297 _CASSERT(SREG_TXKBFT == SKMEM_REGION_TXKBFT);
298 _CASSERT(SREG_TXAKSD == SKMEM_REGION_TXAKSD);
299 _CASSERT(SREG_RXFKSD == SKMEM_REGION_RXFKSD);
300 _CASSERT(SREG_KSTATS == SKMEM_REGION_KSTATS);
301
302 _CASSERT(SKR_MODE_NOREDIRECT == SREG_MODE_NOREDIRECT);
303 _CASSERT(SKR_MODE_MMAPOK == SREG_MODE_MMAPOK);
304 _CASSERT(SKR_MODE_UREADONLY == SREG_MODE_UREADONLY);
305 _CASSERT(SKR_MODE_KREADONLY == SREG_MODE_KREADONLY);
306 _CASSERT(SKR_MODE_PERSISTENT == SREG_MODE_PERSISTENT);
307 _CASSERT(SKR_MODE_MONOLITHIC == SREG_MODE_MONOLITHIC);
308 _CASSERT(SKR_MODE_NOMAGAZINES == SREG_MODE_NOMAGAZINES);
309 _CASSERT(SKR_MODE_NOCACHE == SREG_MODE_NOCACHE);
310 _CASSERT(SKR_MODE_IODIR_IN == SREG_MODE_IODIR_IN);
311 _CASSERT(SKR_MODE_IODIR_OUT == SREG_MODE_IODIR_OUT);
312 _CASSERT(SKR_MODE_GUARD == SREG_MODE_GUARD);
313 _CASSERT(SKR_MODE_SEGPHYSCONTIG == SREG_MODE_SEGPHYSCONTIG);
314 _CASSERT(SKR_MODE_SHAREOK == SREG_MODE_SHAREOK);
315 _CASSERT(SKR_MODE_PUREDATA == SREG_MODE_PUREDATA);
316 _CASSERT(SKR_MODE_PSEUDO == SREG_MODE_PSEUDO);
317 _CASSERT(SKR_MODE_THREADSAFE == SREG_MODE_THREADSAFE);
318 _CASSERT(SKR_MODE_SLAB == SREG_MODE_SLAB);
319 _CASSERT(SKR_MODE_MIRRORED == SREG_MODE_MIRRORED);
320
321 (void) PE_parse_boot_argn(arg_string: "skmem_seg_size", arg_ptr: &skmem_seg_size,
322 max_arg: sizeof(skmem_seg_size));
323 if (skmem_seg_size < SKMEM_MIN_SEG_SIZE) {
324 skmem_seg_size = SKMEM_MIN_SEG_SIZE;
325 }
326 skmem_seg_size = (uint32_t)P2ROUNDUP(skmem_seg_size,
327 SKMEM_MIN_SEG_SIZE);
328 VERIFY(skmem_seg_size != 0 && (skmem_seg_size % SKMEM_PAGE_SIZE) == 0);
329
330 (void) PE_parse_boot_argn(arg_string: "skmem_md_seg_size", arg_ptr: &skmem_md_seg_size,
331 max_arg: sizeof(skmem_md_seg_size));
332 if (skmem_md_seg_size < skmem_seg_size) {
333 skmem_md_seg_size = skmem_seg_size;
334 }
335 skmem_md_seg_size = (uint32_t)P2ROUNDUP(skmem_md_seg_size,
336 SKMEM_MIN_SEG_SIZE);
337 VERIFY((skmem_md_seg_size % SKMEM_PAGE_SIZE) == 0);
338
339 /*
340 * If set via boot-args, honor it and don't randomize.
341 */
342 randomize_seg_size = !PE_parse_boot_argn(arg_string: "skmem_drv_buf_seg_size",
343 arg_ptr: &skmem_drv_buf_seg_size, max_arg: sizeof(skmem_drv_buf_seg_size));
344 if (skmem_drv_buf_seg_size < skmem_seg_size) {
345 skmem_drv_buf_seg_size = skmem_seg_size;
346 }
347 skmem_drv_buf_seg_size = skmem_drv_buf_seg_eff_size =
348 (uint32_t)P2ROUNDUP(skmem_drv_buf_seg_size, SKMEM_MIN_SEG_SIZE);
349 VERIFY((skmem_drv_buf_seg_size % SKMEM_PAGE_SIZE) == 0);
350
351 /*
352 * Randomize the driver buffer segment size; here we choose
353 * a SKMEM_MIN_SEG_SIZE multiplier to bump up the value to.
354 * Set this as the effective driver buffer segment size.
355 */
356 if (randomize_seg_size) {
357 uint32_t sm;
358 read_frandom(buffer: &sm, numBytes: sizeof(sm));
359 skmem_drv_buf_seg_eff_size +=
360 (SKMEM_MIN_SEG_SIZE * (sm % SKMEM_DRV_BUF_SEG_MULTIPLIER));
361 VERIFY((skmem_drv_buf_seg_eff_size % SKMEM_MIN_SEG_SIZE) == 0);
362 }
363 VERIFY(skmem_drv_buf_seg_eff_size >= skmem_drv_buf_seg_size);
364
365 (void) PE_parse_boot_argn(arg_string: "skmem_usr_buf_seg_size",
366 arg_ptr: &skmem_usr_buf_seg_size, max_arg: sizeof(skmem_usr_buf_seg_size));
367 if (skmem_usr_buf_seg_size < skmem_seg_size) {
368 skmem_usr_buf_seg_size = skmem_seg_size;
369 }
370 skmem_usr_buf_seg_size = (uint32_t)P2ROUNDUP(skmem_usr_buf_seg_size,
371 SKMEM_MIN_SEG_SIZE);
372 VERIFY((skmem_usr_buf_seg_size % SKMEM_PAGE_SIZE) == 0);
373
374 SK_ERR("seg_size %u, md_seg_size %u, drv_buf_seg_size %u [eff %u], "
375 "usr_buf_seg_size %u", skmem_seg_size, skmem_md_seg_size,
376 skmem_drv_buf_seg_size, skmem_drv_buf_seg_eff_size,
377 skmem_usr_buf_seg_size);
378
379 TAILQ_INIT(&skmem_region_head);
380
381 skmem_region_update_tc =
382 thread_call_allocate_with_options(func: skmem_region_update_func,
383 NULL, pri: THREAD_CALL_PRIORITY_KERNEL, options: THREAD_CALL_OPTIONS_ONCE);
384 if (skmem_region_update_tc == NULL) {
385 panic("%s: thread_call_allocate failed", __func__);
386 /* NOTREACHED */
387 __builtin_unreachable();
388 }
389
390 sg_size = sizeof(struct sksegment);
391 skmem_sg_cache = skmem_cache_create("sg", sg_size,
392 sizeof(uint64_t), NULL, NULL, NULL, NULL, NULL, 0);
393
394 /* and start the periodic region update machinery */
395 skmem_dispatch(skmem_region_update_tc, NULL,
396 (skmem_region_update_interval * NSEC_PER_SEC));
397
398 __skmem_region_inited = 1;
399}
400
401void
402skmem_region_fini(void)
403{
404 if (__skmem_region_inited) {
405 ASSERT(TAILQ_EMPTY(&skmem_region_head));
406
407 if (skmem_region_update_tc != NULL) {
408 (void) thread_call_cancel_wait(call: skmem_region_update_tc);
409 (void) thread_call_free(call: skmem_region_update_tc);
410 skmem_region_update_tc = NULL;
411 }
412
413 if (skmem_sg_cache != NULL) {
414 skmem_cache_destroy(skmem_sg_cache);
415 skmem_sg_cache = NULL;
416 }
417
418 __skmem_region_inited = 0;
419 }
420}
421
422/*
423 * Reap internal caches.
424 */
425void
426skmem_region_reap_caches(boolean_t purge)
427{
428 skmem_cache_reap_now(skmem_sg_cache, purge);
429}
430
431/*
432 * Configure and compute the parameters of a region.
433 */
434void
435skmem_region_params_config(struct skmem_region_params *srp)
436{
437 uint32_t cache_line_size = skmem_cpu_cache_line_size();
438 size_t seglim, segsize, segcnt;
439 size_t objsize, objcnt;
440
441 ASSERT(srp->srp_id < SKMEM_REGIONS);
442
443 /*
444 * If magazines layer is disabled system-wide, override
445 * the region parameter here. This will effectively reduce
446 * the number of requested objects computed below. Note that
447 * the region may have already been configured to exclude
448 * magazines in the default skmem_regions[] array.
449 */
450 if (!skmem_allow_magazines()) {
451 srp->srp_cflags |= SKMEM_REGION_CR_NOMAGAZINES;
452 }
453
454 objsize = srp->srp_r_obj_size;
455 ASSERT(objsize != 0);
456 objcnt = srp->srp_r_obj_cnt;
457 ASSERT(objcnt != 0);
458
459 if (srp->srp_cflags & SKMEM_REGION_CR_PSEUDO) {
460 size_t align = srp->srp_align;
461
462 VERIFY(align != 0 && (align % SKMEM_CACHE_ALIGN) == 0);
463 VERIFY(powerof2(align));
464 objsize = MAX(objsize, sizeof(uint64_t));
465#if KASAN
466 /*
467 * When KASAN is enabled, the zone allocator adjusts the
468 * element size to include the redzone regions, in which
469 * case we assume that the elements won't start on the
470 * alignment boundary and thus need to do some fix-ups.
471 * These include increasing the effective object size
472 * which adds at least 16 bytes to the original size.
473 */
474 objsize += sizeof(uint64_t) + align;
475#endif /* KASAN */
476 objsize = P2ROUNDUP(objsize, align);
477
478 segsize = objsize;
479 srp->srp_r_seg_size = (uint32_t)segsize;
480 segcnt = objcnt;
481 goto done;
482 } else {
483 /* objects are always aligned at CPU cache line size */
484 srp->srp_align = cache_line_size;
485 }
486
487 /*
488 * Start with default segment size for the region, and compute the
489 * effective segment size (to nearest SKMEM_MIN_SEG_SIZE). If the
490 * object size is greater, then we adjust the segment size to next
491 * multiple of the effective size larger than the object size.
492 */
493 if (srp->srp_r_seg_size == 0) {
494 switch (srp->srp_id) {
495 case SKMEM_REGION_UMD:
496 case SKMEM_REGION_KMD:
497 case SKMEM_REGION_RXKMD:
498 case SKMEM_REGION_TXKMD:
499 srp->srp_r_seg_size = skmem_md_seg_size;
500 break;
501
502 case SKMEM_REGION_BUF_DEF:
503 case SKMEM_REGION_RXBUF_DEF:
504 case SKMEM_REGION_TXBUF_DEF:
505 /*
506 * Use the effective driver buffer segment size,
507 * since it reflects any randomization done at
508 * skmem_region_init() time.
509 */
510 srp->srp_r_seg_size = skmem_drv_buf_seg_eff_size;
511 break;
512
513 default:
514 srp->srp_r_seg_size = skmem_seg_size;
515 break;
516 }
517 } else {
518 srp->srp_r_seg_size = (uint32_t)P2ROUNDUP(srp->srp_r_seg_size,
519 SKMEM_MIN_SEG_SIZE);
520 }
521
522 seglim = srp->srp_r_seg_size;
523 VERIFY(seglim != 0 && (seglim % SKMEM_PAGE_SIZE) == 0);
524
525 SK_DF(SK_VERB_MEM, "%s: seglim %zu objsize %zu objcnt %zu",
526 srp->srp_name, seglim, objsize, objcnt);
527
528 /*
529 * Make sure object size is multiple of CPU cache line
530 * size, and that we can evenly divide the segment size.
531 */
532 if (!((objsize < cache_line_size) && (objsize < seglim) &&
533 ((cache_line_size % objsize) == 0) && ((seglim % objsize) == 0))) {
534 objsize = P2ROUNDUP(objsize, cache_line_size);
535 while (objsize < seglim && (seglim % objsize) != 0) {
536 SK_DF(SK_VERB_MEM, "%s: objsize %zu -> %zu",
537 srp->srp_name, objsize, objsize + cache_line_size);
538 objsize += cache_line_size;
539 }
540 }
541
542 /* segment must be larger than object */
543 while (objsize > seglim) {
544 SK_DF(SK_VERB_MEM, "%s: seglim %zu -> %zu", srp->srp_name,
545 seglim, seglim + SKMEM_MIN_SEG_SIZE);
546 seglim += SKMEM_MIN_SEG_SIZE;
547 }
548
549 /*
550 * Take into account worst-case per-CPU cached
551 * objects if this region is configured for it.
552 */
553 if (!(srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES)) {
554 uint32_t magazine_max_objs =
555 skmem_cache_magazine_max((uint32_t)objsize);
556 SK_DF(SK_VERB_MEM, "%s: objcnt %zu -> %zu", srp->srp_name,
557 objcnt, objcnt + magazine_max_objs);
558 objcnt += magazine_max_objs;
559 }
560
561 SK_DF(SK_VERB_MEM, "%s: seglim %zu objsize %zu "
562 "objcnt %zu", srp->srp_name, seglim, objsize, objcnt);
563
564 segsize = P2ROUNDUP(objsize * objcnt, SKMEM_MIN_SEG_SIZE);
565 if (seglim > segsize) {
566 /*
567 * If the segment limit is larger than what we need,
568 * avoid memory wastage by shrinking it.
569 */
570 while (seglim > segsize && seglim > SKMEM_MIN_SEG_SIZE) {
571 VERIFY(seglim >= SKMEM_MIN_SEG_SIZE);
572 SK_DF(SK_VERB_MEM,
573 "%s: segsize %zu (%zu*%zu) seglim [-] %zu -> %zu",
574 srp->srp_name, segsize, objsize, objcnt, seglim,
575 P2ROUNDUP(seglim - SKMEM_MIN_SEG_SIZE,
576 SKMEM_MIN_SEG_SIZE));
577 seglim = P2ROUNDUP(seglim - SKMEM_MIN_SEG_SIZE,
578 SKMEM_MIN_SEG_SIZE);
579 }
580
581 /* adjust segment size */
582 segsize = seglim;
583 } else if (seglim < segsize) {
584 size_t oseglim = seglim;
585 /*
586 * If the segment limit is less than the segment size,
587 * see if increasing it slightly (up to 1.5x the segment
588 * size) would allow us to avoid allocating too many
589 * extra objects (due to excessive segment count).
590 */
591 while (seglim < segsize && (segsize % seglim) != 0) {
592 SK_DF(SK_VERB_MEM,
593 "%s: segsize %zu (%zu*%zu) seglim [+] %zu -> %zu",
594 srp->srp_name, segsize, objsize, objcnt, seglim,
595 (seglim + SKMEM_MIN_SEG_SIZE));
596 seglim += SKMEM_MIN_SEG_SIZE;
597 if (seglim >= (oseglim + (oseglim >> 1))) {
598 break;
599 }
600 }
601
602 /* can't use P2ROUNDUP since seglim may not be power of 2 */
603 segsize = SK_ROUNDUP(segsize, seglim);
604 }
605 ASSERT(segsize != 0 && (segsize % seglim) == 0);
606
607 SK_DF(SK_VERB_MEM, "%s: segsize %zu seglim %zu",
608 srp->srp_name, segsize, seglim);
609
610 /* compute segment count, and recompute segment size */
611 if (srp->srp_cflags & SKMEM_REGION_CR_MONOLITHIC) {
612 segcnt = 1;
613 } else {
614 /*
615 * The adjustments above were done in increments of
616 * SKMEM_MIN_SEG_SIZE. If the object size is greater
617 * than that, ensure that the segment size is a multiple
618 * of the object size.
619 */
620 if (objsize > SKMEM_MIN_SEG_SIZE) {
621 ASSERT(seglim >= objsize);
622 if ((seglim % objsize) != 0) {
623 seglim += (seglim - objsize);
624 }
625 /* recompute segsize; see SK_ROUNDUP comment above */
626 segsize = SK_ROUNDUP(segsize, seglim);
627 }
628
629 segcnt = MAX(1, (segsize / seglim));
630 segsize /= segcnt;
631 }
632
633 SK_DF(SK_VERB_MEM, "%s: segcnt %zu segsize %zu",
634 srp->srp_name, segcnt, segsize);
635
636 /* recompute object count to avoid wastage */
637 objcnt = (segsize * segcnt) / objsize;
638 ASSERT(objcnt != 0);
639done:
640 srp->srp_c_obj_size = (uint32_t)objsize;
641 srp->srp_c_obj_cnt = (uint32_t)objcnt;
642 srp->srp_c_seg_size = (uint32_t)segsize;
643 srp->srp_seg_cnt = (uint32_t)segcnt;
644
645 SK_DF(SK_VERB_MEM, "%s: objsize %zu objcnt %zu segcnt %zu segsize %zu",
646 srp->srp_name, objsize, objcnt, segcnt, segsize);
647
648#if SK_LOG
649 if (__improbable(sk_verbose != 0)) {
650 char label[32];
651 (void) snprintf(label, sizeof(label), "REGION_%s:",
652 skmem_region_id2name(srp->srp_id));
653 SK_D("%-16s o:[%4u x %6u -> %4u x %6u]", label,
654 (uint32_t)srp->srp_r_obj_cnt,
655 (uint32_t)srp->srp_r_obj_size,
656 (uint32_t)srp->srp_c_obj_cnt,
657 (uint32_t)srp->srp_c_obj_size);
658 }
659#endif /* SK_LOG */
660}
661
662/*
663 * Create a region.
664 */
665struct skmem_region *
666skmem_region_create(const char *name, struct skmem_region_params *srp,
667 sksegment_ctor_fn_t ctor, sksegment_dtor_fn_t dtor, void *private)
668{
669 boolean_t pseudo = (srp->srp_cflags & SKMEM_REGION_CR_PSEUDO);
670 uint32_t cflags = srp->srp_cflags;
671 struct skmem_region *skr;
672 uint32_t i;
673
674 ASSERT(srp->srp_id < SKMEM_REGIONS);
675 ASSERT(srp->srp_c_seg_size != 0 &&
676 (pseudo || (srp->srp_c_seg_size % SKMEM_PAGE_SIZE) == 0));
677 ASSERT(srp->srp_seg_cnt != 0);
678 ASSERT(srp->srp_c_obj_cnt == 1 ||
679 (srp->srp_c_seg_size % srp->srp_c_obj_size) == 0);
680 ASSERT(srp->srp_c_obj_size <= srp->srp_c_seg_size);
681
682 skr = zalloc_flags(skr_zone, Z_WAITOK | Z_ZERO);
683 skr->skr_params.srp_r_seg_size = srp->srp_r_seg_size;
684 skr->skr_seg_size = srp->srp_c_seg_size;
685 skr->skr_size = (srp->srp_c_seg_size * srp->srp_seg_cnt);
686 skr->skr_seg_objs = (srp->srp_c_seg_size / srp->srp_c_obj_size);
687
688 if (!pseudo) {
689 skr->skr_seg_max_cnt = srp->srp_seg_cnt;
690
691 /* set alignment to CPU cache line size */
692 skr->skr_params.srp_align = skmem_cpu_cache_line_size();
693
694 /* allocate the allocated-address hash chain */
695 skr->skr_hash_initial = SKMEM_REGION_HASH_INITIAL;
696 skr->skr_hash_limit = SKMEM_REGION_HASH_LIMIT;
697 skr->skr_hash_table = sk_alloc_type_array(struct sksegment_bkt,
698 skr->skr_hash_initial, Z_WAITOK | Z_NOFAIL,
699 skmem_tag_segment_hash);
700 skr->skr_hash_mask = (skr->skr_hash_initial - 1);
701 skr->skr_hash_shift = flsll(srp->srp_c_seg_size) - 1;
702
703 for (i = 0; i < (skr->skr_hash_mask + 1); i++) {
704 TAILQ_INIT(&skr->skr_hash_table[i].sgb_head);
705 }
706 } else {
707 /* this upper bound doesn't apply */
708 skr->skr_seg_max_cnt = 0;
709
710 /* pick up value set by skmem_regions_params_config() */
711 skr->skr_params.srp_align = srp->srp_align;
712 }
713
714 skr->skr_r_obj_size = srp->srp_r_obj_size;
715 skr->skr_r_obj_cnt = srp->srp_r_obj_cnt;
716 skr->skr_c_obj_size = srp->srp_c_obj_size;
717 skr->skr_c_obj_cnt = srp->srp_c_obj_cnt;
718
719 skr->skr_params.srp_md_type = srp->srp_md_type;
720 skr->skr_params.srp_md_subtype = srp->srp_md_subtype;
721 skr->skr_params.srp_max_frags = srp->srp_max_frags;
722
723 skr->skr_seg_ctor = ctor;
724 skr->skr_seg_dtor = dtor;
725 skr->skr_private = private;
726
727 lck_mtx_init(lck: &skr->skr_lock, grp: &skmem_region_lock_grp,
728 attr: &skmem_region_lock_attr);
729
730 TAILQ_INIT(&skr->skr_seg_free);
731 RB_INIT(&skr->skr_seg_tfree);
732
733 skr->skr_id = srp->srp_id;
734 uuid_generate_random(out: skr->skr_uuid);
735 (void) snprintf(skr->skr_name, count: sizeof(skr->skr_name),
736 "%s.%s.%s", SKMEM_REGION_PREFIX, srp->srp_name, name);
737
738 SK_DF(SK_VERB_MEM_REGION, "\"%s\": skr 0x%llx ",
739 skr->skr_name, SK_KVA(skr));
740
741 /* sanity check */
742 ASSERT(!(cflags & SKMEM_REGION_CR_GUARD) ||
743 !(cflags & (SKMEM_REGION_CR_KREADONLY | SKMEM_REGION_CR_UREADONLY |
744 SKMEM_REGION_CR_PERSISTENT | SKMEM_REGION_CR_SHAREOK |
745 SKMEM_REGION_CR_IODIR_IN | SKMEM_REGION_CR_IODIR_OUT |
746 SKMEM_REGION_CR_PUREDATA)));
747
748 skr->skr_cflags = cflags;
749 if (cflags & SKMEM_REGION_CR_NOREDIRECT) {
750 skr->skr_mode |= SKR_MODE_NOREDIRECT;
751 }
752 if (cflags & SKMEM_REGION_CR_MMAPOK) {
753 skr->skr_mode |= SKR_MODE_MMAPOK;
754 }
755 if ((cflags & SKMEM_REGION_CR_MMAPOK) &&
756 (cflags & SKMEM_REGION_CR_UREADONLY)) {
757 skr->skr_mode |= SKR_MODE_UREADONLY;
758 }
759 if (cflags & SKMEM_REGION_CR_KREADONLY) {
760 skr->skr_mode |= SKR_MODE_KREADONLY;
761 }
762 if (cflags & SKMEM_REGION_CR_PERSISTENT) {
763 skr->skr_mode |= SKR_MODE_PERSISTENT;
764 }
765 if (cflags & SKMEM_REGION_CR_MONOLITHIC) {
766 skr->skr_mode |= SKR_MODE_MONOLITHIC;
767 }
768 if (cflags & SKMEM_REGION_CR_NOMAGAZINES) {
769 skr->skr_mode |= SKR_MODE_NOMAGAZINES;
770 }
771 if (cflags & SKMEM_REGION_CR_NOCACHE) {
772 skr->skr_mode |= SKR_MODE_NOCACHE;
773 }
774 if (cflags & SKMEM_REGION_CR_SEGPHYSCONTIG) {
775 skr->skr_mode |= SKR_MODE_SEGPHYSCONTIG;
776 }
777 if (cflags & SKMEM_REGION_CR_SHAREOK) {
778 skr->skr_mode |= SKR_MODE_SHAREOK;
779 }
780 if (cflags & SKMEM_REGION_CR_IODIR_IN) {
781 skr->skr_mode |= SKR_MODE_IODIR_IN;
782 }
783 if (cflags & SKMEM_REGION_CR_IODIR_OUT) {
784 skr->skr_mode |= SKR_MODE_IODIR_OUT;
785 }
786 if (cflags & SKMEM_REGION_CR_GUARD) {
787 skr->skr_mode |= SKR_MODE_GUARD;
788 }
789 if (cflags & SKMEM_REGION_CR_PUREDATA) {
790 skr->skr_mode |= SKR_MODE_PUREDATA;
791 }
792 if (cflags & SKMEM_REGION_CR_PSEUDO) {
793 skr->skr_mode |= SKR_MODE_PSEUDO;
794 }
795 if (cflags & SKMEM_REGION_CR_THREADSAFE) {
796 skr->skr_mode |= SKR_MODE_THREADSAFE;
797 }
798 if (cflags & SKMEM_REGION_CR_MEMTAG) {
799 skr->skr_mode |= SKR_MODE_MEMTAG;
800 }
801
802#if XNU_TARGET_OS_OSX
803 /*
804 * Mark all regions as persistent except for the guard and Intrinsic
805 * regions.
806 * This is to ensure that kernel threads won't be faulting-in while
807 * accessing these memory regions. We have observed various kinds of
808 * kernel panics due to kernel threads faulting on non-wired memory
809 * access when the VM subsystem is not in a state to swap-in the page.
810 */
811 if (!((skr->skr_mode & SKR_MODE_PSEUDO) ||
812 (skr->skr_mode & SKR_MODE_GUARD))) {
813 skr->skr_mode |= SKR_MODE_PERSISTENT;
814 }
815#endif /* XNU_TARGET_OS_OSX */
816
817 /* SKR_MODE_UREADONLY only takes effect for user task mapping */
818 skr->skr_bufspec.user_writable = !(skr->skr_mode & SKR_MODE_UREADONLY);
819 skr->skr_bufspec.kernel_writable = !(skr->skr_mode & SKR_MODE_KREADONLY);
820 skr->skr_bufspec.purgeable = TRUE;
821 skr->skr_bufspec.inhibitCache = !!(skr->skr_mode & SKR_MODE_NOCACHE);
822 skr->skr_bufspec.physcontig = (skr->skr_mode & SKR_MODE_SEGPHYSCONTIG);
823 skr->skr_bufspec.iodir_in = !!(skr->skr_mode & SKR_MODE_IODIR_IN);
824 skr->skr_bufspec.iodir_out = !!(skr->skr_mode & SKR_MODE_IODIR_OUT);
825 skr->skr_bufspec.puredata = !!(skr->skr_mode & SKR_MODE_PUREDATA);
826 skr->skr_bufspec.threadSafe = !!(skr->skr_mode & SKR_MODE_THREADSAFE);
827 skr->skr_regspec.noRedirect = !!(skr->skr_mode & SKR_MODE_NOREDIRECT);
828
829 /* allocate segment bitmaps */
830 if (!(skr->skr_mode & SKR_MODE_PSEUDO)) {
831 ASSERT(skr->skr_seg_max_cnt != 0);
832 skr->skr_seg_bmap_len = BITMAP_LEN(skr->skr_seg_max_cnt);
833 skr->skr_seg_bmap = sk_alloc_data(BITMAP_SIZE(skr->skr_seg_max_cnt),
834 Z_WAITOK | Z_NOFAIL, skmem_tag_segment_bmap);
835 ASSERT(BITMAP_SIZE(skr->skr_seg_max_cnt) ==
836 (skr->skr_seg_bmap_len * sizeof(*skr->skr_seg_bmap)));
837
838 /* mark all bitmaps as free (bit set) */
839 bitmap_full(map: skr->skr_seg_bmap, nbits: skr->skr_seg_max_cnt);
840 }
841
842 /*
843 * Populate the freelist by allocating all segments for the
844 * region, which will be mapped but not faulted-in, and then
845 * immediately insert each to the freelist. That will in
846 * turn unmap the segment's memory object.
847 */
848 SKR_LOCK(skr);
849 if (skr->skr_mode & SKR_MODE_PSEUDO) {
850 char zone_name[64];
851 (void) snprintf(zone_name, count: sizeof(zone_name), "%s.reg.%s",
852 SKMEM_ZONE_PREFIX, name);
853 skr->skr_zreg = zone_create(name: zone_name, size: skr->skr_c_obj_size,
854 flags: ZC_ZFREE_CLEARMEM | ZC_DESTRUCTIBLE);
855 } else {
856 /* create a backing IOSKRegion object */
857 if ((skr->skr_reg = IOSKRegionCreate(regionSpec: &skr->skr_regspec,
858 segmentSize: (IOSKSize)skr->skr_seg_size,
859 segmentCount: (IOSKCount)skr->skr_seg_max_cnt)) == NULL) {
860 SK_ERR("\%s\": [%u * %u] cflags 0x%b skr_reg failed",
861 skr->skr_name, (uint32_t)skr->skr_seg_size,
862 (uint32_t)skr->skr_seg_max_cnt, skr->skr_cflags,
863 SKMEM_REGION_CR_BITS);
864 goto failed;
865 }
866 }
867
868 ASSERT(skr->skr_seg_objs != 0);
869
870 ++skr->skr_refcnt; /* for caller */
871 SKR_UNLOCK(skr);
872
873 SKMEM_REGION_LOCK();
874 TAILQ_INSERT_TAIL(&skmem_region_head, skr, skr_link);
875 SKMEM_REGION_UNLOCK();
876
877 SK_DF(SK_VERB_MEM_REGION,
878 " [TOTAL] seg (%u*%u) obj (%u*%u) cflags 0x%b",
879 (uint32_t)skr->skr_seg_size, (uint32_t)skr->skr_seg_max_cnt,
880 (uint32_t)skr->skr_c_obj_size, (uint32_t)skr->skr_c_obj_cnt,
881 skr->skr_cflags, SKMEM_REGION_CR_BITS);
882
883 return skr;
884
885failed:
886 SKR_LOCK_ASSERT_HELD(skr);
887 skmem_region_destroy(skr);
888
889 return NULL;
890}
891
892/*
893 * Destroy a region.
894 */
895static void
896skmem_region_destroy(struct skmem_region *skr)
897{
898 struct skmem_region *mskr;
899
900 SKR_LOCK_ASSERT_HELD(skr);
901
902 SK_DF(SK_VERB_MEM_REGION, "\"%s\": skr 0x%llx",
903 skr->skr_name, SK_KVA(skr));
904
905 /*
906 * Panic if we detect there are unfreed segments; the caller
907 * destroying this region is responsible for ensuring that all
908 * allocated segments have been freed prior to getting here.
909 */
910 ASSERT(skr->skr_refcnt == 0);
911 if (skr->skr_seginuse != 0) {
912 panic("%s: '%s' (%p) not empty (%u unfreed)",
913 __func__, skr->skr_name, (void *)skr, skr->skr_seginuse);
914 /* NOTREACHED */
915 __builtin_unreachable();
916 }
917
918 if (skr->skr_link.tqe_next != NULL || skr->skr_link.tqe_prev != NULL) {
919 SKR_UNLOCK(skr);
920 SKMEM_REGION_LOCK();
921 TAILQ_REMOVE(&skmem_region_head, skr, skr_link);
922 SKMEM_REGION_UNLOCK();
923 SKR_LOCK(skr);
924 ASSERT(skr->skr_refcnt == 0);
925 }
926
927 /*
928 * Undo what's done earlier at region creation time.
929 */
930 skmem_region_depopulate(skr);
931 ASSERT(TAILQ_EMPTY(&skr->skr_seg_free));
932 ASSERT(RB_EMPTY(&skr->skr_seg_tfree));
933 ASSERT(skr->skr_seg_free_cnt == 0);
934
935 if (skr->skr_reg != NULL) {
936 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
937 IOSKRegionDestroy(region: skr->skr_reg);
938 skr->skr_reg = NULL;
939 }
940
941 if (skr->skr_zreg != NULL) {
942 ASSERT(skr->skr_mode & SKR_MODE_PSEUDO);
943 zdestroy(zone: skr->skr_zreg);
944 skr->skr_zreg = NULL;
945 }
946
947 if (skr->skr_seg_bmap != NULL) {
948 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
949#if (DEBUG || DEVELOPMENT)
950 ASSERT(skr->skr_seg_bmap_len != 0);
951 /* must have been set to vacant (bit set) by now */
952 assert(bitmap_is_full(skr->skr_seg_bmap, skr->skr_seg_max_cnt));
953#endif /* DEBUG || DEVELOPMENT */
954
955 sk_free_data(skr->skr_seg_bmap, BITMAP_SIZE(skr->skr_seg_max_cnt));
956 skr->skr_seg_bmap = NULL;
957 skr->skr_seg_bmap_len = 0;
958 }
959 ASSERT(skr->skr_seg_bmap_len == 0);
960
961 if (skr->skr_hash_table != NULL) {
962 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
963#if (DEBUG || DEVELOPMENT)
964 for (uint32_t i = 0; i < (skr->skr_hash_mask + 1); i++) {
965 ASSERT(TAILQ_EMPTY(&skr->skr_hash_table[i].sgb_head));
966 }
967#endif /* DEBUG || DEVELOPMENT */
968
969 sk_free_type_array(struct sksegment_bkt, skr->skr_hash_mask + 1,
970 skr->skr_hash_table);
971 skr->skr_hash_table = NULL;
972 }
973 if ((mskr = skr->skr_mirror) != NULL) {
974 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
975 skr->skr_mirror = NULL;
976 mskr->skr_mode &= ~SKR_MODE_MIRRORED;
977 }
978 SKR_UNLOCK(skr);
979
980 if (mskr != NULL) {
981 skmem_region_release(mskr);
982 }
983
984 lck_mtx_destroy(lck: &skr->skr_lock, grp: &skmem_region_lock_grp);
985
986 zfree(skr_zone, skr);
987}
988
989/*
990 * Mirror mskr (slave) to skr (master).
991 */
992void
993skmem_region_mirror(struct skmem_region *skr, struct skmem_region *mskr)
994{
995 SK_DF(SK_VERB_MEM_REGION, "skr master 0x%llx, slave 0x%llx ",
996 SK_KVA(skr), SK_KVA(mskr));
997
998 SKR_LOCK(skr);
999 ASSERT(!(skr->skr_mode & SKR_MODE_MIRRORED));
1000 ASSERT(!(mskr->skr_mode & SKR_MODE_MIRRORED));
1001 ASSERT(skr->skr_mirror == NULL);
1002
1003 /* both regions must share identical parameters */
1004 ASSERT(skr->skr_size == mskr->skr_size);
1005 ASSERT(skr->skr_seg_size == mskr->skr_seg_size);
1006 ASSERT(skr->skr_seg_free_cnt == mskr->skr_seg_free_cnt);
1007
1008 skr->skr_mirror = mskr;
1009 skmem_region_retain(mskr);
1010 mskr->skr_mode |= SKR_MODE_MIRRORED;
1011 SKR_UNLOCK(skr);
1012}
1013
1014void
1015skmem_region_slab_config(struct skmem_region *skr, struct skmem_cache *skm,
1016 bool attach)
1017{
1018 int i;
1019
1020 SKR_LOCK(skr);
1021 if (attach) {
1022 for (i = 0; i < SKR_MAX_CACHES && skr->skr_cache[i] != NULL;
1023 i++) {
1024 ;
1025 }
1026 VERIFY(i < SKR_MAX_CACHES);
1027 ASSERT(skr->skr_cache[i] == NULL);
1028 skr->skr_mode |= SKR_MODE_SLAB;
1029 skr->skr_cache[i] = skm;
1030 skmem_region_retain_locked(skr);
1031 SKR_UNLOCK(skr);
1032 } else {
1033 ASSERT(skr->skr_mode & SKR_MODE_SLAB);
1034 for (i = 0; i < SKR_MAX_CACHES && skr->skr_cache[i] != skm;
1035 i++) {
1036 ;
1037 }
1038 VERIFY(i < SKR_MAX_CACHES);
1039 ASSERT(skr->skr_cache[i] == skm);
1040 skr->skr_cache[i] = NULL;
1041 for (i = 0; i < SKR_MAX_CACHES && skr->skr_cache[i] == NULL;
1042 i++) {
1043 ;
1044 }
1045 if (i == SKR_MAX_CACHES) {
1046 skr->skr_mode &= ~SKR_MODE_SLAB;
1047 }
1048 if (!skmem_region_release_locked(skr)) {
1049 SKR_UNLOCK(skr);
1050 }
1051 }
1052}
1053
1054/*
1055 * Common routines for skmem_region_{alloc,mirror_alloc}.
1056 */
1057static void *
1058skmem_region_alloc_common(struct skmem_region *skr, struct sksegment *sg)
1059{
1060 struct sksegment_bkt *sgb;
1061 void *addr;
1062
1063 SKR_LOCK_ASSERT_HELD(skr);
1064
1065 ASSERT(sg->sg_md != NULL);
1066 ASSERT(sg->sg_start != 0 && sg->sg_end != 0);
1067 addr = (void *)sg->sg_start;
1068 sgb = SKMEM_REGION_HASH(skr, addr);
1069 ASSERT(sg->sg_link.tqe_next == NULL);
1070 ASSERT(sg->sg_link.tqe_prev == NULL);
1071 TAILQ_INSERT_HEAD(&sgb->sgb_head, sg, sg_link);
1072
1073 skr->skr_seginuse++;
1074 skr->skr_meminuse += skr->skr_seg_size;
1075 if (sg->sg_state == SKSEG_STATE_MAPPED_WIRED) {
1076 skr->skr_w_meminuse += skr->skr_seg_size;
1077 }
1078 skr->skr_alloc++;
1079
1080 return addr;
1081}
1082
1083/*
1084 * Allocate a segment from the region.
1085 */
1086void *
1087skmem_region_alloc(struct skmem_region *skr, void **maddr,
1088 struct sksegment **retsg, struct sksegment **retsgm, uint32_t skmflag)
1089{
1090 struct sksegment *sg = NULL;
1091 struct sksegment *sg1 = NULL;
1092 void *addr = NULL, *addr1 = NULL;
1093 uint32_t retries = 0;
1094
1095 VERIFY(!(skr->skr_mode & SKR_MODE_GUARD));
1096
1097 if (retsg != NULL) {
1098 *retsg = NULL;
1099 }
1100 if (retsgm != NULL) {
1101 *retsgm = NULL;
1102 }
1103
1104 /* SKMEM_NOSLEEP and SKMEM_FAILOK are mutually exclusive */
1105 VERIFY((skmflag & (SKMEM_NOSLEEP | SKMEM_FAILOK)) !=
1106 (SKMEM_NOSLEEP | SKMEM_FAILOK));
1107
1108 SKR_LOCK(skr);
1109 while (sg == NULL) {
1110 /* see if there's a segment in the freelist */
1111 sg = TAILQ_FIRST(&skr->skr_seg_free);
1112 if (sg == NULL) {
1113 /* see if we can grow the freelist */
1114 sg = sksegment_freelist_grow(skr);
1115 if (sg != NULL) {
1116 break;
1117 }
1118
1119 if (skr->skr_mode & SKR_MODE_SLAB) {
1120 SKR_UNLOCK(skr);
1121 /*
1122 * None found; it's possible that the slab
1123 * layer is caching extra amount, so ask
1124 * skmem_cache to reap/purge its caches.
1125 */
1126 for (int i = 0; i < SKR_MAX_CACHES; i++) {
1127 if (skr->skr_cache[i] == NULL) {
1128 continue;
1129 }
1130 skmem_cache_reap_now(skr->skr_cache[i],
1131 TRUE);
1132 }
1133 SKR_LOCK(skr);
1134 /*
1135 * If we manage to get some freed, try again.
1136 */
1137 if (TAILQ_FIRST(&skr->skr_seg_free) != NULL) {
1138 continue;
1139 }
1140 }
1141
1142 /*
1143 * Give up if this is a non-blocking allocation,
1144 * or if this is a blocking allocation but the
1145 * caller is willing to retry.
1146 */
1147 if (skmflag & (SKMEM_NOSLEEP | SKMEM_FAILOK)) {
1148 break;
1149 }
1150
1151 /* otherwise we wait until one is available */
1152 ++skr->skr_seg_waiters;
1153 (void) msleep(chan: &skr->skr_seg_free, mtx: &skr->skr_lock,
1154 pri: (PZERO - 1), wmesg: skr->skr_name, NULL);
1155 }
1156 }
1157
1158 SKR_LOCK_ASSERT_HELD(skr);
1159
1160 if (sg != NULL) {
1161retry:
1162 /*
1163 * We have a segment; remove it from the freelist and
1164 * insert it into the allocated-address hash chain.
1165 * Note that this may return NULL if we can't allocate
1166 * the memory descriptor.
1167 */
1168 if (sksegment_freelist_remove(skr, sg, skmflag,
1169 FALSE) == NULL) {
1170 ASSERT(sg->sg_state == SKSEG_STATE_DETACHED);
1171 ASSERT(sg->sg_md == NULL);
1172 ASSERT(sg->sg_start == 0 && sg->sg_end == 0);
1173
1174 /*
1175 * If it's non-blocking allocation, simply just give
1176 * up and let the caller decide when to retry. Else,
1177 * it gets a bit complicated due to the contract we
1178 * have for blocking allocations with the client; the
1179 * most sensible thing to do here is to retry the
1180 * allocation ourselves. Note that we keep using the
1181 * same segment we originally got, since we only need
1182 * the memory descriptor to be allocated for it; thus
1183 * we make sure we don't release the region lock when
1184 * retrying allocation. Doing so is crucial when the
1185 * region is mirrored, since the segment indices on
1186 * both regions need to match.
1187 */
1188 if (skmflag & SKMEM_NOSLEEP) {
1189 SK_ERR("\"%s\": failed to allocate segment "
1190 "(non-sleeping mode)", skr->skr_name);
1191 sg = NULL;
1192 } else {
1193 if (++retries > SKMEM_WDT_MAXTIME) {
1194 panic_plain("\"%s\": failed to "
1195 "allocate segment (sleeping mode) "
1196 "after %u retries\n\n%s",
1197 skr->skr_name, SKMEM_WDT_MAXTIME,
1198 skmem_dump(skr));
1199 /* NOTREACHED */
1200 __builtin_unreachable();
1201 } else {
1202 SK_ERR("\"%s\": failed to allocate "
1203 "segment (sleeping mode): %u "
1204 "retries", skr->skr_name, retries);
1205 }
1206 if (skr->skr_mode & SKR_MODE_SLAB) {
1207 /*
1208 * We can't get any memory descriptor
1209 * for this segment; reap extra cached
1210 * objects from the slab layer and hope
1211 * that we get lucky next time around.
1212 *
1213 * XXX adi@apple.com: perhaps also
1214 * trigger the zone allocator to do
1215 * its garbage collection here?
1216 */
1217 skmem_cache_reap();
1218 }
1219 delay(usec: 1 * USEC_PER_SEC); /* 1 sec */
1220 goto retry;
1221 }
1222 }
1223
1224 if (sg != NULL) {
1225 /* insert to allocated-address hash chain */
1226 addr = skmem_region_alloc_common(skr, sg);
1227 }
1228 }
1229
1230 if (sg == NULL) {
1231 VERIFY(skmflag & (SKMEM_NOSLEEP | SKMEM_FAILOK));
1232 if (skmflag & SKMEM_PANIC) {
1233 VERIFY((skmflag & (SKMEM_NOSLEEP | SKMEM_FAILOK)) ==
1234 SKMEM_NOSLEEP);
1235 /*
1236 * If is a failed non-blocking alloc and the caller
1237 * insists that it must be successful, then panic.
1238 */
1239 panic_plain("\"%s\": skr 0x%p unable to satisfy "
1240 "mandatory allocation\n", skr->skr_name, skr);
1241 /* NOTREACHED */
1242 __builtin_unreachable();
1243 } else {
1244 /*
1245 * Give up if this is a non-blocking allocation,
1246 * or one where the caller is willing to handle
1247 * allocation failures.
1248 */
1249 goto done;
1250 }
1251 }
1252
1253 ASSERT((mach_vm_address_t)addr == sg->sg_start);
1254
1255#if SK_LOG
1256 SK_DF(SK_VERB_MEM_REGION, "skr 0x%llx sg 0x%llx",
1257 SK_KVA(skr), SK_KVA(sg));
1258 if (skr->skr_mirror == NULL ||
1259 !(skr->skr_mirror->skr_mode & SKR_MODE_MIRRORED)) {
1260 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx)",
1261 sg->sg_index, SK_KVA(sg->sg_start), SK_KVA(sg->sg_end));
1262 } else {
1263 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx) mirrored",
1264 sg->sg_index, SK_KVA(sg), SK_KVA(sg->sg_start),
1265 SK_KVA(sg->sg_end));
1266 }
1267#endif /* SK_LOG */
1268
1269 /*
1270 * If mirroring, allocate shadow object from slave region.
1271 */
1272 if (skr->skr_mirror != NULL) {
1273 ASSERT(skr->skr_mirror != skr);
1274 ASSERT(!(skr->skr_mode & SKR_MODE_MIRRORED));
1275 ASSERT(skr->skr_mirror->skr_mode & SKR_MODE_MIRRORED);
1276 addr1 = skmem_region_mirror_alloc(skr->skr_mirror, sg, &sg1);
1277 ASSERT(addr1 != NULL);
1278 ASSERT(sg1 != NULL && sg1 != sg);
1279 ASSERT(sg1->sg_index == sg->sg_index);
1280 }
1281
1282done:
1283 SKR_UNLOCK(skr);
1284
1285 /* return segment metadata to caller if asked (reference not needed) */
1286 if (addr != NULL) {
1287 if (retsg != NULL) {
1288 *retsg = sg;
1289 }
1290 if (retsgm != NULL) {
1291 *retsgm = sg1;
1292 }
1293 }
1294
1295 if (maddr != NULL) {
1296 *maddr = addr1;
1297 }
1298
1299 return addr;
1300}
1301
1302/*
1303 * Allocate a segment from a mirror region at the same index. While it
1304 * is somewhat a simplified variant of skmem_region_alloc, keeping it
1305 * separate allows us to avoid further convoluting that routine.
1306 */
1307static void *
1308skmem_region_mirror_alloc(struct skmem_region *skr, struct sksegment *sg0,
1309 struct sksegment **retsg)
1310{
1311 struct sksegment sg_key = { .sg_index = sg0->sg_index };
1312 struct sksegment *sg = NULL;
1313 void *addr = NULL;
1314
1315 ASSERT(skr->skr_mode & SKR_MODE_MIRRORED);
1316 ASSERT(skr->skr_mirror == NULL);
1317 ASSERT(sg0->sg_type == SKSEG_TYPE_ALLOC);
1318
1319 if (retsg != NULL) {
1320 *retsg = NULL;
1321 }
1322
1323 SKR_LOCK(skr);
1324
1325 /*
1326 * See if we can find one in the freelist first. Otherwise,
1327 * create a new segment of the same index and add that to the
1328 * freelist. We would always get a segment since both regions
1329 * are synchronized when it comes to the indices of allocated
1330 * segments.
1331 */
1332 sg = RB_FIND(segtfreehead, &skr->skr_seg_tfree, &sg_key);
1333 if (sg == NULL) {
1334 sg = sksegment_alloc_with_idx(skr, sg0->sg_index);
1335 VERIFY(sg != NULL);
1336 }
1337 VERIFY(sg->sg_index == sg0->sg_index);
1338
1339 /*
1340 * We have a segment; remove it from the freelist and insert
1341 * it into the allocated-address hash chain. This either
1342 * succeeds or panics (SKMEM_PANIC) when a memory descriptor
1343 * can't be allocated.
1344 *
1345 * TODO: consider retrying IOBMD allocation attempts if needed.
1346 */
1347 sg = sksegment_freelist_remove(skr, sg, SKMEM_PANIC, FALSE);
1348 VERIFY(sg != NULL);
1349
1350 /* insert to allocated-address hash chain */
1351 addr = skmem_region_alloc_common(skr, sg);
1352
1353#if SK_LOG
1354 SK_DF(SK_VERB_MEM_REGION, "skr 0x%llx sg 0x%llx",
1355 SK_KVA(skr), SK_KVA(sg));
1356 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx)",
1357 sg->sg_index, SK_KVA(sg->sg_start), SK_KVA(sg->sg_end));
1358#endif /* SK_LOG */
1359
1360 SKR_UNLOCK(skr);
1361
1362 /* return segment metadata to caller if asked (reference not needed) */
1363 if (retsg != NULL) {
1364 *retsg = sg;
1365 }
1366
1367 return addr;
1368}
1369
1370/*
1371 * Free a segment to the region.
1372 */
1373void
1374skmem_region_free(struct skmem_region *skr, void *addr, void *maddr)
1375{
1376 struct sksegment_bkt *sgb;
1377 struct sksegment *sg, *tsg;
1378
1379 VERIFY(!(skr->skr_mode & SKR_MODE_GUARD));
1380
1381 /*
1382 * Search the hash chain to find a matching segment for the
1383 * given address. If found, remove the segment from the
1384 * hash chain and insert it into the freelist. Otherwise,
1385 * we panic since the caller has given us a bogus address.
1386 */
1387 SKR_LOCK(skr);
1388 sgb = SKMEM_REGION_HASH(skr, addr);
1389 TAILQ_FOREACH_SAFE(sg, &sgb->sgb_head, sg_link, tsg) {
1390 ASSERT(sg->sg_start != 0 && sg->sg_end != 0);
1391 if (sg->sg_start == (mach_vm_address_t)addr) {
1392 TAILQ_REMOVE(&sgb->sgb_head, sg, sg_link);
1393 sg->sg_link.tqe_next = NULL;
1394 sg->sg_link.tqe_prev = NULL;
1395 break;
1396 }
1397 }
1398
1399 ASSERT(sg != NULL);
1400 if (sg->sg_state == SKSEG_STATE_MAPPED_WIRED) {
1401 ASSERT(skr->skr_w_meminuse >= skr->skr_seg_size);
1402 skr->skr_w_meminuse -= skr->skr_seg_size;
1403 }
1404 sksegment_freelist_insert(skr, sg, FALSE);
1405
1406 ASSERT(skr->skr_seginuse != 0);
1407 skr->skr_seginuse--;
1408 skr->skr_meminuse -= skr->skr_seg_size;
1409 skr->skr_free++;
1410
1411#if SK_LOG
1412 SK_DF(SK_VERB_MEM_REGION, "skr 0x%llx sg 0x%llx",
1413 SK_KVA(skr), SK_KVA(sg));
1414 if (skr->skr_mirror == NULL ||
1415 !(skr->skr_mirror->skr_mode & SKR_MODE_MIRRORED)) {
1416 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx)",
1417 sg->sg_index, SK_KVA(addr),
1418 SK_KVA((uintptr_t)addr + skr->skr_seg_size));
1419 } else {
1420 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx) mirrored",
1421 sg->sg_index, SK_KVA(sg), SK_KVA(addr),
1422 SK_KVA((uintptr_t)addr + skr->skr_seg_size));
1423 }
1424#endif /* SK_LOG */
1425
1426 /*
1427 * If mirroring, also free shadow object in slave region.
1428 */
1429 if (skr->skr_mirror != NULL) {
1430 ASSERT(maddr != NULL);
1431 ASSERT(skr->skr_mirror != skr);
1432 ASSERT(!(skr->skr_mode & SKR_MODE_MIRRORED));
1433 ASSERT(skr->skr_mirror->skr_mode & SKR_MODE_MIRRORED);
1434 skmem_region_free(skr: skr->skr_mirror, addr: maddr, NULL);
1435 }
1436
1437 /* wake up any blocked threads waiting for a segment */
1438 if (skr->skr_seg_waiters != 0) {
1439 SK_DF(SK_VERB_MEM_REGION,
1440 "sg 0x%llx waking up %u waiters", SK_KVA(sg),
1441 skr->skr_seg_waiters);
1442 skr->skr_seg_waiters = 0;
1443 wakeup(chan: &skr->skr_seg_free);
1444 }
1445 SKR_UNLOCK(skr);
1446}
1447
1448__attribute__((always_inline))
1449static inline void
1450skmem_region_retain_locked(struct skmem_region *skr)
1451{
1452 SKR_LOCK_ASSERT_HELD(skr);
1453 skr->skr_refcnt++;
1454 ASSERT(skr->skr_refcnt != 0);
1455}
1456
1457/*
1458 * Retain a segment.
1459 */
1460void
1461skmem_region_retain(struct skmem_region *skr)
1462{
1463 SKR_LOCK(skr);
1464 skmem_region_retain_locked(skr);
1465 SKR_UNLOCK(skr);
1466}
1467
1468__attribute__((always_inline))
1469static inline boolean_t
1470skmem_region_release_locked(struct skmem_region *skr)
1471{
1472 SKR_LOCK_ASSERT_HELD(skr);
1473 ASSERT(skr->skr_refcnt != 0);
1474 if (--skr->skr_refcnt == 0) {
1475 skmem_region_destroy(skr);
1476 return TRUE;
1477 }
1478 return FALSE;
1479}
1480
1481/*
1482 * Release (and potentially destroy) a segment.
1483 */
1484boolean_t
1485skmem_region_release(struct skmem_region *skr)
1486{
1487 boolean_t lastref;
1488
1489 SKR_LOCK(skr);
1490 if (!(lastref = skmem_region_release_locked(skr))) {
1491 SKR_UNLOCK(skr);
1492 }
1493
1494 return lastref;
1495}
1496
1497/*
1498 * Depopulate the segment freelist.
1499 */
1500static void
1501skmem_region_depopulate(struct skmem_region *skr)
1502{
1503 struct sksegment *sg, *tsg;
1504
1505 SK_DF(SK_VERB_MEM_REGION, "\"%s\": skr 0x%llx ",
1506 skr->skr_name, SK_KVA(skr));
1507
1508 SKR_LOCK_ASSERT_HELD(skr);
1509 ASSERT(skr->skr_seg_bmap_len != 0 || (skr->skr_mode & SKR_MODE_PSEUDO));
1510
1511 TAILQ_FOREACH_SAFE(sg, &skr->skr_seg_free, sg_link, tsg) {
1512 struct sksegment *sg0;
1513 uint32_t i;
1514
1515 i = sg->sg_index;
1516 sg0 = sksegment_freelist_remove(skr, sg, 0, TRUE);
1517 VERIFY(sg0 == sg);
1518
1519 sksegment_destroy(skr, sg);
1520 ASSERT(bit_test(skr->skr_seg_bmap[i / BMAPSZ], i % BMAPSZ));
1521 }
1522}
1523
1524/*
1525 * Free tree segment compare routine.
1526 */
1527static int
1528sksegment_cmp(const struct sksegment *sg1, const struct sksegment *sg2)
1529{
1530 return sg1->sg_index - sg2->sg_index;
1531}
1532
1533/*
1534 * Create a segment.
1535 *
1536 * Upon success, clear the bit for the segment's index in skr_seg_bmap bitmap.
1537 */
1538static struct sksegment *
1539sksegment_create(struct skmem_region *skr, uint32_t i)
1540{
1541 struct sksegment *sg = NULL;
1542 bitmap_t *bmap;
1543
1544 SKR_LOCK_ASSERT_HELD(skr);
1545
1546 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
1547 ASSERT(i < skr->skr_seg_max_cnt);
1548 ASSERT(skr->skr_reg != NULL);
1549 ASSERT(skr->skr_seg_size == round_page(skr->skr_seg_size));
1550
1551 bmap = &skr->skr_seg_bmap[i / BMAPSZ];
1552 ASSERT(bit_test(*bmap, i % BMAPSZ));
1553
1554 sg = skmem_cache_alloc(skmem_sg_cache, SKMEM_SLEEP);
1555 bzero(s: sg, n: sg_size);
1556
1557 sg->sg_region = skr;
1558 sg->sg_index = i;
1559 sg->sg_state = SKSEG_STATE_DETACHED;
1560
1561 /* claim it (clear bit) */
1562 bit_clear(*bmap, i % BMAPSZ);
1563
1564 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx) 0x%b", i,
1565 SK_KVA(sg->sg_start), SK_KVA(sg->sg_end), skr->skr_mode,
1566 SKR_MODE_BITS);
1567
1568 return sg;
1569}
1570
1571/*
1572 * Destroy a segment.
1573 *
1574 * Set the bit for the segment's index in skr_seg_bmap bitmap,
1575 * indicating that it is now vacant.
1576 */
1577static void
1578sksegment_destroy(struct skmem_region *skr, struct sksegment *sg)
1579{
1580 uint32_t i = sg->sg_index;
1581 bitmap_t *bmap;
1582
1583 SKR_LOCK_ASSERT_HELD(skr);
1584
1585 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
1586 ASSERT(skr == sg->sg_region);
1587 ASSERT(skr->skr_reg != NULL);
1588 ASSERT(sg->sg_type == SKSEG_TYPE_DESTROYED);
1589 ASSERT(i < skr->skr_seg_max_cnt);
1590
1591 bmap = &skr->skr_seg_bmap[i / BMAPSZ];
1592 ASSERT(!bit_test(*bmap, i % BMAPSZ));
1593
1594 SK_DF(SK_VERB_MEM_REGION, " [%u] [0x%llx-0x%llx) 0x%b",
1595 i, SK_KVA(sg->sg_start), SK_KVA(sg->sg_end),
1596 skr->skr_mode, SKR_MODE_BITS);
1597
1598 /*
1599 * Undo what's done earlier at segment creation time.
1600 */
1601
1602 ASSERT(sg->sg_md == NULL);
1603 ASSERT(sg->sg_start == 0 && sg->sg_end == 0);
1604 ASSERT(sg->sg_state == SKSEG_STATE_DETACHED);
1605
1606 /* release it (set bit) */
1607 bit_set(*bmap, i % BMAPSZ);
1608
1609 skmem_cache_free(skmem_sg_cache, sg);
1610}
1611
1612/*
1613 * Insert a segment into freelist (freeing the segment).
1614 */
1615static void
1616sksegment_freelist_insert(struct skmem_region *skr, struct sksegment *sg,
1617 boolean_t populating)
1618{
1619 SKR_LOCK_ASSERT_HELD(skr);
1620
1621 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
1622 ASSERT(sg->sg_type != SKSEG_TYPE_FREE);
1623 ASSERT(skr == sg->sg_region);
1624 ASSERT(skr->skr_reg != NULL);
1625 ASSERT(sg->sg_index < skr->skr_seg_max_cnt);
1626
1627 /*
1628 * If the region is being populated, then we're done.
1629 */
1630 if (__improbable(populating)) {
1631 ASSERT(sg->sg_md == NULL);
1632 ASSERT(sg->sg_start == 0 && sg->sg_end == 0);
1633 ASSERT(sg->sg_state == SKSEG_STATE_DETACHED);
1634 } else {
1635 IOSKMemoryBufferRef md;
1636 IOReturn err;
1637
1638 ASSERT(sg->sg_md != NULL);
1639 ASSERT(sg->sg_start != 0 && sg->sg_end != 0);
1640
1641 /*
1642 * Let the client remove the memory from IOMMU, and unwire it.
1643 */
1644 if (skr->skr_seg_dtor != NULL) {
1645 skr->skr_seg_dtor(sg, sg->sg_md, skr->skr_private);
1646 }
1647
1648 ASSERT(sg->sg_state == SKSEG_STATE_MAPPED ||
1649 sg->sg_state == SKSEG_STATE_MAPPED_WIRED);
1650
1651 IOSKRegionClearBufferDebug(region: skr->skr_reg, segmentIndex: sg->sg_index, prevBufferRef: &md);
1652 VERIFY(sg->sg_md == md);
1653
1654 /* if persistent, unwire this memory now */
1655 if (skr->skr_mode & SKR_MODE_PERSISTENT) {
1656 err = IOSKMemoryUnwire(reference: md);
1657 if (err != kIOReturnSuccess) {
1658 panic("Fail to unwire md %p, err %d", md, err);
1659 }
1660 }
1661
1662 /* mark memory as empty/discarded for consistency */
1663 err = IOSKMemoryDiscard(reference: md);
1664 if (err != kIOReturnSuccess) {
1665 panic("Fail to discard md %p, err %d", md, err);
1666 }
1667
1668 IOSKMemoryDestroy(reference: md);
1669 sg->sg_md = NULL;
1670 sg->sg_start = sg->sg_end = 0;
1671 sg->sg_state = SKSEG_STATE_DETACHED;
1672
1673 ASSERT(skr->skr_memtotal >= skr->skr_seg_size);
1674 skr->skr_memtotal -= skr->skr_seg_size;
1675 }
1676
1677 sg->sg_type = SKSEG_TYPE_FREE;
1678 ASSERT(sg->sg_link.tqe_next == NULL);
1679 ASSERT(sg->sg_link.tqe_prev == NULL);
1680 TAILQ_INSERT_TAIL(&skr->skr_seg_free, sg, sg_link);
1681 ASSERT(sg->sg_node.rbe_left == NULL);
1682 ASSERT(sg->sg_node.rbe_right == NULL);
1683 ASSERT(sg->sg_node.rbe_parent == NULL);
1684 RB_INSERT(segtfreehead, &skr->skr_seg_tfree, sg);
1685 ++skr->skr_seg_free_cnt;
1686 ASSERT(skr->skr_seg_free_cnt <= skr->skr_seg_max_cnt);
1687}
1688
1689/*
1690 * Remove a segment from the freelist (allocating the segment).
1691 */
1692static struct sksegment *
1693sksegment_freelist_remove(struct skmem_region *skr, struct sksegment *sg,
1694 uint32_t skmflag, boolean_t purging)
1695{
1696#pragma unused(skmflag)
1697 mach_vm_address_t segstart;
1698 IOReturn err;
1699
1700 SKR_LOCK_ASSERT_HELD(skr);
1701
1702 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
1703 ASSERT(sg != NULL);
1704 ASSERT(skr == sg->sg_region);
1705 ASSERT(skr->skr_reg != NULL);
1706 ASSERT(sg->sg_type == SKSEG_TYPE_FREE);
1707 ASSERT(sg->sg_index < skr->skr_seg_max_cnt);
1708
1709#if (DEVELOPMENT || DEBUG)
1710 uint64_t mtbf = skmem_region_get_mtbf();
1711 /*
1712 * MTBF doesn't apply when SKMEM_PANIC is set as caller would assert.
1713 */
1714 if (__improbable(mtbf != 0 && !purging &&
1715 (net_uptime_ms() % mtbf) == 0 &&
1716 !(skmflag & SKMEM_PANIC))) {
1717 SK_ERR("skr \"%s\" 0x%llx sg 0x%llx MTBF failure",
1718 skr->skr_name, SK_KVA(skr), SK_KVA(sg));
1719 net_update_uptime();
1720 return NULL;
1721 }
1722#endif /* (DEVELOPMENT || DEBUG) */
1723
1724 TAILQ_REMOVE(&skr->skr_seg_free, sg, sg_link);
1725 sg->sg_link.tqe_next = NULL;
1726 sg->sg_link.tqe_prev = NULL;
1727 RB_REMOVE(segtfreehead, &skr->skr_seg_tfree, sg);
1728 sg->sg_node.rbe_left = NULL;
1729 sg->sg_node.rbe_right = NULL;
1730 sg->sg_node.rbe_parent = NULL;
1731
1732 ASSERT(skr->skr_seg_free_cnt != 0);
1733 --skr->skr_seg_free_cnt;
1734
1735 /*
1736 * If the region is being depopulated, then we're done.
1737 */
1738 if (__improbable(purging)) {
1739 ASSERT(sg->sg_md == NULL);
1740 ASSERT(sg->sg_start == 0 && sg->sg_end == 0);
1741 ASSERT(sg->sg_state == SKSEG_STATE_DETACHED);
1742 sg->sg_type = SKSEG_TYPE_DESTROYED;
1743 return sg;
1744 }
1745
1746 ASSERT(sg->sg_md == NULL);
1747 ASSERT(sg->sg_start == 0 && sg->sg_end == 0);
1748 ASSERT(sg->sg_state == SKSEG_STATE_DETACHED);
1749
1750 /* created as non-volatile (mapped) upon success */
1751 if ((sg->sg_md = IOSKMemoryBufferCreate(capacity: skr->skr_seg_size,
1752 spec: &skr->skr_bufspec, kvaddr: &segstart)) == NULL) {
1753 ASSERT(sg->sg_type == SKSEG_TYPE_FREE);
1754 if (skmflag & SKMEM_PANIC) {
1755 /* if the caller insists for a success then panic */
1756 panic_plain("\"%s\": skr 0x%p sg 0x%p (idx %u) unable "
1757 "to satisfy mandatory allocation\n", skr->skr_name,
1758 skr, sg, sg->sg_index);
1759 /* NOTREACHED */
1760 __builtin_unreachable();
1761 }
1762 /* reinsert this segment to freelist */
1763 ASSERT(sg->sg_link.tqe_next == NULL);
1764 ASSERT(sg->sg_link.tqe_prev == NULL);
1765 TAILQ_INSERT_HEAD(&skr->skr_seg_free, sg, sg_link);
1766 ASSERT(sg->sg_node.rbe_left == NULL);
1767 ASSERT(sg->sg_node.rbe_right == NULL);
1768 ASSERT(sg->sg_node.rbe_parent == NULL);
1769 RB_INSERT(segtfreehead, &skr->skr_seg_tfree, sg);
1770 ++skr->skr_seg_free_cnt;
1771 return NULL;
1772 }
1773
1774 sg->sg_start = segstart;
1775 sg->sg_end = (segstart + skr->skr_seg_size);
1776 ASSERT(sg->sg_start != 0 && sg->sg_end != 0);
1777
1778 /* mark memory as non-volatile just to be consistent */
1779 err = IOSKMemoryReclaim(reference: sg->sg_md);
1780 if (err != kIOReturnSuccess) {
1781 panic("Fail to reclaim md %p, err %d", sg->sg_md, err);
1782 }
1783
1784 /* if persistent, wire down its memory now */
1785 if (skr->skr_mode & SKR_MODE_PERSISTENT) {
1786 err = IOSKMemoryWire(reference: sg->sg_md);
1787 if (err != kIOReturnSuccess) {
1788 panic("Fail to wire md %p, err %d", sg->sg_md, err);
1789 }
1790 }
1791
1792 err = IOSKRegionSetBuffer(region: skr->skr_reg, segmentIndex: sg->sg_index, buffer: sg->sg_md);
1793 if (err != kIOReturnSuccess) {
1794 panic("Fail to set md %p, err %d", sg->sg_md, err);
1795 }
1796
1797 /*
1798 * Let the client wire it and insert to IOMMU, if applicable.
1799 * Try to find out if it's wired and set the right state.
1800 */
1801 if (skr->skr_seg_ctor != NULL) {
1802 skr->skr_seg_ctor(sg, sg->sg_md, skr->skr_private);
1803 }
1804
1805 sg->sg_state = IOSKBufferIsWired(buffer: sg->sg_md) ?
1806 SKSEG_STATE_MAPPED_WIRED : SKSEG_STATE_MAPPED;
1807
1808 skr->skr_memtotal += skr->skr_seg_size;
1809
1810 ASSERT(sg->sg_md != NULL);
1811 ASSERT(sg->sg_start != 0 && sg->sg_end != 0);
1812
1813 sg->sg_type = SKSEG_TYPE_ALLOC;
1814 return sg;
1815}
1816
1817/*
1818 * Find the first available index and allocate a segment at that index.
1819 */
1820static struct sksegment *
1821sksegment_freelist_grow(struct skmem_region *skr)
1822{
1823 struct sksegment *sg = NULL;
1824 uint32_t i, j, idx;
1825
1826 SKR_LOCK_ASSERT_HELD(skr);
1827
1828 ASSERT(!(skr->skr_mode & SKR_MODE_PSEUDO));
1829 ASSERT(skr->skr_seg_bmap_len != 0);
1830 ASSERT(skr->skr_seg_max_cnt != 0);
1831
1832 for (i = 0; i < skr->skr_seg_bmap_len; i++) {
1833 bitmap_t *bmap, mask;
1834 uint32_t end = (BMAPSZ - 1);
1835
1836 if (i == (skr->skr_seg_bmap_len - 1)) {
1837 end = (skr->skr_seg_max_cnt - 1) % BMAPSZ;
1838 }
1839
1840 bmap = &skr->skr_seg_bmap[i];
1841 mask = BMASK64(0, end);
1842
1843 j = ffsll((*bmap) & mask);
1844 if (j == 0) {
1845 continue;
1846 }
1847
1848 --j;
1849 idx = (i * BMAPSZ) + j;
1850
1851 sg = sksegment_alloc_with_idx(skr, idx);
1852
1853 /* we're done */
1854 break;
1855 }
1856
1857 ASSERT((sg != NULL) || (skr->skr_seginuse == skr->skr_seg_max_cnt));
1858 return sg;
1859}
1860
1861/*
1862 * Create a single segment at a specific index and add it to the freelist.
1863 */
1864static struct sksegment *
1865sksegment_alloc_with_idx(struct skmem_region *skr, uint32_t idx)
1866{
1867 struct sksegment *sg;
1868
1869 SKR_LOCK_ASSERT_HELD(skr);
1870
1871 if (!bit_test(skr->skr_seg_bmap[idx / BMAPSZ], idx % BMAPSZ)) {
1872 panic("%s: '%s' (%p) idx %u (out of %u) is already allocated",
1873 __func__, skr->skr_name, (void *)skr, idx,
1874 (skr->skr_seg_max_cnt - 1));
1875 /* NOTREACHED */
1876 __builtin_unreachable();
1877 }
1878
1879 /* must not fail, blocking alloc */
1880 sg = sksegment_create(skr, i: idx);
1881 VERIFY(sg != NULL);
1882 VERIFY(!bit_test(skr->skr_seg_bmap[idx / BMAPSZ], idx % BMAPSZ));
1883
1884 /* populate the freelist */
1885 sksegment_freelist_insert(skr, sg, TRUE);
1886 ASSERT(sg == TAILQ_LAST(&skr->skr_seg_free, segfreehead));
1887#if (DEVELOPMENT || DEBUG)
1888 struct sksegment sg_key = { .sg_index = sg->sg_index };
1889 ASSERT(sg == RB_FIND(segtfreehead, &skr->skr_seg_tfree, &sg_key));
1890#endif /* (DEVELOPMENT || DEBUG) */
1891
1892 SK_DF(SK_VERB_MEM_REGION, "sg %u/%u", (idx + 1), skr->skr_seg_max_cnt);
1893
1894 return sg;
1895}
1896
1897/*
1898 * Rescale the regions's allocated-address hash table.
1899 */
1900static void
1901skmem_region_hash_rescale(struct skmem_region *skr)
1902{
1903 struct sksegment_bkt *old_table, *new_table;
1904 size_t old_size, new_size;
1905 uint32_t i, moved = 0;
1906
1907 if (skr->skr_mode & SKR_MODE_PSEUDO) {
1908 ASSERT(skr->skr_hash_table == NULL);
1909 /* this is no-op for pseudo region */
1910 return;
1911 }
1912
1913 ASSERT(skr->skr_hash_table != NULL);
1914 /* insist that we are executing in the update thread call context */
1915 ASSERT(sk_is_region_update_protected());
1916
1917 /*
1918 * To get small average lookup time (lookup depth near 1.0), the hash
1919 * table size should be roughly the same (not necessarily equivalent)
1920 * as the region size.
1921 */
1922 new_size = MAX(skr->skr_hash_initial,
1923 (1 << (flsll(3 * skr->skr_seginuse + 4) - 2)));
1924 new_size = MIN(skr->skr_hash_limit, new_size);
1925 old_size = (skr->skr_hash_mask + 1);
1926
1927 if ((old_size >> 1) <= new_size && new_size <= (old_size << 1)) {
1928 return;
1929 }
1930
1931 new_table = sk_alloc_type_array(struct sksegment_bkt, new_size,
1932 Z_NOWAIT, skmem_tag_segment_hash);
1933 if (__improbable(new_table == NULL)) {
1934 return;
1935 }
1936
1937 for (i = 0; i < new_size; i++) {
1938 TAILQ_INIT(&new_table[i].sgb_head);
1939 }
1940
1941 SKR_LOCK(skr);
1942
1943 old_size = (skr->skr_hash_mask + 1);
1944 old_table = skr->skr_hash_table;
1945
1946 skr->skr_hash_mask = (uint32_t)(new_size - 1);
1947 skr->skr_hash_table = new_table;
1948 skr->skr_rescale++;
1949
1950 for (i = 0; i < old_size; i++) {
1951 struct sksegment_bkt *sgb = &old_table[i];
1952 struct sksegment_bkt *new_sgb;
1953 struct sksegment *sg;
1954
1955 while ((sg = TAILQ_FIRST(&sgb->sgb_head)) != NULL) {
1956 TAILQ_REMOVE(&sgb->sgb_head, sg, sg_link);
1957 ASSERT(sg->sg_start != 0 && sg->sg_end != 0);
1958 new_sgb = SKMEM_REGION_HASH(skr, sg->sg_start);
1959 TAILQ_INSERT_TAIL(&new_sgb->sgb_head, sg, sg_link);
1960 ++moved;
1961 }
1962 ASSERT(TAILQ_EMPTY(&sgb->sgb_head));
1963 }
1964
1965 SK_DF(SK_VERB_MEM_REGION,
1966 "skr 0x%llx old_size %u new_size %u [%u moved]", SK_KVA(skr),
1967 (uint32_t)old_size, (uint32_t)new_size, moved);
1968
1969 SKR_UNLOCK(skr);
1970
1971 sk_free_type_array(struct sksegment_bkt, old_size, old_table);
1972}
1973
1974/*
1975 * Apply a function to operate on all regions.
1976 */
1977static void
1978skmem_region_applyall(void (*func)(struct skmem_region *))
1979{
1980 struct skmem_region *skr;
1981
1982 net_update_uptime();
1983
1984 SKMEM_REGION_LOCK();
1985 TAILQ_FOREACH(skr, &skmem_region_head, skr_link) {
1986 func(skr);
1987 }
1988 SKMEM_REGION_UNLOCK();
1989}
1990
1991static void
1992skmem_region_update(struct skmem_region *skr)
1993{
1994 SKMEM_REGION_LOCK_ASSERT_HELD();
1995
1996 /* insist that we are executing in the update thread call context */
1997 ASSERT(sk_is_region_update_protected());
1998
1999 SKR_LOCK(skr);
2000 /*
2001 * If there are threads blocked waiting for an available
2002 * segment, wake them up periodically so they can issue
2003 * another skmem_cache_reap() to reclaim resources cached
2004 * by skmem_cache.
2005 */
2006 if (skr->skr_seg_waiters != 0) {
2007 SK_DF(SK_VERB_MEM_REGION,
2008 "waking up %u waiters to reclaim", skr->skr_seg_waiters);
2009 skr->skr_seg_waiters = 0;
2010 wakeup(chan: &skr->skr_seg_free);
2011 }
2012 SKR_UNLOCK(skr);
2013
2014 /*
2015 * Rescale the hash table if needed.
2016 */
2017 skmem_region_hash_rescale(skr);
2018}
2019
2020/*
2021 * Thread call callback for update.
2022 */
2023static void
2024skmem_region_update_func(thread_call_param_t dummy, thread_call_param_t arg)
2025{
2026#pragma unused(dummy, arg)
2027 sk_protect_t protect;
2028
2029 protect = sk_region_update_protect();
2030 skmem_region_applyall(func: skmem_region_update);
2031 sk_region_update_unprotect(protect);
2032
2033 skmem_dispatch(skmem_region_update_tc, NULL,
2034 (skmem_region_update_interval * NSEC_PER_SEC));
2035}
2036
2037boolean_t
2038skmem_region_for_pp(skmem_region_id_t id)
2039{
2040 int i;
2041
2042 for (i = 0; i < SKMEM_PP_REGIONS; i++) {
2043 if (id == skmem_pp_region_ids[i]) {
2044 return TRUE;
2045 }
2046 }
2047 return FALSE;
2048}
2049
2050void
2051skmem_region_get_stats(struct skmem_region *skr, struct sk_stats_region *sreg)
2052{
2053 bzero(s: sreg, n: sizeof(*sreg));
2054
2055 (void) snprintf(sreg->sreg_name, count: sizeof(sreg->sreg_name),
2056 "%s", skr->skr_name);
2057 uuid_copy(dst: sreg->sreg_uuid, src: skr->skr_uuid);
2058 sreg->sreg_id = (sk_stats_region_id_t)skr->skr_id;
2059 sreg->sreg_mode = skr->skr_mode;
2060
2061 sreg->sreg_r_seg_size = skr->skr_params.srp_r_seg_size;
2062 sreg->sreg_c_seg_size = skr->skr_seg_size;
2063 sreg->sreg_seg_cnt = skr->skr_seg_max_cnt;
2064 sreg->sreg_seg_objs = skr->skr_seg_objs;
2065 sreg->sreg_r_obj_size = skr->skr_r_obj_size;
2066 sreg->sreg_r_obj_cnt = skr->skr_r_obj_cnt;
2067 sreg->sreg_c_obj_size = skr->skr_c_obj_size;
2068 sreg->sreg_c_obj_cnt = skr->skr_c_obj_cnt;
2069 sreg->sreg_align = skr->skr_align;
2070 sreg->sreg_max_frags = skr->skr_max_frags;
2071
2072 sreg->sreg_meminuse = skr->skr_meminuse;
2073 sreg->sreg_w_meminuse = skr->skr_w_meminuse;
2074 sreg->sreg_memtotal = skr->skr_memtotal;
2075 sreg->sreg_seginuse = skr->skr_seginuse;
2076 sreg->sreg_rescale = skr->skr_rescale;
2077 sreg->sreg_hash_size = (skr->skr_hash_mask + 1);
2078 sreg->sreg_alloc = skr->skr_alloc;
2079 sreg->sreg_free = skr->skr_free;
2080}
2081
2082static size_t
2083skmem_region_mib_get_stats(struct skmem_region *skr, void *out, size_t len)
2084{
2085 size_t actual_space = sizeof(struct sk_stats_region);
2086 struct sk_stats_region *sreg = out;
2087
2088 if (out == NULL || len < actual_space) {
2089 goto done;
2090 }
2091
2092 skmem_region_get_stats(skr, sreg);
2093
2094done:
2095 return actual_space;
2096}
2097
2098static int
2099skmem_region_mib_get_sysctl SYSCTL_HANDLER_ARGS
2100{
2101#pragma unused(arg1, arg2, oidp)
2102 struct skmem_region *skr;
2103 size_t actual_space;
2104 size_t buffer_space;
2105 size_t allocated_space;
2106 caddr_t buffer = NULL;
2107 caddr_t scan;
2108 int error = 0;
2109
2110 if (!kauth_cred_issuser(cred: kauth_cred_get())) {
2111 return EPERM;
2112 }
2113
2114 net_update_uptime();
2115 buffer_space = req->oldlen;
2116 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
2117 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
2118 buffer_space = SK_SYSCTL_ALLOC_MAX;
2119 }
2120 allocated_space = buffer_space;
2121 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_region_mib);
2122 if (__improbable(buffer == NULL)) {
2123 return ENOBUFS;
2124 }
2125 } else if (req->oldptr == USER_ADDR_NULL) {
2126 buffer_space = 0;
2127 }
2128 actual_space = 0;
2129 scan = buffer;
2130
2131 SKMEM_REGION_LOCK();
2132 TAILQ_FOREACH(skr, &skmem_region_head, skr_link) {
2133 size_t size = skmem_region_mib_get_stats(skr, out: scan, len: buffer_space);
2134 if (scan != NULL) {
2135 if (buffer_space < size) {
2136 /* supplied buffer too small, stop copying */
2137 error = ENOMEM;
2138 break;
2139 }
2140 scan += size;
2141 buffer_space -= size;
2142 }
2143 actual_space += size;
2144 }
2145 SKMEM_REGION_UNLOCK();
2146
2147 if (actual_space != 0) {
2148 int out_error = SYSCTL_OUT(req, buffer, actual_space);
2149 if (out_error != 0) {
2150 error = out_error;
2151 }
2152 }
2153 if (buffer != NULL) {
2154 sk_free_data(buffer, allocated_space);
2155 }
2156
2157 return error;
2158}
2159
2160#if SK_LOG
2161const char *
2162skmem_region_id2name(skmem_region_id_t id)
2163{
2164 const char *name;
2165 switch (id) {
2166 case SKMEM_REGION_SCHEMA:
2167 name = "SCHEMA";
2168 break;
2169
2170 case SKMEM_REGION_RING:
2171 name = "RING";
2172 break;
2173
2174 case SKMEM_REGION_BUF_DEF:
2175 name = "BUF_DEF";
2176 break;
2177
2178 case SKMEM_REGION_BUF_LARGE:
2179 name = "BUF_LARGE";
2180 break;
2181
2182 case SKMEM_REGION_RXBUF_DEF:
2183 name = "RXBUF_DEF";
2184 break;
2185
2186 case SKMEM_REGION_RXBUF_LARGE:
2187 name = "RXBUF_LARGE";
2188 break;
2189
2190 case SKMEM_REGION_TXBUF_DEF:
2191 name = "TXBUF_DEF";
2192 break;
2193
2194 case SKMEM_REGION_TXBUF_LARGE:
2195 name = "TXBUF_LARGE";
2196 break;
2197
2198 case SKMEM_REGION_UMD:
2199 name = "UMD";
2200 break;
2201
2202 case SKMEM_REGION_TXAUSD:
2203 name = "TXAUSD";
2204 break;
2205
2206 case SKMEM_REGION_RXFUSD:
2207 name = "RXFUSD";
2208 break;
2209
2210 case SKMEM_REGION_USTATS:
2211 name = "USTATS";
2212 break;
2213
2214 case SKMEM_REGION_FLOWADV:
2215 name = "FLOWADV";
2216 break;
2217
2218 case SKMEM_REGION_NEXUSADV:
2219 name = "NEXUSADV";
2220 break;
2221
2222 case SKMEM_REGION_SYSCTLS:
2223 name = "SYSCTLS";
2224 break;
2225
2226 case SKMEM_REGION_GUARD_HEAD:
2227 name = "HEADGUARD";
2228 break;
2229
2230 case SKMEM_REGION_GUARD_TAIL:
2231 name = "TAILGUARD";
2232 break;
2233
2234 case SKMEM_REGION_KMD:
2235 name = "KMD";
2236 break;
2237
2238 case SKMEM_REGION_RXKMD:
2239 name = "RXKMD";
2240 break;
2241
2242 case SKMEM_REGION_TXKMD:
2243 name = "TXKMD";
2244 break;
2245
2246 case SKMEM_REGION_TXAKSD:
2247 name = "TXAKSD";
2248 break;
2249
2250 case SKMEM_REGION_RXFKSD:
2251 name = "RXFKSD";
2252 break;
2253
2254 case SKMEM_REGION_KSTATS:
2255 name = "KSTATS";
2256 break;
2257
2258 case SKMEM_REGION_KBFT:
2259 name = "KBFT";
2260 break;
2261
2262 case SKMEM_REGION_UBFT:
2263 name = "UBFT";
2264 break;
2265
2266 case SKMEM_REGION_RXKBFT:
2267 name = "RXKBFT";
2268 break;
2269
2270 case SKMEM_REGION_TXKBFT:
2271 name = "TXKBFT";
2272 break;
2273
2274 case SKMEM_REGION_INTRINSIC:
2275 name = "INTRINSIC";
2276 break;
2277
2278 default:
2279 name = "UNKNOWN";
2280 break;
2281 }
2282
2283 return name;
2284}
2285#endif /* SK_LOG */
2286
2287#if (DEVELOPMENT || DEBUG)
2288uint64_t
2289skmem_region_get_mtbf(void)
2290{
2291 return skmem_region_mtbf;
2292}
2293
2294void
2295skmem_region_set_mtbf(uint64_t newval)
2296{
2297 if (newval < SKMEM_REGION_MTBF_MIN) {
2298 if (newval != 0) {
2299 newval = SKMEM_REGION_MTBF_MIN;
2300 }
2301 } else if (newval > SKMEM_REGION_MTBF_MAX) {
2302 newval = SKMEM_REGION_MTBF_MAX;
2303 }
2304
2305 if (skmem_region_mtbf != newval) {
2306 os_atomic_store(&skmem_region_mtbf, newval, release);
2307 SK_ERR("MTBF set to %llu msec", skmem_region_mtbf);
2308 }
2309}
2310
2311static int
2312skmem_region_mtbf_sysctl(struct sysctl_oid *oidp, void *arg1, int arg2,
2313 struct sysctl_req *req)
2314{
2315#pragma unused(oidp, arg1, arg2)
2316 int changed, error;
2317 uint64_t newval;
2318
2319 _CASSERT(sizeof(skmem_region_mtbf) == sizeof(uint64_t));
2320 if ((error = sysctl_io_number(req, skmem_region_mtbf,
2321 sizeof(uint64_t), &newval, &changed)) == 0) {
2322 if (changed) {
2323 skmem_region_set_mtbf(newval);
2324 }
2325 }
2326 return error;
2327}
2328#endif /* (DEVELOPMENT || DEBUG) */
2329