1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* BEGIN CSTYLED */
30/*
31 * SKMEM_ARENA_TYPE_NEXUS:
32 *
33 * This arena represents the memory subsystem of a nexus adapter. It consist
34 * of a collection of memory regions that are usable by the nexus, as well
35 * as the various caches for objects in those regions.
36 *
37 * (1 per nexus adapter)
38 * +=======================+
39 * | skmem_arena |
40 * +-----------------------+ (backing regions)
41 * | ar_regions[0] | +=======================+
42 * : ... : ------->> | skmem_region |===+
43 * | ar_regions[n] | +=======================+ |===+
44 * +=======================+ +=======================+ |
45 * | arn_{caches,pp} | ---+ +=======================+
46 * +-----------------------+ |
47 * | arn_stats_obj | |
48 * | arn_flowadv_obj | | (cache frontends)
49 * | arn_nexusadv_obj | | +=======================+
50 * +-----------------------+ +--->> | skmem_cache |===+
51 * +=======================+ |===+
52 * +=======================+ |
53 * +=======================+
54 *
55 * Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56 * may be external to the nexus adapter, e.g. created by the driver or an
57 * external entity. If not supplied, we create these regions along with
58 * the packet buffer pool ourselves. The rest of the regions (unrelated
59 * to the packet buffer pool) are unique to the arena and are allocated at
60 * arena creation time.
61 *
62 * An arena may be mapped to a user task/process for as many times as needed.
63 * The result of each mapping is a contiguous range within the address space
64 * of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65 * This is achieved by leveraging the mapper memory object ar_mapper that
66 * "stitches" the disjoint segments together. Only user-mappable regions,
67 * i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68 *
69 * Nexus adapters that are eligible for defunct will trigger the arena to
70 * undergo memory redirection for all regions except those that are marked
71 * with SKR_MODE_NOREDIRECT. This happens when all of the channels opened
72 * to the adapter are defunct. Upon completion, those redirected regions
73 * will be torn down in order to reduce their memory footprints. When this
74 * happens the adapter and its arena are no longer active or in service.
75 *
76 * The arena exposes caches for allocating and freeing most region objects.
77 * These slab-allocator based caches act as front-ends to the regions; only
78 * the metadata cache (for kern_packet_t) utilizes the magazines layer. All
79 * other ones simply utilize skmem_cache for slab-based allocations.
80 *
81 * Certain regions contain singleton objects that are simple enough to not
82 * require the slab allocator, such as the ones used for statistics and flow
83 * advisories. Because of this, we directly allocate from those regions
84 * and store the objects in the arena.
85 *
86 * SKMEM_ARENA_TYPE_NECP:
87 *
88 * This arena represents the memory subsystem of an NECP file descriptor
89 * object. It consists of a memory region for per-flow statistics, as well
90 * as a cache front-end for that region.
91 *
92 * SKMEM_ARENA_SYSTEM:
93 *
94 * This arena represents general, system-wide objects. It currently
95 * consists of the sysctls region that's created once at init time.
96 */
97/* END CSTYLED */
98
99#include <skywalk/os_skywalk_private.h>
100#include <net/necp.h>
101
102static void skmem_arena_destroy(struct skmem_arena *);
103static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104static int skmem_arena_create_finalize(struct skmem_arena *);
105static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109 const char *);
110static void skmem_arena_free(struct skmem_arena *);
111static void skmem_arena_retain_locked(struct skmem_arena *);
112static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114 struct skmem_arena_mmap_info *);
115#if SK_LOG
116static void skmem_arena_create_region_log(struct skmem_arena *);
117#endif /* SK_LOG */
118static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119
120SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122 0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123 "Skywalk arena statistics");
124
125static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127
128static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
129
130#define SKMEM_ARENA_LOCK() \
131 lck_mtx_lock(&skmem_arena_lock)
132#define SKMEM_ARENA_LOCK_ASSERT_HELD() \
133 LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134#define SKMEM_ARENA_LOCK_ASSERT_NOTHELD() \
135 LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136#define SKMEM_ARENA_UNLOCK() \
137 lck_mtx_unlock(&skmem_arena_lock)
138
139#define AR_NEXUS_SIZE sizeof(struct skmem_arena_nexus)
140static SKMEM_TYPE_DEFINE(ar_nexus_zone, struct skmem_arena_nexus);
141
142#define AR_NECP_SIZE sizeof(struct skmem_arena_necp)
143static SKMEM_TYPE_DEFINE(ar_necp_zone, struct skmem_arena_necp);
144
145#define AR_SYSTEM_SIZE sizeof(struct skmem_arena_system)
146static SKMEM_TYPE_DEFINE(ar_system_zone, struct skmem_arena_system);
147
148#define SKMEM_TAG_ARENA_MIB "com.apple.skywalk.arena.mib"
149static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
150
151static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
152static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
153static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
154
155SK_NO_INLINE_ATTRIBUTE
156static int
157skmem_arena_sd_setup(const struct nexus_adapter *na,
158 struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
159 boolean_t kernel_only, boolean_t tx)
160{
161 struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
162 struct skmem_cache **cachep;
163 struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
164 const char *name = na->na_name;
165 char cname[64];
166 skmem_region_id_t usd_type, ksd_type;
167 int err = 0;
168
169 usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
170 ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
171 if (tx) {
172 usd_type = SKMEM_REGION_TXAUSD;
173 ksd_type = SKMEM_REGION_TXAKSD;
174 cachep = &arn->arn_txaksd_cache;
175 } else {
176 usd_type = SKMEM_REGION_RXFUSD;
177 ksd_type = SKMEM_REGION_RXFKSD;
178 cachep = &arn->arn_rxfksd_cache;
179 }
180 ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
181 if (ksd_skr == NULL) {
182 SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
183 "create %s region", ar->ar_name, SK_KVA(ar),
184 ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
185 err = ENOMEM;
186 goto failed;
187 }
188 ar->ar_regions[ksd_type] = ksd_skr;
189 if (!kernel_only) {
190 usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
191 NULL, NULL);
192 if (usd_skr == NULL) {
193 err = ENOMEM;
194 goto failed;
195 }
196 ar->ar_regions[usd_type] = usd_skr;
197 skmem_region_mirror(ksd_skr, usd_skr);
198 }
199 snprintf(cname, count: sizeof(cname), tx ? "txa_ksd.%s" : "rxf_ksd.%s", name);
200 ASSERT(ar->ar_regions[ksd_type] != NULL);
201 *cachep = skmem_cache_create(cname,
202 srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
203 ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
204 if (*cachep == NULL) {
205 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
206 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
207 err = ENOMEM;
208 goto failed;
209 }
210 return 0;
211
212failed:
213 if (ksd_skr != NULL) {
214 skmem_region_release(ksd_skr);
215 ar->ar_regions[ksd_type] = NULL;
216 }
217 if (usd_skr != NULL) {
218 /*
219 * decrements refcnt incremented by skmem_region_mirror()
220 * this is not needed in case skmem_cache_create() succeeds
221 * because skmem_cache_destroy() does the release.
222 */
223 skmem_region_release(usd_skr);
224
225 /* decrements the region's own refcnt */
226 skmem_region_release(usd_skr);
227 ar->ar_regions[usd_type] = NULL;
228 }
229 return err;
230}
231
232SK_NO_INLINE_ATTRIBUTE
233static void
234skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
235{
236 struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
237 struct skmem_cache **cachep;
238 struct skmem_region **ksd_rp, **usd_rp;
239
240 if (tx) {
241 cachep = &arn->arn_txaksd_cache;
242 ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
243 usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
244 } else {
245 cachep = &arn->arn_rxfksd_cache;
246 ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
247 usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
248 }
249 if (*cachep != NULL) {
250 skmem_cache_destroy(*cachep);
251 *cachep = NULL;
252 }
253 if (*usd_rp != NULL) {
254 skmem_region_release(*usd_rp);
255 *usd_rp = NULL;
256 }
257 if (*ksd_rp != NULL) {
258 skmem_region_release(*ksd_rp);
259 *ksd_rp = NULL;
260 }
261}
262
263static bool
264skmem_arena_pp_setup(struct skmem_arena *ar,
265 struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
266 struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
267 boolean_t kernel_only, boolean_t pp_truncated_buf)
268{
269 struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
270
271 if (rx_pp == NULL && tx_pp == NULL) {
272 uint32_t ppcreatef = 0;
273 if (pp_truncated_buf) {
274 ppcreatef |= PPCREATEF_TRUNCATED_BUF;
275 }
276 if (kernel_only) {
277 ppcreatef |= PPCREATEF_KERNEL_ONLY;
278 }
279 if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
280 ppcreatef |= PPCREATEF_ONDEMAND_BUF;
281 }
282 /* callee retains pp upon success */
283 rx_pp = pp_create(name, srp_array: srp, NULL, NULL, NULL, NULL, NULL,
284 ppcreatef);
285 if (rx_pp == NULL) {
286 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
287 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
288 return false;
289 }
290 pp_retain(rx_pp);
291 tx_pp = rx_pp;
292 } else {
293 if (rx_pp == NULL) {
294 rx_pp = tx_pp;
295 } else if (tx_pp == NULL) {
296 tx_pp = rx_pp;
297 }
298
299 ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
300 ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
301 ASSERT(!(!kernel_only &&
302 (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
303 arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
304 pp_retain(rx_pp);
305 pp_retain(tx_pp);
306 }
307
308 arn->arn_rx_pp = rx_pp;
309 arn->arn_tx_pp = tx_pp;
310 if (rx_pp == tx_pp) {
311 skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
312 if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
313 skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
314 }
315 ar->ar_regions[SKMEM_REGION_BUF_DEF] = PP_BUF_REGION_DEF(rx_pp);
316 ar->ar_regions[SKMEM_REGION_BUF_LARGE] =
317 PP_BUF_REGION_LARGE(rx_pp);
318 ar->ar_regions[SKMEM_REGION_RXBUF_DEF] = NULL;
319 ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] = NULL;
320 ar->ar_regions[SKMEM_REGION_TXBUF_DEF] = NULL;
321 ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] = NULL;
322 skmem_region_retain(rx_pp->pp_kmd_region);
323 ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
324 ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
325 ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
326 if (rx_pp->pp_kbft_region != NULL) {
327 skmem_region_retain(rx_pp->pp_kbft_region);
328 ar->ar_regions[SKMEM_REGION_KBFT] =
329 rx_pp->pp_kbft_region;
330 }
331 ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
332 ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
333 } else {
334 ASSERT(kernel_only); /* split userspace pools not supported */
335 ar->ar_regions[SKMEM_REGION_BUF_DEF] = NULL;
336 ar->ar_regions[SKMEM_REGION_BUF_LARGE] = NULL;
337 skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
338 ar->ar_regions[SKMEM_REGION_RXBUF_DEF] =
339 PP_BUF_REGION_DEF(rx_pp);
340 ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] =
341 PP_BUF_REGION_LARGE(rx_pp);
342 if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
343 skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
344 }
345 skmem_region_retain(PP_BUF_REGION_DEF(tx_pp));
346 ar->ar_regions[SKMEM_REGION_TXBUF_DEF] =
347 PP_BUF_REGION_DEF(tx_pp);
348 ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] =
349 PP_BUF_REGION_LARGE(tx_pp);
350 if (PP_BUF_REGION_LARGE(tx_pp) != NULL) {
351 skmem_region_retain(PP_BUF_REGION_LARGE(tx_pp));
352 }
353 ar->ar_regions[SKMEM_REGION_KMD] = NULL;
354 skmem_region_retain(rx_pp->pp_kmd_region);
355 ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
356 skmem_region_retain(tx_pp->pp_kmd_region);
357 ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
358 ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
359 if (rx_pp->pp_kbft_region != NULL) {
360 ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
361 skmem_region_retain(rx_pp->pp_kbft_region);
362 ar->ar_regions[SKMEM_REGION_RXKBFT] =
363 rx_pp->pp_kbft_region;
364 }
365 if (tx_pp->pp_kbft_region != NULL) {
366 ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
367 skmem_region_retain(tx_pp->pp_kbft_region);
368 ar->ar_regions[SKMEM_REGION_TXKBFT] =
369 tx_pp->pp_kbft_region;
370 }
371 }
372
373 if (kernel_only) {
374 if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
375 ASSERT(PP_KERNEL_ONLY(rx_pp));
376 ASSERT(PP_KERNEL_ONLY(tx_pp));
377 ASSERT(rx_pp->pp_umd_region == NULL);
378 ASSERT(tx_pp->pp_umd_region == NULL);
379 ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
380 ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
381 ASSERT(rx_pp->pp_ubft_region == NULL);
382 ASSERT(tx_pp->pp_ubft_region == NULL);
383 if (rx_pp->pp_kbft_region != NULL) {
384 ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
385 NULL);
386 }
387 if (tx_pp->pp_kbft_region != NULL) {
388 ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
389 NULL);
390 }
391 }
392 } else {
393 ASSERT(rx_pp == tx_pp);
394 ASSERT(!PP_KERNEL_ONLY(rx_pp));
395 ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
396 ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
397 ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
398 skmem_region_retain(rx_pp->pp_umd_region);
399 if (rx_pp->pp_kbft_region != NULL) {
400 ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
401 ASSERT(rx_pp->pp_ubft_region != NULL);
402 ASSERT(rx_pp->pp_ubft_region->skr_mode &
403 SKR_MODE_MIRRORED);
404 ar->ar_regions[SKMEM_REGION_UBFT] =
405 rx_pp->pp_ubft_region;
406 skmem_region_retain(rx_pp->pp_ubft_region);
407 }
408 }
409
410 arn->arn_md_type = rx_pp->pp_md_type;
411 arn->arn_md_subtype = rx_pp->pp_md_subtype;
412 return true;
413}
414
415/*
416 * Create a nexus adapter arena.
417 */
418struct skmem_arena *
419skmem_arena_create_for_nexus(const struct nexus_adapter *na,
420 struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
421 struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
422 boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
423{
424#define SRP_CFLAGS(_id) (srp[_id].srp_cflags)
425 struct skmem_arena_nexus *arn;
426 struct skmem_arena *ar;
427 char cname[64];
428 uint32_t i;
429 const char *name = na->na_name;
430
431 *perr = 0;
432
433 ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
434 ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
435 arn = (struct skmem_arena_nexus *)ar;
436
437 /* these regions must not be readable/writeable */
438 ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
439 ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
440
441 /* these regions must be read-only */
442 ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
443 ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
444 ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
445 if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
446 ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
447 SKMEM_REGION_CR_UREADONLY);
448 ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
449 SKMEM_REGION_CR_UREADONLY);
450 } else {
451 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
452 SKMEM_REGION_CR_UREADONLY));
453 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
454 SKMEM_REGION_CR_UREADONLY));
455 }
456
457 /* these regions must be user-mappable */
458 ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
459 ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
460 ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
461 ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_MMAPOK);
462 ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_MMAPOK);
463 ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
464 ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
465 ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
466 ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
467 ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
468 ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
469 ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
470 ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
471
472 /* these must not be user-mappable */
473 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
474 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
475 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
476 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
477 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
478 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
479 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
480 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
481 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
482
483 /* these regions must be shareable */
484 ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_SHAREOK);
485 ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
486 ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
487 ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
488 ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
489 ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
490
491 /* these regions must not be be shareable */
492 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
493 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
494 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
495 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
496 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
497 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
498 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
499 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
500 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
501 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
502 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
503 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
504 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
505 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
506 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
507 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
508 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
509 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
510 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
511 ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
512
513 /* these must stay active */
514 ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
515 ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
516 ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
517
518 /* no kstats for nexus */
519 ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
520
521 AR_LOCK(ar);
522 if (!skmem_arena_pp_setup(ar, srp, name, rx_pp: (rx_pp ? *rx_pp : NULL),
523 tx_pp: (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
524 goto failed;
525 }
526
527 if (nxv != NULL && nxv->nxv_reg != NULL) {
528 struct skmem_region *svr = nxv->nxv_reg;
529
530 ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
531 ASSERT(svr->skr_seg_max_cnt == 1);
532 ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
533 skmem_region_retain(svr);
534
535 ASSERT(nxv->nxv_adv != NULL);
536 if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
537 VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
538 NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
539 } else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
540 VERIFY(nxv->netif_nxv_adv->nna_version ==
541 NX_NETIF_ADVISORY_CURRENT_VERSION);
542 } else {
543 panic_plain("%s: invalid advisory type %d",
544 __func__, nxv->nxv_adv_type);
545 /* NOTREACHED */
546 }
547 arn->arn_nexusadv_obj = nxv->nxv_adv;
548 } else {
549 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
550 ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
551 }
552
553 if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
554 goto failed;
555 }
556
557 if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
558 goto failed;
559 }
560
561 for (i = 0; i < SKMEM_REGIONS; i++) {
562 /* skip if already created */
563 if (ar->ar_regions[i] != NULL) {
564 continue;
565 }
566
567 /* skip external regions from packet pool */
568 if (skmem_region_for_pp(i)) {
569 continue;
570 }
571
572 /* skip slot descriptor regions */
573 if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
574 i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
575 continue;
576 }
577
578 /* skip if region is configured to be empty */
579 if (srp[i].srp_c_obj_cnt == 0) {
580 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
581 i == SKMEM_REGION_USTATS ||
582 i == SKMEM_REGION_KSTATS ||
583 i == SKMEM_REGION_INTRINSIC ||
584 i == SKMEM_REGION_FLOWADV ||
585 i == SKMEM_REGION_NEXUSADV ||
586 i == SKMEM_REGION_SYSCTLS ||
587 i == SKMEM_REGION_GUARD_TAIL);
588 continue;
589 }
590
591 ASSERT(srp[i].srp_id == i);
592
593 /*
594 * Skip {SCHEMA, RING, GUARD} for kernel-only arena. Note
595 * that this is assuming kernel-only arena is always used
596 * for kernel-only nexus adapters (never used directly by
597 * user process.)
598 *
599 * XXX adi@apple.com - see comments in kern_pbufpool_create().
600 * We need to revisit this logic for "direct channel" access,
601 * perhaps via a separate adapter flag.
602 */
603 if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
604 i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
605 i == SKMEM_REGION_GUARD_TAIL)) {
606 continue;
607 }
608
609 /* not for nexus, or for us to create here */
610 ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
611 ASSERT(i != SKMEM_REGION_NEXUSADV);
612 ASSERT(i != SKMEM_REGION_SYSCTLS);
613 ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
614 ASSERT(i != SKMEM_REGION_KSTATS);
615 ASSERT(i != SKMEM_REGION_INTRINSIC);
616
617 /* otherwise create it */
618 if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
619 NULL, NULL, NULL)) == NULL) {
620 SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
621 "create %s region", ar->ar_name, SK_KVA(ar),
622 ar->ar_flags, ARF_BITS, srp[i].srp_name);
623 goto failed;
624 }
625 }
626
627 /* create skmem_cache for schema (without magazines) */
628 ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
629 if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
630 (void) snprintf(cname, count: sizeof(cname), "schema.%s", name);
631 if ((arn->arn_schema_cache = skmem_cache_create(cname,
632 srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
633 NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
634 SKMEM_CR_NOMAGAZINES)) == NULL) {
635 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
636 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
637 cname);
638 goto failed;
639 }
640 }
641
642 /* create skmem_cache for rings (without magazines) */
643 (void) snprintf(cname, count: sizeof(cname), "ring.%s", name);
644 ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
645 if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
646 (arn->arn_ring_cache = skmem_cache_create(cname,
647 srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
648 ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
649 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
650 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
651 goto failed;
652 }
653
654 /*
655 * If the stats region is present, allocate a single object directly
656 * from the region; we don't need to create an skmem_cache for this,
657 * as the object is allocated (and freed) only once.
658 */
659 if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
660 struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
661
662 /* no kstats for nexus */
663 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
664 ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
665 ASSERT(str->skr_seg_max_cnt == 1);
666
667 if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
668 NULL, NULL, SKMEM_SLEEP)) == NULL) {
669 SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
670 "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
671 ARF_BITS);
672 goto failed;
673 }
674 }
675 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
676
677 /*
678 * If the flowadv region is present, allocate a single object directly
679 * from the region; we don't need to create an skmem_cache for this,
680 * as the object is allocated (and freed) only once.
681 */
682 if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
683 struct skmem_region *str =
684 ar->ar_regions[SKMEM_REGION_FLOWADV];
685
686 ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
687 ASSERT(str->skr_seg_max_cnt == 1);
688
689 if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
690 NULL, NULL, SKMEM_SLEEP)) == NULL) {
691 SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
692 "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
693 ARF_BITS);
694 goto failed;
695 }
696 }
697
698 if (skmem_arena_create_finalize(ar) != 0) {
699 SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
700 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
701 goto failed;
702 }
703
704 ++ar->ar_refcnt; /* for caller */
705 AR_UNLOCK(ar);
706
707 SKMEM_ARENA_LOCK();
708 TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
709 SKMEM_ARENA_UNLOCK();
710
711 /* caller didn't give us one, but would like us to return it? */
712 if (rx_pp != NULL && *rx_pp == NULL) {
713 *rx_pp = arn->arn_rx_pp;
714 pp_retain(*rx_pp);
715 }
716 if (tx_pp != NULL && *tx_pp == NULL) {
717 *tx_pp = arn->arn_tx_pp;
718 pp_retain(*tx_pp); /* for caller */
719 }
720
721#if SK_LOG
722 if (__improbable(sk_verbose != 0)) {
723 skmem_arena_create_region_log(ar);
724 }
725#endif /* SK_LOG */
726
727 return ar;
728
729failed:
730 AR_LOCK_ASSERT_HELD(ar);
731 skmem_arena_destroy(ar);
732 *perr = ENOMEM;
733
734 return NULL;
735#undef SRP_CFLAGS
736}
737
738void
739skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
740{
741 struct skmem_arena *ar = &arn->arn_cmn;
742
743 AR_LOCK(ar);
744 arn->arn_ksd_nodefunct += cnt;
745 VERIFY(arn->arn_ksd_nodefunct >= 0);
746 AR_UNLOCK(ar);
747}
748
749boolean_t
750skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
751{
752 struct skmem_arena *ar = &arn->arn_cmn;
753 boolean_t idle;
754
755 AR_LOCK(ar);
756 VERIFY(arn->arn_ksd_nodefunct >= 0);
757 idle = (arn->arn_ksd_nodefunct == 0);
758 AR_UNLOCK(ar);
759
760 return idle;
761}
762
763static void
764skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
765{
766 struct skmem_arena *ar = &arn->arn_cmn;
767 struct skmem_region *skr;
768 int i;
769
770 AR_LOCK_ASSERT_HELD(ar);
771 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
772
773 /* these should never be set for nexus arena */
774 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
775 ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
776 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
777 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
778 ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
779
780 if (arn->arn_stats_obj != NULL) {
781 skr = ar->ar_regions[SKMEM_REGION_USTATS];
782 ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
783 skmem_region_free(skr, arn->arn_stats_obj, NULL);
784 arn->arn_stats_obj = NULL;
785 skmem_region_release(skr);
786 ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
787 }
788 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
789 ASSERT(arn->arn_stats_obj == NULL);
790
791 if (arn->arn_flowadv_obj != NULL) {
792 skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
793 ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
794 skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
795 arn->arn_flowadv_obj = NULL;
796 skmem_region_release(skr);
797 ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
798 }
799 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
800 ASSERT(arn->arn_flowadv_obj == NULL);
801
802 if (arn->arn_nexusadv_obj != NULL) {
803 skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
804 ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
805 /* we didn't allocate this, so just nullify it */
806 arn->arn_nexusadv_obj = NULL;
807 skmem_region_release(skr);
808 ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
809 }
810 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
811 ASSERT(arn->arn_nexusadv_obj == NULL);
812
813 ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
814 if (arn->arn_rx_pp != NULL) {
815 for (i = 0; i < SKMEM_PP_REGIONS; i++) {
816 skmem_region_id_t reg = skmem_pp_region_ids[i];
817 skr = ar->ar_regions[reg];
818 if (skr != NULL) {
819 ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
820 skmem_region_release(skr);
821 ar->ar_regions[reg] = NULL;
822 }
823 }
824 pp_release(arn->arn_rx_pp);
825 pp_release(arn->arn_tx_pp);
826 arn->arn_rx_pp = NULL;
827 arn->arn_tx_pp = NULL;
828 }
829 for (i = 0; i < SKMEM_PP_REGIONS; i++) {
830 ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
831 }
832 ASSERT(arn->arn_rx_pp == NULL);
833 ASSERT(arn->arn_tx_pp == NULL);
834
835 if (arn->arn_ring_cache != NULL) {
836 skr = ar->ar_regions[SKMEM_REGION_RING];
837 ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
838 skmem_cache_destroy(arn->arn_ring_cache);
839 arn->arn_ring_cache = NULL;
840 skmem_region_release(skr);
841 ar->ar_regions[SKMEM_REGION_RING] = NULL;
842 }
843 ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
844 ASSERT(arn->arn_ring_cache == NULL);
845
846 /*
847 * Stop here if we're in the defunct context, and we're asked
848 * to keep the slot descriptor regions alive as they are still
849 * being referred to by the nexus owner (driver).
850 */
851 if (defunct && arn->arn_ksd_nodefunct != 0) {
852 ASSERT(arn->arn_ksd_nodefunct > 0);
853 return;
854 }
855
856 ASSERT(arn->arn_ksd_nodefunct == 0);
857 skmem_arena_sd_teardown(ar, TRUE);
858 skmem_arena_sd_teardown(ar, FALSE);
859
860 /* stop here if we're in the defunct context */
861 if (defunct) {
862 return;
863 }
864 if (arn->arn_schema_cache != NULL) {
865 skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
866 ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
867 skmem_cache_destroy(arn->arn_schema_cache);
868 arn->arn_schema_cache = NULL;
869 skmem_region_release(skr);
870 ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
871 }
872 ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
873 ASSERT(arn->arn_schema_cache == NULL);
874
875 if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
876 ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
877 skmem_region_release(skr);
878 ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
879 }
880 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
881 if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
882 ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
883 skmem_region_release(skr);
884 ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
885 }
886 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
887}
888
889/*
890 * Create an NECP arena.
891 */
892struct skmem_arena *
893skmem_arena_create_for_necp(const char *name,
894 struct skmem_region_params *srp_ustats,
895 struct skmem_region_params *srp_kstats, int *perr)
896{
897 struct skmem_arena_necp *arc;
898 struct skmem_arena *ar;
899 char cname[64];
900
901 *perr = 0;
902
903 ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
904 ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
905 arc = (struct skmem_arena_necp *)ar;
906
907 /*
908 * Must be stats region, and must be user-mappable;
909 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
910 * as the client might want multi-segment mode.
911 */
912 ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
913 ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
914 ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
915 ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
916 ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
917 ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
918 ASSERT(srp_ustats->srp_c_obj_size != 0);
919 ASSERT(srp_kstats->srp_c_obj_size != 0);
920 ASSERT(srp_ustats->srp_c_obj_cnt != 0);
921 ASSERT(srp_kstats->srp_c_obj_cnt != 0);
922 ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
923 ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
924 ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
925 ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
926
927 AR_LOCK(ar);
928
929 if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
930 srp_ustats, NULL, NULL, NULL)) == NULL) {
931 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
932 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
933 srp_ustats->srp_name);
934 goto failed;
935 }
936
937 if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
938 srp_kstats, NULL, NULL, NULL)) == NULL) {
939 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
940 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
941 srp_kstats->srp_name);
942 goto failed;
943 }
944
945 skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
946 ar->ar_regions[SKMEM_REGION_USTATS]);
947
948 /* create skmem_cache for kernel stats (without magazines) */
949 (void) snprintf(cname, count: sizeof(cname), "kstats.%s", name);
950 if ((arc->arc_kstats_cache = skmem_cache_create(cname,
951 srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
952 ar->ar_regions[SKMEM_REGION_KSTATS],
953 SKMEM_CR_NOMAGAZINES)) == NULL) {
954 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
955 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
956 goto failed;
957 }
958
959 if (skmem_arena_create_finalize(ar) != 0) {
960 SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
961 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
962 goto failed;
963 }
964
965 /*
966 * These must never be configured for NECP arena.
967 *
968 * XXX: In theory we can add guard pages to this arena,
969 * but for now leave that as an exercise for the future.
970 */
971 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
972 ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
973 ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
974 ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
975 ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
976 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
977 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
978 ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
979 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
980 ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
981 ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
982 ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
983 for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
984 ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
985 }
986
987 /* these must be configured for NECP arena */
988 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
989 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
990
991 ++ar->ar_refcnt; /* for caller */
992 AR_UNLOCK(ar);
993
994 SKMEM_ARENA_LOCK();
995 TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
996 SKMEM_ARENA_UNLOCK();
997
998#if SK_LOG
999 if (__improbable(sk_verbose != 0)) {
1000 skmem_arena_create_region_log(ar);
1001 }
1002#endif /* SK_LOG */
1003
1004 return ar;
1005
1006failed:
1007 AR_LOCK_ASSERT_HELD(ar);
1008 skmem_arena_destroy(ar);
1009 *perr = ENOMEM;
1010
1011 return NULL;
1012}
1013
1014static void
1015skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1016{
1017#pragma unused(defunct)
1018 struct skmem_arena *ar = &arc->arc_cmn;
1019 struct skmem_region *skr;
1020
1021 AR_LOCK_ASSERT_HELD(ar);
1022 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1023
1024 /* these must never be configured for NECP arena */
1025 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1026 ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1027 ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1028 ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1029 ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1030 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1031 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1032 ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1033 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1034 ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1035 ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1036 ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1037 for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1038 ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1039 }
1040
1041 if (arc->arc_kstats_cache != NULL) {
1042 skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1043 ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1044 skmem_cache_destroy(arc->arc_kstats_cache);
1045 arc->arc_kstats_cache = NULL;
1046 skmem_region_release(skr);
1047 ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1048
1049 skr = ar->ar_regions[SKMEM_REGION_USTATS];
1050 ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1051 skmem_region_release(skr);
1052 ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1053 }
1054 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1055 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1056 ASSERT(arc->arc_kstats_cache == NULL);
1057}
1058
1059/*
1060 * Given an arena, return its NECP variant (if applicable).
1061 */
1062struct skmem_arena_necp *
1063skmem_arena_necp(struct skmem_arena *ar)
1064{
1065 if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1066 return NULL;
1067 }
1068
1069 return (struct skmem_arena_necp *)ar;
1070}
1071
1072/*
1073 * Create a System arena.
1074 */
1075struct skmem_arena *
1076skmem_arena_create_for_system(const char *name, int *perr)
1077{
1078 struct skmem_region *skrsys;
1079 struct skmem_arena_system *ars;
1080 struct skmem_arena *ar;
1081
1082 *perr = 0;
1083
1084 ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1085 ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1086 ars = (struct skmem_arena_system *)ar;
1087
1088 AR_LOCK(ar);
1089 /* retain system-wide sysctls region */
1090 skrsys = skmem_get_sysctls_region();
1091 ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1092 ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1093 SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1094 SKR_MODE_SHAREOK)) ==
1095 (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1096 SKR_MODE_MONOLITHIC));
1097 ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1098 skmem_region_retain(skrsys);
1099
1100 /* object is valid as long as the sysctls region is retained */
1101 ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1102 ASSERT(ars->ars_sysctls_obj != NULL);
1103 ASSERT(ars->ars_sysctls_objsize != 0);
1104
1105 if (skmem_arena_create_finalize(ar) != 0) {
1106 SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1107 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1108 goto failed;
1109 }
1110
1111 /*
1112 * These must never be configured for system arena.
1113 *
1114 * XXX: In theory we can add guard pages to this arena,
1115 * but for now leave that as an exercise for the future.
1116 */
1117 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1118 ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1119 ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1120 ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1121 ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1122 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1123 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1124 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1125 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1126 ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1127 ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1128 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1129 ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1130 for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1131 ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1132 }
1133
1134 /* these must be configured for system arena */
1135 ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1136
1137 ++ar->ar_refcnt; /* for caller */
1138 AR_UNLOCK(ar);
1139
1140 SKMEM_ARENA_LOCK();
1141 TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1142 SKMEM_ARENA_UNLOCK();
1143
1144#if SK_LOG
1145 if (__improbable(sk_verbose != 0)) {
1146 skmem_arena_create_region_log(ar);
1147 }
1148#endif /* SK_LOG */
1149
1150 return ar;
1151
1152failed:
1153 AR_LOCK_ASSERT_HELD(ar);
1154 skmem_arena_destroy(ar);
1155 *perr = ENOMEM;
1156
1157 return NULL;
1158}
1159
1160static void
1161skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1162{
1163 struct skmem_arena *ar = &ars->ars_cmn;
1164 struct skmem_region *skr;
1165
1166 AR_LOCK_ASSERT_HELD(ar);
1167 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1168
1169 /* these must never be configured for system arena */
1170 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1171 ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1172 ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1173 ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1174 ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1175 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1176 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1177 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1178 ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1179 ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1180 ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1181 ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1182 ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1183 for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1184 ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1185 }
1186
1187 /* nothing to do here for now during defunct, just return */
1188 if (defunct) {
1189 return;
1190 }
1191
1192 if (ars->ars_sysctls_obj != NULL) {
1193 skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1194 ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1195 /* we didn't allocate this, so don't free it */
1196 ars->ars_sysctls_obj = NULL;
1197 ars->ars_sysctls_objsize = 0;
1198 skmem_region_release(skr);
1199 ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1200 }
1201 ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1202 ASSERT(ars->ars_sysctls_obj == NULL);
1203 ASSERT(ars->ars_sysctls_objsize == 0);
1204}
1205
1206/*
1207 * Given an arena, return its System variant (if applicable).
1208 */
1209struct skmem_arena_system *
1210skmem_arena_system(struct skmem_arena *ar)
1211{
1212 if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1213 return NULL;
1214 }
1215
1216 return (struct skmem_arena_system *)ar;
1217}
1218
1219void *
1220skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1221{
1222 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1223 return skmem_arena_system(ar)->ars_sysctls_obj;
1224}
1225
1226size_t
1227skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1228{
1229 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1230 return skmem_arena_system(ar)->ars_sysctls_objsize;
1231}
1232
1233/*
1234 * Destroy a region.
1235 */
1236static void
1237skmem_arena_destroy(struct skmem_arena *ar)
1238{
1239 AR_LOCK_ASSERT_HELD(ar);
1240
1241 SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1242 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1243
1244 ASSERT(ar->ar_refcnt == 0);
1245 if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1246 AR_UNLOCK(ar);
1247 SKMEM_ARENA_LOCK();
1248 TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1249 SKMEM_ARENA_UNLOCK();
1250 AR_LOCK(ar);
1251 ASSERT(ar->ar_refcnt == 0);
1252 }
1253
1254 /* teardown all remaining memory regions and associated resources */
1255 skmem_arena_teardown(ar, FALSE);
1256
1257 if (ar->ar_ar != NULL) {
1258 IOSKArenaDestroy(arena: ar->ar_ar);
1259 ar->ar_ar = NULL;
1260 }
1261
1262 if (ar->ar_flags & ARF_ACTIVE) {
1263 ar->ar_flags &= ~ARF_ACTIVE;
1264 }
1265
1266 AR_UNLOCK(ar);
1267
1268 skmem_arena_free(ar);
1269}
1270
1271/*
1272 * Teardown (or defunct) a region.
1273 */
1274static void
1275skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1276{
1277 uint32_t i;
1278
1279 switch (ar->ar_type) {
1280 case SKMEM_ARENA_TYPE_NEXUS:
1281 skmem_arena_nexus_teardown(arn: (struct skmem_arena_nexus *)ar,
1282 defunct);
1283 break;
1284
1285 case SKMEM_ARENA_TYPE_NECP:
1286 skmem_arena_necp_teardown(arc: (struct skmem_arena_necp *)ar,
1287 defunct);
1288 break;
1289
1290 case SKMEM_ARENA_TYPE_SYSTEM:
1291 skmem_arena_system_teardown(ars: (struct skmem_arena_system *)ar,
1292 defunct);
1293 break;
1294
1295 default:
1296 VERIFY(0);
1297 /* NOTREACHED */
1298 __builtin_unreachable();
1299 }
1300
1301 /* stop here if we're in the defunct context */
1302 if (defunct) {
1303 return;
1304 }
1305
1306 /* take care of any remaining ones */
1307 for (i = 0; i < SKMEM_REGIONS; i++) {
1308 if (ar->ar_regions[i] == NULL) {
1309 continue;
1310 }
1311
1312 skmem_region_release(ar->ar_regions[i]);
1313 ar->ar_regions[i] = NULL;
1314 }
1315}
1316
1317static int
1318skmem_arena_create_finalize(struct skmem_arena *ar)
1319{
1320 IOSKRegionRef reg[SKMEM_REGIONS];
1321 uint32_t i, regcnt = 0;
1322 int err = 0;
1323
1324 AR_LOCK_ASSERT_HELD(ar);
1325
1326 ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1327
1328 /*
1329 * Prepare an array of regions that can be mapped to user task;
1330 * exclude regions that aren't eligible for user task mapping.
1331 */
1332 bzero(s: &reg, n: sizeof(reg));
1333 for (i = 0; i < SKMEM_REGIONS; i++) {
1334 struct skmem_region *skr = ar->ar_regions[i];
1335 if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1336 continue;
1337 }
1338
1339 ASSERT(skr->skr_reg != NULL);
1340 reg[regcnt++] = skr->skr_reg;
1341 }
1342 ASSERT(regcnt != 0);
1343
1344 /*
1345 * Create backing IOSKArena handle.
1346 */
1347 ar->ar_ar = IOSKArenaCreate(regionList: reg, regionCount: (IOSKCount)regcnt);
1348 if (ar->ar_ar == NULL) {
1349 SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1350 "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1351 ar->ar_flags, ARF_BITS, regcnt);
1352 err = ENOMEM;
1353 goto failed;
1354 }
1355
1356 ar->ar_flags |= ARF_ACTIVE;
1357
1358failed:
1359 return err;
1360}
1361
1362static inline struct kalloc_type_view *
1363skmem_arena_zone(skmem_arena_type_t type)
1364{
1365 switch (type) {
1366 case SKMEM_ARENA_TYPE_NEXUS:
1367 return ar_nexus_zone;
1368
1369 case SKMEM_ARENA_TYPE_NECP:
1370 return ar_necp_zone;
1371
1372 case SKMEM_ARENA_TYPE_SYSTEM:
1373 return ar_system_zone;
1374
1375 default:
1376 VERIFY(0);
1377 /* NOTREACHED */
1378 __builtin_unreachable();
1379 }
1380}
1381
1382static struct skmem_arena *
1383skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1384{
1385 const char *ar_str = NULL;
1386 struct skmem_arena *ar;
1387 size_t ar_zsize = 0;
1388
1389 switch (type) {
1390 case SKMEM_ARENA_TYPE_NEXUS:
1391 ar_zsize = AR_NEXUS_SIZE;
1392 ar_str = "nexus";
1393 break;
1394
1395 case SKMEM_ARENA_TYPE_NECP:
1396 ar_zsize = AR_NECP_SIZE;
1397 ar_str = "necp";
1398 break;
1399
1400 case SKMEM_ARENA_TYPE_SYSTEM:
1401 ar_zsize = AR_SYSTEM_SIZE;
1402 ar_str = "system";
1403 break;
1404
1405 default:
1406 VERIFY(0);
1407 /* NOTREACHED */
1408 __builtin_unreachable();
1409 }
1410
1411 ar = zalloc_flags(skmem_arena_zone(type), Z_WAITOK | Z_ZERO | Z_NOFAIL);
1412 ar->ar_type = type;
1413 ar->ar_zsize = ar_zsize;
1414
1415 lck_mtx_init(lck: &ar->ar_lock, grp: &skmem_arena_lock_grp,
1416 LCK_ATTR_NULL);
1417 (void) snprintf(ar->ar_name, count: sizeof(ar->ar_name),
1418 "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1419
1420 return ar;
1421}
1422
1423static void
1424skmem_arena_free(struct skmem_arena *ar)
1425{
1426#if DEBUG || DEVELOPMENT
1427 ASSERT(ar->ar_refcnt == 0);
1428 ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1429 ASSERT(ar->ar_ar == NULL);
1430 ASSERT(ar->ar_mapcnt == 0);
1431 ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1432 for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1433 ASSERT(ar->ar_regions[i] == NULL);
1434 }
1435#endif /* DEBUG || DEVELOPMENT */
1436
1437 lck_mtx_destroy(lck: &ar->ar_lock, grp: &skmem_arena_lock_grp);
1438 zfree(skmem_arena_zone(ar->ar_type), ar);
1439}
1440
1441/*
1442 * Retain an arena.
1443 */
1444__attribute__((always_inline))
1445static inline void
1446skmem_arena_retain_locked(struct skmem_arena *ar)
1447{
1448 AR_LOCK_ASSERT_HELD(ar);
1449 ar->ar_refcnt++;
1450 ASSERT(ar->ar_refcnt != 0);
1451}
1452
1453void
1454skmem_arena_retain(struct skmem_arena *ar)
1455{
1456 AR_LOCK(ar);
1457 skmem_arena_retain_locked(ar);
1458 AR_UNLOCK(ar);
1459}
1460
1461/*
1462 * Release (and potentially destroy) an arena.
1463 */
1464__attribute__((always_inline))
1465static inline boolean_t
1466skmem_arena_release_locked(struct skmem_arena *ar)
1467{
1468 boolean_t lastref = FALSE;
1469
1470 AR_LOCK_ASSERT_HELD(ar);
1471 ASSERT(ar->ar_refcnt != 0);
1472 if (--ar->ar_refcnt == 0) {
1473 skmem_arena_destroy(ar);
1474 lastref = TRUE;
1475 } else {
1476 lastref = FALSE;
1477 }
1478
1479 return lastref;
1480}
1481
1482boolean_t
1483skmem_arena_release(struct skmem_arena *ar)
1484{
1485 boolean_t lastref;
1486
1487 AR_LOCK(ar);
1488 /* unlock only if this isn't the last reference */
1489 if (!(lastref = skmem_arena_release_locked(ar))) {
1490 AR_UNLOCK(ar);
1491 }
1492
1493 return lastref;
1494}
1495
1496/*
1497 * Map an arena to the task's address space.
1498 */
1499int
1500skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1501 struct skmem_arena_mmap_info *ami)
1502{
1503 task_t task = proc_task(p);
1504 IOReturn ioerr;
1505 int err = 0;
1506
1507 ASSERT(task != kernel_task && task != TASK_NULL);
1508 ASSERT(ami->ami_arena == NULL);
1509 ASSERT(ami->ami_mapref == NULL);
1510 ASSERT(ami->ami_maptask == TASK_NULL);
1511 ASSERT(!ami->ami_redirect);
1512
1513 AR_LOCK(ar);
1514 if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1515 err = ENODEV;
1516 goto failed;
1517 }
1518
1519 ASSERT(ar->ar_ar != NULL);
1520 if ((ami->ami_mapref = IOSKMapperCreate(arena: ar->ar_ar, task)) == NULL) {
1521 err = ENOMEM;
1522 goto failed;
1523 }
1524
1525 ioerr = IOSKMapperGetAddress(mapper: ami->ami_mapref, address: &ami->ami_mapaddr,
1526 size: &ami->ami_mapsize);
1527 VERIFY(ioerr == kIOReturnSuccess);
1528
1529 ami->ami_arena = ar;
1530 skmem_arena_retain_locked(ar);
1531 SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1532
1533 ami->ami_maptask = task;
1534 ar->ar_mapcnt++;
1535 if (ar->ar_mapcnt == 1) {
1536 ar->ar_mapsize = ami->ami_mapsize;
1537 }
1538
1539 ASSERT(ami->ami_mapref != NULL);
1540 ASSERT(ami->ami_arena == ar);
1541 AR_UNLOCK(ar);
1542
1543 return 0;
1544
1545failed:
1546 AR_UNLOCK(ar);
1547 skmem_arena_munmap(ar, ami);
1548 VERIFY(err != 0);
1549
1550 return err;
1551}
1552
1553/*
1554 * Remove arena's memory mapping from task's address space (common code).
1555 * Returns true if caller needs to perform a deferred defunct.
1556 */
1557static boolean_t
1558skmem_arena_munmap_common(struct skmem_arena *ar,
1559 struct skmem_arena_mmap_info *ami)
1560{
1561 boolean_t need_defunct = FALSE;
1562
1563 AR_LOCK(ar);
1564 if (ami->ami_mapref != NULL) {
1565 IOSKMapperDestroy(mapper: ami->ami_mapref);
1566 ami->ami_mapref = NULL;
1567
1568 VERIFY(ar->ar_mapcnt != 0);
1569 ar->ar_mapcnt--;
1570 if (ar->ar_mapcnt == 0) {
1571 ar->ar_mapsize = 0;
1572 }
1573
1574 VERIFY(ami->ami_arena == ar);
1575 SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1576 ami_link);
1577
1578 /*
1579 * We expect that the caller ensures an extra reference
1580 * held on the arena, in addition to the one in mmap_info.
1581 */
1582 VERIFY(ar->ar_refcnt > 1);
1583 (void) skmem_arena_release_locked(ar);
1584 ami->ami_arena = NULL;
1585
1586 if (ami->ami_redirect) {
1587 /*
1588 * This mapper has been redirected; decrement
1589 * the redirect count associated with it.
1590 */
1591 VERIFY(ar->ar_maprdrcnt != 0);
1592 ar->ar_maprdrcnt--;
1593 } else if (ar->ar_maprdrcnt != 0 &&
1594 ar->ar_maprdrcnt == ar->ar_mapcnt) {
1595 /*
1596 * The are other mappers for this arena that have
1597 * all been redirected, but the arena wasn't marked
1598 * inactive by skmem_arena_redirect() last time since
1599 * this particular mapper that we just destroyed
1600 * was using it. Now that it's gone, finish the
1601 * postponed work below once we return to caller.
1602 */
1603 ASSERT(ar->ar_flags & ARF_ACTIVE);
1604 ar->ar_flags &= ~ARF_ACTIVE;
1605 need_defunct = TRUE;
1606 }
1607 }
1608 ASSERT(ami->ami_mapref == NULL);
1609 ASSERT(ami->ami_arena == NULL);
1610
1611 ami->ami_maptask = TASK_NULL;
1612 ami->ami_mapaddr = 0;
1613 ami->ami_mapsize = 0;
1614 ami->ami_redirect = FALSE;
1615
1616 AR_UNLOCK(ar);
1617
1618 return need_defunct;
1619}
1620
1621/*
1622 * Remove arena's memory mapping from task's address space (channel version).
1623 * Will perform a deferred defunct if needed.
1624 */
1625void
1626skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1627{
1628 SK_LOCK_ASSERT_HELD();
1629 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1630
1631 /*
1632 * If this is this is on a channel that was holding the last
1633 * active reference count on the arena, and that there are
1634 * other defunct channels pointing to that arena, perform the
1635 * actual arena defunct now.
1636 */
1637 if (skmem_arena_munmap_common(ar, ami: &ch->ch_mmap)) {
1638 struct kern_nexus *nx = ch->ch_nexus;
1639 struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1640
1641 /*
1642 * Similar to kern_channel_defunct(), where we let the
1643 * domain provider complete the defunct. At this point
1644 * both sk_lock and the channel locks are held, and so
1645 * we indicate that to the callee.
1646 */
1647 nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1648 nx, ch, TRUE);
1649 }
1650}
1651
1652/*
1653 * Remove arena's memory mapping from task's address space (generic).
1654 * This routine should only be called on non-channel related arenas.
1655 */
1656void
1657skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1658{
1659 (void) skmem_arena_munmap_common(ar, ami);
1660}
1661
1662/*
1663 * Redirect eligible memory regions in the task's memory map so that
1664 * they get overwritten and backed with anonymous (zero-filled) pages.
1665 */
1666int
1667skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1668 struct proc *p, boolean_t *need_defunct)
1669{
1670#pragma unused(p)
1671 int err = 0;
1672
1673 *need_defunct = FALSE;
1674
1675 AR_LOCK(ar);
1676 ASSERT(ar->ar_ar != NULL);
1677 if (ami->ami_redirect) {
1678 err = EALREADY;
1679 } else if (ami->ami_mapref == NULL) {
1680 err = ENXIO;
1681 } else {
1682 VERIFY(ar->ar_mapcnt != 0);
1683 ASSERT(ar->ar_flags & ARF_ACTIVE);
1684 VERIFY(ami->ami_arena == ar);
1685 /*
1686 * This effectively overwrites the mappings for all
1687 * redirectable memory regions (i.e. those without the
1688 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1689 * protection flags. Accesses to these regions will be
1690 * redirected to anonymous, zero-filled pages.
1691 */
1692 IOSKMapperRedirect(mapper: ami->ami_mapref);
1693 ami->ami_redirect = TRUE;
1694
1695 /*
1696 * Mark the arena as inactive if all mapper instances are
1697 * redirected; otherwise, we do this later during unmap.
1698 * Once inactive, the arena will not allow further mmap,
1699 * and it is ready to be defunct later.
1700 */
1701 if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1702 ar->ar_flags &= ~ARF_ACTIVE;
1703 *need_defunct = TRUE;
1704 }
1705 }
1706 AR_UNLOCK(ar);
1707
1708 SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1709 "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1710 "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1711 SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1712 *need_defunct, err);
1713
1714 return err;
1715}
1716
1717/*
1718 * Defunct a region.
1719 */
1720int
1721skmem_arena_defunct(struct skmem_arena *ar)
1722{
1723 AR_LOCK(ar);
1724
1725 SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1726 SK_KVA(ar), ar->ar_flags, ARF_BITS);
1727
1728 if (ar->ar_flags & ARF_DEFUNCT) {
1729 AR_UNLOCK(ar);
1730 return EALREADY;
1731 } else if (ar->ar_flags & ARF_ACTIVE) {
1732 AR_UNLOCK(ar);
1733 return EBUSY;
1734 }
1735
1736 /* purge the caches now */
1737 skmem_arena_reap_locked(ar, TRUE);
1738
1739 /* teardown eligible memory regions and associated resources */
1740 skmem_arena_teardown(ar, TRUE);
1741
1742 ar->ar_flags |= ARF_DEFUNCT;
1743
1744 AR_UNLOCK(ar);
1745
1746 return 0;
1747}
1748
1749/*
1750 * Retrieve total and in-use memory statistics of regions in the arena.
1751 */
1752void
1753skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1754 uint64_t *mem_inuse)
1755{
1756 uint32_t i;
1757
1758 if (mem_total != NULL) {
1759 *mem_total = 0;
1760 }
1761 if (mem_inuse != NULL) {
1762 *mem_inuse = 0;
1763 }
1764
1765 AR_LOCK(ar);
1766 for (i = 0; i < SKMEM_REGIONS; i++) {
1767 if (ar->ar_regions[i] == NULL) {
1768 continue;
1769 }
1770
1771 if (mem_total != NULL) {
1772 *mem_total += AR_MEM_TOTAL(ar, i);
1773 }
1774 if (mem_inuse != NULL) {
1775 *mem_inuse += AR_MEM_INUSE(ar, i);
1776 }
1777 }
1778 AR_UNLOCK(ar);
1779}
1780
1781/*
1782 * Retrieve the offset of a particular region (identified by its ID)
1783 * from the base of the arena.
1784 */
1785mach_vm_offset_t
1786skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1787{
1788 mach_vm_offset_t offset = 0;
1789 uint32_t i;
1790
1791 ASSERT(id < SKMEM_REGIONS);
1792
1793 AR_LOCK(ar);
1794 for (i = 0; i < id; i++) {
1795 if (ar->ar_regions[i] == NULL) {
1796 continue;
1797 }
1798
1799 offset += ar->ar_regions[i]->skr_size;
1800 }
1801 AR_UNLOCK(ar);
1802
1803 return offset;
1804}
1805
1806static void
1807skmem_reap_pbufpool_caches(struct kern_pbufpool *pp, boolean_t purge)
1808{
1809 if (pp->pp_kmd_cache != NULL) {
1810 skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1811 }
1812 if (PP_BUF_CACHE_DEF(pp) != NULL) {
1813 skmem_cache_reap_now(PP_BUF_CACHE_DEF(pp), purge);
1814 }
1815 if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1816 skmem_cache_reap_now(PP_BUF_CACHE_LARGE(pp), purge);
1817 }
1818 if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1819 skmem_cache_reap_now(PP_KBFT_CACHE_DEF(pp), purge);
1820 }
1821 if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1822 skmem_cache_reap_now(PP_KBFT_CACHE_LARGE(pp), purge);
1823 }
1824}
1825
1826/*
1827 * Reap all of configured caches in the arena, so that any excess amount
1828 * outside of their working sets gets released to their respective backing
1829 * regions. If purging is specified, we empty the caches' working sets,
1830 * including everything that's cached at the CPU layer.
1831 */
1832static void
1833skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1834{
1835 struct skmem_arena_nexus *arn;
1836 struct skmem_arena_necp *arc;
1837 struct kern_pbufpool *pp;
1838
1839 AR_LOCK_ASSERT_HELD(ar);
1840
1841 switch (ar->ar_type) {
1842 case SKMEM_ARENA_TYPE_NEXUS:
1843 arn = (struct skmem_arena_nexus *)ar;
1844 if (arn->arn_schema_cache != NULL) {
1845 skmem_cache_reap_now(arn->arn_schema_cache, purge);
1846 }
1847 if (arn->arn_ring_cache != NULL) {
1848 skmem_cache_reap_now(arn->arn_ring_cache, purge);
1849 }
1850 if ((pp = arn->arn_rx_pp) != NULL) {
1851 skmem_reap_pbufpool_caches(pp, purge);
1852 }
1853 if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1854 skmem_reap_pbufpool_caches(pp, purge);
1855 }
1856 break;
1857
1858 case SKMEM_ARENA_TYPE_NECP:
1859 arc = (struct skmem_arena_necp *)ar;
1860 if (arc->arc_kstats_cache != NULL) {
1861 skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1862 }
1863 break;
1864
1865 case SKMEM_ARENA_TYPE_SYSTEM:
1866 break;
1867 }
1868}
1869
1870void
1871skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1872{
1873 AR_LOCK(ar);
1874 skmem_arena_reap_locked(ar, purge);
1875 AR_UNLOCK(ar);
1876}
1877
1878#if SK_LOG
1879SK_LOG_ATTRIBUTE
1880static void
1881skmem_arena_create_region_log(struct skmem_arena *ar)
1882{
1883 char label[32];
1884 int i;
1885
1886 switch (ar->ar_type) {
1887 case SKMEM_ARENA_TYPE_NEXUS:
1888 SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1889 ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1890 SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1891 SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1892 break;
1893
1894 case SKMEM_ARENA_TYPE_NECP:
1895 case SKMEM_ARENA_TYPE_SYSTEM:
1896 SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1897 SK_KVA(ar), ar->ar_flags, ARF_BITS);
1898 break;
1899 }
1900
1901 for (i = 0; i < SKMEM_REGIONS; i++) {
1902 if (ar->ar_regions[i] == NULL) {
1903 continue;
1904 }
1905
1906 (void) snprintf(label, sizeof(label), "REGION_%s:",
1907 skmem_region_id2name(i));
1908 SK_D(" %-16s %6u KB s:[%2u x %6u KB] "
1909 "o:[%4u x %6u -> %4u x %6u]", label,
1910 (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1911 (uint32_t)AR_MEM_SEGCNT(ar, i),
1912 (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1913 (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1914 (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1915 (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1916 (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1917 }
1918}
1919#endif /* SK_LOG */
1920
1921static size_t
1922skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1923{
1924 size_t actual_space = sizeof(struct sk_stats_arena);
1925 struct sk_stats_arena *sar = out;
1926 struct skmem_arena_mmap_info *ami = NULL;
1927 pid_t proc_pid;
1928 int i;
1929
1930 if (out == NULL || len < actual_space) {
1931 goto done;
1932 }
1933
1934 AR_LOCK(ar);
1935 (void) snprintf(sar->sar_name, count: sizeof(sar->sar_name),
1936 "%s", ar->ar_name);
1937 sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1938 sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1939 i = 0;
1940 SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1941 if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1942 struct kern_channel *ch;
1943 ch = container_of(ami, struct kern_channel, ch_mmap);
1944 proc_pid = ch->ch_pid;
1945 } else {
1946 ASSERT((ami->ami_arena->ar_type ==
1947 SKMEM_ARENA_TYPE_NECP) ||
1948 (ami->ami_arena->ar_type ==
1949 SKMEM_ARENA_TYPE_SYSTEM));
1950 proc_pid =
1951 necp_client_get_proc_pid_from_arena_info(arena_info: ami);
1952 }
1953 sar->sar_mapped_pids[i++] = proc_pid;
1954 if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1955 break;
1956 }
1957 }
1958
1959 for (i = 0; i < SKMEM_REGIONS; i++) {
1960 struct skmem_region *skr = ar->ar_regions[i];
1961 uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1962
1963 if (skr == NULL) {
1964 uuid_clear(uu: *sreg_uuid);
1965 continue;
1966 }
1967
1968 uuid_copy(dst: *sreg_uuid, src: skr->skr_uuid);
1969 }
1970 AR_UNLOCK(ar);
1971
1972done:
1973 return actual_space;
1974}
1975
1976static int
1977skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1978{
1979#pragma unused(arg1, arg2, oidp)
1980 struct skmem_arena *ar;
1981 size_t actual_space;
1982 size_t buffer_space;
1983 size_t allocated_space;
1984 caddr_t buffer = NULL;
1985 caddr_t scan;
1986 int error = 0;
1987
1988 if (!kauth_cred_issuser(cred: kauth_cred_get())) {
1989 return EPERM;
1990 }
1991
1992 net_update_uptime();
1993 buffer_space = req->oldlen;
1994 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1995 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1996 buffer_space = SK_SYSCTL_ALLOC_MAX;
1997 }
1998 allocated_space = buffer_space;
1999 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
2000 if (__improbable(buffer == NULL)) {
2001 return ENOBUFS;
2002 }
2003 } else if (req->oldptr == USER_ADDR_NULL) {
2004 buffer_space = 0;
2005 }
2006 actual_space = 0;
2007 scan = buffer;
2008
2009 SKMEM_ARENA_LOCK();
2010 TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
2011 size_t size = skmem_arena_mib_get_stats(ar, out: scan, len: buffer_space);
2012 if (scan != NULL) {
2013 if (buffer_space < size) {
2014 /* supplied buffer too small, stop copying */
2015 error = ENOMEM;
2016 break;
2017 }
2018 scan += size;
2019 buffer_space -= size;
2020 }
2021 actual_space += size;
2022 }
2023 SKMEM_ARENA_UNLOCK();
2024
2025 if (actual_space != 0) {
2026 int out_error = SYSCTL_OUT(req, buffer, actual_space);
2027 if (out_error != 0) {
2028 error = out_error;
2029 }
2030 }
2031 if (buffer != NULL) {
2032 sk_free_data(buffer, allocated_space);
2033 }
2034
2035 return error;
2036}
2037