1/*
2 * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/packet/pbufpool_var.h>
31#include <sys/sdt.h>
32
33static struct kern_pbufpool *pp_alloc(zalloc_flags_t);
34static void pp_free(struct kern_pbufpool *);
35static uint32_t pp_alloc_packet_common(struct kern_pbufpool *, uint16_t,
36 uint64_t *, uint32_t, boolean_t, alloc_cb_func_t, const void *, uint32_t);
37static void pp_free_packet_array(struct kern_pbufpool *, uint64_t *, uint32_t);
38static int pp_metadata_ctor_no_buflet(struct skmem_obj_info *,
39 struct skmem_obj_info *, void *, uint32_t);
40static int pp_metadata_ctor_max_buflet(struct skmem_obj_info *,
41 struct skmem_obj_info *, void *, uint32_t);
42static void pp_metadata_dtor(void *, void *);
43static int pp_metadata_construct(struct __kern_quantum *,
44 struct __user_quantum *, obj_idx_t, struct kern_pbufpool *, uint32_t,
45 uint16_t, bool, struct skmem_obj **);
46static void pp_metadata_destruct(struct __kern_quantum *,
47 struct kern_pbufpool *, bool);
48static struct __kern_quantum *pp_metadata_init(struct __metadata_preamble *,
49 struct kern_pbufpool *, uint16_t, uint32_t, struct skmem_obj **);
50static struct __metadata_preamble *pp_metadata_fini(struct __kern_quantum *,
51 struct kern_pbufpool *, struct mbuf **, struct __kern_packet **,
52 struct skmem_obj **, struct skmem_obj **);
53static void pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid);
54static void pp_buf_seg_ctor(struct sksegment *, IOSKMemoryBufferRef, void *);
55static void pp_buf_seg_dtor(struct sksegment *, IOSKMemoryBufferRef, void *);
56static void pp_destroy_upp_locked(struct kern_pbufpool *);
57static void pp_destroy_upp_bft_locked(struct kern_pbufpool *);
58static int pp_init_upp_bft_locked(struct kern_pbufpool *, boolean_t);
59static void pp_free_buflet_common(const kern_pbufpool_t, kern_buflet_t);
60static mach_vm_address_t pp_alloc_buffer_common(const kern_pbufpool_t pp,
61 struct skmem_obj_info *oi, uint32_t skmflag, bool large);
62static inline uint32_t
63pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
64 uint32_t num, uint32_t skmflag, bool large);
65
66#define KERN_PBUFPOOL_U_HASH_SIZE 64 /* hash table size */
67
68/*
69 * Since the inputs are small (indices to the metadata region), we can use
70 * Knuth's multiplicative hash method which is fast and good enough. Here
71 * we multiply the input by the golden ratio of 2^32. See "The Art of
72 * Computer Programming", section 6.4.
73 */
74#define KERN_PBUFPOOL_U_HASH_INDEX(_i, _m) \
75 (((_i) * 2654435761U) & (_m))
76#define KERN_PBUFPOOL_U_HASH(_pp, _i) \
77 (&(_pp)->pp_u_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
78 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
79#define KERN_PBUFPOOL_U_BFT_HASH(_pp, _i) \
80 (&(_pp)->pp_u_bft_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
81 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
82
83static SKMEM_TYPE_DEFINE(pp_zone, struct kern_pbufpool);
84
85struct kern_pbufpool_u_htbl {
86 struct kern_pbufpool_u_bkt upp_hash[KERN_PBUFPOOL_U_HASH_SIZE];
87};
88
89#define PP_U_HTBL_SIZE sizeof(struct kern_pbufpool_u_htbl)
90static SKMEM_TYPE_DEFINE(pp_u_htbl_zone, struct kern_pbufpool_u_htbl);
91
92static struct skmem_cache *pp_opt_cache; /* cache for __packet_opt */
93static struct skmem_cache *pp_flow_cache; /* cache for __flow */
94static struct skmem_cache *pp_compl_cache; /* cache for __packet_compl */
95
96static int __pp_inited = 0;
97
98int
99pp_init(void)
100{
101 _CASSERT(KPKT_SC_UNSPEC == MBUF_SC_UNSPEC);
102 _CASSERT(KPKT_SC_BK_SYS == MBUF_SC_BK_SYS);
103 _CASSERT(KPKT_SC_BK == MBUF_SC_BK);
104 _CASSERT(KPKT_SC_BE == MBUF_SC_BE);
105 _CASSERT(KPKT_SC_RD == MBUF_SC_RD);
106 _CASSERT(KPKT_SC_OAM == MBUF_SC_OAM);
107 _CASSERT(KPKT_SC_AV == MBUF_SC_AV);
108 _CASSERT(KPKT_SC_RV == MBUF_SC_RV);
109 _CASSERT(KPKT_SC_VI == MBUF_SC_VI);
110 _CASSERT(KPKT_SC_SIG == MBUF_SC_SIG);
111 _CASSERT(KPKT_SC_VO == MBUF_SC_VO);
112 _CASSERT(KPKT_SC_CTL == MBUF_SC_CTL);
113
114 _CASSERT(KPKT_SC_BK_SYS == PKT_SC_BK_SYS);
115 _CASSERT(KPKT_SC_BK == PKT_SC_BK);
116 _CASSERT(KPKT_SC_BE == PKT_SC_BE);
117 _CASSERT(KPKT_SC_RD == PKT_SC_RD);
118 _CASSERT(KPKT_SC_OAM == PKT_SC_OAM);
119 _CASSERT(KPKT_SC_AV == PKT_SC_AV);
120 _CASSERT(KPKT_SC_RV == PKT_SC_RV);
121 _CASSERT(KPKT_SC_VI == PKT_SC_VI);
122 _CASSERT(KPKT_SC_SIG == PKT_SC_SIG);
123 _CASSERT(KPKT_SC_VO == PKT_SC_VO);
124 _CASSERT(KPKT_SC_CTL == PKT_SC_CTL);
125 _CASSERT(KPKT_SC_MAX_CLASSES == MBUF_SC_MAX_CLASSES);
126
127 _CASSERT(KPKT_TC_UNSPEC == MBUF_TC_UNSPEC);
128 _CASSERT(KPKT_TC_BE == MBUF_TC_BE);
129 _CASSERT(KPKT_TC_BK == MBUF_TC_BK);
130 _CASSERT(KPKT_TC_VI == MBUF_TC_VI);
131 _CASSERT(KPKT_TC_VO == MBUF_TC_VO);
132 _CASSERT(KPKT_TC_MAX == MBUF_TC_MAX);
133
134 _CASSERT(KPKT_TC_BE == PKT_TC_BE);
135 _CASSERT(KPKT_TC_BK == PKT_TC_BK);
136 _CASSERT(KPKT_TC_VI == PKT_TC_VI);
137 _CASSERT(KPKT_TC_VO == PKT_TC_VO);
138
139 _CASSERT(PKT_SCVAL_BK_SYS == SCVAL_BK_SYS);
140 _CASSERT(PKT_SCVAL_BK == SCVAL_BK);
141 _CASSERT(PKT_SCVAL_BE == SCVAL_BE);
142 _CASSERT(PKT_SCVAL_RD == SCVAL_RD);
143 _CASSERT(PKT_SCVAL_OAM == SCVAL_OAM);
144 _CASSERT(PKT_SCVAL_AV == SCVAL_AV);
145 _CASSERT(PKT_SCVAL_RV == SCVAL_RV);
146 _CASSERT(PKT_SCVAL_VI == SCVAL_VI);
147 _CASSERT(PKT_SCVAL_VO == SCVAL_VO);
148 _CASSERT(PKT_SCVAL_CTL == SCVAL_CTL);
149
150 /*
151 * Assert that the value of common packet flags between mbuf and
152 * skywalk packets match, and that they are in PKT_F_COMMON_MASK.
153 */
154 _CASSERT(PKT_F_BACKGROUND == PKTF_SO_BACKGROUND);
155 _CASSERT(PKT_F_REALTIME == PKTF_SO_REALTIME);
156 _CASSERT(PKT_F_REXMT == PKTF_TCP_REXMT);
157 _CASSERT(PKT_F_LAST_PKT == PKTF_LAST_PKT);
158 _CASSERT(PKT_F_FLOW_ID == PKTF_FLOW_ID);
159 _CASSERT(PKT_F_FLOW_ADV == PKTF_FLOW_ADV);
160 _CASSERT(PKT_F_TX_COMPL_TS_REQ == PKTF_TX_COMPL_TS_REQ);
161 _CASSERT(PKT_F_TS_VALID == PKTF_TS_VALID);
162 _CASSERT(PKT_F_NEW_FLOW == PKTF_NEW_FLOW);
163 _CASSERT(PKT_F_START_SEQ == PKTF_START_SEQ);
164 _CASSERT(PKT_F_KEEPALIVE == PKTF_KEEPALIVE);
165 _CASSERT(PKT_F_WAKE_PKT == PKTF_WAKE_PKT);
166 _CASSERT(PKT_F_COMMON_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
167 PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV |
168 PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW |
169 PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT));
170 /*
171 * Assert packet flags shared with userland.
172 */
173 _CASSERT(PKT_F_USER_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
174 PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC |
175 PKT_F_TRUNCATED | PKT_F_WAKE_PKT | PKT_F_L4S));
176
177 _CASSERT(offsetof(struct __kern_quantum, qum_len) ==
178 offsetof(struct __kern_packet, pkt_length));
179
180 /*
181 * Due to the use of tagged pointer, we need the size of
182 * the metadata preamble structure to be multiples of 16.
183 * See SK_PTR_TAG() definition for details.
184 */
185 _CASSERT(sizeof(struct __metadata_preamble) != 0 &&
186 (sizeof(struct __metadata_preamble) % 16) == 0);
187
188 _CASSERT(NX_PBUF_FRAGS_MIN == 1 &&
189 NX_PBUF_FRAGS_MIN == NX_PBUF_FRAGS_DEFAULT);
190
191 /*
192 * Batch alloc/free requires linking the objects together;
193 * make sure that the fields are at the same offset since
194 * we cast the object to struct skmem_obj.
195 */
196 _CASSERT(offsetof(struct __metadata_preamble, _mdp_next) ==
197 offsetof(struct skmem_obj, mo_next));
198 _CASSERT(offsetof(struct __buflet, __buflet_next) ==
199 offsetof(struct skmem_obj, mo_next));
200
201 SK_LOCK_ASSERT_HELD();
202 ASSERT(!__pp_inited);
203
204 pp_opt_cache = skmem_cache_create("pkt.opt",
205 sizeof(struct __packet_opt), sizeof(uint64_t),
206 NULL, NULL, NULL, NULL, NULL, 0);
207 pp_flow_cache = skmem_cache_create("pkt.flow",
208 sizeof(struct __flow), 16, /* 16-bytes aligned */
209 NULL, NULL, NULL, NULL, NULL, 0);
210 pp_compl_cache = skmem_cache_create("pkt.compl",
211 sizeof(struct __packet_compl), sizeof(uint64_t),
212 NULL, NULL, NULL, NULL, NULL, 0);
213
214 return 0;
215}
216
217void
218pp_fini(void)
219{
220 SK_LOCK_ASSERT_HELD();
221
222 if (__pp_inited) {
223 if (pp_compl_cache != NULL) {
224 skmem_cache_destroy(pp_compl_cache);
225 pp_compl_cache = NULL;
226 }
227 if (pp_flow_cache != NULL) {
228 skmem_cache_destroy(pp_flow_cache);
229 pp_flow_cache = NULL;
230 }
231 if (pp_opt_cache != NULL) {
232 skmem_cache_destroy(pp_opt_cache);
233 pp_opt_cache = NULL;
234 }
235
236 __pp_inited = 0;
237 }
238}
239
240static struct kern_pbufpool *
241pp_alloc(zalloc_flags_t how)
242{
243 struct kern_pbufpool *pp = zalloc_flags(pp_zone, how | Z_ZERO);
244
245 if (pp) {
246 lck_mtx_init(lck: &pp->pp_lock, grp: &skmem_lock_grp, attr: &skmem_lock_attr);
247 }
248 return pp;
249}
250
251static void
252pp_free(struct kern_pbufpool *pp)
253{
254 PP_LOCK_ASSERT_HELD(pp);
255
256 pp_destroy(pp);
257 PP_UNLOCK(pp);
258
259 SK_DF(SK_VERB_MEM, "pp 0x%llx FREE", SK_KVA(pp));
260 lck_mtx_destroy(lck: &pp->pp_lock, grp: &skmem_lock_grp);
261 zfree(pp_zone, pp);
262}
263
264void
265pp_retain_locked(struct kern_pbufpool *pp)
266{
267 PP_LOCK_ASSERT_HELD(pp);
268
269 pp->pp_refcnt++;
270 ASSERT(pp->pp_refcnt != 0);
271}
272
273void
274pp_retain(struct kern_pbufpool *pp)
275{
276 PP_LOCK(pp);
277 pp_retain_locked(pp);
278 PP_UNLOCK(pp);
279}
280
281boolean_t
282pp_release_locked(struct kern_pbufpool *pp)
283{
284 uint32_t oldref = pp->pp_refcnt;
285
286 PP_LOCK_ASSERT_HELD(pp);
287
288 ASSERT(pp->pp_refcnt != 0);
289 if (--pp->pp_refcnt == 0) {
290 pp_free(pp);
291 }
292
293 return oldref == 1;
294}
295
296boolean_t
297pp_release(struct kern_pbufpool *pp)
298{
299 boolean_t lastref;
300
301 PP_LOCK(pp);
302 if (!(lastref = pp_release_locked(pp))) {
303 PP_UNLOCK(pp);
304 }
305
306 return lastref;
307}
308
309void
310pp_close(struct kern_pbufpool *pp)
311{
312 PP_LOCK(pp);
313 ASSERT(pp->pp_refcnt > 0);
314 ASSERT(!(pp->pp_flags & PPF_CLOSED));
315 pp->pp_flags |= PPF_CLOSED;
316 if (!pp_release_locked(pp)) {
317 PP_UNLOCK(pp);
318 }
319}
320
321void
322pp_regions_params_adjust(struct skmem_region_params *srp_array,
323 nexus_meta_type_t md_type, nexus_meta_subtype_t md_subtype, uint32_t md_cnt,
324 uint16_t max_frags, uint32_t buf_size, uint32_t large_buf_size,
325 uint32_t buf_cnt, uint32_t buf_seg_size, uint32_t flags)
326{
327 struct skmem_region_params *srp, *kmd_srp, *buf_srp, *kbft_srp,
328 *lbuf_srp;
329 uint32_t md_size = 0;
330 bool kernel_only = ((flags & PP_REGION_CONFIG_KERNEL_ONLY) != 0);
331 bool md_persistent = ((flags & PP_REGION_CONFIG_MD_PERSISTENT) != 0);
332 bool buf_persistent = ((flags & PP_REGION_CONFIG_BUF_PERSISTENT) != 0);
333 bool config_buflet = ((flags & PP_REGION_CONFIG_BUFLET) != 0);
334 bool md_magazine_enable = ((flags &
335 PP_REGION_CONFIG_MD_MAGAZINE_ENABLE) != 0);
336
337 ASSERT(max_frags != 0);
338
339 switch (md_type) {
340 case NEXUS_META_TYPE_QUANTUM:
341 md_size = NX_METADATA_QUANTUM_SZ;
342 break;
343 case NEXUS_META_TYPE_PACKET:
344 md_size = NX_METADATA_PACKET_SZ(max_frags);
345 break;
346 default:
347 VERIFY(0);
348 /* NOTREACHED */
349 __builtin_unreachable();
350 }
351
352 switch (flags & PP_REGION_CONFIG_BUF_IODIR_BIDIR) {
353 case PP_REGION_CONFIG_BUF_IODIR_IN:
354 kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
355 buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
356 lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
357 kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
358 break;
359 case PP_REGION_CONFIG_BUF_IODIR_OUT:
360 kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
361 buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
362 lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
363 kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
364 break;
365 case PP_REGION_CONFIG_BUF_IODIR_BIDIR:
366 default:
367 kmd_srp = &srp_array[SKMEM_REGION_KMD];
368 buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
369 lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
370 kbft_srp = &srp_array[SKMEM_REGION_KBFT];
371 break;
372 }
373
374 /* add preamble size to metadata obj size */
375 md_size += METADATA_PREAMBLE_SZ;
376 ASSERT(md_size >= NX_METADATA_OBJ_MIN_SZ);
377
378 /* configure kernel metadata region */
379 kmd_srp->srp_md_type = md_type;
380 kmd_srp->srp_md_subtype = md_subtype;
381 kmd_srp->srp_r_obj_cnt = md_cnt;
382 kmd_srp->srp_r_obj_size = md_size;
383 kmd_srp->srp_max_frags = max_frags;
384 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
385 if (md_persistent) {
386 kmd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
387 }
388 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
389 if (md_magazine_enable) {
390 kmd_srp->srp_cflags &= ~SKMEM_REGION_CR_NOMAGAZINES;
391 }
392 skmem_region_params_config(kmd_srp);
393
394 /* configure user metadata region */
395 srp = &srp_array[SKMEM_REGION_UMD];
396 if (!kernel_only) {
397 srp->srp_md_type = kmd_srp->srp_md_type;
398 srp->srp_md_subtype = kmd_srp->srp_md_subtype;
399 srp->srp_r_obj_cnt = kmd_srp->srp_c_obj_cnt;
400 srp->srp_r_obj_size = kmd_srp->srp_c_obj_size;
401 srp->srp_max_frags = kmd_srp->srp_max_frags;
402 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
403 if (md_persistent) {
404 srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
405 }
406 /*
407 * UMD is a mirrored region and object allocation operations
408 * are performed on the KMD objects.
409 */
410 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
411 skmem_region_params_config(srp);
412 ASSERT(srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
413 } else {
414 ASSERT(srp->srp_r_obj_cnt == 0);
415 ASSERT(srp->srp_r_obj_size == 0);
416 }
417
418 /* configure buffer region */
419 buf_srp->srp_r_obj_cnt = MAX(buf_cnt, kmd_srp->srp_c_obj_cnt);
420 buf_srp->srp_r_obj_size = buf_size;
421 buf_srp->srp_cflags &= ~SKMEM_REGION_CR_MONOLITHIC;
422 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
423 if (buf_persistent) {
424 buf_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
425 }
426 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
427 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_UREADONLY) == 0);
428 if ((flags & PP_REGION_CONFIG_BUF_UREADONLY) != 0) {
429 buf_srp->srp_cflags |= SKMEM_REGION_CR_UREADONLY;
430 }
431 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_KREADONLY) == 0);
432 if ((flags & PP_REGION_CONFIG_BUF_KREADONLY) != 0) {
433 buf_srp->srp_cflags |= SKMEM_REGION_CR_KREADONLY;
434 }
435 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_MONOLITHIC) == 0);
436 if ((flags & PP_REGION_CONFIG_BUF_MONOLITHIC) != 0) {
437 buf_srp->srp_cflags |= SKMEM_REGION_CR_MONOLITHIC;
438 }
439 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_SEGPHYSCONTIG) == 0);
440 if ((flags & PP_REGION_CONFIG_BUF_SEGPHYSCONTIG) != 0) {
441 buf_srp->srp_cflags |= SKMEM_REGION_CR_SEGPHYSCONTIG;
442 }
443 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOCACHE) == 0);
444 if ((flags & PP_REGION_CONFIG_BUF_NOCACHE) != 0) {
445 buf_srp->srp_cflags |= SKMEM_REGION_CR_NOCACHE;
446 }
447 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_THREADSAFE) == 0);
448 if ((flags & PP_REGION_CONFIG_BUF_THREADSAFE) != 0) {
449 buf_srp->srp_cflags |= SKMEM_REGION_CR_THREADSAFE;
450 }
451 if (buf_seg_size != 0) {
452 buf_srp->srp_r_seg_size = buf_seg_size;
453 }
454 skmem_region_params_config(buf_srp);
455
456 /* configure large buffer region */
457 if (large_buf_size != 0) {
458 lbuf_srp->srp_r_obj_cnt = buf_srp->srp_r_obj_cnt;
459 lbuf_srp->srp_r_obj_size = large_buf_size;
460 lbuf_srp->srp_r_seg_size = buf_srp->srp_r_seg_size;
461 lbuf_srp->srp_cflags = buf_srp->srp_cflags;
462 skmem_region_params_config(lbuf_srp);
463 }
464
465 /* configure kernel buflet region */
466 if (config_buflet) {
467 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
468 /*
469 * Ideally we want the number of buflets to be
470 * "kmd_srp->srp_c_obj_cnt * (kmd_srp->srp_max_frags - 1)",
471 * so that we have enough buflets when multi-buflet and
472 * shared buffer object is used.
473 * Currently multi-buflet is being used only by user pool
474 * which doesn't support shared buffer object, hence to reduce
475 * the number of objects we are restricting the number of
476 * buflets to the number of buffers.
477 */
478 kbft_srp->srp_r_obj_cnt = buf_srp->srp_c_obj_cnt +
479 lbuf_srp->srp_c_obj_cnt;
480 kbft_srp->srp_r_obj_size = MAX(sizeof(struct __kern_buflet_ext),
481 sizeof(struct __user_buflet));
482 kbft_srp->srp_cflags = kmd_srp->srp_cflags;
483 skmem_region_params_config(kbft_srp);
484 ASSERT(kbft_srp->srp_c_obj_cnt >= buf_srp->srp_c_obj_cnt +
485 lbuf_srp->srp_c_obj_cnt);
486 } else {
487 ASSERT(kbft_srp->srp_r_obj_cnt == 0);
488 ASSERT(kbft_srp->srp_r_obj_size == 0);
489 }
490
491 /* configure user buflet region */
492 srp = &srp_array[SKMEM_REGION_UBFT];
493 if (config_buflet && !kernel_only) {
494 srp->srp_r_obj_cnt = kbft_srp->srp_c_obj_cnt;
495 srp->srp_r_obj_size = kbft_srp->srp_c_obj_size;
496 srp->srp_cflags = srp_array[SKMEM_REGION_UMD].srp_cflags;
497 skmem_region_params_config(srp);
498 ASSERT(srp->srp_c_obj_cnt == kbft_srp->srp_c_obj_cnt);
499 } else {
500 ASSERT(srp->srp_r_obj_cnt == 0);
501 ASSERT(srp->srp_r_obj_size == 0);
502 }
503
504 /* make sure each metadata can be paired with a buffer */
505 ASSERT(kmd_srp->srp_c_obj_cnt <= buf_srp->srp_c_obj_cnt);
506}
507
508SK_NO_INLINE_ATTRIBUTE
509static int
510pp_metadata_construct(struct __kern_quantum *kqum, struct __user_quantum *uqum,
511 obj_idx_t midx, struct kern_pbufpool *pp, uint32_t skmflag, uint16_t bufcnt,
512 bool raw, struct skmem_obj **blist)
513{
514 struct __kern_buflet *kbuf;
515 mach_vm_address_t baddr = 0;
516 uint16_t *pbufs_cnt, *pbufs_max;
517 uint16_t i;
518
519 ASSERT(bufcnt == 1 || PP_HAS_BUFFER_ON_DEMAND(pp));
520
521 /* construct {user,kernel} metadata */
522 switch (pp->pp_md_type) {
523 case NEXUS_META_TYPE_PACKET: {
524 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
525 struct __user_packet *upkt = SK_PTR_ADDR_UPKT(uqum);
526 struct __packet_opt *opt;
527 struct __flow *flow;
528 struct __packet_compl *compl;
529 uint64_t pflags;
530
531 if (raw) {
532 opt = skmem_cache_alloc(pp_opt_cache, SKMEM_SLEEP);
533 flow = skmem_cache_alloc(pp_flow_cache, SKMEM_SLEEP);
534 compl = skmem_cache_alloc(pp_compl_cache, SKMEM_SLEEP);
535 pflags = (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC |
536 PKT_F_TX_COMPL_ALLOC);
537 } else {
538 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
539 kpkt->pkt_com_opt != NULL);
540 opt = kpkt->pkt_com_opt;
541 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
542 kpkt->pkt_flow != NULL);
543 flow = kpkt->pkt_flow;
544 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
545 kpkt->pkt_tx_compl != NULL);
546 compl = kpkt->pkt_tx_compl;
547 pflags = kpkt->pkt_pflags;
548 }
549 /* will be adjusted below as part of allocating buffer(s) */
550 _CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
551 _CASSERT(sizeof(kpkt->pkt_bufs_max) == sizeof(uint16_t));
552 pbufs_cnt = __DECONST(uint16_t *, &kpkt->pkt_bufs_cnt);
553 pbufs_max = __DECONST(uint16_t *, &kpkt->pkt_bufs_max);
554
555 /* kernel (and user) packet */
556 KPKT_CTOR(kpkt, pflags, opt, flow, compl, midx,
557 upkt, pp, 0, pp->pp_max_frags, 0);
558 break;
559 }
560 default:
561 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
562 VERIFY(bufcnt == 1);
563 /* TODO: point these to quantum's once they're defined */
564 pbufs_cnt = pbufs_max = NULL;
565 /* kernel quantum */
566 KQUM_CTOR(kqum, midx, uqum, pp, 0);
567 break;
568 }
569
570 kbuf = kqum->qum_buf;
571 for (i = 0; i < bufcnt; i++) {
572 struct skmem_obj_info oib;
573
574 if (!PP_HAS_BUFFER_ON_DEMAND(pp)) {
575 ASSERT(i == 0);
576 ASSERT(*blist == NULL);
577 /*
578 * quantum has a native buflet, so we only need a
579 * buffer to be allocated and attached to the buflet.
580 */
581 baddr = pp_alloc_buffer_common(pp, oi: &oib, skmflag,
582 false);
583 if (__improbable(baddr == 0)) {
584 goto fail;
585 }
586 KBUF_CTOR(kbuf, baddr, SKMEM_OBJ_IDX_REG(&oib),
587 SKMEM_OBJ_BUFCTL(&oib), pp, false);
588 baddr = 0;
589 } else {
590 /*
591 * we use pre-constructed buflets with attached buffers.
592 */
593 struct __kern_buflet *pkbuf = kbuf;
594 struct skmem_obj *blistn;
595
596 ASSERT(pkbuf != NULL);
597 kbuf = (kern_buflet_t)*blist;
598 if (__improbable(kbuf == NULL)) {
599 SK_DF(SK_VERB_MEM, "failed to get buflet,"
600 " pp 0x%llx", SK_KVA(pp));
601 goto fail;
602 }
603
604#if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
605 /* Checking to ensure the object address is tagged */
606 ASSERT((vm_offset_t)kbuf !=
607 vm_memtag_canonicalize_address((vm_offset_t)kbuf));
608#endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
609
610 blistn = (*blist)->mo_next;
611 (*blist)->mo_next = NULL;
612
613 KBUF_EXT_INIT(kbuf, pp);
614 KBUF_LINK(pkbuf, kbuf);
615 *blist = blistn;
616 }
617
618 /* adjust buffer count accordingly */
619 if (__probable(pbufs_cnt != NULL)) {
620 *pbufs_cnt += 1;
621 ASSERT(*pbufs_cnt <= *pbufs_max);
622 }
623 }
624
625 ASSERT(!PP_KERNEL_ONLY(pp) || (kqum->qum_qflags & QUM_F_KERNEL_ONLY));
626 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
627 SK_DF(SK_VERB_MEM, "pp 0x%llx pkt 0x%llx bufcnt %d buf 0x%llx",
628 SK_KVA(pp), SK_KVA(kqum), bufcnt, SK_KVA(baddr));
629 return 0;
630
631fail:
632 ASSERT(bufcnt != 0 && baddr == 0);
633 pp_metadata_destruct(kqum, pp, raw);
634 return ENOMEM;
635}
636
637static int
638pp_metadata_ctor_common(struct skmem_obj_info *oi0,
639 struct skmem_obj_info *oim0, struct kern_pbufpool *pp, uint32_t skmflag,
640 bool no_buflet)
641{
642 struct skmem_obj_info _oi, _oim;
643 struct skmem_obj_info *oi, *oim;
644 struct __kern_quantum *kqum;
645 struct __user_quantum *uqum;
646 uint16_t bufcnt = (no_buflet ? 0 : pp->pp_max_frags);
647 struct skmem_obj *blist = NULL;
648 int error;
649
650#if (DEVELOPMENT || DEBUG)
651 uint64_t mtbf = skmem_region_get_mtbf();
652 /*
653 * MTBF is applicable only for non-blocking allocations here.
654 */
655 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
656 (skmflag & SKMEM_NOSLEEP))) {
657 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
658 net_update_uptime();
659 return ENOMEM;
660 }
661#endif /* (DEVELOPMENT || DEBUG) */
662
663 /*
664 * Note that oi0 and oim0 may be stored inside the object itself;
665 * if so, copy them to local variables before constructing. We
666 * don't use PPF_BATCH to test as the allocator may be allocating
667 * storage space differently depending on the number of objects.
668 */
669 if (__probable((uintptr_t)oi0 >= (uintptr_t)SKMEM_OBJ_ADDR(oi0) &&
670 ((uintptr_t)oi0 + sizeof(*oi0)) <=
671 ((uintptr_t)SKMEM_OBJ_ADDR(oi0) + SKMEM_OBJ_SIZE(oi0)))) {
672 oi = &_oi;
673 *oi = *oi0;
674 if (__probable(oim0 != NULL)) {
675 oim = &_oim;
676 *oim = *oim0;
677 } else {
678 oim = NULL;
679 }
680 } else {
681 oi = oi0;
682 oim = oim0;
683 }
684
685 kqum = SK_PTR_ADDR_KQUM((uintptr_t)SKMEM_OBJ_ADDR(oi) +
686 METADATA_PREAMBLE_SZ);
687
688 if (__probable(!PP_KERNEL_ONLY(pp))) {
689 ASSERT(oim != NULL && SKMEM_OBJ_ADDR(oim) != NULL);
690 ASSERT(SKMEM_OBJ_SIZE(oi) == SKMEM_OBJ_SIZE(oim));
691 uqum = SK_PTR_ADDR_UQUM((uintptr_t)SKMEM_OBJ_ADDR(oim) +
692 METADATA_PREAMBLE_SZ);
693 } else {
694 ASSERT(oim == NULL);
695 uqum = NULL;
696 }
697
698 if (oim != NULL) {
699 /* initialize user metadata redzone */
700 struct __metadata_preamble *mdp = SKMEM_OBJ_ADDR(oim);
701 mdp->mdp_redzone =
702 (SKMEM_OBJ_ROFF(oim) + METADATA_PREAMBLE_SZ) ^
703 __ch_umd_redzone_cookie;
704 }
705
706 /* allocate (constructed) buflet(s) with buffer(s) attached */
707 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0) {
708 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), list: &blist,
709 bufcnt, skmflag);
710 }
711
712 error = pp_metadata_construct(kqum, uqum, SKMEM_OBJ_IDX_REG(oi), pp,
713 skmflag, bufcnt, TRUE, blist: &blist);
714 if (__improbable(blist != NULL)) {
715 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist);
716 blist = NULL;
717 }
718 return error;
719}
720
721static int
722pp_metadata_ctor_no_buflet(struct skmem_obj_info *oi0,
723 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
724{
725 return pp_metadata_ctor_common(oi0, oim0, pp: arg, skmflag, true);
726}
727
728static int
729pp_metadata_ctor_max_buflet(struct skmem_obj_info *oi0,
730 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
731{
732 return pp_metadata_ctor_common(oi0, oim0, pp: arg, skmflag, false);
733}
734
735__attribute__((always_inline))
736static void
737pp_metadata_destruct_common(struct __kern_quantum *kqum,
738 struct kern_pbufpool *pp, bool raw, struct skmem_obj **blist_def,
739 struct skmem_obj **blist_large)
740{
741 struct __kern_buflet *kbuf, *nbuf;
742 struct skmem_obj *p_blist_def = NULL, *p_blist_large = NULL;
743 struct skmem_obj **pp_blist_def = &p_blist_def;
744 struct skmem_obj **pp_blist_large = &p_blist_large;
745 uint16_t bufcnt, i = 0;
746 bool first_buflet_empty;
747
748 ASSERT(blist_def != NULL);
749 ASSERT(blist_large != NULL);
750
751 switch (pp->pp_md_type) {
752 case NEXUS_META_TYPE_PACKET: {
753 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
754
755 ASSERT(kpkt->pkt_user != NULL || PP_KERNEL_ONLY(pp));
756 ASSERT(kpkt->pkt_qum.qum_pp == pp);
757 ASSERT(METADATA_TYPE(kpkt) == pp->pp_md_type);
758 ASSERT(METADATA_SUBTYPE(kpkt) == pp->pp_md_subtype);
759 ASSERT(METADATA_IDX(kpkt) != OBJ_IDX_NONE);
760 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
761 ASSERT(kpkt->pkt_bufs_cnt <= kpkt->pkt_bufs_max);
762 ASSERT(kpkt->pkt_bufs_max == pp->pp_max_frags);
763 _CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
764 bufcnt = kpkt->pkt_bufs_cnt;
765 kbuf = &kqum->qum_buf[0];
766 /*
767 * special handling for empty first buflet.
768 */
769 first_buflet_empty = (kbuf->buf_addr == 0);
770 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = 0;
771 break;
772 }
773 default:
774 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
775 ASSERT(kqum->qum_user != NULL || PP_KERNEL_ONLY(pp));
776 ASSERT(kqum->qum_pp == pp);
777 ASSERT(METADATA_TYPE(kqum) == pp->pp_md_type);
778 ASSERT(METADATA_SUBTYPE(kqum) == pp->pp_md_subtype);
779 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
780 ASSERT(kqum->qum_ksd == NULL);
781 kbuf = &kqum->qum_buf[0];
782 /*
783 * XXX: Special handling for quantum as we don't currently
784 * define bufs_{cnt,max} there. Given that we support at
785 * most only 1 buflet for now, check if buf_addr is non-NULL.
786 * See related code in pp_metadata_construct().
787 */
788 first_buflet_empty = (kbuf->buf_addr == 0);
789 bufcnt = first_buflet_empty ? 0 : 1;
790 break;
791 }
792
793 nbuf = __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr);
794 BUF_NBFT_ADDR(kbuf, 0);
795 BUF_NBFT_IDX(kbuf, OBJ_IDX_NONE);
796 if (!first_buflet_empty) {
797 pp_free_buflet_common(pp, kbuf);
798 ++i;
799 }
800
801 while (nbuf != NULL) {
802 if (BUFLET_HAS_LARGE_BUF(nbuf)) {
803 *pp_blist_large = (struct skmem_obj *)(void *)nbuf;
804 pp_blist_large =
805 &((struct skmem_obj *)(void *)nbuf)->mo_next;
806 } else {
807 *pp_blist_def = (struct skmem_obj *)(void *)nbuf;
808 pp_blist_def =
809 &((struct skmem_obj *)(void *)nbuf)->mo_next;
810 }
811 BUF_NBFT_IDX(nbuf, OBJ_IDX_NONE);
812 nbuf = __DECONST(struct __kern_buflet *, nbuf->buf_nbft_addr);
813 ++i;
814 }
815
816 ASSERT(i == bufcnt);
817
818 if (p_blist_def != NULL) {
819 *pp_blist_def = *blist_def;
820 *blist_def = p_blist_def;
821 }
822 if (p_blist_large != NULL) {
823 *pp_blist_large = *blist_large;
824 *blist_large = p_blist_large;
825 }
826
827 /* if we're about to return this object to the slab, clean it up */
828 if (raw) {
829 switch (pp->pp_md_type) {
830 case NEXUS_META_TYPE_PACKET: {
831 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
832
833 ASSERT(kpkt->pkt_com_opt != NULL ||
834 !(kpkt->pkt_pflags & PKT_F_OPT_ALLOC));
835 if (kpkt->pkt_com_opt != NULL) {
836 ASSERT(kpkt->pkt_pflags & PKT_F_OPT_ALLOC);
837 skmem_cache_free(pp_opt_cache,
838 kpkt->pkt_com_opt);
839 kpkt->pkt_com_opt = NULL;
840 }
841 ASSERT(kpkt->pkt_flow != NULL ||
842 !(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC));
843 if (kpkt->pkt_flow != NULL) {
844 ASSERT(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC);
845 skmem_cache_free(pp_flow_cache, kpkt->pkt_flow);
846 kpkt->pkt_flow = NULL;
847 }
848 ASSERT(kpkt->pkt_tx_compl != NULL ||
849 !(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC));
850 if (kpkt->pkt_tx_compl != NULL) {
851 ASSERT(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC);
852 skmem_cache_free(pp_compl_cache,
853 kpkt->pkt_tx_compl);
854 kpkt->pkt_tx_compl = NULL;
855 }
856 kpkt->pkt_pflags = 0;
857 break;
858 }
859 default:
860 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_QUANTUM);
861 /* nothing to do for quantum (yet) */
862 break;
863 }
864 }
865}
866
867__attribute__((always_inline))
868static void
869pp_metadata_destruct(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
870 bool raw)
871{
872 struct skmem_obj *blist_def = NULL, *blist_large = NULL;
873
874 pp_metadata_destruct_common(kqum, pp, raw, blist_def: &blist_def, blist_large: &blist_large);
875 if (blist_def != NULL) {
876 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
877 }
878 if (blist_large != NULL) {
879 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
880 }
881}
882
883static void
884pp_metadata_dtor(void *addr, void *arg)
885{
886 pp_metadata_destruct(SK_PTR_ADDR_KQUM((uintptr_t)addr +
887 METADATA_PREAMBLE_SZ), pp: arg, TRUE);
888}
889
890static void
891pp_buf_seg_ctor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
892{
893 struct kern_pbufpool *pp = arg;
894
895 if (pp->pp_pbuf_seg_ctor != NULL) {
896 pp->pp_pbuf_seg_ctor(pp, sg, md);
897 }
898}
899
900static void
901pp_buf_seg_dtor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
902{
903 struct kern_pbufpool *pp = arg;
904
905 if (pp->pp_pbuf_seg_dtor != NULL) {
906 pp->pp_pbuf_seg_dtor(pp, sg, md);
907 }
908}
909
910static int
911pp_buflet_metadata_ctor_common(struct skmem_obj_info *oi0,
912 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag, bool large)
913{
914#pragma unused (skmflag)
915 struct kern_pbufpool *pp = (struct kern_pbufpool *)arg;
916 struct __kern_buflet *kbft;
917 struct __user_buflet *ubft;
918 struct skmem_obj_info oib;
919 mach_vm_address_t baddr;
920 obj_idx_t oi_idx_reg;
921
922 baddr = pp_alloc_buffer_common(pp, oi: &oib, skmflag, large);
923 if (__improbable(baddr == 0)) {
924 return ENOMEM;
925 }
926 /*
927 * Note that oi0 and oim0 may be stored inside the object itself;
928 * so copy what is required to local variables before constructing.
929 */
930 oi_idx_reg = SKMEM_OBJ_IDX_REG(oi0);
931 kbft = SKMEM_OBJ_ADDR(oi0);
932
933 if (__probable(!PP_KERNEL_ONLY(pp))) {
934 ASSERT(oim0 != NULL && SKMEM_OBJ_ADDR(oim0) != NULL);
935 ASSERT(SKMEM_OBJ_SIZE(oi0) == SKMEM_OBJ_SIZE(oim0));
936 ASSERT(oi_idx_reg == SKMEM_OBJ_IDX_REG(oim0));
937 ASSERT(SKMEM_OBJ_IDX_SEG(oi0) == SKMEM_OBJ_IDX_SEG(oim0));
938 ubft = SKMEM_OBJ_ADDR(oim0);
939 } else {
940 ASSERT(oim0 == NULL);
941 ubft = NULL;
942 }
943 KBUF_EXT_CTOR(kbft, ubft, baddr, SKMEM_OBJ_IDX_REG(&oib),
944 SKMEM_OBJ_BUFCTL(&oib), oi_idx_reg, pp, large);
945 return 0;
946}
947
948static int
949pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info *oi0,
950 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
951{
952 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, false);
953}
954
955static int
956pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info *oi0,
957 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
958{
959 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, true);
960}
961
962static void
963pp_buflet_metadata_dtor(void *addr, void *arg)
964{
965 struct __kern_buflet *kbft = addr;
966 void *objaddr = kbft->buf_objaddr;
967 struct kern_pbufpool *pp = arg;
968 uint32_t usecnt = 0;
969 bool large = BUFLET_HAS_LARGE_BUF(kbft);
970
971 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
972 /*
973 * don't assert for (buf_nbft_addr == 0) here as constructed
974 * buflet may have this field as non-zero. This is because
975 * buf_nbft_addr (__buflet_next) is used by skmem batch alloc
976 * for chaining the buflets.
977 * To ensure that the frred buflet was not part of a chain we
978 * assert for (buf_nbft_idx == OBJ_IDX_NONE).
979 */
980 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
981 ASSERT(((struct __kern_buflet_ext *)kbft)->kbe_buf_upp_link.sle_next ==
982 NULL);
983 ASSERT(kbft->buf_addr != 0);
984 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
985 ASSERT(kbft->buf_ctl != NULL);
986
987 KBUF_DTOR(kbft, usecnt);
988 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u", SK_KVA(pp),
989 SK_KVA(objaddr), usecnt);
990 if (__probable(usecnt == 0)) {
991 skmem_cache_free(large ? PP_BUF_CACHE_LARGE(pp) :
992 PP_BUF_CACHE_DEF(pp), objaddr);
993 }
994}
995
996struct kern_pbufpool *
997pp_create(const char *name, struct skmem_region_params *srp_array,
998 pbuf_seg_ctor_fn_t buf_seg_ctor, pbuf_seg_dtor_fn_t buf_seg_dtor,
999 const void *ctx, pbuf_ctx_retain_fn_t ctx_retain,
1000 pbuf_ctx_release_fn_t ctx_release, uint32_t ppcreatef)
1001{
1002 struct kern_pbufpool *pp = NULL;
1003 uint32_t md_size, def_buf_obj_size;
1004 uint32_t def_buf_size, large_buf_size;
1005 nexus_meta_type_t md_type;
1006 nexus_meta_subtype_t md_subtype;
1007 uint32_t md_cflags;
1008 uint16_t max_frags;
1009 char cname[64];
1010 struct skmem_region_params *kmd_srp;
1011 struct skmem_region_params *buf_srp;
1012 struct skmem_region_params *kbft_srp;
1013 struct skmem_region_params *umd_srp = NULL;
1014 struct skmem_region_params *ubft_srp = NULL;
1015 struct skmem_region_params *lbuf_srp = NULL;
1016
1017 /* buf_seg_{ctor,dtor} pair must be either NULL or non-NULL */
1018 ASSERT(!(!(buf_seg_ctor == NULL && buf_seg_dtor == NULL) &&
1019 ((buf_seg_ctor == NULL) ^ (buf_seg_dtor == NULL))));
1020
1021 /* ctx{,_retain,_release} must be either ALL NULL or ALL non-NULL */
1022 ASSERT((ctx == NULL && ctx_retain == NULL && ctx_release == NULL) ||
1023 (ctx != NULL && ctx_retain != NULL && ctx_release != NULL));
1024
1025 if (srp_array[SKMEM_REGION_KMD].srp_c_obj_cnt != 0) {
1026 kmd_srp = &srp_array[SKMEM_REGION_KMD];
1027 buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
1028 lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
1029 kbft_srp = &srp_array[SKMEM_REGION_KBFT];
1030 } else if (srp_array[SKMEM_REGION_RXKMD].srp_c_obj_cnt != 0) {
1031 kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
1032 buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
1033 lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
1034 kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
1035 } else {
1036 VERIFY(srp_array[SKMEM_REGION_TXKMD].srp_c_obj_cnt != 0);
1037 kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
1038 buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
1039 lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
1040 kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
1041 }
1042
1043 VERIFY(kmd_srp->srp_c_obj_size != 0);
1044 VERIFY(buf_srp->srp_c_obj_cnt != 0);
1045 VERIFY(buf_srp->srp_c_obj_size != 0);
1046
1047 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1048 VERIFY(kbft_srp->srp_c_obj_cnt != 0);
1049 VERIFY(kbft_srp->srp_c_obj_size != 0);
1050 } else {
1051 kbft_srp = NULL;
1052 }
1053
1054 if ((ppcreatef & PPCREATEF_KERNEL_ONLY) == 0) {
1055 umd_srp = &srp_array[SKMEM_REGION_UMD];
1056 ASSERT(umd_srp->srp_c_obj_size == kmd_srp->srp_c_obj_size);
1057 ASSERT(umd_srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
1058 ASSERT(umd_srp->srp_c_seg_size == kmd_srp->srp_c_seg_size);
1059 ASSERT(umd_srp->srp_seg_cnt == kmd_srp->srp_seg_cnt);
1060 ASSERT(umd_srp->srp_md_type == kmd_srp->srp_md_type);
1061 ASSERT(umd_srp->srp_md_subtype == kmd_srp->srp_md_subtype);
1062 ASSERT(umd_srp->srp_max_frags == kmd_srp->srp_max_frags);
1063 ASSERT((umd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1064 (kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1065 if (kbft_srp != NULL) {
1066 ubft_srp = &srp_array[SKMEM_REGION_UBFT];
1067 ASSERT(ubft_srp->srp_c_obj_size ==
1068 kbft_srp->srp_c_obj_size);
1069 ASSERT(ubft_srp->srp_c_obj_cnt ==
1070 kbft_srp->srp_c_obj_cnt);
1071 ASSERT(ubft_srp->srp_c_seg_size ==
1072 kbft_srp->srp_c_seg_size);
1073 ASSERT(ubft_srp->srp_seg_cnt == kbft_srp->srp_seg_cnt);
1074 }
1075 }
1076
1077 md_size = kmd_srp->srp_r_obj_size;
1078 md_type = kmd_srp->srp_md_type;
1079 md_subtype = kmd_srp->srp_md_subtype;
1080 max_frags = kmd_srp->srp_max_frags;
1081 def_buf_obj_size = buf_srp->srp_c_obj_size;
1082 def_buf_size = def_buf_obj_size;
1083 large_buf_size = lbuf_srp->srp_c_obj_size;
1084
1085#if (DEBUG || DEVELOPMENT)
1086 ASSERT(def_buf_obj_size != 0);
1087 ASSERT(md_type > NEXUS_META_TYPE_INVALID &&
1088 md_type <= NEXUS_META_TYPE_MAX);
1089 if (md_type == NEXUS_META_TYPE_QUANTUM) {
1090 ASSERT(max_frags == 1);
1091 ASSERT(md_size >=
1092 (METADATA_PREAMBLE_SZ + NX_METADATA_QUANTUM_SZ));
1093 } else {
1094 ASSERT(max_frags >= 1);
1095 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
1096 ASSERT(md_size >= (METADATA_PREAMBLE_SZ +
1097 NX_METADATA_PACKET_SZ(max_frags)));
1098 }
1099 ASSERT(md_subtype > NEXUS_META_SUBTYPE_INVALID &&
1100 md_subtype <= NEXUS_META_SUBTYPE_MAX);
1101#endif /* DEBUG || DEVELOPMENT */
1102
1103 pp = pp_alloc(how: Z_WAITOK);
1104
1105 (void) snprintf((char *)pp->pp_name, count: sizeof(pp->pp_name),
1106 "skywalk.pp.%s", name);
1107
1108 pp->pp_ctx = __DECONST(void *, ctx);
1109 pp->pp_ctx_retain = ctx_retain;
1110 pp->pp_ctx_release = ctx_release;
1111 if (pp->pp_ctx != NULL) {
1112 pp->pp_ctx_retain(pp->pp_ctx);
1113 }
1114
1115 pp->pp_pbuf_seg_ctor = buf_seg_ctor;
1116 pp->pp_pbuf_seg_dtor = buf_seg_dtor;
1117 PP_BUF_SIZE_DEF(pp) = def_buf_size;
1118 PP_BUF_OBJ_SIZE_DEF(pp) = def_buf_obj_size;
1119 PP_BUF_SIZE_LARGE(pp) = large_buf_size;
1120 PP_BUF_OBJ_SIZE_LARGE(pp) = lbuf_srp->srp_c_obj_size;
1121 pp->pp_md_type = md_type;
1122 pp->pp_md_subtype = md_subtype;
1123 pp->pp_max_frags = max_frags;
1124 if (ppcreatef & PPCREATEF_EXTERNAL) {
1125 pp->pp_flags |= PPF_EXTERNAL;
1126 }
1127 if (ppcreatef & PPCREATEF_TRUNCATED_BUF) {
1128 pp->pp_flags |= PPF_TRUNCATED_BUF;
1129 }
1130 if (ppcreatef & PPCREATEF_KERNEL_ONLY) {
1131 pp->pp_flags |= PPF_KERNEL;
1132 }
1133 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1134 pp->pp_flags |= PPF_BUFFER_ON_DEMAND;
1135 }
1136 if (ppcreatef & PPCREATEF_DYNAMIC) {
1137 pp->pp_flags |= PPF_DYNAMIC;
1138 }
1139 if (lbuf_srp->srp_c_obj_cnt > 0) {
1140 ASSERT(lbuf_srp->srp_c_obj_size != 0);
1141 pp->pp_flags |= PPF_LARGE_BUF;
1142 }
1143
1144 pp_retain(pp);
1145
1146 md_cflags = ((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ?
1147 SKMEM_CR_NOMAGAZINES : 0);
1148 md_cflags |= SKMEM_CR_BATCH;
1149 pp->pp_flags |= PPF_BATCH;
1150
1151 if (pp->pp_flags & PPF_DYNAMIC) {
1152 md_cflags |= SKMEM_CR_DYNAMIC;
1153 }
1154
1155 if (umd_srp != NULL && (pp->pp_umd_region =
1156 skmem_region_create(name, umd_srp, NULL, NULL, NULL)) == NULL) {
1157 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1158 pp->pp_name, SK_KVA(pp), umd_srp->srp_name);
1159 goto failed;
1160 }
1161
1162 if ((pp->pp_kmd_region = skmem_region_create(name, kmd_srp, NULL, NULL,
1163 NULL)) == NULL) {
1164 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1165 pp->pp_name, SK_KVA(pp), kmd_srp->srp_name);
1166 goto failed;
1167 }
1168
1169 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1170 VERIFY((kbft_srp != NULL) && (kbft_srp->srp_c_obj_cnt > 0));
1171 if (!PP_KERNEL_ONLY(pp)) {
1172 VERIFY((ubft_srp != NULL) &&
1173 (ubft_srp->srp_c_obj_cnt > 0));
1174 }
1175 }
1176 /*
1177 * Metadata regions {KMD,KBFT,UBFT} magazines layer and persistency
1178 * attribute must match.
1179 */
1180 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1181 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
1182 (kbft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
1183 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1184 (kbft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1185 }
1186
1187 if (PP_HAS_BUFFER_ON_DEMAND(pp) && !PP_KERNEL_ONLY(pp)) {
1188 if ((pp->pp_ubft_region = skmem_region_create(name, ubft_srp,
1189 NULL, NULL, NULL)) == NULL) {
1190 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1191 pp->pp_name, SK_KVA(pp), ubft_srp->srp_name);
1192 goto failed;
1193 }
1194 }
1195
1196 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1197 if ((pp->pp_kbft_region = skmem_region_create(name,
1198 kbft_srp, NULL, NULL, NULL)) == NULL) {
1199 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1200 pp->pp_name, SK_KVA(pp), kbft_srp->srp_name);
1201 goto failed;
1202 }
1203 }
1204
1205 if (!PP_KERNEL_ONLY(pp)) {
1206 skmem_region_mirror(pp->pp_kmd_region, pp->pp_umd_region);
1207 }
1208 if (!PP_KERNEL_ONLY(pp) && pp->pp_ubft_region != NULL) {
1209 ASSERT(pp->pp_kbft_region != NULL);
1210 skmem_region_mirror(pp->pp_kbft_region, pp->pp_ubft_region);
1211 }
1212
1213 /*
1214 * Create the metadata cache; magazines layer is determined by caller.
1215 */
1216 (void) snprintf(cname, count: sizeof(cname), "kmd.%s", name);
1217 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1218 pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1219 pp_metadata_ctor_no_buflet, pp_metadata_dtor, NULL, pp,
1220 pp->pp_kmd_region, md_cflags);
1221 } else {
1222 pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1223 pp_metadata_ctor_max_buflet, pp_metadata_dtor, NULL, pp,
1224 pp->pp_kmd_region, md_cflags);
1225 }
1226
1227 if (pp->pp_kmd_cache == NULL) {
1228 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1229 pp->pp_name, SK_KVA(pp), cname);
1230 goto failed;
1231 }
1232
1233 /*
1234 * Create the buflet metadata cache
1235 */
1236 if (pp->pp_kbft_region != NULL) {
1237 (void) snprintf(cname, count: sizeof(cname), "kbft_def.%s", name);
1238 PP_KBFT_CACHE_DEF(pp) = skmem_cache_create(cname,
1239 kbft_srp->srp_c_obj_size, 0,
1240 pp_buflet_default_buffer_metadata_ctor,
1241 pp_buflet_metadata_dtor, NULL, pp, pp->pp_kbft_region,
1242 md_cflags);
1243
1244 if (PP_KBFT_CACHE_DEF(pp) == NULL) {
1245 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1246 pp->pp_name, SK_KVA(pp), cname);
1247 goto failed;
1248 }
1249
1250 if (PP_HAS_LARGE_BUF(pp)) {
1251 /* Aggressive memory reclaim flag set to kbft_large for now */
1252 md_cflags |= SKMEM_CR_RECLAIM;
1253 (void) snprintf(cname, count: sizeof(cname), "kbft_large.%s",
1254 name);
1255 PP_KBFT_CACHE_LARGE(pp) = skmem_cache_create(cname,
1256 kbft_srp->srp_c_obj_size, 0,
1257 pp_buflet_large_buffer_metadata_ctor,
1258 pp_buflet_metadata_dtor,
1259 NULL, pp, pp->pp_kbft_region, md_cflags);
1260
1261 if (PP_KBFT_CACHE_LARGE(pp) == NULL) {
1262 SK_ERR("\"%s\" (0x%llx) failed to "
1263 "create \"%s\" cache", pp->pp_name,
1264 SK_KVA(pp), cname);
1265 goto failed;
1266 }
1267 }
1268 }
1269
1270 if ((PP_BUF_REGION_DEF(pp) = skmem_region_create(name,
1271 buf_srp, pp_buf_seg_ctor, pp_buf_seg_dtor, pp)) == NULL) {
1272 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1273 pp->pp_name, SK_KVA(pp), buf_srp->srp_name);
1274 goto failed;
1275 }
1276
1277 if (PP_HAS_LARGE_BUF(pp)) {
1278 PP_BUF_REGION_LARGE(pp) = skmem_region_create(name, lbuf_srp,
1279 pp_buf_seg_ctor, pp_buf_seg_dtor, pp);
1280 if (PP_BUF_REGION_LARGE(pp) == NULL) {
1281 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1282 pp->pp_name, SK_KVA(pp), lbuf_srp->srp_name);
1283 goto failed;
1284 }
1285 }
1286
1287 /*
1288 * Create the buffer object cache without the magazines layer.
1289 * We rely on caching the constructed metadata object instead.
1290 */
1291 (void) snprintf(cname, count: sizeof(cname), "buf_def.%s", name);
1292 if ((PP_BUF_CACHE_DEF(pp) = skmem_cache_create(cname, def_buf_obj_size,
1293 0, NULL, NULL, NULL, pp, PP_BUF_REGION_DEF(pp),
1294 SKMEM_CR_NOMAGAZINES)) == NULL) {
1295 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1296 pp->pp_name, SK_KVA(pp), cname);
1297 goto failed;
1298 }
1299
1300 if (PP_BUF_REGION_LARGE(pp) != NULL) {
1301 (void) snprintf(cname, count: sizeof(cname), "buf_large.%s", name);
1302 if ((PP_BUF_CACHE_LARGE(pp) = skmem_cache_create(cname,
1303 lbuf_srp->srp_c_obj_size, 0, NULL, NULL, NULL, pp,
1304 PP_BUF_REGION_LARGE(pp), SKMEM_CR_NOMAGAZINES)) == NULL) {
1305 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1306 pp->pp_name, SK_KVA(pp), cname);
1307 goto failed;
1308 }
1309 }
1310
1311 return pp;
1312
1313failed:
1314 if (pp != NULL) {
1315 if (pp->pp_ctx != NULL) {
1316 pp->pp_ctx_release(pp->pp_ctx);
1317 pp->pp_ctx = NULL;
1318 }
1319 pp_close(pp);
1320 }
1321
1322 return NULL;
1323}
1324
1325void
1326pp_destroy(struct kern_pbufpool *pp)
1327{
1328 PP_LOCK_ASSERT_HELD(pp);
1329
1330 /* may be called for built-in pp with outstanding reference */
1331 ASSERT(!(pp->pp_flags & PPF_EXTERNAL) || pp->pp_refcnt == 0);
1332
1333 pp_destroy_upp_locked(pp);
1334
1335 pp_destroy_upp_bft_locked(pp);
1336
1337 if (pp->pp_kmd_cache != NULL) {
1338 skmem_cache_destroy(pp->pp_kmd_cache);
1339 pp->pp_kmd_cache = NULL;
1340 }
1341
1342 if (pp->pp_umd_region != NULL) {
1343 skmem_region_release(pp->pp_umd_region);
1344 pp->pp_umd_region = NULL;
1345 }
1346
1347 if (pp->pp_kmd_region != NULL) {
1348 skmem_region_release(pp->pp_kmd_region);
1349 pp->pp_kmd_region = NULL;
1350 }
1351
1352 if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1353 skmem_cache_destroy(PP_KBFT_CACHE_DEF(pp));
1354 PP_KBFT_CACHE_DEF(pp) = NULL;
1355 }
1356
1357 if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1358 skmem_cache_destroy(PP_KBFT_CACHE_LARGE(pp));
1359 PP_KBFT_CACHE_LARGE(pp) = NULL;
1360 }
1361
1362 if (pp->pp_ubft_region != NULL) {
1363 skmem_region_release(pp->pp_ubft_region);
1364 pp->pp_ubft_region = NULL;
1365 }
1366
1367 if (pp->pp_kbft_region != NULL) {
1368 skmem_region_release(pp->pp_kbft_region);
1369 pp->pp_kbft_region = NULL;
1370 }
1371
1372 /*
1373 * The order is important here, since pp_metadata_dtor()
1374 * called by freeing on the pp_kmd_cache will in turn
1375 * free the attached buffer. Therefore destroy the
1376 * buffer cache last.
1377 */
1378 if (PP_BUF_CACHE_DEF(pp) != NULL) {
1379 skmem_cache_destroy(PP_BUF_CACHE_DEF(pp));
1380 PP_BUF_CACHE_DEF(pp) = NULL;
1381 }
1382 if (PP_BUF_REGION_DEF(pp) != NULL) {
1383 skmem_region_release(PP_BUF_REGION_DEF(pp));
1384 PP_BUF_REGION_DEF(pp) = NULL;
1385 }
1386 if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1387 skmem_cache_destroy(PP_BUF_CACHE_LARGE(pp));
1388 PP_BUF_CACHE_LARGE(pp) = NULL;
1389 }
1390 if (PP_BUF_REGION_LARGE(pp) != NULL) {
1391 skmem_region_release(PP_BUF_REGION_LARGE(pp));
1392 PP_BUF_REGION_LARGE(pp) = NULL;
1393 }
1394
1395 if (pp->pp_ctx != NULL) {
1396 pp->pp_ctx_release(pp->pp_ctx);
1397 pp->pp_ctx = NULL;
1398 }
1399}
1400
1401static int
1402pp_init_upp_locked(struct kern_pbufpool *pp, boolean_t can_block)
1403{
1404 int i, err = 0;
1405
1406 if (pp->pp_u_hash_table != NULL) {
1407 goto done;
1408 }
1409
1410 /* allocated-address hash table */
1411 pp->pp_u_hash_table = can_block ? zalloc(kt_view: pp_u_htbl_zone) :
1412 zalloc_noblock(kt_view: pp_u_htbl_zone);
1413 if (pp->pp_u_hash_table == NULL) {
1414 SK_ERR("failed to zalloc packet buffer pool upp hash table");
1415 err = ENOMEM;
1416 goto done;
1417 }
1418
1419 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1420 SLIST_INIT(&pp->pp_u_hash_table[i].upp_head);
1421 }
1422done:
1423 return err;
1424}
1425
1426static void
1427pp_destroy_upp_locked(struct kern_pbufpool *pp)
1428{
1429 PP_LOCK_ASSERT_HELD(pp);
1430 if (pp->pp_u_hash_table != NULL) {
1431 /* purge anything that's left */
1432 pp_purge_upp_locked(pp, pid: -1);
1433
1434#if (DEBUG || DEVELOPMENT)
1435 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1436 ASSERT(SLIST_EMPTY(&pp->pp_u_hash_table[i].upp_head));
1437 }
1438#endif /* DEBUG || DEVELOPMENT */
1439
1440 zfree(pp_u_htbl_zone, pp->pp_u_hash_table);
1441 pp->pp_u_hash_table = NULL;
1442 }
1443 ASSERT(pp->pp_u_bufinuse == 0);
1444}
1445
1446int
1447pp_init_upp(struct kern_pbufpool *pp, boolean_t can_block)
1448{
1449 int err = 0;
1450
1451 PP_LOCK(pp);
1452 err = pp_init_upp_locked(pp, can_block);
1453 if (err) {
1454 SK_ERR("packet UPP init failed (%d)", err);
1455 goto done;
1456 }
1457 err = pp_init_upp_bft_locked(pp, can_block);
1458 if (err) {
1459 SK_ERR("buflet UPP init failed (%d)", err);
1460 pp_destroy_upp_locked(pp);
1461 goto done;
1462 }
1463 pp_retain_locked(pp);
1464done:
1465 PP_UNLOCK(pp);
1466 return err;
1467}
1468
1469__attribute__((always_inline))
1470static void
1471pp_insert_upp_bft_locked(struct kern_pbufpool *pp,
1472 struct __kern_buflet *kbft, pid_t pid)
1473{
1474 struct kern_pbufpool_u_bft_bkt *bkt;
1475 struct __kern_buflet_ext *kbe = (struct __kern_buflet_ext *)kbft;
1476
1477 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1478 ASSERT(kbe->kbe_buf_pid == (pid_t)-1);
1479 kbe->kbe_buf_pid = pid;
1480 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, kbft->buf_bft_idx_reg);
1481 SLIST_INSERT_HEAD(&bkt->upp_head, kbe, kbe_buf_upp_link);
1482 pp->pp_u_bftinuse++;
1483}
1484
1485__attribute__((always_inline))
1486static void
1487pp_insert_upp_bft_chain_locked(struct kern_pbufpool *pp,
1488 struct __kern_buflet *kbft, pid_t pid)
1489{
1490 while (kbft != NULL) {
1491 pp_insert_upp_bft_locked(pp, kbft, pid);
1492 kbft = __DECONST(kern_buflet_t, kbft->buf_nbft_addr);
1493 }
1494}
1495
1496/* Also inserts the attached chain of buflets */
1497void static inline
1498pp_insert_upp_common(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1499 pid_t pid)
1500{
1501 struct kern_pbufpool_u_bkt *bkt;
1502 struct __kern_buflet *kbft;
1503
1504 ASSERT(kqum->qum_pid == (pid_t)-1);
1505 kqum->qum_pid = pid;
1506
1507 bkt = KERN_PBUFPOOL_U_HASH(pp, METADATA_IDX(kqum));
1508 SLIST_INSERT_HEAD(&bkt->upp_head, kqum, qum_upp_link);
1509 pp->pp_u_bufinuse++;
1510
1511 kbft = (kern_buflet_t)kqum->qum_buf[0].buf_nbft_addr;
1512 if (kbft != NULL) {
1513 ASSERT(((kern_buflet_t)kbft)->buf_flag & BUFLET_FLAG_EXTERNAL);
1514 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
1515 pp_insert_upp_bft_chain_locked(pp, kbft, pid);
1516 }
1517}
1518
1519void
1520pp_insert_upp_locked(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1521 pid_t pid)
1522{
1523 pp_insert_upp_common(pp, kqum, pid);
1524}
1525
1526void
1527pp_insert_upp(struct kern_pbufpool *pp, struct __kern_quantum *kqum, pid_t pid)
1528{
1529 PP_LOCK(pp);
1530 pp_insert_upp_common(pp, kqum, pid);
1531 PP_UNLOCK(pp);
1532}
1533
1534void
1535pp_insert_upp_batch(struct kern_pbufpool *pp, pid_t pid, uint64_t *array,
1536 uint32_t num)
1537{
1538 uint32_t i = 0;
1539
1540 ASSERT(array != NULL && num > 0);
1541 PP_LOCK(pp);
1542 while (num != 0) {
1543 struct __kern_quantum *kqum = SK_PTR_ADDR_KQUM(array[i]);
1544
1545 ASSERT(kqum != NULL);
1546 pp_insert_upp_common(pp, kqum, pid);
1547 --num;
1548 ++i;
1549 }
1550 PP_UNLOCK(pp);
1551}
1552
1553__attribute__((always_inline))
1554static struct __kern_buflet *
1555pp_remove_upp_bft_locked(struct kern_pbufpool *pp, obj_idx_t bft_idx)
1556{
1557 struct __kern_buflet_ext *kbft, *tbft;
1558 struct kern_pbufpool_u_bft_bkt *bkt;
1559
1560 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, bft_idx);
1561 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link, tbft) {
1562 if (((kern_buflet_t)kbft)->buf_bft_idx_reg == bft_idx) {
1563 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1564 kbe_buf_upp_link);
1565 kbft->kbe_buf_pid = (pid_t)-1;
1566 kbft->kbe_buf_upp_link.sle_next = NULL;
1567 ASSERT(pp->pp_u_bftinuse != 0);
1568 pp->pp_u_bftinuse--;
1569 break;
1570 }
1571 }
1572 return (kern_buflet_t)kbft;
1573}
1574
1575struct __kern_buflet *
1576pp_remove_upp_bft(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1577{
1578 struct __kern_buflet *kbft = pp_remove_upp_bft_locked(pp, bft_idx: md_idx);
1579
1580 *err = __improbable(kbft != NULL) ? 0 : EINVAL;
1581 return kbft;
1582}
1583
1584__attribute__((always_inline))
1585static int
1586pp_remove_upp_bft_chain_locked(struct kern_pbufpool *pp,
1587 struct __kern_quantum *kqum)
1588{
1589 uint32_t max_frags = pp->pp_max_frags;
1590 struct __kern_buflet *kbft;
1591 uint16_t nbfts, upkt_nbfts;
1592 obj_idx_t bft_idx;
1593
1594 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
1595 bft_idx = kqum->qum_user->qum_buf[0].buf_nbft_idx;
1596 kbft = &kqum->qum_buf[0];
1597 if (bft_idx == OBJ_IDX_NONE) {
1598 return 0;
1599 }
1600
1601 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_PACKET);
1602 struct __kern_packet *kpkt = __DECONST(struct __kern_packet *, kqum);
1603 struct __user_packet *upkt = __DECONST(struct __user_packet *,
1604 kpkt->pkt_qum.qum_user);
1605
1606 upkt_nbfts = upkt->pkt_bufs_cnt;
1607 if (__improbable(upkt_nbfts > max_frags)) {
1608 SK_ERR("bad bcnt in upkt (%d > %d)", upkt_nbfts, max_frags);
1609 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1610 BUF_NBFT_ADDR(kbft, 0);
1611 return ERANGE;
1612 }
1613
1614 nbfts = (kbft->buf_addr != 0) ? 1 : 0;
1615
1616 do {
1617 struct __kern_buflet *pbft = kbft;
1618 struct __kern_buflet_ext *kbe;
1619
1620 kbft = pp_remove_upp_bft_locked(pp, bft_idx);
1621 if (__improbable(kbft == NULL)) {
1622 BUF_NBFT_IDX(pbft, OBJ_IDX_NONE);
1623 BUF_NBFT_ADDR(pbft, 0);
1624 SK_ERR("unallocated next buflet (%d), %p", bft_idx,
1625 SK_KVA(pbft));
1626 return ERANGE;
1627 }
1628 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1629 BUF_NBFT_IDX(pbft, bft_idx);
1630 BUF_NBFT_ADDR(pbft, kbft);
1631 kbe = (struct __kern_buflet_ext *)kbft;
1632 bft_idx = kbe->kbe_buf_user->buf_nbft_idx;
1633 ++nbfts;
1634 } while ((bft_idx != OBJ_IDX_NONE) && (nbfts < upkt_nbfts));
1635
1636 ASSERT(kbft != NULL);
1637 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1638 BUF_NBFT_ADDR(kbft, 0);
1639 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = nbfts;
1640
1641 if (__improbable((bft_idx != OBJ_IDX_NONE) || (nbfts != upkt_nbfts))) {
1642 SK_ERR("bad buflet in upkt (%d, %d)", nbfts, upkt_nbfts);
1643 return ERANGE;
1644 }
1645 return 0;
1646}
1647
1648struct __kern_quantum *
1649pp_remove_upp_locked(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1650{
1651 struct __kern_quantum *kqum, *tqum;
1652 struct kern_pbufpool_u_bkt *bkt;
1653
1654 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1655 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1656 if (METADATA_IDX(kqum) == md_idx) {
1657 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1658 qum_upp_link);
1659 kqum->qum_pid = (pid_t)-1;
1660 ASSERT(pp->pp_u_bufinuse != 0);
1661 pp->pp_u_bufinuse--;
1662 break;
1663 }
1664 }
1665 if (__probable(kqum != NULL)) {
1666 *err = pp_remove_upp_bft_chain_locked(pp, kqum);
1667 } else {
1668 *err = ERANGE;
1669 }
1670 return kqum;
1671}
1672
1673struct __kern_quantum *
1674pp_remove_upp(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1675{
1676 struct __kern_quantum *kqum;
1677
1678 PP_LOCK(pp);
1679 kqum = pp_remove_upp_locked(pp, md_idx, err);
1680 PP_UNLOCK(pp);
1681 return kqum;
1682}
1683
1684struct __kern_quantum *
1685pp_find_upp(struct kern_pbufpool *pp, obj_idx_t md_idx)
1686{
1687 struct __kern_quantum *kqum, *tqum;
1688 struct kern_pbufpool_u_bkt *bkt;
1689
1690 PP_LOCK(pp);
1691 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1692 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1693 if (METADATA_IDX(kqum) == md_idx) {
1694 break;
1695 }
1696 }
1697 PP_UNLOCK(pp);
1698
1699 return kqum;
1700}
1701
1702__attribute__((always_inline))
1703static void
1704pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid)
1705{
1706 struct __kern_quantum *kqum, *tqum;
1707 struct kern_pbufpool_u_bkt *bkt;
1708 int i;
1709
1710 PP_LOCK_ASSERT_HELD(pp);
1711
1712 /*
1713 * TODO: Build a list of packets and batch-free them.
1714 */
1715 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1716 bkt = &pp->pp_u_hash_table[i];
1717 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1718 ASSERT(kqum->qum_pid != (pid_t)-1);
1719 if (pid != (pid_t)-1 && kqum->qum_pid != pid) {
1720 continue;
1721 }
1722 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1723 qum_upp_link);
1724 pp_remove_upp_bft_chain_locked(pp, kqum);
1725 kqum->qum_pid = (pid_t)-1;
1726 kqum->qum_qflags &= ~QUM_F_FINALIZED;
1727 kqum->qum_ksd = NULL;
1728 pp_free_packet(__DECONST(struct kern_pbufpool *,
1729 kqum->qum_pp), (uint64_t)kqum);
1730 ASSERT(pp->pp_u_bufinuse != 0);
1731 pp->pp_u_bufinuse--;
1732 }
1733 }
1734}
1735
1736__attribute__((always_inline))
1737static void
1738pp_purge_upp_bft_locked(struct kern_pbufpool *pp, pid_t pid)
1739{
1740 struct __kern_buflet_ext *kbft, *tbft;
1741 struct kern_pbufpool_u_bft_bkt *bkt;
1742 int i;
1743
1744 PP_LOCK_ASSERT_HELD(pp);
1745
1746 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1747 bkt = &pp->pp_u_bft_hash_table[i];
1748 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link,
1749 tbft) {
1750 ASSERT(kbft->kbe_buf_pid != (pid_t)-1);
1751 if (pid != (pid_t)-1 && kbft->kbe_buf_pid != pid) {
1752 continue;
1753 }
1754 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1755 kbe_buf_upp_link);
1756 kbft->kbe_buf_pid = (pid_t)-1;
1757 kbft->kbe_buf_upp_link.sle_next = NULL;
1758 pp_free_buflet(pp, (kern_buflet_t)kbft);
1759 ASSERT(pp->pp_u_bftinuse != 0);
1760 pp->pp_u_bftinuse--;
1761 }
1762 }
1763}
1764
1765void
1766pp_purge_upp(struct kern_pbufpool *pp, pid_t pid)
1767{
1768 PP_LOCK(pp);
1769 pp_purge_upp_locked(pp, pid);
1770 pp_purge_upp_bft_locked(pp, pid);
1771 PP_UNLOCK(pp);
1772}
1773
1774static int
1775pp_init_upp_bft_locked(struct kern_pbufpool *pp, boolean_t can_block)
1776{
1777 int i, err = 0;
1778
1779 PP_LOCK_ASSERT_HELD(pp);
1780 if (pp->pp_u_bft_hash_table != NULL) {
1781 return 0;
1782 }
1783
1784 /* allocated-address hash table */
1785 pp->pp_u_bft_hash_table = can_block ? zalloc(kt_view: pp_u_htbl_zone) :
1786 zalloc_noblock(kt_view: pp_u_htbl_zone);
1787 if (pp->pp_u_bft_hash_table == NULL) {
1788 SK_ERR("failed to zalloc packet buffer pool upp buflet hash table");
1789 err = ENOMEM;
1790 goto fail;
1791 }
1792
1793 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1794 SLIST_INIT(&pp->pp_u_bft_hash_table[i].upp_head);
1795 }
1796
1797fail:
1798 return err;
1799}
1800
1801static void
1802pp_destroy_upp_bft_locked(struct kern_pbufpool *pp)
1803{
1804 PP_LOCK_ASSERT_HELD(pp);
1805 if (pp->pp_u_bft_hash_table != NULL) {
1806 /* purge anything that's left */
1807 pp_purge_upp_bft_locked(pp, pid: -1);
1808
1809#if (DEBUG || DEVELOPMENT)
1810 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1811 ASSERT(SLIST_EMPTY(&pp->pp_u_bft_hash_table[i].upp_head));
1812 }
1813#endif /* DEBUG || DEVELOPMENT */
1814
1815 zfree(pp_u_htbl_zone, pp->pp_u_bft_hash_table);
1816 pp->pp_u_bft_hash_table = NULL;
1817 }
1818 ASSERT(pp->pp_u_bftinuse == 0);
1819}
1820
1821void
1822pp_insert_upp_bft(struct kern_pbufpool *pp,
1823 struct __kern_buflet *kbft, pid_t pid)
1824{
1825 PP_LOCK(pp);
1826 pp_insert_upp_bft_locked(pp, kbft, pid);
1827 PP_UNLOCK(pp);
1828}
1829
1830boolean_t
1831pp_isempty_upp(struct kern_pbufpool *pp)
1832{
1833 boolean_t isempty;
1834
1835 PP_LOCK(pp);
1836 isempty = (pp->pp_u_bufinuse == 0);
1837 PP_UNLOCK(pp);
1838
1839 return isempty;
1840}
1841
1842__attribute__((always_inline))
1843static inline struct __kern_quantum *
1844pp_metadata_init(struct __metadata_preamble *mdp, struct kern_pbufpool *pp,
1845 uint16_t bufcnt, uint32_t skmflag, struct skmem_obj **blist)
1846{
1847 struct __kern_quantum *kqum;
1848 struct __user_quantum *uqum;
1849
1850 kqum = SK_PTR_ADDR_KQUM((uintptr_t)mdp + METADATA_PREAMBLE_SZ);
1851 ASSERT(kqum->qum_pp == pp);
1852 if (__probable(!PP_KERNEL_ONLY(pp))) {
1853 ASSERT(!(kqum->qum_qflags & QUM_F_KERNEL_ONLY));
1854 uqum = __DECONST(struct __user_quantum *, kqum->qum_user);
1855 ASSERT(uqum != NULL);
1856 } else {
1857 ASSERT(kqum->qum_qflags & QUM_F_KERNEL_ONLY);
1858 ASSERT(kqum->qum_user == NULL);
1859 uqum = NULL;
1860 }
1861
1862 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 &&
1863 pp_metadata_construct(kqum, uqum, METADATA_IDX(kqum), pp,
1864 skmflag, bufcnt, FALSE, blist) != 0) {
1865 return NULL;
1866 }
1867
1868 /* (re)construct {user,kernel} metadata */
1869 switch (pp->pp_md_type) {
1870 case NEXUS_META_TYPE_PACKET: {
1871 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
1872 struct __kern_buflet *kbuf = &kpkt->pkt_qum_buf;
1873 uint16_t i;
1874
1875 /* sanitize flags */
1876 kpkt->pkt_pflags &= PKT_F_INIT_MASK;
1877
1878 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
1879 kpkt->pkt_com_opt != NULL);
1880 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
1881 kpkt->pkt_flow != NULL);
1882 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
1883 kpkt->pkt_tx_compl != NULL);
1884
1885 /*
1886 * XXX: For now we always set PKT_F_FLOW_DATA;
1887 * this is a no-op but done for consistency
1888 * with the other PKT_F_*_DATA flags.
1889 */
1890 kpkt->pkt_pflags |= PKT_F_FLOW_DATA;
1891
1892 /* initialize kernel packet */
1893 KPKT_INIT(kpkt, QUM_F_INTERNALIZED);
1894
1895 ASSERT(bufcnt || PP_HAS_BUFFER_ON_DEMAND(pp));
1896 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1897 ASSERT(kbuf->buf_ctl == NULL);
1898 ASSERT(kbuf->buf_addr == 0);
1899 kbuf = __DECONST(struct __kern_buflet *,
1900 kbuf->buf_nbft_addr);
1901 }
1902 /* initialize kernel buflet */
1903 for (i = 0; i < bufcnt; i++) {
1904 ASSERT(kbuf != NULL);
1905 KBUF_INIT(kbuf);
1906 kbuf = __DECONST(struct __kern_buflet *,
1907 kbuf->buf_nbft_addr);
1908 }
1909 ASSERT((kbuf == NULL) || (bufcnt == 0));
1910 break;
1911 }
1912 default:
1913 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
1914 /* kernel quantum */
1915 KQUM_INIT(kqum, QUM_F_INTERNALIZED);
1916 KBUF_INIT(&kqum->qum_buf[0]);
1917 break;
1918 }
1919
1920 return kqum;
1921}
1922
1923/*
1924 * When PPF_BUFFER_ON_DEMAND flag is set on packet pool creation, we create
1925 * packet descriptor cache with no buffer attached and a buflet cache with
1926 * cpu layer caching enabled. While operating in this mode, we can call
1927 * pp_alloc_packet_common() either with `bufcnt = 0` or `bufcnt = n`,
1928 * where n <= pp->pp_max_frags. If `bufcnt == 0` then we allocate packet
1929 * descriptor with no attached buffer from the metadata cache.
1930 * If `bufcnt != 0`, then this routine allocates packet descriptor and buflets
1931 * from their respective caches and constructs the packet on behalf of the
1932 * caller.
1933 */
1934__attribute__((always_inline))
1935static inline uint32_t
1936pp_alloc_packet_common(struct kern_pbufpool *pp, uint16_t bufcnt,
1937 uint64_t *array, uint32_t num, boolean_t tagged, alloc_cb_func_t cb,
1938 const void *ctx, uint32_t skmflag)
1939{
1940 struct __metadata_preamble *mdp;
1941 struct __kern_quantum *kqum = NULL;
1942 uint32_t allocp, need = num;
1943 struct skmem_obj *plist, *blist = NULL;
1944
1945 ASSERT(bufcnt <= pp->pp_max_frags);
1946 ASSERT(array != NULL && num > 0);
1947 ASSERT(PP_BATCH_CAPABLE(pp));
1948
1949 /* allocate (constructed) packet(s) with buffer(s) attached */
1950 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, list: &plist, num,
1951 skmflag);
1952
1953 /* allocate (constructed) buflet(s) with buffer(s) attached */
1954 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
1955 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), list: &blist,
1956 (allocp * bufcnt), skmflag);
1957 }
1958
1959 while (plist != NULL) {
1960 struct skmem_obj *plistn;
1961
1962 plistn = plist->mo_next;
1963 plist->mo_next = NULL;
1964
1965 mdp = (struct __metadata_preamble *)(void *)plist;
1966 kqum = pp_metadata_init(mdp, pp, bufcnt, skmflag, blist: &blist);
1967 if (kqum == NULL) {
1968 if (blist != NULL) {
1969 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
1970 blist);
1971 blist = NULL;
1972 }
1973 plist->mo_next = plistn;
1974 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
1975 plist = NULL;
1976 break;
1977 }
1978
1979#if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
1980 /* Checking to ensure the object address is tagged */
1981 ASSERT((vm_offset_t)kqum !=
1982 vm_memtag_canonicalize_address((vm_offset_t)kqum));
1983#endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
1984
1985 if (tagged) {
1986 *array = SK_PTR_ENCODE(kqum, METADATA_TYPE(kqum),
1987 METADATA_SUBTYPE(kqum));
1988 } else {
1989 *array = (uint64_t)kqum;
1990 }
1991
1992 if (cb != NULL) {
1993 (cb)(*array, (num - need), ctx);
1994 }
1995
1996 ++array;
1997 plist = plistn;
1998
1999 ASSERT(need > 0);
2000 --need;
2001 }
2002 ASSERT(blist == NULL);
2003 ASSERT((num - need) == allocp || kqum == NULL);
2004
2005 return num - need;
2006}
2007
2008uint64_t
2009pp_alloc_packet(struct kern_pbufpool *pp, uint16_t bufcnt, uint32_t skmflag)
2010{
2011 uint64_t kpkt = 0;
2012
2013 (void) pp_alloc_packet_common(pp, bufcnt, array: &kpkt, num: 1, FALSE,
2014 NULL, NULL, skmflag);
2015
2016 return kpkt;
2017}
2018
2019int
2020pp_alloc_packet_batch(struct kern_pbufpool *pp, uint16_t bufcnt,
2021 uint64_t *array, uint32_t *size, boolean_t tagged, alloc_cb_func_t cb,
2022 const void *ctx, uint32_t skmflag)
2023{
2024 uint32_t i, n;
2025 int err;
2026
2027 ASSERT(array != NULL && size > 0);
2028
2029 n = *size;
2030 *size = 0;
2031
2032 i = pp_alloc_packet_common(pp, bufcnt, array, num: n, tagged,
2033 cb, ctx, skmflag);
2034 *size = i;
2035
2036 if (__probable(i == n)) {
2037 err = 0;
2038 } else if (i != 0) {
2039 err = EAGAIN;
2040 } else {
2041 err = ENOMEM;
2042 }
2043
2044 return err;
2045}
2046
2047int
2048pp_alloc_pktq(struct kern_pbufpool *pp, uint16_t bufcnt,
2049 struct pktq *pktq, uint32_t num, alloc_cb_func_t cb, const void *ctx,
2050 uint32_t skmflag)
2051{
2052 struct __metadata_preamble *mdp;
2053 struct __kern_packet *kpkt = NULL;
2054 uint32_t allocp, need = num;
2055 struct skmem_obj *plist, *blist = NULL;
2056 int err;
2057
2058 ASSERT(pktq != NULL && num > 0);
2059 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_PACKET);
2060 ASSERT(bufcnt <= pp->pp_max_frags);
2061 ASSERT(PP_BATCH_CAPABLE(pp));
2062
2063 /* allocate (constructed) packet(s) with buffer(s) attached */
2064 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, list: &plist, num,
2065 skmflag);
2066
2067 /* allocate (constructed) buflet(s) with buffer(s) attached */
2068 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
2069 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), list: &blist,
2070 (allocp * bufcnt), skmflag);
2071 }
2072
2073 while (plist != NULL) {
2074 struct skmem_obj *plistn;
2075
2076 plistn = plist->mo_next;
2077 plist->mo_next = NULL;
2078
2079 mdp = (struct __metadata_preamble *)(void *)plist;
2080 kpkt = (struct __kern_packet *)pp_metadata_init(mdp, pp,
2081 bufcnt, skmflag, blist: &blist);
2082 if (kpkt == NULL) {
2083 if (blist != NULL) {
2084 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
2085 blist);
2086 blist = NULL;
2087 }
2088 plist->mo_next = plistn;
2089 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
2090 plist = NULL;
2091 break;
2092 }
2093
2094#if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
2095 /* Checking to ensure the object address is tagged */
2096 ASSERT((vm_offset_t)kpkt !=
2097 vm_memtag_canonicalize_address((vm_offset_t)kpkt));
2098#endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
2099
2100 KPKTQ_ENQUEUE(pktq, kpkt);
2101
2102 if (cb != NULL) {
2103 (cb)((uint64_t)kpkt, (num - need), ctx);
2104 }
2105
2106 plist = plistn;
2107
2108 ASSERT(need > 0);
2109 --need;
2110 }
2111 ASSERT(blist == NULL);
2112 ASSERT((num - need) == allocp || kpkt == NULL);
2113
2114 if (__probable(need == 0)) {
2115 err = 0;
2116 } else if (need == num) {
2117 err = ENOMEM;
2118 } else {
2119 err = EAGAIN;
2120 }
2121
2122 return err;
2123}
2124
2125uint64_t
2126pp_alloc_packet_by_size(struct kern_pbufpool *pp, uint32_t size,
2127 uint32_t skmflag)
2128{
2129 uint32_t bufcnt = pp->pp_max_frags;
2130 uint64_t kpkt = 0;
2131
2132 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
2133 bufcnt =
2134 SK_ROUNDUP(size, PP_BUF_SIZE_DEF(pp)) / PP_BUF_SIZE_DEF(pp);
2135 ASSERT(bufcnt <= UINT16_MAX);
2136 }
2137
2138 (void) pp_alloc_packet_common(pp, bufcnt: (uint16_t)bufcnt, array: &kpkt, num: 1, TRUE,
2139 NULL, NULL, skmflag);
2140
2141 return kpkt;
2142}
2143
2144__attribute__((always_inline))
2145static inline struct __metadata_preamble *
2146pp_metadata_fini(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
2147 struct mbuf **mp, struct __kern_packet **kpp, struct skmem_obj **blist_def,
2148 struct skmem_obj **blist_large)
2149{
2150 struct __metadata_preamble *mdp = METADATA_PREAMBLE(kqum);
2151
2152 ASSERT(SK_PTR_TAG(kqum) == 0);
2153
2154 switch (pp->pp_md_type) {
2155 case NEXUS_META_TYPE_PACKET: {
2156 struct __kern_packet *kpkt = SK_PTR_KPKT(kqum);
2157
2158 if ((kpkt->pkt_pflags & PKT_F_TX_COMPL_TS_REQ) != 0) {
2159 __packet_perform_tx_completion_callbacks(
2160 SK_PKT2PH(kpkt), NULL);
2161 }
2162 if ((kpkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
2163 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
2164 ASSERT(kpkt->pkt_mbuf != NULL);
2165 ASSERT(kpkt->pkt_mbuf->m_nextpkt == NULL);
2166 if (mp != NULL) {
2167 ASSERT(*mp == NULL);
2168 *mp = kpkt->pkt_mbuf;
2169 } else {
2170 m_freem(kpkt->pkt_mbuf);
2171 }
2172 KPKT_CLEAR_MBUF_DATA(kpkt);
2173 } else if ((kpkt->pkt_pflags & PKT_F_PKT_DATA) != 0) {
2174 ASSERT(kpkt->pkt_pkt != NULL);
2175 ASSERT(kpkt->pkt_pkt->pkt_nextpkt == NULL);
2176 if (kpp != NULL) {
2177 ASSERT(*kpp == NULL);
2178 *kpp = kpkt->pkt_pkt;
2179 } else {
2180 /* can only recurse once */
2181 ASSERT((kpkt->pkt_pkt->pkt_pflags &
2182 PKT_F_PKT_DATA) == 0);
2183 pp_free_packet_single(kpkt->pkt_pkt);
2184 }
2185 KPKT_CLEAR_PKT_DATA(kpkt);
2186 }
2187 kpkt->pkt_pflags &= ~PKT_F_TRUNCATED;
2188 ASSERT(kpkt->pkt_nextpkt == NULL);
2189 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
2190 ASSERT((kpkt->pkt_pflags & PKT_F_MBUF_MASK) == 0);
2191 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_MASK) == 0);
2192 break;
2193 }
2194 default:
2195 break;
2196 }
2197
2198 if (__improbable(PP_HAS_BUFFER_ON_DEMAND(pp))) {
2199 pp_metadata_destruct_common(kqum, pp, FALSE, blist_def,
2200 blist_large);
2201 }
2202 return mdp;
2203}
2204
2205void
2206pp_free_packet_chain(struct __kern_packet *pkt_chain, int *npkt)
2207{
2208 struct __metadata_preamble *mdp;
2209 struct skmem_obj *top = NULL;
2210 struct skmem_obj *blist_def = NULL;
2211 struct skmem_obj *blist_large = NULL;
2212 struct skmem_obj **list = &top;
2213 struct mbuf *mtop = NULL;
2214 struct mbuf **mp = &mtop;
2215 struct __kern_packet *kptop = NULL;
2216 struct __kern_packet **kpp = &kptop, *pkt, *next;
2217 struct kern_pbufpool *pp;
2218 int c = 0;
2219
2220 pp = __DECONST(struct kern_pbufpool *, pkt_chain->pkt_qum.qum_pp);
2221 ASSERT(pp != NULL);
2222 ASSERT(PP_BATCH_CAPABLE(pp));
2223
2224 for (pkt = pkt_chain; pkt != NULL; pkt = next) {
2225 next = pkt->pkt_nextpkt;
2226 pkt->pkt_nextpkt = NULL;
2227
2228 ASSERT(SK_PTR_ADDR_KQUM(pkt)->qum_pp == pp);
2229 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(pkt), pp,
2230 mp, kpp, blist_def: &blist_def, blist_large: &blist_large);
2231
2232 *list = (struct skmem_obj *)mdp;
2233 list = &(*list)->mo_next;
2234 c++;
2235
2236 if (*mp != NULL) {
2237 mp = &(*mp)->m_nextpkt;
2238 ASSERT(*mp == NULL);
2239 }
2240 if (*kpp != NULL) {
2241 kpp = &(*kpp)->pkt_nextpkt;
2242 ASSERT(*kpp == NULL);
2243 }
2244 }
2245
2246 ASSERT(top != NULL);
2247 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2248 if (blist_def != NULL) {
2249 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
2250 blist_def = NULL;
2251 }
2252 if (blist_large != NULL) {
2253 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
2254 blist_large = NULL;
2255 }
2256 if (mtop != NULL) {
2257 DTRACE_SKYWALK(free__attached__mbuf);
2258 if (__probable(mtop->m_nextpkt != NULL)) {
2259 m_freem_list(mtop);
2260 } else {
2261 m_freem(mtop);
2262 }
2263 }
2264 if (kptop != NULL) {
2265 int cnt = 0;
2266 pp_free_packet_chain(pkt_chain: kptop, npkt: &cnt);
2267 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2268 }
2269 if (npkt != NULL) {
2270 *npkt = c;
2271 }
2272}
2273
2274void
2275pp_free_pktq(struct pktq *pktq)
2276{
2277 if (__improbable(KPKTQ_EMPTY(pktq))) {
2278 return;
2279 }
2280 struct __kern_packet *pkt = KPKTQ_FIRST(pktq);
2281 pp_free_packet_chain(pkt_chain: pkt, NULL);
2282 KPKTQ_DISPOSE(pktq);
2283}
2284
2285__attribute__((always_inline))
2286static inline void
2287pp_free_packet_array(struct kern_pbufpool *pp, uint64_t *array, uint32_t num)
2288{
2289 struct __metadata_preamble *mdp;
2290 struct skmem_obj *top = NULL;
2291 struct skmem_obj *blist_def = NULL;
2292 struct skmem_obj *blist_large = NULL;
2293 struct skmem_obj **list = &top;
2294 struct mbuf *mtop = NULL;
2295 struct mbuf **mp = &mtop;
2296 struct __kern_packet *kptop = NULL;
2297 struct __kern_packet **kpp = &kptop;
2298 uint32_t i;
2299
2300 ASSERT(pp != NULL);
2301 ASSERT(array != NULL && num > 0);
2302 ASSERT(PP_BATCH_CAPABLE(pp));
2303
2304 for (i = 0; i < num; i++) {
2305 ASSERT(SK_PTR_ADDR_KQUM(array[i])->qum_pp == pp);
2306 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(array[i]), pp,
2307 mp, kpp, blist_def: &blist_def, blist_large: &blist_large);
2308
2309 *list = (struct skmem_obj *)mdp;
2310 list = &(*list)->mo_next;
2311 array[i] = 0;
2312
2313 if (*mp != NULL) {
2314 mp = &(*mp)->m_nextpkt;
2315 ASSERT(*mp == NULL);
2316 }
2317 if (*kpp != NULL) {
2318 kpp = &(*kpp)->pkt_nextpkt;
2319 ASSERT(*kpp == NULL);
2320 }
2321 }
2322
2323 ASSERT(top != NULL);
2324 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2325 if (blist_def != NULL) {
2326 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
2327 blist_def = NULL;
2328 }
2329 if (blist_large != NULL) {
2330 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
2331 blist_large = NULL;
2332 }
2333 if (mtop != NULL) {
2334 DTRACE_SKYWALK(free__attached__mbuf);
2335 if (__probable(mtop->m_nextpkt != NULL)) {
2336 m_freem_list(mtop);
2337 } else {
2338 m_freem(mtop);
2339 }
2340 }
2341 if (kptop != NULL) {
2342 int cnt = 0;
2343 pp_free_packet_chain(pkt_chain: kptop, npkt: &cnt);
2344 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2345 }
2346}
2347
2348void
2349pp_free_packet(struct kern_pbufpool *pp, uint64_t kqum)
2350{
2351 pp_free_packet_array(pp, array: &kqum, num: 1);
2352}
2353
2354void
2355pp_free_packet_batch(const kern_pbufpool_t pp, uint64_t *array, uint32_t size)
2356{
2357 pp_free_packet_array(pp, array, num: size);
2358}
2359
2360void
2361pp_free_packet_single(struct __kern_packet *pkt)
2362{
2363 ASSERT(pkt->pkt_nextpkt == NULL);
2364 pp_free_packet(__DECONST(struct kern_pbufpool *,
2365 pkt->pkt_qum.qum_pp), SK_PTR_ADDR(pkt));
2366}
2367
2368static mach_vm_address_t
2369pp_alloc_buffer_common(const kern_pbufpool_t pp, struct skmem_obj_info *oi,
2370 uint32_t skmflag, bool large)
2371{
2372 mach_vm_address_t baddr;
2373 struct skmem_cache *skm = large ? PP_BUF_CACHE_LARGE(pp):
2374 PP_BUF_CACHE_DEF(pp);
2375
2376 ASSERT(skm != NULL);
2377 /* allocate a cached buffer */
2378 baddr = (mach_vm_address_t)skmem_cache_alloc(skm, skmflag);
2379
2380#if (DEVELOPMENT || DEBUG)
2381 uint64_t mtbf = skmem_region_get_mtbf();
2382 /*
2383 * MTBF is applicable only for non-blocking allocations here.
2384 */
2385 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
2386 (skmflag & SKMEM_NOSLEEP))) {
2387 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
2388 net_update_uptime();
2389 if (baddr != 0) {
2390 skmem_cache_free(skm, (void *)baddr);
2391 baddr = 0;
2392 }
2393 }
2394#endif /* (DEVELOPMENT || DEBUG) */
2395
2396 if (__improbable(baddr == 0)) {
2397 SK_DF(SK_VERB_MEM, "failed to alloc buffer, pp 0x%llx",
2398 SK_KVA(pp));
2399 return 0;
2400 }
2401 skmem_cache_get_obj_info(skm, (void *)baddr, oi, NULL);
2402 ASSERT(SKMEM_OBJ_BUFCTL(oi) != NULL);
2403 ASSERT((mach_vm_address_t)SKMEM_OBJ_ADDR(oi) == baddr);
2404 return baddr;
2405}
2406
2407errno_t
2408pp_alloc_buffer(const kern_pbufpool_t pp, mach_vm_address_t *baddr,
2409 kern_segment_t *seg, kern_obj_idx_seg_t *idx, uint32_t skmflag)
2410{
2411 struct skmem_obj_info oib;
2412
2413 VERIFY(pp != NULL && baddr != NULL);
2414 VERIFY((seg != NULL) == (idx != NULL));
2415
2416 if (__improbable(!PP_HAS_BUFFER_ON_DEMAND(pp))) {
2417 return ENOTSUP;
2418 }
2419
2420 *baddr = pp_alloc_buffer_common(pp, oi: &oib, skmflag, false);
2421 if (__improbable(*baddr == 0)) {
2422 return ENOMEM;
2423 }
2424
2425 if (seg != NULL) {
2426 ASSERT(SKMEM_OBJ_SEG(&oib) != NULL);
2427 *seg = SKMEM_OBJ_SEG(&oib);
2428 *idx = SKMEM_OBJ_IDX_SEG(&oib);
2429 }
2430 return 0;
2431}
2432
2433void
2434pp_free_buffer(const kern_pbufpool_t pp, mach_vm_address_t addr)
2435{
2436 ASSERT(pp != NULL && addr != 0);
2437 skmem_cache_free(PP_BUF_CACHE_DEF(pp), (void *)addr);
2438}
2439
2440__attribute__((always_inline))
2441static inline uint32_t
2442pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
2443 uint32_t num, uint32_t skmflag, bool large)
2444{
2445 struct __kern_buflet *kbft = NULL;
2446 uint32_t allocd, need = num;
2447 struct skmem_obj *list;
2448
2449 ASSERT(array != NULL && num > 0);
2450 ASSERT(PP_BATCH_CAPABLE(pp));
2451 ASSERT(PP_KBFT_CACHE_DEF(pp) != NULL);
2452 ASSERT(PP_BUF_SIZE_LARGE(pp) != 0 || !large);
2453
2454 allocd = skmem_cache_batch_alloc(large ? PP_KBFT_CACHE_LARGE(pp) :
2455 PP_KBFT_CACHE_DEF(pp), list: &list, num, skmflag);
2456
2457 while (list != NULL) {
2458 struct skmem_obj *listn;
2459
2460 listn = list->mo_next;
2461 list->mo_next = NULL;
2462 kbft = (kern_buflet_t)(void *)list;
2463
2464#if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
2465 /* Checking to ensure the object address is tagged */
2466 ASSERT((vm_offset_t)kbft !=
2467 vm_memtag_canonicalize_address((vm_offset_t)kbft));
2468#endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
2469
2470 KBUF_EXT_INIT(kbft, pp);
2471 *array = (uint64_t)kbft;
2472 ++array;
2473 list = listn;
2474 ASSERT(need > 0);
2475 --need;
2476 }
2477 ASSERT((num - need) == allocd || kbft == NULL);
2478 return num - need;
2479}
2480
2481errno_t
2482pp_alloc_buflet(struct kern_pbufpool *pp, kern_buflet_t *kbft, uint32_t skmflag,
2483 bool large)
2484{
2485 uint64_t bft;
2486
2487 if (__improbable(!pp_alloc_buflet_common(pp, &bft, 1, skmflag, large))) {
2488 return ENOMEM;
2489 }
2490 *kbft = (kern_buflet_t)bft;
2491 return 0;
2492}
2493
2494errno_t
2495pp_alloc_buflet_batch(struct kern_pbufpool *pp, uint64_t *array,
2496 uint32_t *size, uint32_t skmflag, bool large)
2497{
2498 uint32_t i, n;
2499 int err;
2500
2501 ASSERT(array != NULL && size > 0);
2502
2503 n = *size;
2504 *size = 0;
2505
2506 i = pp_alloc_buflet_common(pp, array, num: n, skmflag, large);
2507 *size = i;
2508
2509 if (__probable(i == n)) {
2510 err = 0;
2511 } else if (i != 0) {
2512 err = EAGAIN;
2513 } else {
2514 err = ENOMEM;
2515 }
2516
2517 return err;
2518}
2519
2520__attribute__((always_inline))
2521static void
2522pp_free_buflet_common(const kern_pbufpool_t pp, kern_buflet_t kbft)
2523{
2524 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
2525 ASSERT(kbft->buf_nbft_addr == 0);
2526
2527 if (kbft->buf_flag & BUFLET_FLAG_EXTERNAL) {
2528 ASSERT(kbft->buf_addr != 0);
2529 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2530 ASSERT(kbft->buf_bft_idx_reg != OBJ_IDX_NONE);
2531 ASSERT(kbft->buf_ctl != NULL);
2532 ASSERT(((struct __kern_buflet_ext *)kbft)->
2533 kbe_buf_upp_link.sle_next == NULL);
2534 /*
2535 * external buflet has buffer attached at construction,
2536 * so we don't free the buffer here.
2537 */
2538 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2539 PP_KBFT_CACHE_LARGE(pp) : PP_KBFT_CACHE_DEF(pp),
2540 (void *)kbft);
2541 } else if (__probable(kbft->buf_addr != 0)) {
2542 void *objaddr = kbft->buf_objaddr;
2543 uint32_t usecnt = 0;
2544
2545 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2546 ASSERT(kbft->buf_ctl != NULL);
2547 KBUF_DTOR(kbft, usecnt);
2548 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u",
2549 SK_KVA(pp), SK_KVA(objaddr), usecnt);
2550 if (__probable(usecnt == 0)) {
2551 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2552 PP_BUF_CACHE_LARGE(pp) : PP_BUF_CACHE_DEF(pp),
2553 objaddr);
2554 }
2555 }
2556}
2557
2558void
2559pp_free_buflet(const kern_pbufpool_t pp, kern_buflet_t kbft)
2560{
2561 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
2562 ASSERT(pp != NULL && kbft != NULL);
2563 pp_free_buflet_common(pp, kbft);
2564}
2565
2566void
2567pp_reap_caches(boolean_t purge)
2568{
2569 skmem_cache_reap_now(pp_opt_cache, purge);
2570 skmem_cache_reap_now(pp_flow_cache, purge);
2571 skmem_cache_reap_now(pp_compl_cache, purge);
2572}
2573