1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _SKYWALK_PACKET_PACKETVAR_H_
30#define _SKYWALK_PACKET_PACKETVAR_H_
31
32#ifdef BSD_KERNEL_PRIVATE
33#include <skywalk/core/skywalk_var.h>
34#include <skywalk/os_packet_private.h>
35
36/*
37 * Kernel variant of __user_buflet.
38 *
39 * The main difference here is the support for shared buffers, where
40 * multiple buflets may point to the same buffer object at different
41 * data span within it, each holding a reference to the buffer object,
42 * i.e. the "use" count. The buf_addr therefore points to the beginning
43 * of the data span; the buf_len describes the length of the span; and
44 * the buf_doff describes the offset relative to the beginning of the
45 * span as noted by buf_addr. The buffer object is stored in buf_objaddr.
46 */
47struct __kern_buflet {
48 /*
49 * Common area between user and kernel variants.
50 */
51 struct __buflet buf_com;
52 /*
53 * Kernel specific.
54 */
55 /* buffer control of the buffer object */
56 const struct skmem_bufctl *buf_ctl;
57
58#define buf_objaddr buf_ctl->bc_addr
59#define buf_objlim buf_ctl->bc_lim
60} __attribute((packed));
61
62struct __kern_buflet_ext {
63 /*
64 * This is an overlay structure on __kern_buflet.
65 */
66 struct __kern_buflet kbe_overlay;
67 /*
68 * extended variant specific.
69 */
70 /* mirrored user buflet */
71 struct __user_buflet const *kbe_buf_user;
72
73 /* buflet user packet pool hash bucket linkage */
74 SLIST_ENTRY(__kern_buflet_ext) kbe_buf_upp_link;
75
76 /* pid of the process using the buflet */
77 pid_t kbe_buf_pid;
78} __attribute((packed));
79
80#define KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp, _large) do { \
81 _CASSERT(sizeof ((_kbuf)->buf_addr) == sizeof (mach_vm_address_t));\
82 /* kernel variant (deconst) */ \
83 BUF_CTOR(_kbuf, _baddr, _bidxreg, (_large) ? PP_BUF_SIZE_LARGE(_pp) :\
84 PP_BUF_SIZE_DEF(_pp), 0, 0, (_kbuf)->buf_nbft_addr, \
85 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
86 *(struct skmem_bufctl **)(uintptr_t)&(_kbuf)->buf_ctl = (_bc); \
87 /* this may be called to initialize unused buflets */ \
88 if (__probable((_bc) != NULL)) { \
89 skmem_bufctl_use(_bc); \
90 } \
91 /* no need to construct user variant as it is done in externalize */ \
92} while (0)
93
94#define KBUF_EXT_CTOR(_kbuf, _ubuf, _baddr, _bidxreg, _bc, \
95 _bft_idx_reg, _pp, _large) do { \
96 ASSERT(_bft_idx_reg != OBJ_IDX_NONE); \
97 _CASSERT(sizeof((_kbuf)->buf_flag) == sizeof(uint16_t)); \
98 /* we don't set buf_nbft_addr here as during construction it */ \
99 /* is used by skmem batch alloc logic */ \
100 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) = BUFLET_FLAG_EXTERNAL;\
101 if (_large) { \
102 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) |= \
103 BUFLET_FLAG_LARGE_BUF; \
104 } \
105 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
106 BUF_BFT_IDX_REG(_kbuf, _bft_idx_reg); \
107 *__DECONST(struct __user_buflet **, \
108 &((struct __kern_buflet_ext *)(_kbuf))->kbe_buf_user) = (_ubuf);\
109 KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp, _large); \
110} while (0)
111
112#define KBUF_INIT(_kbuf) do { \
113 ASSERT((_kbuf)->buf_ctl != NULL); \
114 ASSERT((_kbuf)->buf_addr != 0); \
115 ASSERT((_kbuf)->buf_dlim != 0); \
116 /* kernel variant (deconst) */ \
117 BUF_INIT(_kbuf, 0, 0); \
118} while (0)
119
120#define KBUF_EXT_INIT(_kbuf, _pp) do { \
121 ASSERT((_kbuf)->buf_ctl != NULL); \
122 ASSERT((_kbuf)->buf_flag & BUFLET_FLAG_EXTERNAL); \
123 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
124 BUF_BADDR(_kbuf, (_kbuf)->buf_ctl->bc_addr); \
125 BUF_NBFT_ADDR(_kbuf, 0); \
126 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
127 *__DECONST(uint32_t *, &(_kbuf)->buf_dlim) = \
128 BUFLET_HAS_LARGE_BUF(_kbuf) ? PP_BUF_SIZE_LARGE((_pp)) : \
129 PP_BUF_SIZE_DEF((_pp)); \
130 (_kbuf)->buf_dlen = 0; \
131 (_kbuf)->buf_doff = 0; \
132 ((struct __kern_buflet_ext *__unsafe_indexable)(_kbuf))->kbe_buf_pid = (pid_t)-1; \
133 ((struct __kern_buflet_ext *__unsafe_indexable)(_kbuf))->kbe_buf_upp_link.sle_next = NULL;\
134} while (0)
135
136/* initialize struct __user_buflet from struct __kern_buflet */
137#define UBUF_INIT(_kbuf, _ubuf) do { \
138 BUF_CTOR(_ubuf, 0, (_kbuf)->buf_idx, (_kbuf)->buf_dlim, \
139 (_kbuf)->buf_dlen, (_kbuf)->buf_doff, (_kbuf)->buf_nbft_addr,\
140 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
141 BUF_BFT_IDX_REG(_ubuf, (_kbuf)->buf_bft_idx_reg); \
142} while (0)
143
144#define KBUF_EXTERNALIZE(_kbuf, _ubuf, _pp) do { \
145 ASSERT((_kbuf)->buf_dlim == BUFLET_HAS_LARGE_BUF(_kbuf) ? \
146 PP_BUF_SIZE_LARGE((_pp)) : PP_BUF_SIZE_DEF((_pp))); \
147 ASSERT((_kbuf)->buf_addr != 0); \
148 /* For now, user-facing pool does not support shared */ \
149 /* buffer, since otherwise the ubuf and kbuf buffer */ \
150 /* indices would not match. Assert this is the case.*/ \
151 ASSERT((_kbuf)->buf_addr == (mach_vm_address_t)(_kbuf)->buf_objaddr);\
152 /* Initialize user buflet metadata from kernel buflet */ \
153 UBUF_INIT(_kbuf, _ubuf); \
154} while (0)
155
156#define KBUF_LINK(_pkbuf, _kbuf) do { \
157 ASSERT(__DECONST(void *, (_pkbuf)->buf_nbft_addr) == NULL); \
158 ASSERT(__DECONST(obj_idx_t, (_pkbuf)->buf_nbft_idx) == OBJ_IDX_NONE); \
159 ASSERT((_kbuf) != NULL); \
160 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
161 BUF_NBFT_ADDR(_pkbuf, _kbuf); \
162 BUF_NBFT_IDX(_pkbuf, (_kbuf)->buf_bft_idx_reg); \
163} while (0)
164
165#define KBUF_DTOR(_kbuf, _usecnt) do { \
166 if (__probable((_kbuf)->buf_ctl != NULL)) { \
167 (_usecnt) = skmem_bufctl_unuse( \
168 __DECONST(struct skmem_bufctl *, (_kbuf)->buf_ctl));\
169 *(struct skmem_bufctl **) \
170 (uintptr_t)&(_kbuf)->buf_ctl = NULL; \
171 } \
172 BUF_BADDR(_kbuf, 0); \
173 BUF_BIDX(_kbuf, OBJ_IDX_NONE); \
174} while (0)
175
176/*
177 * Copy kernel buflet (and add reference count to buffer).
178 */
179#define _KBUF_COPY(_skb, _dkb) do { \
180 ASSERT((_skb)->buf_nbft_addr == 0); \
181 ASSERT((_skb)->buf_nbft_idx == OBJ_IDX_NONE); \
182 ASSERT(!((_dkb)->buf_flag & BUFLET_FLAG_EXTERNAL)); \
183 _CASSERT(sizeof(struct __kern_buflet) == 50); \
184 /* copy everything in the kernel buflet */ \
185 sk_copy64_40((uint64_t *)(void *)(_skb), (uint64_t *)(void *)(_dkb));\
186 ((uint64_t *)(void *)(_dkb))[5] = ((uint64_t *)(void *)(_skb))[5]; \
187 ((uint16_t *)(void *)(_dkb))[24] = ((uint16_t *)(void *)(_skb))[24]; \
188 ASSERT((_dkb)->buf_ctl == (_skb)->buf_ctl); \
189 _CASSERT(sizeof((_dkb)->buf_flag) == sizeof(uint16_t)); \
190 *__DECONST(uint16_t *, &(_dkb)->buf_flag) &= ~BUFLET_FLAG_EXTERNAL;\
191 if (__probable((_dkb)->buf_ctl != NULL)) { \
192 skmem_bufctl_use(__DECONST(struct skmem_bufctl *, \
193 (_dkb)->buf_ctl)); \
194 } \
195} while (0)
196
197/*
198 * Kernel variant of __user_quantum.
199 */
200struct __kern_quantum {
201 /*
202 * Common area between user and kernel variants.
203 */
204 struct __quantum qum_com;
205
206 /*
207 * Kernel specific.
208 */
209 SLIST_ENTRY(__kern_quantum) qum_upp_link;
210 const struct kern_pbufpool *qum_pp;
211 const struct __user_quantum *qum_user;
212 const struct __kern_slot_desc *qum_ksd;
213 struct __kern_buflet qum_buf[1]; /* 1 buflet */
214 pid_t qum_pid;
215} __attribute((aligned(sizeof(uint64_t))));
216
217#define KQUM_CTOR(_kqum, _midx, _uqum, _pp, _qflags) do { \
218 ASSERT((uintptr_t)(_kqum) != (uintptr_t)(_uqum)); \
219 _CASSERT(sizeof(METADATA_IDX(_kqum)) == sizeof(obj_idx_t)); \
220 /* kernel variant (deconst) */ \
221 _KQUM_CTOR(_kqum, (PP_KERNEL_ONLY(_pp) ? \
222 QUM_F_KERNEL_ONLY : 0) | _qflags, 0, 0, OBJ_IDX_NONE, \
223 PP_BUF_SIZE_DEF((_pp)), _midx); \
224 _CASSERT(NEXUS_META_TYPE_MAX <= UINT16_MAX); \
225 METADATA_TYPE(_kqum) = (uint16_t)(_pp)->pp_md_type; \
226 _CASSERT(NEXUS_META_SUBTYPE_MAX <= UINT16_MAX); \
227 METADATA_SUBTYPE(_kqum) = (uint16_t)(_pp)->pp_md_subtype; \
228 *(struct kern_pbufpool **)(uintptr_t)&(_kqum)->qum_pp = (_pp); \
229 *(struct __user_quantum **)(uintptr_t)&(_kqum)->qum_user = (_uqum); \
230 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_kqum) = (_midx); \
231 (_kqum)->qum_pid = (pid_t)-1; \
232 *(struct __kern_slot_desc **)(uintptr_t)&(_kqum)->qum_ksd = NULL;\
233 /* no need to construct user variant as it is done in externalize */ \
234} while (0)
235
236#define KQUM_INIT(_kqum, _flags) do { \
237 ASSERT((_kqum)->qum_ksd == NULL); \
238 ASSERT((_kqum)->qum_pid == (pid_t)-1); \
239 /* kernel variant (deconst) */ \
240 _KQUM_INIT(_kqum, (PP_KERNEL_ONLY((_kqum)->qum_pp) ? \
241 QUM_F_KERNEL_ONLY : 0) | _flags, 0, METADATA_IDX(_kqum)); \
242 /* no need to initialize user variant as it is done in externalize */ \
243} while (0)
244
245__attribute__((always_inline))
246inline boolean_t
247_UUID_MATCH(uuid_t u1, uuid_t u2)
248{
249 uint64_t *a = (uint64_t *)(void *) u1;
250 uint64_t *b = (uint64_t *)(void *) u2;
251 bool first_same = (a[0] == b[0]);
252 bool second_same = (a[1] == b[1]);
253
254 return first_same && second_same;
255}
256
257#define _UUID_COPY(_dst, _src) do { \
258 _CASSERT(sizeof (uuid_t) == 16); \
259 sk_copy64_16((uint64_t *)(void *)_src, (uint64_t *)(void *)_dst); \
260} while (0)
261
262#define _UUID_CLEAR(_u) do { \
263 uint64_t *__dst = (uint64_t *)(void *)(_u); \
264 _CASSERT(sizeof (uuid_t) == 16); \
265 *(__dst++) = 0; /* qw[0] */ \
266 *(__dst) = 0; /* qw[1] */ \
267} while (0)
268
269/*
270 * _QUM_COPY only copies the user metadata portion of the quantum;
271 * at the moment this is everything from the beginning down to __q_flags,
272 * but no more. It preserves the destination's QUM_F_SAVE_MASK bits.
273 *
274 * NOTE: this needs to be adjusted if more user-mutable field is added
275 * after __q_flags.
276 */
277#define _QUM_COPY(_skq, _dkq) do { \
278 volatile uint16_t _sf = ((_dkq)->qum_qflags & QUM_F_SAVE_MASK); \
279 _CASSERT(sizeof (_sf) == sizeof ((_dkq)->qum_qflags)); \
280 _CASSERT(offsetof(struct __quantum, __q_flags) == 24); \
281 /* copy everything above (and excluding) __q_flags */ \
282 sk_copy64_24((uint64_t *)(void *)&(_skq)->qum_com, \
283 (uint64_t *)(void *)&(_dkq)->qum_com); \
284 /* copy __q_flags and restore saved bits */ \
285 (_dkq)->qum_qflags = ((_skq)->qum_qflags & ~QUM_F_SAVE_MASK) | _sf; \
286} while (0)
287
288/*
289 * _QUM_INTERNALIZE internalizes a portion of the quantum that includes
290 * user visible fields without overwriting the portion that's private to
291 * the kernel; see comments on _QUM_COPY().
292 */
293#define _QUM_INTERNALIZE(_uq, _kq) do { \
294 _QUM_COPY(_uq, _kq); \
295 /* drop all but QUM_F_SAVE_MASK */ \
296 (_kq)->qum_qflags &= QUM_F_SAVE_MASK; \
297} while (0)
298
299/*
300 * _QUM_EXTERNALIZE externalizes a portion of the quantum that's user
301 * visible without including fields that's private to the kernel; at
302 * the moment this is everything from the begininng down to __q_flags,
303 * but no more. It does NOT preserve the destination's QUM_F_SAVE_MASK
304 * bits, but instead copies all bits except QUMF_KERNEL_FLAGS ones.
305 *
306 * NOTE: this needs to be adjusted if more user-mutable field is added
307 * after __q_flags. This macro is used only during externalize.
308 */
309#define _QUM_EXTERNALIZE(_kq, _uq) do { \
310 _CASSERT(offsetof(struct __quantum, __q_flags) == 24); \
311 _CASSERT(sizeof(METADATA_IDX(_uq)) == sizeof(obj_idx_t)); \
312 /* copy __quantum excluding qum_qflags */ \
313 sk_copy64_24((uint64_t *)(void *)&(_kq)->qum_com, \
314 (uint64_t *)(void *)&(_uq)->qum_com); \
315 /* copy qum_qflags excluding saved bits */ \
316 (_uq)->qum_qflags = ((_kq)->qum_qflags & ~QUM_F_KERNEL_FLAGS); \
317 /* re-initialize user metadata */ \
318 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_uq) = METADATA_IDX(_kq); \
319 METADATA_TYPE(_uq) = METADATA_TYPE(_kq); \
320 METADATA_SUBTYPE(_uq) = METADATA_SUBTYPE(_kq); \
321 (_uq)->qum_usecnt = 0; \
322} while (0)
323
324/*
325 * Transmit completion.
326 */
327struct __packet_compl {
328 /*
329 * Tx completion data
330 * _arg & _data: context data which are passed as arguments
331 * to the registered Tx completion callback.
332 * _tx_status: Tx status set by the driver.
333 */
334 union {
335 uint64_t compl_data64[3];
336 struct {
337 uintptr_t _cb_arg;
338 uintptr_t _cb_data;
339 uint32_t _tx_status;
340 uint32_t _pad;
341 } compl_data;
342 };
343 /* bitmap indicating the requested packet completion callbacks */
344 uint32_t compl_callbacks;
345 /* Context identifier for a given packet completion */
346 uint32_t compl_context;
347};
348
349/*
350 * Kernel variant of __user_packet.
351 */
352struct __kern_packet {
353 struct __kern_quantum pkt_qum;
354#define pkt_user pkt_qum.qum_user
355
356 /*
357 * Common area between user and kernel variants.
358 */
359 struct __packet pkt_com;
360
361 /*
362 * Option common area (PKT_F_OPT_DATA),
363 * non-NULL if PKT_F_OPT_ALLOC is set.
364 */
365 struct __packet_opt *pkt_com_opt;
366
367 /* TX: enqueue time, RX: receive timestamp */
368 uint64_t pkt_timestamp;
369
370 /* next chain in queue; used while enqueuing to classq or reass */
371 struct __kern_packet *pkt_nextpkt;
372
373 /*
374 * Attached mbuf or pkt.
375 * Used by compat netif driver (PKT_F_MBUF_DATA) or interface
376 * filters (PKT_F_PKT_DATA).
377 */
378 union {
379 struct mbuf *pkt_mbuf;
380 struct __kern_packet *pkt_pkt;
381 };
382 /*
383 * Flow classifier data (PKT_F_FLOW_DATA),
384 * non-NULL if PKT_F_FLOW_ALLOC is set.
385 */
386 struct __flow *pkt_flow; /* classifier info */
387#define pkt_flow_ipv4_addrs pkt_flow->flow_ipv4_addrs
388#define pkt_flow_ipv4_src pkt_flow->flow_ipv4_src
389#define pkt_flow_ipv4_dst pkt_flow->flow_ipv4_dst
390#define pkt_flow_ipv6_addrs pkt_flow->flow_ipv6_addrs
391#define pkt_flow_ipv6_src pkt_flow->flow_ipv6_src
392#define pkt_flow_ipv6_dst pkt_flow->flow_ipv6_dst
393#define pkt_flow_ip_ver pkt_flow->flow_ip_ver
394#define pkt_flow_ip_proto pkt_flow->flow_ip_proto
395#define pkt_flow_ip_hdr pkt_flow->flow_ip_hdr
396#define pkt_flow_tcp pkt_flow->flow_tcp
397#define pkt_flow_tcp_src pkt_flow->flow_tcp_src
398#define pkt_flow_tcp_dst pkt_flow->flow_tcp_dst
399#define pkt_flow_tcp_seq pkt_flow->flow_tcp_seq
400#define pkt_flow_tcp_ack pkt_flow->flow_tcp_ack
401#define pkt_flow_tcp_off pkt_flow->flow_tcp_off
402#define pkt_flow_tcp_flags pkt_flow->flow_tcp_flags
403#define pkt_flow_tcp_win pkt_flow->flow_tcp_win
404#define pkt_flow_tcp_hlen pkt_flow->flow_tcp_hlen
405#define pkt_flow_tcp_hdr pkt_flow->flow_tcp_hdr
406#define pkt_flow_tcp_agg_fast pkt_flow->flow_tcp_agg_fast
407#define pkt_flow_udp pkt_flow->flow_udp
408#define pkt_flow_udp_src pkt_flow->flow_udp_src
409#define pkt_flow_udp_dst pkt_flow->flow_udp_dst
410#define pkt_flow_udp_hlen pkt_flow->flow_udp_hlen
411#define pkt_flow_udp_hdr pkt_flow->flow_udp_hdr
412#define pkt_flow_esp_spi pkt_flow->flow_esp_spi
413#define pkt_transport_protocol pkt_flow->flow_ulp_encap
414#define pkt_flow_ip_hlen pkt_flow->flow_ip_hlen
415#define pkt_flow_ulen pkt_flow->flow_ulen
416#define pkt_flow_ip_frag_id pkt_flow->flow_ip_frag_id
417#define pkt_flow_ip_is_frag pkt_flow->flow_ip_is_frag
418#define pkt_flow_ip_is_first_frag pkt_flow->flow_ip_is_first_frag
419#define pkt_flowsrc_token pkt_flow->flow_src_token
420#define pkt_flowsrc_id pkt_flow->flow_src_id
421#define pkt_flowsrc_fidx pkt_flow->flow_src_fidx
422#define pkt_flowsrc_type pkt_flow->flow_src_type
423#define pkt_classq_hash pkt_flow->flow_classq_hash
424#define pkt_classq_flags pkt_flow->flow_classq_flags
425#define pkt_policy_id pkt_flow->flow_policy_id
426#define pkt_skip_policy_id pkt_flow->flow_skip_policy_id
427#define pkt_policy_euuid pkt_flow->flow_policy_euuid
428
429 /*
430 * Transmit completion data (PKT_TX_COMPL_DATA),
431 * non-NULL if PKT_F_TX_COMPL_ALLOC is set.
432 */
433 struct __packet_compl *pkt_tx_compl; /* TX completion info */
434#define pkt_tx_compl_data pkt_tx_compl->compl_data
435#define pkt_tx_compl_data64 pkt_tx_compl->compl_data64
436#define pkt_tx_compl_cb_arg pkt_tx_compl->compl_data._cb_arg
437#define pkt_tx_compl_cb_data pkt_tx_compl->compl_data._cb_data
438#define pkt_tx_compl_status pkt_tx_compl->compl_data._tx_status
439#define pkt_tx_compl_callbacks pkt_tx_compl->compl_callbacks
440#define pkt_tx_compl_context pkt_tx_compl->compl_context
441
442 void * pkt_priv; /* free to use for every layer */
443
444 uint32_t pkt_fpd_seqnum; // @ 0xd0
445 uint16_t pkt_fpd_metadata; // @ 0xd4
446 /*
447 * Kernel specific.
448 *
449 * pkt_{bufs,max} aren't part of the common area, on purpose,
450 * since we selectively update them on internalize/externalize.
451 */
452 const uint16_t pkt_bufs_max; /* maximum size of buflet chain */
453 const uint16_t pkt_bufs_cnt; /* buflet chain size */
454 uint32_t pkt_chain_count; /* number of packets in chain */
455 uint32_t pkt_chain_bytes; /* number of bytes in chain */
456
457 nexus_port_t pkt_nx_port; /* user channel port */
458 /*
459 * gencnt of pkt_nx_port's corresponding vpna. So that we can tell
460 * whether the port in pkt_nx_port has been defuncted or reused.
461 */
462 uint16_t pkt_vpna_gencnt;
463
464 /* Cellular Host Driver generated trace_tag */
465 packet_trace_tag_t pkt_trace_tag;
466 /* index of the qset that the pkt comes from */
467 uint8_t pkt_qset_idx;
468 uint8_t _pad[1];
469} __attribute((aligned(sizeof(uint64_t))));
470
471
472/* the size of __user_packet structure for n total buflets */
473#define _KERN_PACKET_SIZE(n) sizeof(struct __kern_packet)
474
475#define _PKT_COM_INIT(_p, _pflags) do { \
476 /* save packet flags since it might be wiped out */ \
477 volatile uint64_t __pflags = (_pflags); \
478 /* first wipe it clean */ \
479 _CASSERT(sizeof(struct __packet_com) == 32); \
480 _CASSERT(sizeof(struct __packet) == 32); \
481 sk_zero_32(&(_p)->pkt_com.__pkt_data[0]); \
482 /* then initialize */ \
483 (_p)->pkt_pflags = (__pflags); \
484 (_p)->pkt_svc_class = KPKT_SC_UNSPEC; \
485} while (0)
486
487#define _PKT_CTOR(_p, _pflags, _bufcnt, _maxfrags) do { \
488 _PKT_COM_INIT(_p, _pflags); \
489 _CASSERT(sizeof ((_p)->pkt_bufs_max) == sizeof (uint16_t)); \
490 _CASSERT(sizeof ((_p)->pkt_bufs_cnt) == sizeof (uint16_t)); \
491 /* deconst */ \
492 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_max = (_maxfrags); \
493 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_cnt = (_bufcnt); \
494} while (0)
495
496#define KPKT_CLEAR_MBUF_PKT_DATA(_pk) do { \
497 _CASSERT(offsetof(struct __kern_packet, pkt_mbuf) == \
498 offsetof(struct __kern_packet, pkt_pkt)); \
499 (_pk)->pkt_pflags &= ~(PKT_F_MBUF_MASK|PKT_F_PKT_MASK); \
500 /* the following also clears pkt_pkt */ \
501 (_pk)->pkt_mbuf = NULL; \
502} while (0)
503
504#define KPKT_CLEAR_MBUF_DATA(_pk) do { \
505 (_pk)->pkt_pflags &= ~PKT_F_MBUF_MASK; \
506 (_pk)->pkt_mbuf = NULL; \
507} while (0)
508
509#define KPKT_CLEAR_PKT_DATA(_pk) do { \
510 (_pk)->pkt_pflags &= ~PKT_F_PKT_MASK; \
511 (_pk)->pkt_pkt = NULL; \
512} while (0)
513
514#define KPKT_CLEAR_FLOW_INIT(_fl) do { \
515 _CASSERT(sizeof ((_fl)->flow_init_data) == 128); \
516 sk_zero_128(&(_fl)->flow_init_data[0]); \
517} while (0)
518
519#define KPKT_CLEAR_FLOW_ALL(_fl) do { \
520 bzero(_fl, sizeof(struct __flow)); \
521} while (0)
522
523#define _KPKT_CTOR_PRIV_VARS(_p, _opt, _flow, _txcomp) do { \
524 (_p)->pkt_com_opt = (_opt); \
525 (_p)->pkt_flow = (_flow); \
526 (_p)->pkt_tx_compl = (_txcomp); \
527} while (0)
528
529#define _KPKT_INIT_FPD_VARS(_p)
530
531#define _KPKT_INIT_PRIV_VARS(_p) do { \
532 struct __flow *__fl = (_p)->pkt_flow; \
533 (_p)->pkt_timestamp = 0; \
534 (_p)->pkt_nextpkt = NULL; \
535 (_p)->pkt_priv = NULL; \
536 _KPKT_INIT_FPD_VARS(_p); \
537 KPKT_CLEAR_MBUF_PKT_DATA(_p); \
538 if (__probable(__fl != NULL)) { \
539 KPKT_CLEAR_FLOW_INIT(__fl); \
540 } \
541 (_p)->pkt_chain_count = (_p)->pkt_chain_bytes = 0; \
542 (_p)->pkt_nx_port = NEXUS_PORT_ANY; \
543 (_p)->pkt_vpna_gencnt = 0; \
544 (_p)->pkt_trace_tag = 0; \
545 (_p)->pkt_qset_idx = 0; \
546} while (0)
547
548#define KPKT_CTOR(_pk, _pflags, _opt, _flow, _txcomp, _midx, _pu, _pp, \
549 _bufcnt, _maxfrags, _qflags) do { \
550 ASSERT((uintptr_t)(_pk) != (uintptr_t)(_pu)); \
551 /* ASSERT((_pu) != NULL || PP_KERNEL_ONLY(_pp)); */ \
552 /* kernel (and user) quantum */ \
553 KQUM_CTOR(&(_pk)->pkt_qum, _midx, \
554 (((_pu) == NULL) ? NULL : &(_pu)->pkt_qum), _pp, _qflags); \
555 /* kernel packet variant */ \
556 _PKT_CTOR(_pk, _pflags, _bufcnt, _maxfrags); \
557 _KPKT_CTOR_PRIV_VARS(_pk, _opt, _flow, _txcomp); \
558 /* no need to construct user variant as it is done in externalize */ \
559} while (0)
560
561#define KPKT_INIT(_pk, _flags) do { \
562 KQUM_INIT(&(_pk)->pkt_qum, _flags); \
563 _PKT_COM_INIT(_pk, (_pk)->pkt_pflags); \
564 _KPKT_INIT_PRIV_VARS(_pk); \
565 /* no need to initialize user variant as it is done in externalize */ \
566} while (0)
567
568#define _KPKT_INIT_TX_COMPL_DATA(_p) do { \
569 if (((_p)->pkt_pflags & PKT_F_TX_COMPL_DATA) == 0) { \
570 ASSERT((_p)->pkt_pflags & PKT_F_TX_COMPL_ALLOC); \
571 (_p)->pkt_pflags |= PKT_F_TX_COMPL_DATA; \
572 _CASSERT(sizeof((_p)->pkt_tx_compl_data64) == 24); \
573 /* 32-bit compl_data should be in the union */ \
574 _CASSERT(sizeof((_p)->pkt_tx_compl_data) <= 24); \
575 (_p)->pkt_tx_compl_data64[0] = 0; \
576 (_p)->pkt_tx_compl_data64[1] = 0; \
577 (_p)->pkt_tx_compl_data64[2] = 0; \
578 } \
579} while (0)
580
581/*
582 * Copy optional meta data.
583 * Both source and destination must be a kernel packet.
584 */
585#define _PKT_COPY_OPT_DATA(_skp, _dkp) do { \
586 if (__improbable(((_skp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
587 _CASSERT(sizeof(struct __packet_opt) == 40); \
588 ASSERT((_skp)->pkt_pflags & PKT_F_OPT_ALLOC); \
589 sk_copy64_40((uint64_t *)(void *)(_skp)->pkt_com_opt, \
590 (uint64_t *)(void *)(_dkp)->pkt_com_opt); \
591 } \
592} while (0)
593
594/*
595 * _PKT_COPY only copies the user metadata portion of the packet;
596 * at the moment this is everything from the beginning down to __p_flags,
597 * but no more. It additionally copies only QUM_F_COPY_MASK bits from
598 * the source __p_flags to the destination's.
599 *
600 * NOTE: this needs to be adjusted if more user-mutable field is added
601 * after __p_flags.
602 */
603#define _PKT_COPY(_skp, _dkp) do { \
604 _CASSERT(sizeof(struct __packet) == 32); \
605 _CASSERT(sizeof(struct __packet_com) == 32); \
606 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
607 /* copy __packet excluding pkt_pflags */ \
608 sk_copy64_24((uint64_t *)(void *)&(_skp)->pkt_com, \
609 (uint64_t *)(void *)&(_dkp)->pkt_com); \
610 /* copy relevant pkt_pflags bits */ \
611 (_dkp)->pkt_pflags = ((_skp)->pkt_pflags & PKT_F_COPY_MASK); \
612 /* copy __packet_opt if applicable */ \
613 _PKT_COPY_OPT_DATA((_skp), (_dkp)); \
614} while (0)
615
616
617/*
618 * Copy Transmit completion data.
619 */
620#define _PKT_COPY_TX_PORT_DATA(_skp, _dkp) do { \
621 (_dkp)->pkt_nx_port = (_skp)->pkt_nx_port; \
622 (_dkp)->pkt_vpna_gencnt = (_skp)->pkt_vpna_gencnt; \
623 (_dkp)->pkt_pflags |= ((_skp)->pkt_pflags & PKT_F_TX_PORT_DATA);\
624} while (0)
625
626/*
627 * _PKT_INTERNALIZE internalizes a portion of the packet that includes
628 * user visible fields without overwriting the portion that's private to
629 * the kernel.
630 *
631 * NOTE: this needs to be adjusted if more user-mutable data is added
632 * after __p_flags. This macro is used only during internalize.
633 */
634#define _PKT_INTERNALIZE(_up, _kp) do { \
635 volatile uint64_t _kf = ((_kp)->pkt_pflags & ~PKT_F_USER_MASK); \
636 _CASSERT(sizeof(struct __packet) == 32); \
637 _CASSERT(sizeof(struct __packet_com) == 32); \
638 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
639 /* copy __packet excluding pkt_pflags */ \
640 sk_copy64_24((uint64_t *)(void *)&(_up)->pkt_com, \
641 (uint64_t *)(void *)&(_kp)->pkt_com); \
642 /* copy pkt_pflags and restore kernel bits */ \
643 (_kp)->pkt_pflags = ((_up)->pkt_pflags & PKT_F_USER_MASK) | _kf;\
644 /* copy (internalize) __packet_opt if applicable */ \
645 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
646 _CASSERT(sizeof(struct __packet_opt) == 40); \
647 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
648 sk_copy64_40((uint64_t *)(void *)&(_up)->pkt_com_opt, \
649 (uint64_t *)(void *)(_kp)->pkt_com_opt); \
650 } \
651} while (0)
652
653/*
654 * _PKT_EXTERNALIZE externalizes a portion of the packet that's user
655 * visible without including fields that's private to the kernel; at the
656 * moment this is everything from the beginning down to __p_flags,
657 * but no more.
658 *
659 * NOTE: this needs to be adjusted if more user-mutable data is added
660 * after __p_flags. This macro is used only during externalize.
661 */
662#define _PKT_EXTERNALIZE(_kp, _up) do { \
663 _CASSERT(sizeof(struct __packet) == 32); \
664 _CASSERT(sizeof(struct __packet_com) == 32); \
665 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
666 /* copy __packet excluding pkt_pflags */ \
667 sk_copy64_24((uint64_t *)(void *)&(_kp)->pkt_com, \
668 (uint64_t *)(void *)&(_up)->pkt_com); \
669 /* copy pkt_pflags excluding kernel bits */ \
670 (_up)->pkt_pflags = ((_kp)->pkt_pflags & PKT_F_USER_MASK); \
671 /* copy (externalize) __packet_opt if applicable */ \
672 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
673 _CASSERT(sizeof(struct __packet_opt) == 40); \
674 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
675 sk_copy64_40((uint64_t *)(void *)(_kp)->pkt_com_opt, \
676 (uint64_t *)(void *)&(_up)->pkt_com_opt); \
677 } \
678} while (0)
679
680#define SK_PTR_ADDR_KQUM(_ph) __unsafe_forge_single(struct __kern_quantum *, \
681 ((struct __kern_quantum *)SK_PTR_ADDR(_ph)))
682#define SK_PTR_ADDR_KPKT(_ph) __unsafe_forge_single(struct __kern_packet *, \
683 ((struct __kern_packet *)SK_PTR_ADDR(_ph)))
684#define SK_PTR_KPKT(_pa) ((struct __kern_packet *)(void *)(_pa))
685#define SK_PKT2PH(_pkt) \
686 (SK_PTR_ENCODE((_pkt), METADATA_TYPE((_pkt)), METADATA_SUBTYPE((_pkt))))
687
688/*
689 * Set the length of the data to various places: __user_slot_desc,
690 * __kern_quantum, and for a packet, the buflet.
691 * !!! This should be used only for dropping the packet as the macro
692 * is not functionally correct.
693 *
694 * TODO: adi@apple.com -- maybe finalize here as well?
695 */
696#define METADATA_SET_LEN(_md, _len, _doff) do { \
697 struct __kern_quantum *_q = \
698 (struct __kern_quantum *)(void *)(_md); \
699 _q->qum_len = (_len); \
700 switch (METADATA_TYPE(_q)) { \
701 case NEXUS_META_TYPE_PACKET: { \
702 struct __kern_packet *_p = \
703 (struct __kern_packet *)(void *)(_md); \
704 struct __kern_buflet *_kbft; \
705 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
706 _kbft->buf_dlen = (_len); \
707 _kbft->buf_doff = (_doff); \
708 break; \
709 } \
710 default: \
711 ASSERT(METADATA_TYPE(_q) == NEXUS_META_TYPE_QUANTUM); \
712 _q->qum_buf[0].buf_dlen = (_len); \
713 _q->qum_buf[0].buf_doff = (_doff); \
714 break; \
715 } \
716} while (0)
717
718#define METADATA_ADJUST_LEN(_md, _len, _doff) do { \
719 struct __kern_quantum *_q = \
720 (struct __kern_quantum *)(void *)(_md); \
721 switch (METADATA_TYPE(_q)) { \
722 case NEXUS_META_TYPE_PACKET: { \
723 struct __kern_packet *_p = \
724 (struct __kern_packet *)(void *)(_md); \
725 struct __kern_buflet *_kbft; \
726 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
727 _kbft->buf_dlen += (_len); \
728 _kbft->buf_doff = (_doff); \
729 break; \
730 } \
731 default: \
732 ASSERT(METADATA_TYPE(_q) == NEXUS_META_TYPE_QUANTUM); \
733 _q->qum_buf[0].buf_dlen += (_len); \
734 _q->qum_buf[0].buf_doff = (_doff); \
735 break; \
736 } \
737} while (0)
738
739__attribute__((always_inline))
740static inline kern_packet_t
741SD_GET_TAGGED_METADATA(const struct __kern_slot_desc *ksd)
742{
743 return __improbable(ksd->sd_md == NULL) ? 0 :
744 SK_PTR_ENCODE(ksd->sd_md, METADATA_TYPE(ksd->sd_qum),
745 METADATA_SUBTYPE(ksd->sd_qum));
746}
747
748__attribute__((always_inline))
749static inline errno_t
750KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,
751 struct __kern_slot_desc *ksd, struct __kern_quantum *kqum)
752{
753 obj_idx_t idx = KR_SLOT_INDEX(kr: kring,
754 slot: (struct __slot_desc *)(void *)ksd);
755
756 /* Ensure this is only done by the thread doing a sync syscall */
757 ASSERT(sk_is_sync_protected());
758 ASSERT(kqum->qum_pp == kring->ckr_pp);
759 ASSERT(kqum->qum_ksd == NULL);
760 /*
761 * Packets being attached to a slot should always be internalized.
762 * Internalized packet should be in finalized or dropped state.
763 */
764 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
765 ASSERT(((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
766 ((kqum->qum_qflags & QUM_F_DROPPED) != 0));
767
768 kqum->qum_ksd = ksd;
769
770 KSD_ATTACH_METADATA(ksd, kqum);
771 if (!KR_KERNEL_ONLY(kring)) {
772 USD_ATTACH_METADATA(KR_USD(kring, idx), METADATA_IDX(kqum));
773 }
774
775 return 0;
776}
777
778__attribute__((always_inline))
779static inline struct __kern_quantum *
780KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,
781 struct __kern_slot_desc *ksd)
782{
783 struct __kern_quantum *kqum = ksd->sd_qum;
784 obj_idx_t idx = KR_SLOT_INDEX(kr: kring,
785 slot: (struct __slot_desc *)(void *)ksd);
786
787 /* Ensure this is only done by the thread doing a sync syscall */
788 ASSERT(sk_is_sync_protected());
789 ASSERT(KSD_VALID_METADATA(ksd));
790 ASSERT(kqum->qum_ksd == ksd);
791 ASSERT(kqum->qum_pp == kring->ckr_pp);
792 /*
793 * Packets being attached to a slot would always be internalized.
794 * We also detach externalized packets on an rx ring on behalf
795 * of the user space if the channel is not in user packet pool mode.
796 * Externalized packet should be in finalized or dropped state.
797 */
798 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED)) ||
799 ((((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
800 ((kqum->qum_qflags & QUM_F_DROPPED) != 0))));
801
802 /* detaching requires the packet to be finalized later */
803 kqum->qum_qflags &= ~QUM_F_FINALIZED;
804 kqum->qum_ksd = NULL;
805
806 KSD_DETACH_METADATA(ksd);
807 if (!KR_KERNEL_ONLY(kring)) {
808 USD_DETACH_METADATA(KR_USD(kring, idx));
809 }
810
811 return kqum;
812}
813
814__attribute__((always_inline))
815static inline errno_t
816KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,
817 struct __kern_slot_desc *ksd, struct __kern_buflet *kbuf)
818{
819 obj_idx_t idx = KR_SLOT_INDEX(kr: kring,
820 slot: (struct __slot_desc *)(void *)ksd);
821
822 /* Ensure this is only done by the thread doing a sync syscall */
823 ASSERT(sk_is_sync_protected());
824
825 KSD_ATTACH_METADATA(ksd, kbuf);
826 /*
827 * buflet is attached only to the user packet pool alloc ring.
828 */
829 ASSERT(!KR_KERNEL_ONLY(kring));
830 ASSERT(kring->ckr_tx == CR_KIND_ALLOC);
831 USD_ATTACH_METADATA(KR_USD(kring, idx), kbuf->buf_bft_idx_reg);
832 return 0;
833}
834
835#if (DEVELOPMENT || DEBUG)
836SYSCTL_DECL(_kern_skywalk_packet);
837extern int pkt_trailers;
838#endif /* !DEVELOPMENT && !DEBUG */
839
840typedef void (pkt_copy_from_pkt_t)(const enum txrx, kern_packet_t,
841 const uint16_t, kern_packet_t, const uint16_t, const uint32_t,
842 const boolean_t, const uint16_t, const uint16_t, const boolean_t);
843
844typedef void (pkt_copy_from_mbuf_t)(const enum txrx, kern_packet_t,
845 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
846 const boolean_t, const uint16_t);
847
848typedef void (pkt_copy_to_mbuf_t)(const enum txrx, kern_packet_t,
849 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
850 const boolean_t, const uint16_t);
851
852__BEGIN_DECLS
853extern void pkt_subtype_assert_fail(const kern_packet_t, uint64_t, uint64_t);
854extern void pkt_type_assert_fail(const kern_packet_t, uint64_t);
855
856extern pkt_copy_from_pkt_t pkt_copy_from_pkt;
857extern pkt_copy_from_pkt_t pkt_copy_multi_buflet_from_pkt;
858extern pkt_copy_from_mbuf_t pkt_copy_from_mbuf;
859extern pkt_copy_from_mbuf_t pkt_copy_multi_buflet_from_mbuf;
860extern pkt_copy_to_mbuf_t pkt_copy_to_mbuf;
861extern pkt_copy_to_mbuf_t pkt_copy_multi_buflet_to_mbuf;
862
863extern void pkt_copypkt_sum(kern_packet_t, uint16_t, kern_packet_t,
864 uint16_t, uint16_t, uint32_t *, boolean_t);
865extern uint32_t
866pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
867 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start);
868extern uint32_t pkt_sum(kern_packet_t, uint16_t, uint16_t);
869extern uint32_t pkt_mcopypkt_sum(mbuf_t, int, kern_packet_t, uint16_t,
870 uint16_t, boolean_t);
871extern uint32_t
872m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
873 boolean_t *odd_start);
874extern void pkt_copy(void *src, void *dst, size_t len);
875
876#if (DEVELOPMENT || DEBUG)
877extern uint32_t pkt_add_trailers(kern_packet_t, const uint32_t, const uint16_t);
878extern uint32_t pkt_add_trailers_mbuf(struct mbuf *, const uint16_t);
879#endif /* !DEVELOPMENT && !DEBUG */
880__END_DECLS
881#endif /* BSD_KERNEL_PRIVATE */
882#endif /* !_SKYWALK_PACKET_PACKETVAR_H_ */
883