1/*
2 * Copyright (c) 2016-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _SKYWALK_OS_PACKET_PRIVATE_H_
30#define _SKYWALK_OS_PACKET_PRIVATE_H_
31
32#if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
33#include <skywalk/os_packet.h>
34#include <skywalk/os_nexus_private.h>
35#include <skywalk/os_channel_private.h>
36#include <libkern/OSByteOrder.h>
37#include <netinet/in.h>
38#include <net/ethernet.h>
39
40#if defined(BSD_KERNEL_PRIVATE)
41/*
42 * Flow (currently for kernel, potentially for userland one day).
43 *
44 * XXX: When we expose this to userland, we need to be make sure to NOT
45 * expose kernel pointer/address values embedded within.
46 *
47 * Values in flow_{l2,l3,l4} are stored in network byte order. Pointers
48 * are defined using mach_vm_address_t because it's stable across user
49 * and kernel, and therefore keeps the structure size the same.
50 *
51 * Because this structure might be initialized on a per-packet allocation
52 * basis, it as well as some of its member sub-subtructures are allocated
53 * on a 16-bytes address boundary to allow 128-bit operations on platforms
54 * that support them.
55 *
56 * XXX: when adding new fields, try to leverage __pad ones first.
57 *
58 * TODO: we should consider embedding a flow_key structure here and
59 * use that to store the tuples. That way we can leverage that for
60 * flow lookups without having to copy things back-and-forth.
61 */
62struct __flow {
63 union {
64 /*
65 * The following is always zeroed out on each alloc.
66 */
67 struct __flow_init {
68 /*
69 * Layer 3
70 */
71 struct __flow_l3 {
72 union {
73 struct __flow_l3_ipv4_addrs {
74 struct in_addr _src;
75 struct in_addr _dst;
76 } _l3_ipv4;
77 struct __flow_l3_ipv6_addrs {
78 struct in6_addr _src;
79 struct in6_addr _dst;
80 } _l3_ipv6;
81 };
82 uint8_t _l3_ip_ver;
83 uint8_t _l3_proto;
84 uint8_t _l3_hlen;
85 unsigned _l3_is_frag : 1;
86 unsigned _l3_is_first_frag : 1;
87 unsigned _l3_reserved_flags : 6;
88 uint32_t _l3_frag_id;
89 mach_vm_address_t _l3_ptr;
90 } __l3;
91 /*
92 * AQM
93 */
94 struct __flow_classq {
95 uint32_t _fcq_hash; /* classq-specific hash */
96 uint32_t _fcq_flags; /* classq-specific flags */
97 } __classq;
98 /*
99 * Misc.
100 */
101 uint32_t __ulen; /* user data length */
102 uint8_t __ulp_encap; /* e.g. IPPROTO_QUIC */
103 uint8_t __pad[3];
104 uint64_t __pad64[2];
105 /*
106 * Flow Source.
107 */
108 struct __flow_source {
109 union {
110 /* source identifier */
111 uint64_t _fsrc_id_64[2];
112 uint32_t _fsrc_id_32[4];
113 uuid_t _fsrc_id;
114 } __attribute__((aligned(sizeof(uint64_t))));
115 flowadv_idx_t _fsrc_fidx; /* flow adv. index */
116 uint8_t _fsrc_type; /* FLOWSRC_* mbuf.h */
117 uint8_t _fsrc_pad[3];
118 } __source;
119 /*
120 * Policy.
121 */
122 struct __flow_policy {
123 uint32_t _fpc_id; /* policy id of pkt sender */
124 uint32_t _fpc_skip_id; /* skip policy id of pkt sender */
125 union {
126 /* process identifier */
127 uint64_t _fpc_euuid_64[2];
128 uint32_t _fpc_euuid_32[4];
129 uuid_t _fpc_euuid;
130 } __attribute__((aligned(sizeof(uint64_t))));
131 } __policy;
132 } flow_init;
133 uint64_t flow_init_data[16];
134 } __attribute((aligned(16)));
135#define flow_l3 flow_init.__l3
136#define flow_classq flow_init.__classq
137#define flow_ulen flow_init.__ulen
138#define flow_ulp_encap flow_init.__ulp_encap
139#define flow_source flow_init.__source
140#define flow_policy flow_init.__policy
141
142#define flow_ipv4_addrs flow_l3._l3_ipv4
143#define flow_ipv4_src flow_l3._l3_ipv4._src
144#define flow_ipv4_dst flow_l3._l3_ipv4._dst
145#define flow_ipv6_addrs flow_l3._l3_ipv6
146#define flow_ipv6_src flow_l3._l3_ipv6._src
147#define flow_ipv6_dst flow_l3._l3_ipv6._dst
148#define flow_ip_ver flow_l3._l3_ip_ver
149#define flow_ip_proto flow_l3._l3_proto
150#define flow_ip_hlen flow_l3._l3_hlen
151#define flow_ip_hdr flow_l3._l3_ptr
152#define flow_ip_frag_id flow_l3._l3_frag_id
153#define flow_ip_is_frag flow_l3._l3_is_frag
154#define flow_ip_is_first_frag flow_l3._l3_is_first_frag
155
156#define flow_classq_hash flow_classq._fcq_hash
157#define flow_classq_flags flow_classq._fcq_flags
158
159#define flow_src_token flow_source._fsrc_id_32[0]
160#define flow_src_id flow_source._fsrc_id
161#define flow_src_fidx flow_source._fsrc_fidx
162#define flow_src_type flow_source._fsrc_type
163
164#define flow_policy_id flow_policy._fpc_id
165#define flow_skip_policy_id flow_policy._fpc_skip_id
166#define flow_policy_euuid flow_policy._fpc_euuid
167
168 /*
169 * Layer 4.
170 */
171 union {
172 struct __flow_l4 {
173 union {
174 struct __flow_l4_tcp {
175 in_port_t _src;
176 in_port_t _dst;
177 uint32_t _seq;
178 uint32_t _ack;
179 union {
180 struct {
181#if BYTE_ORDER == LITTLE_ENDIAN
182 uint8_t _tcp_res:4;
183 uint8_t _off:4;
184#else /* BYTE_ORDER == BIG_ENDIAN */
185 uint8_t _off:4;
186 uint8_t _tcp_res:4;
187#endif /* BYTE_ORDER == BIG_ENDIAN */
188 uint8_t _flags;
189 uint16_t _win;
190 };
191 uint32_t _ofw;
192 };
193 } _l4_tcp;
194 struct __flow_l4_udp {
195 in_port_t _src;
196 in_port_t _dst;
197 uint32_t _ls;
198 } _l4_udp;
199 struct __flow_l4_esp {
200 uint32_t _spi;
201 } _l4_esp;
202 };
203 uint8_t _l4_hlen;
204 uint8_t _l4_agg_fast;
205 uint8_t _l4_pad[6];
206 mach_vm_address_t _l4_ptr;
207 } flow_l4;
208 uint64_t flow_l4_data[4];
209 } __attribute((aligned(sizeof(uint64_t))));
210#define flow_tcp flow_l4._l4_tcp
211#define flow_tcp_src flow_l4._l4_tcp._src
212#define flow_tcp_dst flow_l4._l4_tcp._dst
213#define flow_tcp_seq flow_l4._l4_tcp._seq
214#define flow_tcp_ack flow_l4._l4_tcp._ack
215#define flow_tcp_off flow_l4._l4_tcp._off
216#define flow_tcp_flags flow_l4._l4_tcp._flags
217#define flow_tcp_win flow_l4._l4_tcp._win
218#define flow_tcp_hlen flow_l4._l4_hlen
219#define flow_tcp_hdr flow_l4._l4_ptr
220#define flow_tcp_agg_fast flow_l4._l4_agg_fast
221#define flow_udp flow_l4._l4_udp
222#define flow_udp_src flow_l4._l4_udp._src
223#define flow_udp_dst flow_l4._l4_udp._dst
224#define flow_udp_hlen flow_l4._l4_hlen
225#define flow_udp_hdr flow_l4._l4_ptr
226#define flow_esp_spi flow_l4._l4_esp._spi
227} __attribute((aligned(16)));
228#endif /* BSD_KERNEL_PRIVATE */
229
230/*
231 * Maximum size of L2, L3 & L4 headers combined.
232 */
233#define PKT_MAX_PROTO_HEADER_SIZE 256
234
235/* based on 2KB buflet size */
236#define BUFLETS_MIN 1 /* Ethernet MTU (default) */
237#define BUFLETS_9K_JUMBO 5 /* 9000 bytes MTU */
238#define BUFLETS_GSO 46 /* 64KB GSO, Ethernet MTU */
239
240/*
241 * Common buflet structure shared by {__user,__kern}_buflet.
242 */
243struct __buflet {
244 union {
245 /* for skmem batch alloc/free */
246 uint64_t __buflet_next;
247 /* address of next buflet in chain */
248 const mach_vm_address_t __nbft_addr;
249 };
250 /* buffer data address */
251 const mach_vm_address_t __baddr;
252 /* index of buflet object in the owning buflet region */
253 const obj_idx_t __bft_idx;
254 /* buffer object index in buffer region */
255 const obj_idx_t __bidx;
256 /* object index in buflet region of next buflet(for buflet chaining) */
257 const obj_idx_t __nbft_idx;
258 const uint32_t __dlim; /* maximum length */
259 uint32_t __doff; /* offset of data in buflet */
260 uint32_t __dlen; /* length of data in buflet */
261 const uint16_t __flag;
262#define BUFLET_FLAG_EXTERNAL 0x0001
263#define BUFLET_FLAG_LARGE_BUF 0x0002 /* buflet holds large buffer */
264} __attribute((packed));
265
266/*
267 * A buflet represents the smallest buffer fragment representing
268 * part of the packet. The index refers to the position of the buflet
269 * in the pool, and the data length represents the actual payload
270 * size -- not the buflet size itself as it is fixed for all objects
271 * in the pool.
272 */
273struct __user_buflet {
274 /*
275 * Common area between user and kernel variants.
276 */
277 struct __buflet buf_com;
278#define buf_addr buf_com.__baddr
279#define buf_nbft_addr buf_com.__nbft_addr
280#define buf_idx buf_com.__bidx
281#define buf_nbft_idx buf_com.__nbft_idx
282#define buf_dlim buf_com.__dlim
283#define buf_dlen buf_com.__dlen
284#define buf_doff buf_com.__doff
285#define buf_flag buf_com.__flag
286#define buf_bft_idx_reg buf_com.__bft_idx
287};
288
289#define BUFLET_HAS_LARGE_BUF(_buf) \
290 (((_buf)->buf_flag & BUFLET_FLAG_LARGE_BUF) != 0)
291
292#define BUF_BADDR(_buf, _addr) \
293 *__DECONST(mach_vm_address_t *, &(_buf)->buf_addr) = \
294 (mach_vm_address_t)(_addr)
295
296#define BUF_BIDX(_buf, _idx) \
297 *__DECONST(obj_idx_t *, &(_buf)->buf_idx) = (obj_idx_t)(_idx)
298
299#define BUF_NBFT_ADDR(_buf, _addr) \
300 *__DECONST(mach_vm_address_t *, &(_buf)->buf_nbft_addr) = \
301 (mach_vm_address_t)(_addr)
302
303#define BUF_NBFT_IDX(_buf, _idx) \
304 *__DECONST(obj_idx_t *, &(_buf)->buf_nbft_idx) = (obj_idx_t)(_idx)
305
306#define BUF_BFT_IDX_REG(_buf, _idx) \
307 *__DECONST(obj_idx_t *, &(_buf)->buf_bft_idx_reg) = (_idx)
308
309#define UBUF_LINK(_pubft, _ubft) do { \
310 ASSERT((_ubft) != NULL); \
311 BUF_NBFT_ADDR(_pubft, _ubft); \
312 BUF_NBFT_IDX(_pubft, (_ubft)->buf_bft_idx_reg); \
313} while (0)
314
315#ifdef KERNEL
316#define BUF_CTOR(_buf, _baddr, _bidx, _dlim, _dlen, _doff, _nbaddr, _nbidx, _bflag) do { \
317 _CASSERT(sizeof ((_buf)->buf_addr) == sizeof (mach_vm_address_t)); \
318 _CASSERT(sizeof ((_buf)->buf_idx) == sizeof (obj_idx_t)); \
319 _CASSERT(sizeof ((_buf)->buf_dlim) == sizeof (uint32_t)); \
320 BUF_BADDR(_buf, _baddr); \
321 BUF_NBFT_ADDR(_buf, _nbaddr); \
322 BUF_BIDX(_buf, _bidx); \
323 BUF_NBFT_IDX(_buf, _nbidx); \
324 *(uint32_t *)(uintptr_t)&(_buf)->buf_dlim = (_dlim); \
325 (_buf)->buf_dlen = (_dlen); \
326 (_buf)->buf_doff = (_doff); \
327 *(uint16_t *)(uintptr_t)&(_buf)->buf_flag = (_bflag); \
328} while (0)
329
330#define BUF_INIT(_buf, _dlen, _doff) do { \
331 (_buf)->buf_dlen = (_dlen); \
332 (_buf)->buf_doff = (_doff); \
333} while (0)
334
335#endif /* KERNEL */
336
337#ifdef KERNEL
338#define BUF_IN_RANGE(_buf) \
339 ((_buf)->buf_addr >= (mach_vm_address_t)(_buf)->buf_objaddr && \
340 ((uintptr_t)(_buf)->buf_addr + (_buf)->buf_dlim) <= \
341 ((uintptr_t)(_buf)->buf_objaddr + (_buf)->buf_objlim) && \
342 ((_buf)->buf_doff + (_buf)->buf_dlen) <= (_buf)->buf_dlim)
343#else /* !KERNEL */
344#define BUF_IN_RANGE(_buf) \
345 (((_buf)->buf_doff + (_buf)->buf_dlen) <= (_buf)->buf_dlim)
346#endif /* !KERNEL */
347
348/*
349 * Metadata preamble. This structure is placed at begining of each
350 * __{user,kern}_{quantum,packet} object. Each user metadata object has a
351 * unique red zone pattern, which is an XOR of the redzone cookie and
352 * offset of the metadata object in the object's region. Due to the use
353 * of tagged pointer, we need the structure size to be multiples of 16.
354 * See SK_PTR_TAG() definition for details.
355 */
356struct __metadata_preamble {
357 union {
358 uint64_t _mdp_next; /* for batch alloc/free (K) */
359 uint64_t mdp_redzone; /* red zone cookie (U) */
360 };
361 const obj_idx_t mdp_idx; /* index within region (UK) */
362 uint16_t mdp_type; /* nexus_meta_type_t (UK) */
363 uint16_t mdp_subtype; /* nexus_meta_subtype_t (UK) */
364};
365
366#define METADATA_PREAMBLE_SZ (sizeof (struct __metadata_preamble))
367
368#define METADATA_PREAMBLE(_md) \
369 ((struct __metadata_preamble *) \
370 ((mach_vm_address_t)(_md) - METADATA_PREAMBLE_SZ))
371
372#define METADATA_IDX(_md) \
373 (METADATA_PREAMBLE(_md)->mdp_idx)
374
375#define METADATA_TYPE(_md) \
376 (METADATA_PREAMBLE(_md)->mdp_type)
377
378#define METADATA_SUBTYPE(_md) \
379 (METADATA_PREAMBLE(_md)->mdp_subtype)
380
381/*
382 * Common packet structure shared by {__user,__kern}_quantum.
383 */
384struct __quantum {
385 union {
386 uuid_t __uuid; /* flow UUID */
387 uint8_t __val8[16];
388 uint16_t __val16[8];
389 uint32_t __val32[4];
390 uint64_t __val64[2];
391 } __flow_id_u;
392#define __q_flow_id __flow_id_u.__uuid
393#define __q_flow_id_val8 __flow_id_u.__val8
394#define __q_flow_id_val16 __flow_id_u.__val16
395#define __q_flow_id_val32 __flow_id_u.__val32
396#define __q_flow_id_val64 __flow_id_u.__val64
397
398 uint32_t __q_len;
399
400 /* QoS service class, see packet_svc_class_t */
401 uint32_t __q_svc_class; /* PKT_SC_* values */
402
403 /*
404 * See notes on _QUM_{INTERNALIZE,EXTERNALIZE}() regarding
405 * portion of this structure above __flags that gets copied.
406 * Adding more user-mutable fields after __flags would also
407 * require adjusting those macros as well.
408 */
409 volatile uint16_t __q_flags; /* QUMF_* flags */
410 uint16_t __q_pad[3];
411} __attribute((aligned(sizeof(uint64_t))));
412
413/*
414 * Quantum.
415 *
416 * This structure is aligned for efficient copy and accesses.
417 * It is the user version of the __kernel_quantum structure.
418 *
419 * XXX: Do NOT store kernel pointer/address values here.
420 */
421struct __user_quantum {
422 /*
423 * Common area between user and kernel variants.
424 */
425 struct __quantum qum_com;
426#define qum_flow_id qum_com.__q_flow_id
427#define qum_flow_id_val8 qum_com.__q_flow_id_val8
428#define qum_flow_id_val16 qum_com.__q_flow_id_val16
429#define qum_flow_id_val32 qum_com.__q_flow_id_val32
430#define qum_flow_id_val64 qum_com.__q_flow_id_val64
431#define qum_len qum_com.__q_len
432#define qum_qflags qum_com.__q_flags
433#define qum_svc_class qum_com.__q_svc_class
434
435 /*
436 * Userland specific.
437 */
438 struct __user_buflet qum_buf[1]; /* 1 buflet */
439 /*
440 * use count for packet.
441 */
442 uint16_t qum_usecnt;
443} __attribute((aligned(sizeof(uint64_t))));
444
445/*
446 * Valid values for (16-bit) qum_qflags.
447 */
448#define QUM_F_FINALIZED 0x0001 /* has been finalized */
449#define QUM_F_DROPPED 0x0002 /* has been dropped */
450#define QUM_F_FLOW_CLASSIFIED 0x0010 /* flow has been classified */
451#ifdef KERNEL
452#define QUM_F_INTERNALIZED 0x1000 /* has been internalized */
453#define QUM_F_KERNEL_ONLY 0x8000 /* kernel only; no user counterpart */
454
455/* invariant flags we want to keep */
456#define QUM_F_SAVE_MASK (QUM_F_KERNEL_ONLY)
457/* kernel-only flags that's never externalized */
458#define QUM_F_KERNEL_FLAGS (QUM_F_INTERNALIZED|QUM_F_KERNEL_ONLY)
459#endif /* KERNEL */
460
461#ifdef KERNEL
462#define _KQUM_CTOR(_kqum, _flags, _len, _baddr, _bidx, _dlim, _qidx) do { \
463 (_kqum)->qum_flow_id_val64[0] = 0; \
464 (_kqum)->qum_flow_id_val64[1] = 0; \
465 (_kqum)->qum_qflags = (_flags); \
466 (_kqum)->qum_len = (_len); \
467 _CASSERT(sizeof(METADATA_IDX(_kqum)) == sizeof(obj_idx_t)); \
468 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_kqum) = (_qidx); \
469 BUF_CTOR(&(_kqum)->qum_buf[0], (_baddr), (_bidx), (_dlim), 0, 0, 0, \
470 OBJ_IDX_NONE, 0); \
471} while (0)
472
473#define _KQUM_INIT(_kqum, _flags, _len, _qidx) do { \
474 (_kqum)->qum_flow_id_val64[0] = 0; \
475 (_kqum)->qum_flow_id_val64[1] = 0; \
476 (_kqum)->qum_qflags = (_flags); \
477 (_kqum)->qum_len = (_len); \
478 BUF_INIT(&(_kqum)->qum_buf[0], 0, 0); \
479} while (0)
480#endif /* KERNEL */
481
482/*
483 * Common packet structure shared by {__user,__kern}_packet.
484 */
485struct __packet_com {
486 /* Link layer (offset relevant to first buflet) */
487 uint16_t __link_flags; /* PKT_LINKF_* flags */
488
489 /*
490 * Headroom/protocol header length
491 *
492 * Since the security model of Skywalk nexus is that we doesn't trust
493 * packets either from above (userspace) or below (driver/firmware),
494 * the only metadata field that nexus makes use of from external is the
495 * headroom. Based on headroom, the flowswitch starts demux routine on
496 * l2 header, if any. The l2_len is stored in this step. Then the flow
497 * extraction (l3+l4 flow) begins parsing from (headroom + l2_len).
498 *
499 * __headroom is the empty buffer space before any packet data,
500 * it is also the equivalent to the first header offset.
501 *
502 * __l2_len is l2 (link layer) protocol header length, if any.
503 */
504 uint8_t __headroom;
505 uint8_t __l2_len;
506
507 /*
508 * Checksum offload.
509 *
510 * Partial checksum does not require any header parsing and is
511 * therefore simpler to implement both in software and hardware.
512 *
513 * On transmit, PKT_CSUMF_PARTIAL indicates that a partial one's
514 * complement checksum to be computed on the span starting from
515 * pkt_csum_tx_start_off to the end of the packet, and have the
516 * resulted checksum value written at the location specified by
517 * pkt_csum_tx_stuff_off.
518 *
519 * The PKT_CSUMF_ZERO_INVERT flag is used on transmit to indicate
520 * that the value 0xffff (negative 0 in one's complement) must be
521 * substituted for the value of 0. This is set for UDP packets,
522 * since otherwise the receiver may not validate the checksum
523 * (UDP/IPv4), or drop the packet altogether (UDP/IPv6).
524 *
525 * On receive, PKT_CSUMF_PARTIAL indicates that a partial one's
526 * complement checksum has been computed on the span beginning at
527 * pkt_csum_rx_start_off to the end of the packet, and that the
528 * computed value is now stored in pkt_csum_rx_value.
529 *
530 * All offsets are relative to the base of the first buflet.
531 */
532 uint32_t __csum_flags; /* PKT_CSUMF_* flags */
533 union {
534 struct {
535 uint16_t __csum_start_off; /* start offset */
536 uint16_t __csum_value; /* checksum value */
537 } __csum_rx;
538 struct {
539 uint16_t __csum_start_off; /* start offset */
540 uint16_t __csum_stuff_off; /* stuff offset */
541 } __csum_tx;
542 uint32_t __csum_data;
543 };
544
545 /* Compression generation count */
546 uint32_t __comp_gencnt;
547
548 /*
549 * Trace ID for each sampled packet.
550 * Non-zero ID indicates that the packet is being actively traced.
551 */
552 packet_trace_id_t __trace_id;
553
554 /* Aggregation type */
555 uint8_t __aggr_type; /* PKT_AGGR_* values */
556 uint8_t __seg_cnt; /* Number of LRO-packets */
557
558 uint16_t __proto_seg_sz; /* Protocol segment size */
559
560 /*
561 * See notes on _PKT_{INTERNALIZE,EXTERNALIZE}() regarding portion
562 * of this structure above __p_flags that gets copied. Adding
563 * more user-mutable fields after __p_flags would also require
564 * adjusting those macros as well.
565 */
566 union {
567 volatile uint32_t __flags32[2];
568 volatile uint64_t __flags; /* PKT_F_* flags */
569 };
570} __attribute((aligned(sizeof(uint64_t))));
571
572struct __packet {
573 union {
574 uint64_t __pkt_data[4];
575 struct __packet_com __pkt_com;
576 };
577#define __p_link_flags __pkt_com.__link_flags
578#define __p_headroom __pkt_com.__headroom
579#define __p_l2_len __pkt_com.__l2_len
580#define __p_csum_flags __pkt_com.__csum_flags
581#define __p_csum_rx __pkt_com.__csum_rx
582#define __p_csum_tx __pkt_com.__csum_tx
583#define __p_csum_data __pkt_com.__csum_data
584#define __p_comp_gencnt __pkt_com.__comp_gencnt
585#define __p_aggr_type __pkt_com.__aggr_type
586#define __p_seg_cnt __pkt_com.__seg_cnt
587#define __p_proto_seg_sz __pkt_com.__proto_seg_sz
588#define __p_trace_id __pkt_com.__trace_id
589#define __p_flags32 __pkt_com.__flags32
590#define __p_flags __pkt_com.__flags
591};
592
593/* optional packet token types */
594#define PKT_OPT_TOKEN_TYPE_OPAQUE 1 /* token has opaque data */
595#define PKT_OPT_TOKEN_TYPE_PACKET_ID 2 /* token has packet_id */
596
597/* maximum token size */
598#define PKT_OPT_MAX_TOKEN_SIZE 16
599
600struct __packet_opt_com {
601 union {
602 uint64_t __token_data[2];
603 uint8_t __token[PKT_OPT_MAX_TOKEN_SIZE];
604 };
605 uint64_t __expire_ts;
606 uint64_t __pkt_tx_time;
607 uint16_t __vlan_tag;
608 uint16_t __token_len;
609 uint8_t __token_type;
610 uint8_t __expiry_action;
611 uint8_t __app_type;
612 uint8_t __app_metadata;
613} __attribute((aligned(sizeof(uint64_t))));
614
615struct __packet_opt {
616 union {
617 uint64_t __pkt_opt_data[5];
618 struct __packet_opt_com __pkt_opt_com;
619 };
620#define __po_token_type __pkt_opt_com.__token_type
621#define __po_token_len __pkt_opt_com.__token_len
622#define __po_vlan_tag __pkt_opt_com.__vlan_tag
623#define __po_token_data __pkt_opt_com.__token_data
624#define __po_token __pkt_opt_com.__token
625#define __po_expire_ts __pkt_opt_com.__expire_ts
626#define __po_expiry_action __pkt_opt_com.__expiry_action
627#define __po_app_type __pkt_opt_com.__app_type
628#define __po_app_metadata __pkt_opt_com.__app_metadata
629#define __po_pkt_tx_time __pkt_opt_com.__pkt_tx_time
630};
631
632/*
633 * Packet.
634 *
635 * This structure is aligned for efficient copy and accesses.
636 * It is the user version of the __kern_packet structure.
637 *
638 * XXX: Do NOT store kernel pointer/address values here.
639 */
640struct __user_packet {
641 struct __user_quantum pkt_qum;
642/*
643 * pkt_flow_id is the flow identifier used by user space stack to identfy a
644 * flow. This identifier is passed as a metadata on all packets generated by
645 * the user space stack. On RX flowswitch fills in this metadata on every
646 * packet and can be used by user space stack for flow classification purposes.
647 */
648#define pkt_flow_id pkt_qum.qum_flow_id
649#define pkt_flow_id_64 pkt_qum.qum_flow_id_val64
650#define pkt_qum_qflags pkt_qum.qum_qflags
651#define pkt_length pkt_qum.qum_len
652#define pkt_qum_buf pkt_qum.qum_buf[0]
653#define pkt_svc_class pkt_qum.qum_svc_class
654#ifdef KERNEL
655/*
656 * pkt_flow_token is a globally unique flow identifier generated by the
657 * flowswitch for each flow. Flowswitch stamps every TX packet with this
658 * identifier. This is the flow identifier which would be visible to the AQM
659 * logic and the driver.
660 * pkt_flow_token uses the first 4 bytes of pkt_flow_id as the storage space.
661 * This is not a problem as pkt_flow_id is only for flowswitch consumption
662 * and is not required by any other module after the flowswitch TX processing
663 * stage.
664 */
665#define pkt_flow_token pkt_qum.qum_flow_id_val32[0]
666#endif /* KERNEL */
667
668 /*
669 * Common area between user and kernel variants.
670 */
671 struct __packet pkt_com;
672#define pkt_link_flags pkt_com.__p_link_flags
673#define pkt_headroom pkt_com.__p_headroom
674#define pkt_l2_len pkt_com.__p_l2_len
675#define pkt_csum_flags pkt_com.__p_csum_flags
676#define pkt_csum_rx_start_off pkt_com.__p_csum_rx.__csum_start_off
677#define pkt_csum_rx_value pkt_com.__p_csum_rx.__csum_value
678#define pkt_csum_tx_start_off pkt_com.__p_csum_tx.__csum_start_off
679#define pkt_csum_tx_stuff_off pkt_com.__p_csum_tx.__csum_stuff_off
680#define pkt_csum_data pkt_com.__p_csum_data
681#define pkt_comp_gencnt pkt_com.__p_comp_gencnt
682#define pkt_aggr_type pkt_com.__p_aggr_type
683#define pkt_seg_cnt pkt_com.__p_seg_cnt
684#define pkt_proto_seg_sz pkt_com.__p_proto_seg_sz
685#define pkt_trace_id pkt_com.__p_trace_id
686#if BYTE_ORDER == LITTLE_ENDIAN
687#define pkt_pflags32 pkt_com.__p_flags32[0]
688#else /* BYTE_ORDER != LITTLE_ENDIAN */
689#define pkt_pflags32 pkt_com.__p_flags32[1]
690#endif /* BYTE_ORDER != LITTLE_ENDIAN */
691#define pkt_pflags pkt_com.__p_flags
692
693 /*
694 * Optional common metadata.
695 */
696 struct __packet_opt pkt_com_opt;
697
698 /*
699 * Userland specific.
700 */
701
702 /*
703 * pkt_{bufs,max} aren't part of the common area, on purpose,
704 * since we selectively update them on internalize/externalize.
705 */
706 const uint16_t pkt_bufs_max; /* maximum size of buflet chain */
707 const uint16_t pkt_bufs_cnt; /* buflet chain size */
708} __attribute((aligned(sizeof(uint64_t))));
709
710/* the size of __user_packet structure for n total buflets */
711#define _USER_PACKET_SIZE(n) sizeof(struct __user_packet)
712
713/*
714 * Valid values for pkt_link_flags.
715 */
716#define PKT_LINKF_BCAST 0x0001 /* send/received as link-level bcast */
717#define PKT_LINKF_MCAST 0x0002 /* send/received as link-level mcast */
718#define PKT_LINKF_ETHFCS 0x0004 /* has Ethernet FCS */
719
720/*
721 * XXX IMPORTANT - READ THIS XXX
722 *
723 * Valid values for (64-bit) pkt_pflags.
724 *
725 * The lower 32-bit values are equivalent to PKTF_* flags used by mbufs,
726 * hence the unused values are reserved. Do not use define any of these
727 * values unless they correspond to PKTF_* flags. Make sure to do the
728 * following when adding a value in the lower 32-bit range:
729 *
730 * a. If the flag is kernel-only, prefix it with 2 underscore characters,
731 * then add a PKT_F_* alias under the KERNEL block conditional. This
732 * will help ensure that the libsyscall code doesn't mistakenly use it.
733 *
734 * b. In pp_init(), add compile-time assertion to ensure that the PKT_F_*
735 * value matches the corresponding PKTF_* as defined in <sys/mbuf.h>.
736 *
737 * c. Add the new flag to PKT_F_USER_MASK depending on whether it's allowed
738 * to be used by userland. Flags not added to this mask will only be
739 * used by the kernel. We only internalize and externalize flags listed
740 * in PKT_F_USER_MASK.
741 *
742 * d. Add the new flag to PKT_F_COMMON_MASK.
743 *
744 * When adding an upper 32-bit value, ensure (a) and (c) above are done.
745 *
746 * Legend:
747 *
748 * (K) - Kernel-only
749 * (U+K) - User and kernel
750 * (reserved) - Only to be used for mapping with mbuf PKTF_* flags
751 */
752#define __PKT_F_FLOW_ID 0x0000000000000001ULL /* (K) */
753#define __PKT_F_FLOW_ADV 0x0000000000000002ULL /* (K) */
754/* 0x0000000000000004ULL (reserved) */
755/* 0x0000000000000008ULL (reserved) */
756/* 0x0000000000000010ULL (reserved) */
757/* 0x0000000000000020ULL (reserved) */
758/* 0x0000000000000040ULL (reserved) */
759/* 0x0000000000000080ULL (reserved) */
760/* 0x0000000000000100ULL (reserved) */
761/* 0x0000000000000200ULL (reserved) */
762#define PKT_F_WAKE_PKT 0x0000000000000400ULL /* (U+K) */
763/* 0x0000000000000800ULL (reserved) */
764/* 0x0000000000001000ULL (reserved) */
765/* 0x0000000000002000ULL (reserved) */
766/* 0x0000000000004000ULL (reserved) */
767#define PKT_F_BACKGROUND 0x0000000000008000ULL /* (U+K) */
768/* 0x0000000000010000ULL (reserved) */
769/* 0x0000000000020000ULL (reserved) */
770#define PKT_F_KEEPALIVE 0x0000000000040000ULL /* (U+K) */
771#define PKT_F_REALTIME 0x0000000000080000ULL /* (U+K) */
772/* 0x0000000000100000ULL (reserved) */
773#define PKT_F_REXMT 0x0000000000200000ULL /* (U+K) */
774/* 0x0000000000400000ULL (reserved) */
775#define __PKT_F_TX_COMPL_TS_REQ 0x0000000000800000ULL /* (K) */
776#define __PKT_F_TS_VALID 0x0000000001000000ULL /* (K) */
777/* 0x0000000002000000ULL (reserved) */
778#define __PKT_F_NEW_FLOW 0x0000000004000000ULL /* (K) */
779#define __PKT_F_START_SEQ 0x0000000008000000ULL /* (K) */
780#define PKT_F_LAST_PKT 0x0000000010000000ULL /* (U+K) */
781/* 0x0000000020000000ULL (reserved) */
782/* 0x0000000040000000ULL (reserved) */
783/* 0x0000000080000000ULL (reserved) */
784/* --------------------- upper 32-bit below */
785#define PKT_F_OPT_GROUP_START 0x0000000100000000ULL /* (U+K) */
786#define PKT_F_OPT_GROUP_END 0x0000000200000000ULL /* (U+K) */
787#define PKT_F_OPT_EXPIRE_TS 0x0000000400000000ULL /* (U+K) */
788#define PKT_F_OPT_TOKEN 0x0000000800000000ULL /* (U+K) */
789#define __PKT_F_FLOW_DATA 0x0000001000000000ULL /* (K) */
790#define __PKT_F_TX_COMPL_DATA 0x0000002000000000ULL /* (K) */
791#define __PKT_F_MBUF_DATA 0x0000004000000000ULL /* (K) */
792#define PKT_F_TRUNCATED 0x0000008000000000ULL /* (U+K) */
793#define __PKT_F_PKT_DATA 0x0000010000000000ULL /* (K) */
794#define PKT_F_PROMISC 0x0000020000000000ULL /* (U+K) */
795#define PKT_F_OPT_VLTAG 0x0000040000000000ULL /* (U+K) */
796#define PKT_F_OPT_VLTAG_IN_PKT 0x0000080000000000ULL /* (U+K) */
797#define __PKT_F_TX_PORT_DATA 0x0000100000000000ULL /* (K) */
798#define PKT_F_OPT_EXP_ACTION 0x0000200000000000ULL /* (U+K) */
799#define PKT_F_OPT_APP_METADATA 0x0000400000000000ULL /* (U+K) */
800#define PKT_F_L4S 0x0000800000000000ULL /* (U+K) */
801#define PKT_F_OPT_TX_TIMESTAMP 0x0001000000000000ULL /* (U+K) */
802/* 0x0002000000000000ULL */
803/* 0x0004000000000000ULL */
804/* 0x0008000000000000ULL */
805/* 0x0010000000000000ULL */
806/* 0x0020000000000000ULL */
807/* 0x0040000000000000ULL */
808/* 0x0080000000000000ULL */
809#define __PKT_F_OPT_ALLOC 0x0100000000000000ULL /* (K) */
810#define __PKT_F_FLOW_ALLOC 0x0200000000000000ULL /* (K) */
811#define __PKT_F_TX_COMPL_ALLOC 0x0400000000000000ULL /* (K) */
812/* 0x0800000000000000ULL */
813/* 0x1000000000000000ULL */
814/* 0x2000000000000000ULL */
815/* 0x4000000000000000ULL */
816/* 0x8000000000000000ULL */
817
818/*
819 * Packet option flags.
820 */
821#define PKT_F_OPT_DATA \
822 (PKT_F_OPT_GROUP_START | PKT_F_OPT_GROUP_END | \
823 PKT_F_OPT_EXPIRE_TS | PKT_F_OPT_TOKEN | \
824 PKT_F_OPT_VLTAG | PKT_F_OPT_VLTAG_IN_PKT | PKT_F_OPT_EXP_ACTION | \
825 PKT_F_OPT_APP_METADATA | PKT_F_OPT_TX_TIMESTAMP)
826
827#ifdef KERNEL
828/*
829 * Flags exposed to user (and kernel). See notes above.
830 */
831#define PKT_F_USER_MASK \
832 (PKT_F_BACKGROUND | PKT_F_REALTIME | PKT_F_REXMT | \
833 PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC | \
834 PKT_F_TRUNCATED | PKT_F_WAKE_PKT | PKT_F_L4S)
835
836/*
837 * Aliases for kernel-only flags. See notes above. The ones marked
838 * with (common) have corresponding PKTF_* definitions and are also
839 * included in PKT_F_COMMON_MASK below.
840 */
841#define PKT_F_FLOW_ID __PKT_F_FLOW_ID /* (common) */
842#define PKT_F_FLOW_ADV __PKT_F_FLOW_ADV /* (common) */
843#define PKT_F_TX_COMPL_TS_REQ __PKT_F_TX_COMPL_TS_REQ /* (common) */
844#define PKT_F_TS_VALID __PKT_F_TS_VALID /* (common) */
845#define PKT_F_NEW_FLOW __PKT_F_NEW_FLOW /* (common) */
846#define PKT_F_START_SEQ __PKT_F_START_SEQ /* (common) */
847#define PKT_F_FLOW_DATA __PKT_F_FLOW_DATA
848#define PKT_F_TX_COMPL_DATA __PKT_F_TX_COMPL_DATA
849#define PKT_F_MBUF_DATA __PKT_F_MBUF_DATA
850#define PKT_F_PKT_DATA __PKT_F_PKT_DATA
851#define PKT_F_OPT_ALLOC __PKT_F_OPT_ALLOC
852#define PKT_F_FLOW_ALLOC __PKT_F_FLOW_ALLOC
853#define PKT_F_TX_COMPL_ALLOC __PKT_F_TX_COMPL_ALLOC
854#define PKT_F_TX_PORT_DATA __PKT_F_TX_PORT_DATA
855
856/*
857 * Flags related to mbuf attached to the packet.
858 */
859#define PKT_F_MBUF_MASK (PKT_F_MBUF_DATA | PKT_F_TRUNCATED)
860
861/*
862 * Flags related to packet attached to the packet.
863 */
864#define PKT_F_PKT_MASK (PKT_F_PKT_DATA | PKT_F_TRUNCATED)
865
866/*
867 * Invariant flags kept during _PKT_COPY(). At the moment we keep
868 * all except those related to the attached mbuf.
869 */
870#define PKT_F_COPY_MASK (~(PKT_F_MBUF_MASK | PKT_F_PKT_MASK))
871
872/*
873 * Lower 32-bit flags common to mbuf and __kern_packet. See notes above.
874 * DO NOT add flags to this mask unless they have equivalent PKTF_* flags
875 * defined in <sys/mbuf.h>
876 */
877#define PKT_F_COMMON_MASK \
878 (PKT_F_BACKGROUND | PKT_F_REALTIME | PKT_F_REXMT | \
879 PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV | \
880 PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW | \
881 PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT)
882
883/*
884 * Flags retained across alloc/free.
885 */
886#define PKT_F_INIT_MASK \
887 (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC | PKT_F_TX_COMPL_ALLOC)
888#endif /* KERNEL */
889
890/*
891 * 64-bit tagged pointer (limit tag to least significant byte).
892 * We use 2 bits to encode type, and another 2 bits for subtype.
893 */
894#define SK_PTR_TYPE_MASK ((uint64_t)0x3) /* 00 11 */
895#define SK_PTR_SUBTYPE_MASK ((uint64_t)0xc) /* 11 00 */
896#define SK_PTR_TAG_MASK ((uint64_t)0xf) /* 11 11 */
897
898#define SK_PTR_TAG(_p) ((uint64_t)(_p) & SK_PTR_TAG_MASK)
899#define SK_PTR_ADDR_MASK (~SK_PTR_TAG_MASK)
900
901#define SK_PTR_TYPE(_p) ((uint64_t)(_p) & SK_PTR_TYPE_MASK)
902#define SK_PTR_TYPE_ENC(_t) ((uint64_t)(_t) & SK_PTR_TYPE_MASK)
903
904#define SK_PTR_SUBTYPE(_p) (((uint64_t)(_p) & SK_PTR_SUBTYPE_MASK) >> 2)
905#define SK_PTR_SUBTYPE_ENC(_s) (((uint64_t)(_s) << 2) & SK_PTR_SUBTYPE_MASK)
906
907#define SK_PTR_ADDR(_p) ((uint64_t)(_p) & SK_PTR_ADDR_MASK)
908#define SK_PTR_ADDR_ENC(_p) ((uint64_t)(_p) & SK_PTR_ADDR_MASK)
909
910#define SK_PTR_ENCODE(_p, _t, _s) \
911 (SK_PTR_ADDR_ENC(_p) | SK_PTR_TYPE_ENC(_t) | SK_PTR_SUBTYPE_ENC(_s))
912
913#define SK_PTR_ADDR_UQUM(_ph) (__unsafe_forge_single(struct __user_quantum *, SK_PTR_ADDR(_ph)))
914#define SK_PTR_ADDR_UPKT(_ph) (__unsafe_forge_single(struct __user_packet *, SK_PTR_ADDR(_ph)))
915
916#ifdef KERNEL
917__BEGIN_DECLS
918/*
919 * Packets.
920 */
921extern struct mbuf *kern_packet_get_mbuf(const kern_packet_t);
922__END_DECLS
923#else /* !KERNEL */
924#if defined(LIBSYSCALL_INTERFACE)
925__BEGIN_DECLS
926extern void pkt_subtype_assert_fail(const packet_t, uint64_t, uint64_t);
927extern void pkt_type_assert_fail(const packet_t, uint64_t);
928__END_DECLS
929#endif /* LIBSYSCALL_INTERFACE */
930#endif /* !KERNEL */
931#if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE)
932#include <skywalk/packet_common.h>
933#endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */
934#endif /* PRIVATE || BSD_KERNEL_PRIVATE */
935#endif /* !_SKYWALK_OS_PACKET_PRIVATE_H_ */
936