skywalk_common.h source code [xnu-build/EXPORT_HDRS/bsd/skywalk/skywalk_common.h]

1	/*
2	* Copyright (c) 2017-2021 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#ifndef _SKYWALK_COMMON_H_
30	#define _SKYWALK_COMMON_H_
31
32	#if defined(PRIVATE) \|\| defined(BSD_KERNEL_PRIVATE)
33	/*
34	* Routines common to kernel and userland. This file is intended to
35	* be included by the Skywalk kernel and libsyscall code.
36	*/
37
38	#include <skywalk/os_skywalk_private.h>
39
40	#ifndef KERNEL
41	#if defined(LIBSYSCALL_INTERFACE)
42	__BEGIN_DECLS
43	extern int fprintf_stderr(const char *format, ...);
44	__END_DECLS
45
46	/ CSTYLED /
47
48	#define SK_ABORT(msg) do { \
49	(void) fprintf_stderr("%s\n", msg); \
50	__asm__(""); __builtin_trap(); \
51	} while (0)
52
53	#define SK_ABORT_WITH_CAUSE(msg, cause) do { \
54	(void) fprintf_stderr("%s: cause 0x%x\n", msg, cause); \
55	__asm__(""); __builtin_trap(); \
56	} while (0)
57
58	#define SK_ABORT_DYNAMIC(msg) SK_ABORT(msg)
59
60
61	#define VERIFY(EX) do { \
62	if (__improbable(!(EX))) { \
63	SK_ABORT("assertion failed: " #EX); \
64	/* NOTREACHED */ \
65	__builtin_unreachable(); \
66	} \
67	} while (0)
68
69	#if (DEBUG \|\| DEVELOPMENT)
70	#define ASSERT(EX) VERIFY(EX)
71	#else /* !DEBUG && !DEVELOPMENT */
72	#define ASSERT(EX) ((void)0)
73	#endif /* !DEBUG && !DEVELOPMENT */
74	#endif /* !LIBSYSCALL_INTERFACE */
75	#endif /* !KERNEL */
76
77	#ifndef container_of
78	#define container_of(ptr, type, member) \
79	((type*)(((uintptr_t)ptr) - offsetof(type, member)))
80	#endif
81
82	/*
83	* Prefetch.
84	*/
85	#define SK_PREFETCH(a, n) \
86	__builtin_prefetch((const void *)((uintptr_t)(a) + (n)), 0, 3)
87	#define SK_PREFETCHW(a, n) \
88	__builtin_prefetch((const void *)((uintptr_t)(a) + (n)), 1, 3)
89
90	/*
91	* Slower roundup function; if "align" is not power of 2 (else use P2ROUNDUP)
92	*/
93	#define SK_ROUNDUP(x, align) \
94	((((x) % (align)) == 0) ? (x) : ((x) + ((align) - ((x) % (align)))))
95
96	/ compile time assert /
97	#ifndef _CASSERT
98	#define _CASSERT(x) _Static_assert(x, "compile-time assertion failed")
99	#endif /* !_CASSERT */
100
101	/ power of 2 address alignment /
102	#ifndef IS_P2ALIGNED
103	#define IS_P2ALIGNED(v, a) \
104	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
105	#endif /* IS_P2ALIGNED */
106
107	#define __sk_aligned(a) __attribute__((__aligned__(a)))
108	#define __sk_packed __attribute__((__packed__))
109	#define __sk_unused __attribute__((__unused__))
110
111	#ifdef KERNEL
112	#include <sys/sdt.h>
113
114	/*
115	* Copy 8-bytes total, 64-bit aligned, scalar.
116	*/
117	__attribute__((always_inline))
118	static inline void
119	__sk_copy64_8(uint64_t src, uint64_t dst)
120	{
121	dst = src; / [#08] /*
122	}
123
124	/*
125	* Copy 8-bytes total, 32-bit aligned, scalar.
126	*/
127	__attribute__((always_inline))
128	static inline void
129	__sk_copy32_8(uint32_t __counted_by(`2`)src, uint32_t __counted_by(`2`)dst)
130	{
131	#if defined(__x86_64__)
132	/ use unaligned scalar move on x86_64 /
133	__sk_copy64_8((uint64_t )(void* )src, (uint64_t )(void *)dst);
134	#else
135	dst[`0`] = src[`0`]; / dw[0] /
136	dst[`1`] = src[`1`]; / dw[1] /
137	#endif
138	}
139
140	/*
141	* Copy 16-bytes total, 64-bit aligned, scalar.
142	*/
143	static inline void
144	__sk_copy64_16(uint64_t __counted_by(`2`)src, uint64_t __counted_by(`2`)dst)
145	{
146	dst[`0`] = src[`0`]; / [#08] /*
147	dst[`1`] = src[`1`]; / [#18] /*
148	}
149
150	/*
151	* Copy 16-bytes total, 32-bit aligned, scalar.
152	*/
153	__attribute__((always_inline))
154	static inline void
155	__sk_copy32_16(uint32_t __counted_by(`4`)src, uint32_t __counted_by(`4`)dst)
156	{
157	dst[`0`] = src[`0`]; / [#04] /*
158	dst[`1`] = src[`1`]; / [#14] /*
159	dst[`2`] = src[`2`]; / [#24] /*
160	dst[`3`] = src[`3`]; / [#34] /*
161	}
162
163	/*
164	* Copy 20-bytes total, 64-bit aligned, scalar.
165	*/
166	__attribute__((always_inline))
167	static inline void
168	__sk_copy64_20(uint64_t __sized_by(`20`)src, uint64_t __sized_by(`20`)dst)
169	{
170	dst[`0`] = src[`0`]; / [#08] /*
171	dst[`1`] = src[`1`]; / [#18] /*
172	(uint32_t )(dst + `2`) = (uint32_t )(src + `2`); / [#24] /*
173	}
174
175	/*
176	* Copy 24-bytes total, 64-bit aligned, scalar.
177	*/
178	__attribute__((always_inline))
179	static inline void
180	__sk_copy64_24(uint64_t __counted_by(`3`)src, uint64_t __counted_by(`3`)dst)
181	{
182	dst[`0`] = src[`0`]; / [#08] /*
183	dst[`1`] = src[`1`]; / [#18] /*
184	dst[`2`] = src[`2`]; / [#28] /*
185	}
186
187	/*
188	* Copy 32-bytes total, 64-bit aligned, scalar.
189	*/
190	__attribute__((always_inline))
191	static inline void
192	__sk_copy64_32(uint64_t __counted_by(`4`)src, uint64_t __counted_by(`4`)dst)
193	{
194	dst[`0`] = src[`0`]; / [#08] /*
195	dst[`1`] = src[`1`]; / [#18] /*
196	dst[`2`] = src[`2`]; / [#28] /*
197	dst[`3`] = src[`3`]; / [#38] /*
198	}
199
200	/*
201	* Copy 32-bytes total, 32-bit aligned, scalar.
202	*/
203	__attribute__((always_inline))
204	static inline void
205	__sk_copy32_32(uint32_t __counted_by(`8`)src, uint32_t __counted_by(`8`)dst)
206	{
207	dst[`0`] = src[`0`]; / [#04] /*
208	dst[`1`] = src[`1`]; / [#14] /*
209	dst[`2`] = src[`2`]; / [#24] /*
210	dst[`3`] = src[`3`]; / [#34] /*
211	dst[`4`] = src[`4`]; / [#44] /*
212	dst[`5`] = src[`5`]; / [#54] /*
213	dst[`6`] = src[`6`]; / [#64] /*
214	dst[`7`] = src[`7`]; / [#74] /*
215	}
216
217	/*
218	* Copy 40-bytes total, 64-bit aligned, scalar.
219	*/
220	__attribute__((always_inline))
221	static inline void
222	__sk_copy64_40(uint64_t __counted_by(`5`)src, uint64_t __counted_by(`5`)dst)
223	{
224	dst[`0`] = src[`0`]; / [#08] /*
225	dst[`1`] = src[`1`]; / [#18] /*
226	dst[`2`] = src[`2`]; / [#28] /*
227	dst[`3`] = src[`3`]; / [#38] /*
228	dst[`4`] = src[`4`]; / [#48] /*
229	}
230
231	#if defined(__arm64__)
232	/*
233	* Copy 16-bytes total, 64-bit aligned, SIMD (if available).
234	*/
235	__attribute__((always_inline))
236	static inline void
237	__sk_vcopy64_16(uint64_t src, uint64_t dst)
238	{
239	/ no need to save/restore registers on arm64 (SPILL_REGISTERS) /
240	/ BEGIN CSTYLED /
241	__asm__ __volatile__ (
242	"ldr q0, [%[src]] \n\t"
243	"str q0, [%[dst]] \n\t"
244	:
245	: [src] "r" (src), [dst] "r" (dst)
246	: "v0", "memory"
247	);
248	/ END CSTYLED /
249	}
250
251	/*
252	* Copy 16-bytes total, 32-bit aligned, SIMD (if available).
253	*/
254	__attribute__((always_inline))
255	static inline void
256	__sk_vcopy32_16(uint32_t src, uint32_t dst)
257	{
258	/ use SIMD unaligned move on arm64 /
259	__sk_vcopy64_16(src: (uint64_t )(void* )src, dst: (uint64_t )(void *)dst);
260	}
261
262	/*
263	* Copy 20-bytes total, 64-bit aligned, SIMD (if available).
264	*/
265	__attribute__((always_inline))
266	static inline void
267	__sk_vcopy64_20(uint64_t src, uint64_t dst)
268	{
269	/*
270	* Load/store 16 + 4 bytes;
271	* no need to save/restore registers on arm64 (SPILL_REGISTERS).
272	*/
273	/ BEGIN CSTYLED /
274	__asm__ __volatile__ (
275	"ldr q0, [%[src]] \n\t"
276	"str q0, [%[dst]] \n\t"
277	"ldr s0, [%[src], #16] \n\t"
278	"str s0, [%[dst], #16] \n\t"
279	:
280	: [src] "r" (src), [dst] "r" (dst)
281	: "v0", "memory"
282	);
283	/ END CSTYLED /
284	}
285
286	/*
287	* Copy 24-bytes total, 64-bit aligned, SIMD (if available).
288	*/
289	__attribute__((always_inline))
290	static inline void
291	__sk_vcopy64_24(uint64_t src, uint64_t dst)
292	{
293	/*
294	* Use 16-bytes load/store and 8-bytes load/store on arm64;
295	* no need to save/restore registers on arm64 (SPILL_REGISTERS).
296	*/
297	/ BEGIN CSTYLED /
298	__asm__ __volatile__ (
299	"ldr q0, [%[src]] \n\t"
300	"str q0, [%[dst]] \n\t"
301	"ldr d0, [%[src], #16] \n\t"
302	"str d0, [%[dst], #16] \n\t"
303	:
304	: [src] "r" (src), [dst] "r" (dst)
305	: "v0", "memory"
306	);
307	/ END CSTYLED /
308	}
309
310	/*
311	* Copy 32-bytes total, 64-bit aligned, SIMD (if available).
312	*/
313	__attribute__((always_inline))
314	static inline void
315	__sk_vcopy64_32(uint64_t src, uint64_t dst)
316	{
317	/ no need to save/restore registers on arm64 (SPILL_REGISTERS) /
318	/ BEGIN CSTYLED /
319	__asm__ __volatile__ (
320	"ldp q0, q1, [%[src]] \n\t"
321	"stp q0, q1, [%[dst]] \n\t"
322	:
323	: [src] "r" (src), [dst] "r" (dst)
324	: "v0", "v1", "memory"
325	);
326	/ END CSTYLED /
327	}
328
329	/*
330	* Copy 32-bytes total, 32-bit aligned, SIMD (if available).
331	*/
332	__attribute__((always_inline))
333	static inline void
334	__sk_vcopy32_32(uint32_t __counted_by(`8`)src, uint32_t __counted_by(`8`)dst)
335	{
336	/ use SIMD unaligned move on arm64 /
337	__sk_vcopy64_32(src: (uint64_t )(void* )src, dst: (uint64_t )(void *)dst);
338	}
339
340	/*
341	* Copy 40-bytes total, 64-bit aligned, SIMD (if available).
342	*/
343	__attribute__((always_inline))
344	static inline void
345	__sk_vcopy64_40(uint64_t src, uint64_t dst)
346	{
347	/*
348	* Use 32-bytes load/store pair and 8-bytes load/store on arm64;
349	* no need to save/restore registers on arm64 (SPILL_REGISTERS).
350	*/
351	/ BEGIN CSTYLED /
352	__asm__ __volatile__ (
353	"ldp q0, q1, [%[src]] \n\t"
354	"stp q0, q1, [%[dst]] \n\t"
355	"ldr d0, [%[src], #32] \n\t"
356	"str d0, [%[dst], #32] \n\t"
357	:
358	: [src] "r" (src), [dst] "r" (dst)
359	: "v0", "v1", "memory"
360	);
361	/ END CSTYLED /
362	}
363
364	/*
365	* On arm64, the following inline assembly fixed-length routines have
366	* fewer clock cycles than bzero(). We can directly use vector registers
367	* without saving/restoring them unlike on x86_64/arm32.
368	*/
369
370	/*
371	* Zero 16-bytes total, SIMD.
372	*/
373	__attribute__((always_inline))
374	static inline void
375	__sk_zero_16(void *p)
376	{
377	/*
378	* Use 16-bytes store pair using 64-bit zero register on arm64;
379	* no need to save/restore registers on arm64 (SPILL_REGISTERS).
380	*/
381	/ BEGIN CSTYLED /
382	__asm__ __volatile__ (
383	"stp xzr, xzr, [%[p]] \n\t"
384	:
385	: [p] "r" (p)
386	: "memory"
387	);
388	/ END CSTYLED /
389	}
390
391	/*
392	* Zero 32-bytes total, SIMD.
393	*/
394	__attribute__((always_inline))
395	static inline void
396	__sk_zero_32(void *p)
397	{
398	/*
399	* Use 32-bytes store pair using zeroed v0 register on arm64;
400	* no need to save/restore registers on arm64 (SPILL_REGISTERS).
401	*/
402	/ BEGIN CSTYLED /
403	__asm__ __volatile__ (
404	"eor.16b v0, v0, v0 \n\t"
405	"stp q0, q0, [%[p]] \n\t"
406	:
407	: [p] "r" (p)
408	: "v0", "memory", "cc"
409	);
410	/ END CSTYLED /
411	}
412
413	/*
414	* Zero 48-bytes total, SIMD.
415	*/
416	__attribute__((always_inline))
417	static inline void
418	__sk_zero_48(void *p)
419	{
420	/*
421	* Use 32-bytes store pair and 16-byte store using zeroed v0
422	* register on arm64; no need to save/restore registers on
423	* arm64 (SPILL_REGISTERS).
424	*/
425	/ BEGIN CSTYLED /
426	__asm__ __volatile__ (
427	"eor.16b v0, v0, v0 \n\t"
428	"stp q0, q0, [%[p]] \n\t"
429	"str q0, [%[p], #32] \n\t"
430	:
431	: [p] "r" (p)
432	: "v0", "memory", "cc"
433	);
434	/ END CSTYLED /
435	}
436
437	/*
438	* Zero 128-bytes total, SIMD.
439	*/
440	__attribute__((always_inline))
441	static inline void
442	__sk_zero_128(void *p)
443	{
444	/*
445	* Use 4x 32-bytes store pairs using zeroed v0 register on arm64;
446	* no need to save/restore registers on arm64 (SPILL_REGISTERS).
447	*
448	* Note that we could optimize this routine by utilizing "dc zva"
449	* which zeroes the entire cache line. However, that requires
450	* us to guarantee that the address is cache line aligned which
451	* we cannot (at the moment).
452	*/
453	/ BEGIN CSTYLED /
454	__asm__ __volatile__ (
455	"eor.16b v0, v0, v0 \n\t"
456	"stp q0, q0, [%[p]] \n\t"
457	"stp q0, q0, [%[p], #32] \n\t"
458	"stp q0, q0, [%[p], #64] \n\t"
459	"stp q0, q0, [%[p], #96] \n\t"
460	:
461	: [p] "r" (p)
462	: "v0", "memory", "cc"
463	);
464	/ END CSTYLED /
465	}
466	#else /* !__arm64__ */
467	/*
468	* Just use bzero() for simplicity. On x86_64, "rep stosb" microcoded
469	* implementation already uses wider stores and can go much faster than
470	* one byte per clock cycle. For arm32, bzero() is also good enough.
471	*/
472	#define __sk_zero_16(_p) bzero(_p, 16)
473	#define __sk_zero_32(_p) bzero(_p, 32)
474	#define __sk_zero_48(_p) bzero(_p, 48)
475	#define __sk_zero_128(_p) bzero(_p, 128)
476	#endif /* !__arm64__ */
477
478	/*
479	* The following are optimized routines which rely on the caller
480	* rounding up the source and destination buffers to multiples of
481	* 4, 8 or 64 bytes, and are 64-bit aligned; faster than memcpy().
482	*
483	* Note: they do not support overlapping ranges.
484	*/
485
486	/*
487	* Threshold as to when we use memcpy() rather than unrolled copy.
488	*/
489	#if defined(__x86_64__)
490	#define SK_COPY_THRES 2048
491	#elif defined(__arm64__)
492	#define SK_COPY_THRES 1024
493	#else /* !__x86_64__ && !__arm64__ */
494	#define SK_COPY_THRES 1024
495	#endif /* !__x86_64__ && !__arm64__ */
496
497	#if (DEVELOPMENT \|\| DEBUG)
498	extern size_t sk_copy_thres;
499	#endif /* (DEVELOPMENT \|\| DEBUG) */
500
501	/*
502	* Scalar version, 4-bytes multiple.
503	*/
504	__attribute__((always_inline))
505	static inline void
506	sk_copy64_4x(uint32_t __sized_by(l)src, uint32_t __sized_by(l)dst, size_t l)
507	{
508	#if (DEVELOPMENT \|\| DEBUG)
509	if (__probable(l <= sk_copy_thres)) {
510	#else
511	if (__probable(l <= SK_COPY_THRES)) {
512	#endif /* (!DEVELOPMENT && !DEBUG! */
513	int i;
514
515	for (i = `0`; i < l / `4`; i++) {
516	dst[i] = src[i]; / [#i4] /*
517	}
518	} else {
519	(void) memcpy(dst: (void )dst, src: (void* *)src, n: l);
520	}
521	}
522
523	/*
524	* Scalar version, 8-bytes multiple.
525	*/
526	__attribute__((always_inline))
527	static inline void
528	sk_copy64_8x(uint64_t __sized_by(l)src, uint64_t __sized_by(l)dst, size_t l)
529	{
530	#if (DEVELOPMENT \|\| DEBUG)
531	if (__probable(l <= sk_copy_thres)) {
532	#else
533	if (__probable(l <= SK_COPY_THRES)) {
534	#endif /* (!DEVELOPMENT && !DEBUG! */
535	int i;
536
537	for (i = `0`; i < l / `8`; i++) {
538	dst[i] = src[i]; / [#i8] /*
539	}
540	} else {
541	(void) memcpy(dst: (void )dst, src: (void* *)src, n: l);
542	}
543	}
544
545	/*
546	* Scalar version (usually faster than SIMD), 32-bytes multiple.
547	*/
548	__attribute__((always_inline))
549	static inline void
550	sk_copy64_32x(uint64_t __sized_by(l)src, uint64_t __sized_by(l)dst, size_t l)
551	{
552	#if (DEVELOPMENT \|\| DEBUG)
553	if (__probable(l <= sk_copy_thres)) {
554	#else
555	if (__probable(l <= SK_COPY_THRES)) {
556	#endif /* (!DEVELOPMENT && !DEBUG! */
557	int n, i;
558
559	for (n = `0`; n < l / `32`; n++) {
560	i = n * `4`;
561	dst[i] = src[i]; / [#(i+0)8] /*
562	dst[i + `1`] = src[i + `1`]; / [#(i+1)8] /*
563	dst[i + `2`] = src[i + `2`]; / [#(i+2)8] /*
564	dst[i + `3`] = src[i + `3`]; / [#(i+3)8] /*
565	}
566	} else {
567	(void) memcpy(dst: (void )dst, src: (void* *)src, n: l);
568	}
569	}
570
571	/*
572	* Scalar version (usually faster than SIMD), 64-bytes multiple.
573	*/
574	__attribute__((always_inline))
575	static inline void
576	sk_copy64_64x(uint64_t __sized_by(l)src, uint64_t __sized_by(l)dst, size_t l)
577	{
578	#if (DEVELOPMENT \|\| DEBUG)
579	if (__probable(l <= sk_copy_thres)) {
580	#else
581	if (__probable(l <= SK_COPY_THRES)) {
582	#endif /* (!DEVELOPMENT && !DEBUG! */
583	int n, i;
584
585	for (n = `0`; n < l / `64`; n++) {
586	i = n * `8`;
587	dst[i] = src[i]; / [#(i+0)8] /*
588	dst[i + `1`] = src[i + `1`]; / [#(i+1)8] /*
589	dst[i + `2`] = src[i + `2`]; / [#(i+2)8] /*
590	dst[i + `3`] = src[i + `3`]; / [#(i+3)8] /*
591	dst[i + `4`] = src[i + `4`]; / [#(i+4)8] /*
592	dst[i + `5`] = src[i + `5`]; / [#(i+5)8] /*
593	dst[i + `6`] = src[i + `6`]; / [#(i+6)8] /*
594	dst[i + `7`] = src[i + `7`]; / [#(i+7)8] /*
595	}
596	} else {
597	(void) memcpy(dst: (void )dst, src: (void* *)src, n: l);
598	}
599	}
600
601	/*
602	* Use scalar or SIMD based on platform/size.
603	*/
604	#if defined(__x86_64__)
605	#define sk_copy64_8 __sk_copy64_8 /* scalar only */
606	#define sk_copy32_8 __sk_copy32_8 /* scalar only */
607	#define sk_copy64_16 __sk_copy64_16 /* scalar */
608	#define sk_copy32_16 __sk_copy32_16 /* scalar */
609	#define sk_copy64_20 __sk_copy64_20 /* scalar */
610	#define sk_copy64_24 __sk_copy64_24 /* scalar */
611	#define sk_copy64_32 __sk_copy64_32 /* scalar */
612	#define sk_copy32_32 __sk_copy32_32 /* scalar */
613	#define sk_copy64_40 __sk_copy64_40 /* scalar */
614	#define sk_zero_16 __sk_zero_16 /* scalar */
615	#define sk_zero_32 __sk_zero_32 /* scalar */
616	#define sk_zero_48 __sk_zero_48 /* scalar */
617	#define sk_zero_128 __sk_zero_128 /* scalar */
618	#elif defined(__arm64__)
619	#define sk_copy64_8 __sk_copy64_8 /* scalar only */
620	#define sk_copy32_8 __sk_copy32_8 /* scalar only */
621	#define sk_copy64_16 __sk_vcopy64_16 /* SIMD */
622	#define sk_copy32_16 __sk_vcopy32_16 /* SIMD */
623	#define sk_copy64_20 __sk_vcopy64_20 /* SIMD */
624	#define sk_copy64_24 __sk_vcopy64_24 /* SIMD */
625	#define sk_copy64_32 __sk_vcopy64_32 /* SIMD */
626	#define sk_copy32_32 __sk_vcopy32_32 /* SIMD */
627	#define sk_copy64_40 __sk_vcopy64_40 /* SIMD */
628	#define sk_zero_16 __sk_zero_16 /* SIMD */
629	#define sk_zero_32 __sk_zero_32 /* SIMD */
630	#define sk_zero_48 __sk_zero_48 /* SIMD */
631	#define sk_zero_128 __sk_zero_128 /* SIMD */
632	#else
633	#define sk_copy64_8 __sk_copy64_8 /* scalar only */
634	#define sk_copy32_8 __sk_copy32_8 /* scalar only */
635	#define sk_copy64_16 __sk_copy64_16 /* scalar */
636	#define sk_copy32_16 __sk_copy32_16 /* scalar */
637	#define sk_copy64_20 __sk_copy64_20 /* scalar */
638	#define sk_copy64_24 __sk_copy64_24 /* scalar */
639	#define sk_copy64_32 __sk_copy64_32 /* scalar */
640	#define sk_copy32_32 __sk_copy32_32 /* scalar */
641	#define sk_copy64_40 __sk_copy64_40 /* scalar */
642	#define sk_zero_16 __sk_zero_16 /* scalar */
643	#define sk_zero_32 __sk_zero_32 /* scalar */
644	#define sk_zero_48 __sk_zero_48 /* scalar */
645	#define sk_zero_128 __sk_zero_128 /* scalar */
646	#endif
647
648	/*
649	* Do not use these directly.
650	* Use the skn_ variants if you need custom probe names.
651	*/
652	#define _sk_alloc_type(probename, type, flags, name) \
653	({ \
654	void *ret; \
655	\
656	/* XXX Modify this to use KT_PRIV_ACCT later */ \
657	ret = kalloc_type_tag(type, Z_ZERO \| (flags), (name)->tag); \
658	DTRACE_SKYWALK3(probename, char *, #type, int, (flags), \
659	void *, ret); \
660	ret; \
661	})
662
663	#define _sk_alloc_type_array(probename, type, count, flags, name) \
664	({ \
665	void *ret; \
666	\
667	ret = kalloc_type_tag(type, (count), Z_ZERO \| (flags), \
668	(name)->tag); \
669	DTRACE_SKYWALK4(probename, char *, #type, size_t, (count), \
670	int, (flags), void *, ret); \
671	ret; \
672	})
673
674	#define _sk_alloc_type_hash(probename, heap, size, flags, name) \
675	({ \
676	void *ret; \
677	\
678	ret = kalloc_type_var_impl((heap), (size), \
679	__zone_flags_mix_tag((flags) \| Z_ZERO, (name)->tag), NULL); \
680	DTRACE_SKYWALK4(probename, char *, (heap)->kt_name + 5, \
681	size_t, (size), int, (flags), void *, ret); \
682	ret; \
683	})
684
685	#define _sk_realloc_type_array(probename, type, oldcount, newcount, elem, flags, name) \
686	({ \
687	void *ret; \
688	\
689	ret = krealloc_type_tag(type, (oldcount), (newcount), (elem), \
690	Z_ZERO \| (flags), (name)->tag); \
691	DTRACE_SKYWALK5(probename, void *, (elem), size_t, (oldcount), \
692	size_t, (newcount), int, (flags), void *, ret); \
693	ret; \
694	})
695
696	#define _sk_alloc_type_header_array(probename, htype, type, count, flags, name) \
697	({ \
698	void *ret; \
699	\
700	ret = kalloc_type_tag(htype, type, (count), Z_ZERO \| (flags), \
701	(name)->tag); \
702	DTRACE_SKYWALK5(probename, char , #htype, char , #type, \
703	size_t, (count), int, (flags), void *, ret); \
704	ret; \
705	})
706
707	#define _sk_free_type(probename, type, elem) \
708	{ \
709	DTRACE_SKYWALK2(probename, char , #type, void , (elem)); \
710	kfree_type(type, (elem)); \
711	}
712
713	#define _sk_free_type_array(probename, type, count, elem) \
714	{ \
715	DTRACE_SKYWALK3(probename, char *, #type, size_t, (count), \
716	void *, (elem)); \
717	kfree_type(type, (count), (elem)); \
718	}
719
720	#define _sk_free_type_hash(probename, heap, size, elem) \
721	{ \
722	DTRACE_SKYWALK3(probename, char *, (heap)->kt_name + 5, \
723	size_t, (size), void *, (elem)); \
724	kfree_type_var_impl((heap), (elem), (size)); \
725	}
726
727	#define _sk_free_type_header_array(probename, htype, type, count, elem) \
728	{ \
729	DTRACE_SKYWALK4(probename, char , #htype, char , #type, \
730	size_t, (count), void *, (elem)); \
731	kfree_type(htype, type, (count), (elem)); \
732	}
733
734	#define _sk_alloc_data(probename, size, flags, name) \
735	({ \
736	void *ret; \
737	\
738	ret = kalloc_data_tag((size), Z_ZERO \| (flags), (name)->tag); \
739	DTRACE_SKYWALK3(probename, size_t, (size), int, (flags), \
740	void *, ret); \
741	ret; \
742	})
743
744	#define _sk_realloc_data(probename, elem, oldsize, newsize, flags, name) \
745	({ \
746	void *ret; \
747	\
748	ret = krealloc_data_tag((elem), (oldsize), (newsize), \
749	Z_ZERO \| (flags), (name)->tag); \
750	DTRACE_SKYWALK5(probename, void *, (elem), size_t, (oldsize), \
751	size_t, (newsize), int, (flags), void *, ret); \
752	ret; \
753	})
754
755	#define _sk_free_data(probename, elem, size) \
756	{ \
757	DTRACE_SKYWALK2(probename, void *, (elem), size_t, (size)); \
758	kfree_data((elem), (size)); \
759	}
760
761	#define sk_alloc_type(type, flags, tag) \
762	_sk_alloc_type(sk_alloc_type, type, flags, tag)
763
764	#define sk_alloc_type_array(type, count, flags, tag) \
765	_sk_alloc_type_array(sk_alloc_type_array, type, count, flags, tag)
766
767	#define sk_alloc_type_hash(heap, size, flags, tag) \
768	_sk_alloc_type_hash(sk_alloc_type_hash, heap, size, flags, tag)
769
770	#define sk_alloc_type_header_array(htype, type, count, flags, tag) \
771	_sk_alloc_type_header_array(sk_alloc_type_header_array, htype, \
772	type, count, flags, tag)
773
774	#define sk_realloc_type_array(type, oldsize, newsize, elem, flags, tag) \
775	_sk_realloc_type_array(sk_realloc_type_array, type, \
776	oldsize, newsize, elem, flags, tag)
777
778	#define sk_free_type(type, elem) \
779	_sk_free_type(sk_free_type, type, elem)
780
781	#define sk_free_type_array(type, count, elem) \
782	_sk_free_type_array(sk_free_type_array, type, count, elem)
783
784	#define sk_free_type_hash(heap, size, elem) \
785	_sk_free_type_hash(sk_free_type_hash, heap, size, elem)
786
787	#define sk_free_type_header_array(htype, type, count, elem) \
788	_sk_free_type_header_array(sk_free_type_header_array, htype, \
789	type, count, elem)
790
791	#define sk_alloc_data(size, flags, tag) \
792	_sk_alloc_data(sk_alloc_data, size, flags, tag)
793
794	#define sk_realloc_data(elem, oldsize, newsize, flags, tag) \
795	_sk_realloc_data(sk_realloc_data, elem, oldsize, newsize, \
796	flags, tag)
797
798	#define sk_free_data(elem, size) \
799	_sk_free_data(sk_free_data, elem, size)
800
801	/*
802	* The skn_ variants are meant to be used if you need to use two or more
803	* of the same call within the same function and you want the dtrace
804	* probename to be different at each callsite.
805	*/
806	#define skn_realloc(name, elem, oldsize, newsize, flags, tag) \
807	_sk_realloc(sk_realloc_ ## name, elem, oldsize, newsize, flags, \
808	tag)
809
810	#define skn_alloc_type(name, type, flags, tag) \
811	_sk_alloc_type(sk_alloc_type_ ## name, type, flags, tag)
812
813	#define skn_alloc_type_array(name, type, count, flags, tag) \
814	_sk_alloc_type_array(sk_alloc_type_array_ ## name, type, count, \
815	flags, tag)
816
817	#define skn_alloc_type_hash(name, heap, size, flags, tag) \
818	_sk_alloc_type_hash(sk_alloc_type_hash_ ## name, heap, size, \
819	flags, tag)
820
821	#define skn_alloc_type_header_array(name, htype, type, count, flags, tag) \
822	_sk_alloc_type_header_array(sk_alloc_type_header_array_ ## name, \
823	htype, type, count, flags, tag)
824
825	#define skn_free_type(name, type, elem) \
826	_sk_free_type(sk_free_type_ ## name, type, elem)
827
828	#define skn_free_type_array(name, type, count, elem) \
829	_sk_free_type_array(sk_free_type_array_ ## name, type, count, \
830	elem)
831
832	#define skn_free_type_hash(name, heap, size, elem) \
833	_sk_free_type_hash(sk_free_type_hash_ ## name, heap, size, elem)
834
835	#define skn_free_type_header_array(name, htype, type, count, elem) \
836	_sk_free_type_header_array(sk_free_type_header_array_ ## name, \
837	htype, type, count, elem)
838
839	#define skn_alloc_data(name, size, flags, tag) \
840	_sk_alloc_data(sk_alloc_data_ ## name, size, flags, tag)
841
842	#define skn_realloc_data(name, elem, oldsize, newsize, flags, tag) \
843	_sk_realloc_data(sk_realloc_data_ ## name, elem, oldsize, newsize,\
844	flags, tag)
845
846	#define skn_free_data(name, elem, size) \
847	_sk_free_data(sk_free_data_ ## name, elem, size)
848
849	struct sk_tag_spec {
850	kern_allocation_name_t *skt_var;
851	const char *skt_name;
852	};
853
854	extern void __sk_tag_make(const struct sk_tag_spec *spec);
855
856	#define SKMEM_TAG_DEFINE(var, name) \
857	SECURITY_READ_ONLY_LATE(kern_allocation_name_t) var; \
858	__startup_data struct sk_tag_spec __sktag_##var = { \
859	.skt_var = &var, .skt_name = name, \
860	}; \
861	STARTUP_ARG(ZALLOC, STARTUP_RANK_LAST, __sk_tag_make, &__sktag_##var)
862
863	/!*
864	* @abstract Compare byte buffers of n bytes long src1 against src2, applying
865	* the byte masks to input data before comparison. (Scalar version)
866	*
867	* @discussion
868	* Returns zero if the two buffers are identical after applying the byte
869	* masks, otherwise non-zero.
870	* Zero-length buffers are always identical.
871	*
872	* @param src1 first input buffer of n bytes long
873	* @param src2 second input buffer of n bytes long
874	* @param byte_mask byte mask of n bytes long applied before comparision
875	* @param n number of bytes
876	*/
877	static inline int
878	__sk_memcmp_mask_scalar(const uint8_t *__counted_by(n)src1,
879	const uint8_t *__counted_by(n)src2,
880	const uint8_t *__counted_by(n)byte_mask, size_t n)
881	{
882	uint32_t result = `0`;
883	for (size_t i = `0`; i < n; i++) {
884	result \|= (src1[i] ^ src2[i]) & byte_mask[i];
885	}
886	return result;
887	}
888
889	static inline int
890	__sk_memcmp_mask_16B_scalar(const uint8_t *__counted_by(`16`)src1,
891	const uint8_t *__counted_by(`16`)src2,
892	const uint8_t *__counted_by(`16`)byte_mask)
893	{
894	return __sk_memcmp_mask_scalar(src1, src2, byte_mask, n: `16`);
895	}
896
897	static inline int
898	__sk_memcmp_mask_32B_scalar(const uint8_t *__counted_by(`32`)src1,
899	const uint8_t *__counted_by(`32`)src2,
900	const uint8_t *__counted_by(`32`)byte_mask)
901	{
902	return __sk_memcmp_mask_scalar(src1, src2, byte_mask, n: `32`);
903	}
904
905	static inline int
906	__sk_memcmp_mask_48B_scalar(const uint8_t *__counted_by(`48`)src1,
907	const uint8_t *__counted_by(`48`)src2,
908	const uint8_t *__counted_by(`48`)byte_mask)
909	{
910	return __sk_memcmp_mask_scalar(src1, src2, byte_mask, n: `48`);
911	}
912
913	static inline int
914	__sk_memcmp_mask_64B_scalar(const uint8_t *__counted_by(`64`)src1,
915	const uint8_t *__counted_by(`64`)src2,
916	const uint8_t *__counted_by(`64`)byte_mask)
917	{
918	return __sk_memcmp_mask_scalar(src1, src2, byte_mask, n: `64`);
919	}
920
921	static inline int
922	__sk_memcmp_mask_80B_scalar(const uint8_t *__counted_by(`80`)src1,
923	const uint8_t *__counted_by(`80`)src2,
924	const uint8_t *__counted_by(`80`)byte_mask)
925	{
926	return __sk_memcmp_mask_scalar(src1, src2, byte_mask, n: `80`);
927	}
928
929	#if defined(__arm64__) \|\| defined(__arm__) \|\| defined(__x86_64__)
930	extern int os_memcmp_mask_16B(const uint8_t *__counted_by(`16`)src1,
931	const uint8_t *__counted_by(`16`)src2,
932	const uint8_t *__counted_by(`16`)byte_mask);
933	extern int os_memcmp_mask_32B(const uint8_t *__counted_by(`32`)src1,
934	const uint8_t *__counted_by(`32`)src2,
935	const uint8_t *__counted_by(`32`)byte_mask);
936	extern int os_memcmp_mask_48B(const uint8_t *__counted_by(`48`)src1,
937	const uint8_t *__counted_by(`48`)src2,
938	const uint8_t *__counted_by(`48`)byte_mask);
939	extern int os_memcmp_mask_64B(const uint8_t *__counted_by(`64`)src1,
940	const uint8_t *__counted_by(`64`)src2,
941	const uint8_t *__counted_by(`64`)byte_mask);
942	extern int os_memcmp_mask_80B(const uint8_t *__counted_by(`80`)src1,
943	const uint8_t *__counted_by(`80`)src2,
944	const uint8_t *__counted_by(`80`)byte_mask);
945
946	/*
947	* Use SIMD variants based on ARM64 and x86_64.
948	*/
949	#define sk_memcmp_mask __sk_memcmp_mask
950	#define sk_memcmp_mask_16B os_memcmp_mask_16B
951	#define sk_memcmp_mask_32B os_memcmp_mask_32B
952	#define sk_memcmp_mask_48B os_memcmp_mask_48B
953	#define sk_memcmp_mask_64B os_memcmp_mask_64B
954	#define sk_memcmp_mask_80B os_memcmp_mask_80B
955
956	/!*
957	* @abstract Compare byte buffers of n bytes long src1 against src2, applying
958	* the byte masks to input data before comparison. (SIMD version)
959	*
960	* @discussion
961	* Returns zero if the two buffers are identical after applying the byte
962	* masks, otherwise non-zero.
963	* Zero-length buffers are always identical.
964	*
965	* @param src1 first input buffer of n bytes long
966	* @param src2 second input buffer of n bytes long
967	* @param byte_mask byte mask of n bytes long applied before comparision
968	* @param n number of bytes
969	*/
970	static inline int
971	__sk_memcmp_mask(const uint8_t *__counted_by(n)src1,
972	const uint8_t *__counted_by(n)src2,
973	const uint8_t *__counted_by(n)byte_mask, size_t n)
974	{
975	uint32_t result = `0`;
976	size_t i = `0`;
977	for (; i + `64` <= n; i += `64`) {
978	result \|= sk_memcmp_mask_64B(src1: src1 + i, src2: src2 + i,
979	byte_mask: byte_mask + i);
980	}
981	for (; i + `32` <= n; i += `32`) {
982	result \|= sk_memcmp_mask_32B(src1: src1 + i, src2: src2 + i,
983	byte_mask: byte_mask + i);
984	}
985	for (; i + `16` <= n; i += `16`) {
986	result \|= sk_memcmp_mask_16B(src1: src1 + i, src2: src2 + i,
987	byte_mask: byte_mask + i);
988	}
989	if (i < n) {
990	if (n >= `16`) {
991	/ Compare the last 16 bytes with vector code. /
992	result \|= sk_memcmp_mask_16B(src1: src1 + n - `16`,
993	src2: src2 + n - `16`, byte_mask: byte_mask + n - `16`);
994	} else {
995	/ Use scalar code if n < 16. /
996	for (; i < n; i++) {
997	result \|= (src1[i] ^ src2[i]) & byte_mask[i];
998	}
999	}
1000	}
1001	return result;
1002	}
1003	#else /* !(__arm64__ \|\| __arm__ \|\| __x86_64__) */
1004	/*
1005	* Use scalar variants elsewhere.
1006	*/
1007	#define sk_memcmp_mask __sk_memcmp_mask_scalar
1008	#define sk_memcmp_mask_16B __sk_memcmp_mask_16B_scalar
1009	#define sk_memcmp_mask_32B __sk_memcmp_mask_32B_scalar
1010	#define sk_memcmp_mask_48B __sk_memcmp_mask_48B_scalar
1011	#define sk_memcmp_mask_64B __sk_memcmp_mask_64B_scalar
1012	#define sk_memcmp_mask_80B __sk_memcmp_mask_80B_scalar
1013	#endif /* !(__arm64__ \|\| __arm__ \|\| __x86_64__) */
1014
1015	/*
1016	* Scalar variants are available on all platforms if needed.
1017	*/
1018	#define sk_memcmp_mask_scalar __sk_memcmp_mask_scalar
1019	#define sk_memcmp_mask_16B_scalar __sk_memcmp_mask_16B_scalar
1020	#define sk_memcmp_mask_32B_scalar __sk_memcmp_mask_32B_scalar
1021	#define sk_memcmp_mask_48B_scalar __sk_memcmp_mask_48B_scalar
1022	#define sk_memcmp_mask_64B_scalar __sk_memcmp_mask_64B_scalar
1023	#define sk_memcmp_mask_80B_scalar __sk_memcmp_mask_80B_scalar
1024
1025	#endif /* KERNEL */
1026	#endif /* PRIVATE \|\| BSD_KERNEL_PRIVATE */
1027	#endif /* !_SKYWALK_COMMON_H_ */
1028

Browse the source code of xnu-build/EXPORT_HDRS/bsd/skywalk/skywalk_common.h