strnlen.s source code [xnu/osfmk/arm64/strnlen.s]

1	/*
2	* Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*
28	* This file implements the following function for the arm64 architecture:
29	*
30	* size_t strnlen(const char *string, size_t maxlen);
31	*
32	* The strnlen function returns either strlen(string) or maxlen, whichever
33	* is amller, without reading beyond the first maxlen characters of string.
34	*/
35
36	#include <arm64/asm.h>
37
38	.globl _strlen
39	.globl _strnlen
40
41	/*****************************************************************************
42	* Macros *
43	*****************************************************************************/
44
45	.macro EstablishFrame
46	ARM64_STACK_PROLOG
47	stp fp, lr, [sp, #-`16`]!
48	mov fp, sp
49	.endm
50
51	.macro ClearFrameAndReturn
52	ldp fp, lr, [sp], #`16`
53	ARM64_STACK_EPILOG
54	.endm
55
56	/*****************************************************************************
57	* Constants *
58	*****************************************************************************/
59
60	.text
61	.align `5`
62	L_masks:
63	.quad `0x0706050403020100`, `0x0f0e0d0c0b0a0908`
64	.quad `0x0000000000000000`, `0x0000000000000000`
65
66	/*****************************************************************************
67	* strnlen entrypoint *
68	*****************************************************************************/
69
70	_strnlen:
71	// If n == 0, return NULL without loading any data from s. If n is so large
72	// that it exceeds the size of any buffer that can be allocted, jump into a
73	// simpler implementation that omits all length checks. This is both faster
74	// and lets us avoid some messy edgecases in the mainline.
75	tst x1, x1
76	b.mi _strlen
77	b.eq L_maxlenIsZero
78	EstablishFrame
79	// Load the 16-byte aligned vector containing the start of the string.
80	and x2, x0, #-`16`
81	ldr q0, [x2]
82	// Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
83	// byte once we identify one. We don't use this vector until the very end
84	// of the routine; it simply falls out naturally to load it now.
85	adr x3, L_masks
86	ldr q2, [x3],#`16`
87	// The aligned vector that we loaded to q0 contains the start of the string,
88	// but if the string was not originally aligned, it also contains bytes
89	// which preceed the start of the string, and which may cause false positives
90	// when we search for the terminating NUL. We generate a mask to OR into the
91	// vector using an unaligned load to prevent this. The mask has non-zero
92	// values only in those bytes which correspond to bytes preceeding the start
93	// of the string in the aligned vector load.
94	and x4, x0, #`0xf`
95	sub x3, x3, x4
96	ldr q1, [x3]
97	orr`.16b` v0, v0, v1
98	// Adjust maxlen to account for bytes which preceed the start of the string,
99	// and jump into the main scanning loop.
100	add x1, x1, x4
101	b `1f`
102
103	// Main loop. Identical to strlen, except that we also need to check that we
104	// don't read more than maxlen bytes. To that end, we decrement maxlen by 16
105	// on each iteration, and exit the loop if the result is zero or negative.
106	.align `4`
107	`0`: ldr q0, [x2, #`16`]!
108	`1`: uminv`.16b` b1, v0
109	fmov w3, s1
110	cbz w3, L_foundNUL
111	subs x1, x1, #`16`
112	b.hi `0b`
113
114	// We exhausted maxlen bytes without finding a terminating NUL character, so
115	// we need to return maxlen.
116	sub x0, x2, x0
117	add x1, x1, #`16`
118	add x0, x0, x1
119	ClearFrameAndReturn
120
121	L_maxlenIsZero:
122	mov x0, #`0`
123	ret // No stack frame, so don't clear it.
124
125	L_foundNUL:
126	// Compute the index of the NUL byte, and check if it occurs before maxlen
127	// bytes into the vector. If not, return maxlen. Otherwise, return the
128	// length of the string.
129	eor`.16b` v1, v1, v1
130	cmhi`.16b` v0, v0, v1
131	orr`.16b` v0, v0, v2
132	uminv`.16b` b1, v0
133	fmov w3, s1 // index of NUL byte in vector
134	sub x0, x2, x0 // index of vector in string
135	cmp x1, x3 // if NUL occurs before maxlen bytes
136	csel x1, x1, x3, cc // return strlen, else maxlen
137	add x0, x0, x1
138	ClearFrameAndReturn
139
140	/*****************************************************************************
141	* strlen entrypoint *
142	*****************************************************************************/
143
144	.align `4`
145	_strlen:
146	EstablishFrame
147	// Load the 16-byte aligned vector containing the start of the string.
148	and x1, x0, #-`16`
149	ldr q0, [x1]
150	// Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
151	// byte once we identify one. We don't use this vector until the very end
152	// of the routine; it simply falls out naturally to load it now.
153	adr x3, L_masks
154	ldr q2, [x3],#`16`
155	// The aligned vector that we loaded to q0 contains the start of the string,
156	// but if the string was not originally aligned, it also contains bytes
157	// which preceed the start of the string, and which may cause false positives
158	// when we search for the terminating NUL. We generate a mask to OR into the
159	// vector using an unaligned load to prevent this. The mask has non-zero
160	// values only in those bytes which correspond to bytes preceeding the start
161	// of the string in the aligned vector load.
162	and x2, x0, #`0xf`
163	sub x3, x3, x2
164	ldr q1, [x3]
165	orr`.16b` v0, v0, v1
166	b `1f`
167
168	// Main loop. On each iteration we do the following:
169	//
170	// q0 <-- next 16 aligned bytes of string
171	// b1 <-- unsigned minimum byte in q0
172	// if (b1 != 0) continue
173	//
174	// Thus, we continue the loop until the 16 bytes we load contain a zero byte.
175	.align `4`
176	`0`: ldr q0, [x1, #`16`]!
177	`1`: uminv`.16b` b1, v0
178	fmov w2, s1 // umov.b would be more natural, but requries 2 µops.
179	cbnz w2, `0b`
180
181	// A zero byte has been found. The following registers contain values that
182	// we need to compute the string's length:
183	//
184	// x0 pointer to start of string
185	// x1 pointer to vector containing terminating NUL byte
186	// v0 vector containing terminating NUL byte
187	// v2 {0, 1, 2, ... , 15}
188	//
189	// We compute the index of the terminating NUL byte in the string (which is
190	// precisely the length of the string) as follows:
191	//
192	// vec <-- mask(v0 != 0) \| v2
193	// index <-- x1 - x0 + unsignedMinimum(vec)
194	eor`.16b` v1, v1, v1
195	cmhi`.16b` v0, v0, v1
196	orr`.16b` v0, v0, v2
197	uminv`.16b` b1, v0
198	fmov w2, s1
199	sub x0, x1, x0
200	add x0, x0, x2
201	ClearFrameAndReturn
202

Browse the source code of xnu/osfmk/arm64/strnlen.s