strncmp.s source code [xnu/osfmk/arm64/strncmp.s]

1	/*
2	* Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*
28	* This file implements the following function for the arm64 architecture:
29	*
30	* int strncmp(const char s1, const char s2, size_t n);
31	*
32	* Returns 0 if the two strings are equal up to the first n bytes or to the
33	* end of the string, whichever comes first. Otherwise, returns the difference
34	* of the first mismatched characters interpreted as uint8_t.
35	*/
36
37	#include <arm64/asm.h>
38
39	.globl _strncmp
40
41	/*****************************************************************************
42	* Macros *
43	*****************************************************************************/
44
45	.macro EstablishFrame
46	ARM64_STACK_PROLOG
47	stp fp, lr, [sp, #-`16`]!
48	mov fp, sp
49	.endm
50
51	.macro ClearFrameAndReturn
52	ldp fp, lr, [sp], #`16`
53	ARM64_STACK_EPILOG
54	.endm
55
56	#include "../mach/arm/vm_param.h"
57	#define kVectorSize 16
58
59	/*****************************************************************************
60	* Constants *
61	*****************************************************************************/
62
63	.text
64	.align `5`
65	L_mask:
66	.quad `0x0706050403020100`, `0x0f0e0d0c0b0a0908`
67
68	/*****************************************************************************
69	* Entrypoints *
70	*****************************************************************************/
71
72	_strncmp:
73	EstablishFrame
74	eor x3, x3, x3
75	cbz x2, L_scalarDone
76	// Compare one byte at a time until s1 has vector alignment.
77	`0`: tst x0, #(kVectorSize-`1`)
78	b.eq L_s1aligned
79	ldrb w4, [x0],#`1` // load byte from src1
80	ldrb w5, [x1],#`1` // load byte from src2
81	subs x3, x4, x5 // if the are not equal
82	ccmp w4, #`0`, #`4`, eq // or we find an EOS
83	b.eq L_scalarDone // return the difference
84	subs x2, x2, #`1` // decrement length
85	b.ne `0b` // continue loop if non-zero
86
87	// We found a mismatch or EOS before s1 became aligned. Simply return the
88	// difference between the last bytes that we loaded.
89	L_scalarDone:
90	mov x0, x3
91	ClearFrameAndReturn
92
93	L_s1aligned:
94	// If s2 is similarly aligned to s1, then we can use a naive vector comparison
95	// from this point on without worrying about spurious page faults; none of our
96	// loads will ever cross a page boundary, because they are all aligned.
97	tst x1, #(kVectorSize-`1`)
98	b.eq L_naiveVector
99
100	/*****************************************************************************
101	* Careful chunk comparison *
102	*****************************************************************************/
103
104	// Otherwise, we need to be careful; although vector loads from s1 cannot
105	// cross a page boundary because they are aligned, s2 is not aligned. We
106	// compute the multiple of vector size that we can safely load before reaching
107	// a page boundary, and compare only that far before switching over to scalar
108	// comparisons to step across the page boundary. If this number happens to
109	// be zero, we jump directly to the scalar comparison.
110	neg x7, x1
111	ands x7, x7, #(PAGE_MIN_SIZE-kVectorSize)
112	b.eq `2f`
113
114	.align `4`
115	// If n is less than the number of bytes before a page-crossing load, jump
116	// into the naive vector path instead, since we will not even reach a page
117	// crossing. Otherwise, decrement n by that number before we monkey with it,
118	// and set the decremented value aside.
119	`0`: cmp x2, x7
120	b.ls L_naiveVector
121	sub x6, x2, x7
122	// Use vector comparisons until a mismatch or EOS is encountered, or the next
123	// vector load from s2 would be page-crossing.
124	`1`: ldr q0, [x0],#(kVectorSize)
125	ldr q1, [x1],#(kVectorSize)
126	cmeq`.16b` v1, v0, v1
127	and`.16b` v0, v0, v1 // contains zero byte iff mismatch or EOS
128	uminv`.16b` b1, v0
129	fmov w3, s1 // zero only iff comparison is finished
130	cbz w3, L_vectorDone
131	subs x7, x7, #(kVectorSize)
132	b.ne `1b`
133	// Restore the updated n to x2
134	mov x2, x6
135	// The next vector load will cross a page boundary. Instead, compare one byte
136	// at a time until s1 again has vector alignment, at which point we will have
137	// compared exactly 16 bytes.
138	`2`: ldrb w4, [x0],#`1` // load byte from src1
139	ldrb w5, [x1],#`1` // load byte from src2
140	subs x3, x4, x5 // if the are not equal
141	ccmp w4, #`0`, #`4`, eq // or we find an EOS
142	b.eq L_scalarDone // return the difference
143	subs x2, x2, #`1` // decrement length
144	b.eq L_scalarDone // exit loop if zero.
145	tst x0, #(kVectorSize-`1`)
146	b.ne `2b`
147	// Having compared one vector's worth of bytes using a scalar comparison, we
148	// know that we are safely across the page boundary. Initialize x7 and jump
149	// back into the vector comparison part of the loop.
150	mov x7, #(PAGE_MIN_SIZE-kVectorSize)
151	b `0b`
152
153	/*****************************************************************************
154	* Naive vector comparison *
155	*****************************************************************************/
156
157	L_naiveVector:
158	subs x3, x2, #(kVectorSize)
159	b.lo L_scalar
160	add x4, x0, x3 // save the addresses of the last vectors
161	add x5, x1, x3
162	mov x2, x3 // length -= kVectorSize
163	.align `4`
164	`0`:
165	ldr q0, [x0],#(kVectorSize)
166	ldr q1, [x1],#(kVectorSize)
167	cmeq`.16b` v1, v0, v1
168	and`.16b` v0, v0, v1 // contains zero byte iff mismatch or EOS
169	uminv`.16b` b1, v0
170	fmov w3, s1 // zero only iff comparison is finished
171	cbz w3, L_vectorDone
172	subs x2, x2, #(kVectorSize)
173	b.hi `0b`
174
175	// compare the last vector
176	mov x0, x4
177	mov x1, x5
178	ldr q0, [x0],#(kVectorSize)
179	ldr q1, [x1],#(kVectorSize)
180	cmeq`.16b` v1, v0, v1
181	and`.16b` v0, v0, v1 // contains zero byte iff mismatch or EOS
182	uminv`.16b` b1, v0
183	fmov w3, s1 // zero only iff comparison is finished
184	cbz w3, L_vectorDone
185
186	L_readNBytes:
187	eor x0, x0, x0
188	ClearFrameAndReturn
189
190	L_vectorDone:
191	// Load the bytes corresponding to the first mismatch or EOS and return
192	// their difference.
193	eor`.16b` v1, v1, v1
194	cmhi`.16b` v0, v0, v1 // force non-zero lanes to 0xff
195	ldr q1, L_mask
196	orr`.16b` v0, v0, v1 // lane index in lanes containing mismatch or EOS
197	uminv`.16b` b1, v0
198	fmov w3, s1
199	sub x3, x3, #(kVectorSize)
200	ldrb w4, [x0, x3]
201	ldrb w5, [x1, x3]
202	sub x0, x4, x5
203	ClearFrameAndReturn
204
205	L_scalar:
206	ldrb w4, [x0],#`1` // load byte from src1
207	ldrb w5, [x1],#`1` // load byte from src2
208	subs x3, x4, x5 // if the are not equal
209	ccmp w4, #`0`, #`4`, eq // or we find an EOS
210	b.eq `1f` // return the difference
211	subs x2, x2, #`1` // decrement length
212	b.ne L_scalar // continue loop if non-zero
213	`1`:
214	mov x0, x3
215	ClearFrameAndReturn
216
217

Browse the source code of xnu/osfmk/arm64/strncmp.s