1/*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 * This file implements the following function for the arm64 architecture:
29 *
30 * int strncmp(const char *s1, const char *s2, size_t n);
31 *
32 * Returns 0 if the two strings are equal up to the first n bytes or to the
33 * end of the string, whichever comes first. Otherwise, returns the difference
34 * of the first mismatched characters interpreted as uint8_t.
35 */
36
37#include <arm64/asm.h>
38
39.globl _strncmp
40
41/*****************************************************************************
42 * Macros *
43 *****************************************************************************/
44
45.macro EstablishFrame
46 ARM64_STACK_PROLOG
47 stp fp, lr, [sp, #-16]!
48 mov fp, sp
49.endm
50
51.macro ClearFrameAndReturn
52 ldp fp, lr, [sp], #16
53 ARM64_STACK_EPILOG
54.endm
55
56#include "../mach/arm/vm_param.h"
57#define kVectorSize 16
58
59/*****************************************************************************
60 * Constants *
61 *****************************************************************************/
62
63.text
64.align 5
65L_mask:
66.quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
67
68/*****************************************************************************
69 * Entrypoints *
70 *****************************************************************************/
71
72_strncmp:
73 EstablishFrame
74 eor x3, x3, x3
75 cbz x2, L_scalarDone
76// Compare one byte at a time until s1 has vector alignment.
770: tst x0, #(kVectorSize-1)
78 b.eq L_s1aligned
79 ldrb w4, [x0],#1 // load byte from src1
80 ldrb w5, [x1],#1 // load byte from src2
81 subs x3, x4, x5 // if the are not equal
82 ccmp w4, #0, #4, eq // or we find an EOS
83 b.eq L_scalarDone // return the difference
84 subs x2, x2, #1 // decrement length
85 b.ne 0b // continue loop if non-zero
86
87// We found a mismatch or EOS before s1 became aligned. Simply return the
88// difference between the last bytes that we loaded.
89L_scalarDone:
90 mov x0, x3
91 ClearFrameAndReturn
92
93L_s1aligned:
94// If s2 is similarly aligned to s1, then we can use a naive vector comparison
95// from this point on without worrying about spurious page faults; none of our
96// loads will ever cross a page boundary, because they are all aligned.
97 tst x1, #(kVectorSize-1)
98 b.eq L_naiveVector
99
100/*****************************************************************************
101 * Careful chunk comparison *
102 *****************************************************************************/
103
104// Otherwise, we need to be careful; although vector loads from s1 cannot
105// cross a page boundary because they are aligned, s2 is not aligned. We
106// compute the multiple of vector size that we can safely load before reaching
107// a page boundary, and compare only that far before switching over to scalar
108// comparisons to step across the page boundary. If this number happens to
109// be zero, we jump directly to the scalar comparison.
110 neg x7, x1
111 ands x7, x7, #(PAGE_MIN_SIZE-kVectorSize)
112 b.eq 2f
113
114.align 4
115// If n is less than the number of bytes before a page-crossing load, jump
116// into the naive vector path instead, since we will not even reach a page
117// crossing. Otherwise, decrement n by that number before we monkey with it,
118// and set the decremented value aside.
1190: cmp x2, x7
120 b.ls L_naiveVector
121 sub x6, x2, x7
122// Use vector comparisons until a mismatch or EOS is encountered, or the next
123// vector load from s2 would be page-crossing.
1241: ldr q0, [x0],#(kVectorSize)
125 ldr q1, [x1],#(kVectorSize)
126 cmeq.16b v1, v0, v1
127 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS
128 uminv.16b b1, v0
129 fmov w3, s1 // zero only iff comparison is finished
130 cbz w3, L_vectorDone
131 subs x7, x7, #(kVectorSize)
132 b.ne 1b
133// Restore the updated n to x2
134 mov x2, x6
135// The next vector load will cross a page boundary. Instead, compare one byte
136// at a time until s1 again has vector alignment, at which point we will have
137// compared exactly 16 bytes.
1382: ldrb w4, [x0],#1 // load byte from src1
139 ldrb w5, [x1],#1 // load byte from src2
140 subs x3, x4, x5 // if the are not equal
141 ccmp w4, #0, #4, eq // or we find an EOS
142 b.eq L_scalarDone // return the difference
143 subs x2, x2, #1 // decrement length
144 b.eq L_scalarDone // exit loop if zero.
145 tst x0, #(kVectorSize-1)
146 b.ne 2b
147// Having compared one vector's worth of bytes using a scalar comparison, we
148// know that we are safely across the page boundary. Initialize x7 and jump
149// back into the vector comparison part of the loop.
150 mov x7, #(PAGE_MIN_SIZE-kVectorSize)
151 b 0b
152
153/*****************************************************************************
154 * Naive vector comparison *
155 *****************************************************************************/
156
157L_naiveVector:
158 subs x3, x2, #(kVectorSize)
159 b.lo L_scalar
160 add x4, x0, x3 // save the addresses of the last vectors
161 add x5, x1, x3
162 mov x2, x3 // length -= kVectorSize
163.align 4
1640:
165 ldr q0, [x0],#(kVectorSize)
166 ldr q1, [x1],#(kVectorSize)
167 cmeq.16b v1, v0, v1
168 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS
169 uminv.16b b1, v0
170 fmov w3, s1 // zero only iff comparison is finished
171 cbz w3, L_vectorDone
172 subs x2, x2, #(kVectorSize)
173 b.hi 0b
174
175 // compare the last vector
176 mov x0, x4
177 mov x1, x5
178 ldr q0, [x0],#(kVectorSize)
179 ldr q1, [x1],#(kVectorSize)
180 cmeq.16b v1, v0, v1
181 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS
182 uminv.16b b1, v0
183 fmov w3, s1 // zero only iff comparison is finished
184 cbz w3, L_vectorDone
185
186L_readNBytes:
187 eor x0, x0, x0
188 ClearFrameAndReturn
189
190L_vectorDone:
191// Load the bytes corresponding to the first mismatch or EOS and return
192// their difference.
193 eor.16b v1, v1, v1
194 cmhi.16b v0, v0, v1 // force non-zero lanes to 0xff
195 ldr q1, L_mask
196 orr.16b v0, v0, v1 // lane index in lanes containing mismatch or EOS
197 uminv.16b b1, v0
198 fmov w3, s1
199 sub x3, x3, #(kVectorSize)
200 ldrb w4, [x0, x3]
201 ldrb w5, [x1, x3]
202 sub x0, x4, x5
203 ClearFrameAndReturn
204
205L_scalar:
206 ldrb w4, [x0],#1 // load byte from src1
207 ldrb w5, [x1],#1 // load byte from src2
208 subs x3, x4, x5 // if the are not equal
209 ccmp w4, #0, #4, eq // or we find an EOS
210 b.eq 1f // return the difference
211 subs x2, x2, #1 // decrement length
212 b.ne L_scalar // continue loop if non-zero
2131:
214 mov x0, x3
215 ClearFrameAndReturn
216
217