| 1 | /* |
| 2 | * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | * |
| 28 | * This file implements the following function for the arm64 architecture: |
| 29 | * |
| 30 | * size_t strnlen(const char *string, size_t maxlen); |
| 31 | * |
| 32 | * The strnlen function returns either strlen(string) or maxlen, whichever |
| 33 | * is amller, without reading beyond the first maxlen characters of string. |
| 34 | */ |
| 35 | |
| 36 | #include <arm64/asm.h> |
| 37 | |
| 38 | .globl _strlen |
| 39 | .globl _strnlen |
| 40 | |
| 41 | /***************************************************************************** |
| 42 | * Macros * |
| 43 | *****************************************************************************/ |
| 44 | |
| 45 | .macro EstablishFrame |
| 46 | ARM64_STACK_PROLOG |
| 47 | stp fp, lr, [sp, #-16]! |
| 48 | mov fp, sp |
| 49 | .endm |
| 50 | |
| 51 | .macro ClearFrameAndReturn |
| 52 | ldp fp, lr, [sp], #16 |
| 53 | ARM64_STACK_EPILOG |
| 54 | .endm |
| 55 | |
| 56 | /***************************************************************************** |
| 57 | * Constants * |
| 58 | *****************************************************************************/ |
| 59 | |
| 60 | .text |
| 61 | .align 5 |
| 62 | L_masks: |
| 63 | .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 |
| 64 | .quad 0x0000000000000000, 0x0000000000000000 |
| 65 | |
| 66 | /***************************************************************************** |
| 67 | * strnlen entrypoint * |
| 68 | *****************************************************************************/ |
| 69 | |
| 70 | _strnlen: |
| 71 | // If n == 0, return NULL without loading any data from s. If n is so large |
| 72 | // that it exceeds the size of any buffer that can be allocted, jump into a |
| 73 | // simpler implementation that omits all length checks. This is both faster |
| 74 | // and lets us avoid some messy edgecases in the mainline. |
| 75 | tst x1, x1 |
| 76 | b.mi _strlen |
| 77 | b.eq L_maxlenIsZero |
| 78 | EstablishFrame |
| 79 | // Load the 16-byte aligned vector containing the start of the string. |
| 80 | and x2, x0, #-16 |
| 81 | ldr q0, [x2] |
| 82 | // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL |
| 83 | // byte once we identify one. We don't use this vector until the very end |
| 84 | // of the routine; it simply falls out naturally to load it now. |
| 85 | adr x3, L_masks |
| 86 | ldr q2, [x3],#16 |
| 87 | // The aligned vector that we loaded to q0 contains the start of the string, |
| 88 | // but if the string was not originally aligned, it also contains bytes |
| 89 | // which preceed the start of the string, and which may cause false positives |
| 90 | // when we search for the terminating NUL. We generate a mask to OR into the |
| 91 | // vector using an unaligned load to prevent this. The mask has non-zero |
| 92 | // values only in those bytes which correspond to bytes preceeding the start |
| 93 | // of the string in the aligned vector load. |
| 94 | and x4, x0, #0xf |
| 95 | sub x3, x3, x4 |
| 96 | ldr q1, [x3] |
| 97 | orr.16b v0, v0, v1 |
| 98 | // Adjust maxlen to account for bytes which preceed the start of the string, |
| 99 | // and jump into the main scanning loop. |
| 100 | add x1, x1, x4 |
| 101 | b 1f |
| 102 | |
| 103 | // Main loop. Identical to strlen, except that we also need to check that we |
| 104 | // don't read more than maxlen bytes. To that end, we decrement maxlen by 16 |
| 105 | // on each iteration, and exit the loop if the result is zero or negative. |
| 106 | .align 4 |
| 107 | 0: ldr q0, [x2, #16]! |
| 108 | 1: uminv.16b b1, v0 |
| 109 | fmov w3, s1 |
| 110 | cbz w3, L_foundNUL |
| 111 | subs x1, x1, #16 |
| 112 | b.hi 0b |
| 113 | |
| 114 | // We exhausted maxlen bytes without finding a terminating NUL character, so |
| 115 | // we need to return maxlen. |
| 116 | sub x0, x2, x0 |
| 117 | add x1, x1, #16 |
| 118 | add x0, x0, x1 |
| 119 | ClearFrameAndReturn |
| 120 | |
| 121 | L_maxlenIsZero: |
| 122 | mov x0, #0 |
| 123 | ret // No stack frame, so don't clear it. |
| 124 | |
| 125 | L_foundNUL: |
| 126 | // Compute the index of the NUL byte, and check if it occurs before maxlen |
| 127 | // bytes into the vector. If not, return maxlen. Otherwise, return the |
| 128 | // length of the string. |
| 129 | eor.16b v1, v1, v1 |
| 130 | cmhi.16b v0, v0, v1 |
| 131 | orr.16b v0, v0, v2 |
| 132 | uminv.16b b1, v0 |
| 133 | fmov w3, s1 // index of NUL byte in vector |
| 134 | sub x0, x2, x0 // index of vector in string |
| 135 | cmp x1, x3 // if NUL occurs before maxlen bytes |
| 136 | csel x1, x1, x3, cc // return strlen, else maxlen |
| 137 | add x0, x0, x1 |
| 138 | ClearFrameAndReturn |
| 139 | |
| 140 | /***************************************************************************** |
| 141 | * strlen entrypoint * |
| 142 | *****************************************************************************/ |
| 143 | |
| 144 | .align 4 |
| 145 | _strlen: |
| 146 | EstablishFrame |
| 147 | // Load the 16-byte aligned vector containing the start of the string. |
| 148 | and x1, x0, #-16 |
| 149 | ldr q0, [x1] |
| 150 | // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL |
| 151 | // byte once we identify one. We don't use this vector until the very end |
| 152 | // of the routine; it simply falls out naturally to load it now. |
| 153 | adr x3, L_masks |
| 154 | ldr q2, [x3],#16 |
| 155 | // The aligned vector that we loaded to q0 contains the start of the string, |
| 156 | // but if the string was not originally aligned, it also contains bytes |
| 157 | // which preceed the start of the string, and which may cause false positives |
| 158 | // when we search for the terminating NUL. We generate a mask to OR into the |
| 159 | // vector using an unaligned load to prevent this. The mask has non-zero |
| 160 | // values only in those bytes which correspond to bytes preceeding the start |
| 161 | // of the string in the aligned vector load. |
| 162 | and x2, x0, #0xf |
| 163 | sub x3, x3, x2 |
| 164 | ldr q1, [x3] |
| 165 | orr.16b v0, v0, v1 |
| 166 | b 1f |
| 167 | |
| 168 | // Main loop. On each iteration we do the following: |
| 169 | // |
| 170 | // q0 <-- next 16 aligned bytes of string |
| 171 | // b1 <-- unsigned minimum byte in q0 |
| 172 | // if (b1 != 0) continue |
| 173 | // |
| 174 | // Thus, we continue the loop until the 16 bytes we load contain a zero byte. |
| 175 | .align 4 |
| 176 | 0: ldr q0, [x1, #16]! |
| 177 | 1: uminv.16b b1, v0 |
| 178 | fmov w2, s1 // umov.b would be more natural, but requries 2 µops. |
| 179 | cbnz w2, 0b |
| 180 | |
| 181 | // A zero byte has been found. The following registers contain values that |
| 182 | // we need to compute the string's length: |
| 183 | // |
| 184 | // x0 pointer to start of string |
| 185 | // x1 pointer to vector containing terminating NUL byte |
| 186 | // v0 vector containing terminating NUL byte |
| 187 | // v2 {0, 1, 2, ... , 15} |
| 188 | // |
| 189 | // We compute the index of the terminating NUL byte in the string (which is |
| 190 | // precisely the length of the string) as follows: |
| 191 | // |
| 192 | // vec <-- mask(v0 != 0) | v2 |
| 193 | // index <-- x1 - x0 + unsignedMinimum(vec) |
| 194 | eor.16b v1, v1, v1 |
| 195 | cmhi.16b v0, v0, v1 |
| 196 | orr.16b v0, v0, v2 |
| 197 | uminv.16b b1, v0 |
| 198 | fmov w2, s1 |
| 199 | sub x0, x1, x0 |
| 200 | add x0, x0, x2 |
| 201 | ClearFrameAndReturn |
| 202 | |