1 | /* |
2 | * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | * |
28 | * This file implements the following functions for the arm64 architecture: |
29 | * |
30 | * void bzero(void *buffer, size_t length); |
31 | * void __bzero(void *buffer, size_t length); |
32 | * void *memset(void *buffer, int value, size_t length); |
33 | * |
34 | * The first two zero-fill a buffer. The third fills the buffer with the low |
35 | * byte of its second argument. |
36 | */ |
37 | |
38 | #include "asm.h" |
39 | |
40 | .globl _bzero |
41 | .globl ___bzero |
42 | .globl _memset |
43 | .globl _secure_memset |
44 | |
45 | /***************************************************************************** |
46 | * bzero entrypoint * |
47 | *****************************************************************************/ |
48 | |
49 | .text |
50 | .align 4 |
51 | _bzero: |
52 | ___bzero: |
53 | ARM64_STACK_PROLOG |
54 | PUSH_FRAME |
55 | mov x2, x1 |
56 | eor x1, x1, x1 |
57 | mov x3, x0 |
58 | cmp x2, #128 |
59 | b.cc L_memsetSmall |
60 | |
61 | /***************************************************************************** |
62 | * Large buffer zero engine * |
63 | *****************************************************************************/ |
64 | |
65 | L_bzeroLarge: |
66 | // Write the first 64 bytes of the buffer without regard to alignment, then |
67 | // advance x3 to point to a cacheline-aligned location within the buffer, and |
68 | // decrement the length accordingly. |
69 | stp x1, x1, [x0] |
70 | stp x1, x1, [x0, #16] |
71 | stp x1, x1, [x0, #32] |
72 | stp x1, x1, [x0, #48] |
73 | add x3, x0, #64 |
74 | and x3, x3, #-64 |
75 | add x2, x2, x0 // end of buffer |
76 | add x4, x3, #64 // end of first cacheline to zero |
77 | subs x2, x2, x4 // if the end of the buffer comes first, jump |
78 | b.ls 1f // directly to the cleanup pass. |
79 | 0: dc zva, x3 // zero cacheline |
80 | add x3, x3, #64 // increment pointer |
81 | subs x2, x2, #64 // decrement length |
82 | b.hi 0b |
83 | 1: add x3, x3, x2 // back up pointer to (end of buffer) - 64. |
84 | stp x1, x1, [x3] // and store 64 bytes to reach end of buffer. |
85 | stp x1, x1, [x3, #16] |
86 | stp x1, x1, [x3, #32] |
87 | stp x1, x1, [x3, #48] |
88 | POP_FRAME |
89 | ARM64_STACK_EPILOG |
90 | |
91 | /***************************************************************************** |
92 | * memset entrypoint * |
93 | *****************************************************************************/ |
94 | |
95 | .align 4 |
96 | /* |
97 | * It is important that secure_memset remains defined in assembly to avoid |
98 | * compiler optimizations. |
99 | */ |
100 | _secure_memset: |
101 | _memset: |
102 | ARM64_STACK_PROLOG |
103 | PUSH_FRAME |
104 | and x1, x1, #0xff |
105 | orr x3, xzr,#0x0101010101010101 |
106 | mul x1, x1, x3 |
107 | mov x3, x0 |
108 | cmp x2, #64 |
109 | b.cc L_memsetSmall |
110 | |
111 | /***************************************************************************** |
112 | * Large buffer store engine * |
113 | *****************************************************************************/ |
114 | |
115 | L_memsetLarge: |
116 | // Write the first 64 bytes of the buffer without regard to alignment, then |
117 | // advance x3 to point to an aligned location within the buffer, and |
118 | // decrement the length accordingly. |
119 | stp x1, x1, [x0] |
120 | add x3, x0, #16 |
121 | and x3, x3, #-16 |
122 | add x2, x2, x0 // end of buffer |
123 | add x4, x3, #64 // end of first aligned 64-byte store |
124 | subs x2, x2, x4 // if the end of the buffer comes first, jump |
125 | b.ls 1f // directly to the cleanup store. |
126 | 0: stnp x1, x1, [x3] |
127 | stnp x1, x1, [x3, #16] |
128 | stnp x1, x1, [x3, #32] |
129 | stnp x1, x1, [x3, #48] |
130 | add x3, x3, #64 |
131 | subs x2, x2, #64 |
132 | b.hi 0b |
133 | 1: add x3, x3, x2 // back up pointer to (end of buffer) - 64. |
134 | stp x1, x1, [x3] |
135 | stp x1, x1, [x3, #16] |
136 | stp x1, x1, [x3, #32] |
137 | stp x1, x1, [x3, #48] |
138 | POP_FRAME |
139 | ARM64_STACK_EPILOG |
140 | |
141 | /***************************************************************************** |
142 | * Small buffer store engine * |
143 | *****************************************************************************/ |
144 | |
145 | 0: str x1, [x3],#8 |
146 | L_memsetSmall: |
147 | subs x2, x2, #8 |
148 | b.cs 0b |
149 | adds x2, x2, #8 |
150 | b.eq 2f |
151 | 1: strb w1, [x3],#1 |
152 | subs x2, x2, #1 |
153 | b.ne 1b |
154 | 2: POP_FRAME |
155 | ARM64_STACK_EPILOG |
156 | |
157 | |