1 | /* |
2 | * Copyright (c) 2022 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #define LOCK_PRIVATE 1 |
30 | |
31 | #include <mach_ldebug.h> |
32 | #include <kern/locks_internal.h> |
33 | #include <kern/lock_stat.h> |
34 | #include <kern/lock_ptr.h> |
35 | |
36 | #include <mach/mach_time.h> |
37 | #include <mach/machine/sdt.h> |
38 | #include <mach/vm_param.h> |
39 | |
40 | #include <machine/cpu_data.h> |
41 | #include <machine/machine_cpu.h> |
42 | |
43 | |
44 | #pragma mark hw_lck_ptr_t: helpers |
45 | |
46 | static_assert(VM_KERNEL_POINTER_SIGNIFICANT_BITS < HW_LCK_PTR_BITS, |
47 | "sign extension of lck_ptr_bits does the right thing" ); |
48 | |
49 | static inline void |
50 | __hw_lck_ptr_encode(hw_lck_ptr_t *lck, const void *ptr) |
51 | { |
52 | lck->lck_ptr_bits = (intptr_t)ptr; |
53 | #if CONFIG_KERNEL_TAGGING |
54 | lck->lck_ptr_tag = vm_memtag_extract_tag((vm_offset_t)ptr); |
55 | #endif /* CONFIG_KERNEL_TAGGING */ |
56 | } |
57 | |
58 | __abortlike |
59 | static void |
60 | __hw_lck_ptr_invalid_panic(hw_lck_ptr_t *lck) |
61 | { |
62 | hw_lck_ptr_t tmp = os_atomic_load(lck, relaxed); |
63 | |
64 | panic("Invalid/destroyed ptr spinlock %p: <%p %d 0x%04x>" , |
65 | lck, __hw_lck_ptr_value(tmp), tmp.lck_ptr_locked, |
66 | tmp.lck_ptr_mcs_tail); |
67 | } |
68 | |
69 | __attribute__((always_inline, overloadable)) |
70 | static inline bool |
71 | hw_lck_ptr_take_slowpath(hw_lck_ptr_t tmp) |
72 | { |
73 | hw_lck_ptr_t check_bits = { |
74 | #if CONFIG_DTRACE |
75 | .lck_ptr_stats = true, |
76 | #endif /* CONFIG_DTRACE */ |
77 | }; |
78 | unsigned long take_slowpath = 0; |
79 | |
80 | take_slowpath = tmp.lck_ptr_value & check_bits.lck_ptr_value; |
81 | #if CONFIG_DTRACE |
82 | take_slowpath |= lockstat_enabled(); |
83 | #endif /* CONFIG_DTRACE */ |
84 | return take_slowpath; |
85 | } |
86 | |
87 | |
88 | #pragma mark hw_lck_ptr_t: init/destroy |
89 | |
90 | void |
91 | hw_lck_ptr_init(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp) |
92 | { |
93 | hw_lck_ptr_t init = { }; |
94 | |
95 | #if LCK_GRP_USE_ARG |
96 | if (grp) { |
97 | #if CONFIG_DTRACE |
98 | if (grp->lck_grp_attr_id & LCK_GRP_ATTR_STAT) { |
99 | init.lck_ptr_stats = true; |
100 | } |
101 | #endif /* CONFIG_DTRACE */ |
102 | lck_grp_reference(grp, &grp->lck_grp_spincnt); |
103 | } |
104 | #endif /* LCK_GRP_USE_ARG */ |
105 | |
106 | __hw_lck_ptr_encode(lck: &init, ptr: val); |
107 | os_atomic_init(lck, init); |
108 | } |
109 | |
110 | void |
111 | hw_lck_ptr_destroy(hw_lck_ptr_t *lck, lck_grp_t *grp) |
112 | { |
113 | hw_lck_ptr_t tmp = os_atomic_load(lck, relaxed); |
114 | |
115 | if (tmp.lck_ptr_locked || tmp.lck_ptr_mcs_tail) { |
116 | __hw_lck_ptr_invalid_panic(lck); |
117 | } |
118 | #if LCK_GRP_USE_ARG |
119 | if (grp) { |
120 | lck_grp_deallocate(grp, &grp->lck_grp_spincnt); |
121 | } |
122 | #endif /* LCK_GRP_USE_ARG */ |
123 | |
124 | /* make clients spin forever, and use an invalid MCS ID */ |
125 | tmp.lck_ptr_locked = true; |
126 | tmp.lck_ptr_stats = false; |
127 | tmp.lck_ptr_mcs_tail = 0xffff; |
128 | os_atomic_store(lck, tmp, relaxed); |
129 | } |
130 | |
131 | bool |
132 | hw_lck_ptr_held(hw_lck_ptr_t *lck) |
133 | { |
134 | return os_atomic_load(lck, relaxed).lck_ptr_locked; |
135 | } |
136 | |
137 | |
138 | #pragma mark hw_lck_ptr_t: hw_lck_ptr_lock |
139 | |
140 | __abortlike |
141 | static hw_spin_timeout_status_t |
142 | hw_lck_ptr_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st) |
143 | { |
144 | hw_lck_ptr_t *lck = _lock; |
145 | hw_lck_ptr_t tmp; |
146 | |
147 | tmp = os_atomic_load(lck, relaxed); |
148 | panic("Ptr spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; " |
149 | "ptr_value: %p, mcs_tail: 0x%04x, " |
150 | HW_SPIN_TIMEOUT_DETAILS_FMT, |
151 | lck, HW_SPIN_TIMEOUT_ARG(to, st), |
152 | __hw_lck_ptr_value(tmp), tmp.lck_ptr_mcs_tail, |
153 | HW_SPIN_TIMEOUT_DETAILS_ARG(to, st)); |
154 | } |
155 | |
156 | static const struct hw_spin_policy hw_lck_ptr_spin_policy = { |
157 | .hwsp_name = "hw_lck_ptr_lock" , |
158 | .hwsp_timeout_atomic = &lock_panic_timeout, |
159 | .hwsp_op_timeout = hw_lck_ptr_timeout_panic, |
160 | }; |
161 | |
162 | |
163 | static void * __attribute__((noinline)) |
164 | hw_lck_ptr_contended(hw_lck_ptr_t *lck LCK_GRP_ARG(lck_grp_t *grp)) |
165 | { |
166 | hw_spin_policy_t pol = &hw_lck_ptr_spin_policy; |
167 | hw_spin_timeout_t to = hw_spin_compute_timeout(policy: pol); |
168 | hw_spin_state_t ss = { }; |
169 | |
170 | hw_lck_ptr_t value, nvalue; |
171 | lck_mcs_id_t pidx; |
172 | lck_spin_txn_t txn; |
173 | |
174 | #if CONFIG_DTRACE || LOCK_STATS |
175 | uint64_t spin_start; |
176 | |
177 | lck_grp_spin_update_miss(lock: lck LCK_GRP_ARG(grp)); |
178 | if (__improbable(lck_grp_spin_spin_enabled(lck LCK_GRP_ARG(grp)))) { |
179 | spin_start = mach_absolute_time(); |
180 | } |
181 | #endif /* LOCK_STATS || CONFIG_DTRACE */ |
182 | |
183 | /* |
184 | * Take a spot in the MCS queue, |
185 | * and then spin until we're at the head of it. |
186 | */ |
187 | |
188 | txn = lck_spin_txn_begin(lck); |
189 | |
190 | pidx = os_atomic_xchg(&lck->lck_ptr_mcs_tail, txn.txn_mcs_id, release); |
191 | if (pidx) { |
192 | lck_spin_mcs_t pnode; |
193 | unsigned long ready; |
194 | |
195 | pnode = lck_spin_mcs_decode(mcs_id: pidx); |
196 | os_atomic_store(&pnode->lsm_next, txn.txn_slot, relaxed); |
197 | |
198 | while (!hw_spin_wait_until(&txn.txn_slot->lsm_ready, ready, ready)) { |
199 | hw_spin_should_keep_spinning(lock: lck, policy: pol, to, state: &ss); |
200 | } |
201 | } |
202 | |
203 | /* |
204 | * We're now the first in line, wait for the lock bit |
205 | * to look ready and take it. |
206 | */ |
207 | do { |
208 | while (!hw_spin_wait_until(&lck->lck_ptr_value, |
209 | value.lck_ptr_value, value.lck_ptr_locked == 0)) { |
210 | hw_spin_should_keep_spinning(lock: lck, policy: pol, to, state: &ss); |
211 | } |
212 | |
213 | nvalue = value; |
214 | nvalue.lck_ptr_locked = true; |
215 | if (nvalue.lck_ptr_mcs_tail == txn.txn_mcs_id) { |
216 | nvalue.lck_ptr_mcs_tail = 0; |
217 | } |
218 | } while (!os_atomic_cmpxchg(lck, value, nvalue, acquire)); |
219 | |
220 | /* |
221 | * We now have the lock, let's cleanup the MCS state. |
222 | * |
223 | * If there is a node after us, notify that it |
224 | * is at the head of the interlock queue. |
225 | * |
226 | * Then, clear the MCS node. |
227 | */ |
228 | if (value.lck_ptr_mcs_tail != txn.txn_mcs_id) { |
229 | lck_spin_mcs_t nnode; |
230 | |
231 | while (!hw_spin_wait_until(&txn.txn_slot->lsm_next, nnode, nnode)) { |
232 | hw_spin_should_keep_spinning(lock: lck, policy: pol, to, state: &ss); |
233 | } |
234 | |
235 | os_atomic_store(&nnode->lsm_ready, 1, relaxed); |
236 | } |
237 | |
238 | lck_spin_txn_end(txn: &txn); |
239 | |
240 | #if CONFIG_DTRACE || LOCK_STATS |
241 | if (__improbable(spin_start)) { |
242 | lck_grp_spin_update_spin(lock: lck LCK_GRP_ARG(grp), |
243 | time: mach_absolute_time() - spin_start); |
244 | } |
245 | #endif /* CONFIG_DTRACE || LCK_GRP_STAT */ |
246 | |
247 | return __hw_lck_ptr_value(val: value); |
248 | } |
249 | |
250 | #if CONFIG_DTRACE |
251 | __attribute__((noinline)) |
252 | #else /* !CONFIG_DTRACE */ |
253 | __attribute__((always_inline)) |
254 | #endif /* !CONFIG_DTRACE */ |
255 | static void * |
256 | hw_lck_ptr_lock_slow( |
257 | hw_lck_ptr_t *lck, |
258 | hw_lck_ptr_t tmp |
259 | LCK_GRP_ARG(lck_grp_t *grp)) |
260 | { |
261 | lck_grp_spin_update_held(lock: lck LCK_GRP_ARG(grp)); |
262 | return __hw_lck_ptr_value(val: tmp); |
263 | } |
264 | |
265 | static inline void * |
266 | hw_lck_ptr_lock_fastpath(hw_lck_ptr_t *lck LCK_GRP_ARG(lck_grp_t *grp)) |
267 | { |
268 | hw_lck_ptr_t lock_bit = { .lck_ptr_locked = 1 }; |
269 | hw_lck_ptr_t tmp; |
270 | |
271 | tmp = os_atomic_load(lck, relaxed); |
272 | if (__probable(tmp.lck_ptr_locked == 0 && tmp.lck_ptr_mcs_tail == 0)) { |
273 | tmp.lck_ptr_value = os_atomic_or_orig(&lck->lck_ptr_value, |
274 | lock_bit.lck_ptr_value, acquire); |
275 | if (__probable(tmp.lck_ptr_locked == 0)) { |
276 | if (__probable(!hw_lck_ptr_take_slowpath(tmp))) { |
277 | return __hw_lck_ptr_value(val: tmp); |
278 | } |
279 | return hw_lck_ptr_lock_slow(lck, tmp LCK_GRP_ARG(grp)); |
280 | } |
281 | } |
282 | |
283 | return hw_lck_ptr_contended(lck LCK_GRP_ARG(grp)); |
284 | } |
285 | |
286 | void * |
287 | hw_lck_ptr_lock_nopreempt(hw_lck_ptr_t *lck, lck_grp_t *grp) |
288 | { |
289 | return hw_lck_ptr_lock_fastpath(lck LCK_GRP_ARG(grp)); |
290 | } |
291 | |
292 | void * |
293 | hw_lck_ptr_lock(hw_lck_ptr_t *lck, lck_grp_t *grp) |
294 | { |
295 | lock_disable_preemption_for_thread(current_thread()); |
296 | return hw_lck_ptr_lock_fastpath(lck LCK_GRP_ARG(grp)); |
297 | } |
298 | |
299 | |
300 | |
301 | #pragma mark hw_lck_ptr_t: hw_lck_ptr_unlock |
302 | |
303 | #if CONFIG_DTRACE |
304 | __attribute__((noinline)) |
305 | static void |
306 | hw_lck_ptr_unlock_slow( |
307 | hw_lck_ptr_t *lck, |
308 | bool do_preempt |
309 | LCK_GRP_ARG(lck_grp_t *grp)) |
310 | { |
311 | if (do_preempt) { |
312 | lock_enable_preemption(); |
313 | } |
314 | LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lck, |
315 | (uintptr_t)LCK_GRP_PROBEARG(grp)); |
316 | } |
317 | #endif /* CONFIG_DTRACE */ |
318 | |
319 | static inline void |
320 | hw_lck_ptr_unlock_fastpath( |
321 | hw_lck_ptr_t *lck, |
322 | void *val, |
323 | bool do_preempt |
324 | LCK_GRP_ARG(lck_grp_t *grp)) |
325 | { |
326 | hw_lck_ptr_t curv; |
327 | hw_lck_ptr_t xorv = { }; |
328 | |
329 | /* |
330 | * compute the value to xor in order to unlock + change the pointer |
331 | * value, but leaving the lck_ptr_stats and lck_ptr_mcs_tail unmodified. |
332 | * |
333 | * (the latter might change while we unlock and this avoids a CAS loop. |
334 | */ |
335 | curv = atomic_load_explicit((hw_lck_ptr_t _Atomic *)lck, |
336 | memory_order_relaxed); |
337 | |
338 | curv.lck_ptr_stats = false; |
339 | curv.lck_ptr_mcs_tail = 0; |
340 | |
341 | __hw_lck_ptr_encode(lck: &xorv, ptr: val); |
342 | xorv.lck_ptr_value ^= curv.lck_ptr_value; |
343 | |
344 | curv.lck_ptr_value = |
345 | os_atomic_xor(&lck->lck_ptr_value, xorv.lck_ptr_value, release); |
346 | |
347 | #if CONFIG_DTRACE |
348 | if (__improbable(hw_lck_ptr_take_slowpath(curv))) { |
349 | return hw_lck_ptr_unlock_slow(lck, do_preempt LCK_GRP_ARG(grp)); |
350 | } |
351 | #endif /* CONFIG_DTRACE */ |
352 | |
353 | if (do_preempt) { |
354 | lock_enable_preemption(); |
355 | } |
356 | } |
357 | |
358 | void |
359 | hw_lck_ptr_unlock_nopreempt(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp) |
360 | { |
361 | hw_lck_ptr_unlock_fastpath(lck, val, false LCK_GRP_ARG(grp)); |
362 | } |
363 | |
364 | void |
365 | hw_lck_ptr_unlock(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp) |
366 | { |
367 | hw_lck_ptr_unlock_fastpath(lck, val, true LCK_GRP_ARG(grp)); |
368 | } |
369 | |
370 | |
371 | #pragma mark hw_lck_ptr_t: hw_lck_ptr_wait_for_value |
372 | |
373 | static void __attribute__((noinline)) |
374 | hw_lck_ptr_wait_for_value_contended( |
375 | hw_lck_ptr_t *lck, |
376 | void *val |
377 | LCK_GRP_ARG(lck_grp_t *grp)) |
378 | { |
379 | hw_spin_policy_t pol = &hw_lck_ptr_spin_policy; |
380 | hw_spin_timeout_t to = hw_spin_compute_timeout(policy: pol); |
381 | hw_spin_state_t ss = { }; |
382 | hw_lck_ptr_t tmp; |
383 | |
384 | #if CONFIG_DTRACE || LOCK_STATS |
385 | uint64_t spin_start; |
386 | |
387 | if (__improbable(lck_grp_spin_spin_enabled(lck LCK_GRP_ARG(grp)))) { |
388 | spin_start = mach_absolute_time(); |
389 | } |
390 | #endif /* LOCK_STATS || CONFIG_DTRACE */ |
391 | |
392 | while (__improbable(!hw_spin_wait_until(&lck->lck_ptr_value, |
393 | tmp.lck_ptr_value, __hw_lck_ptr_value(tmp) == val))) { |
394 | hw_spin_should_keep_spinning(lock: lck, policy: pol, to, state: &ss); |
395 | } |
396 | |
397 | #if CONFIG_DTRACE || LOCK_STATS |
398 | if (__improbable(spin_start)) { |
399 | lck_grp_spin_update_spin(lock: lck LCK_GRP_ARG(grp), |
400 | time: mach_absolute_time() - spin_start); |
401 | } |
402 | #endif /* CONFIG_DTRACE || LCK_GRP_STAT */ |
403 | |
404 | os_atomic_thread_fence(acquire); |
405 | } |
406 | |
407 | void |
408 | hw_lck_ptr_wait_for_value( |
409 | hw_lck_ptr_t *lck, |
410 | void *val, |
411 | lck_grp_t *grp) |
412 | { |
413 | hw_lck_ptr_t tmp = os_atomic_load(lck, acquire); |
414 | |
415 | if (__probable(__hw_lck_ptr_value(tmp) == val)) { |
416 | return; |
417 | } |
418 | |
419 | hw_lck_ptr_wait_for_value_contended(lck, val LCK_GRP_ARG(grp)); |
420 | } |
421 | |