| 1 | /* |
| 2 | * Copyright (c) 2013 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <mach/host_priv.h> |
| 30 | #include <mach/host_special_ports.h> |
| 31 | #include <mach/memory_error_notification.h> |
| 32 | |
| 33 | #include <mach/mach_types.h> |
| 34 | #include <mach/host_info.h> |
| 35 | #include <kern/host.h> |
| 36 | #include <kern/locks.h> |
| 37 | #include <kern/ecc.h> |
| 38 | #include <kern/spl.h> |
| 39 | #include <kern/mpsc_queue.h> |
| 40 | #include <kern/thread.h> |
| 41 | #include <kern/thread_call.h> |
| 42 | #include <kern/startup.h> |
| 43 | #include <os/log.h> |
| 44 | #include <pexpert/pexpert.h> |
| 45 | #include <pexpert/device_tree.h> |
| 46 | #include <libkern/OSAtomic.h> |
| 47 | #include <arm/pmap_public.h> |
| 48 | #include <vm/vm_protos.h> |
| 49 | |
| 50 | /* New CoreAnalytics ECC logging mechanism */ |
| 51 | |
| 52 | /** |
| 53 | * Stubs for targets which do not support ECC. |
| 54 | */ |
| 55 | |
| 56 | kern_return_t |
| 57 | ecc_log_memory_error( |
| 58 | __unused pmap_paddr_t physical_address, |
| 59 | __unused uint32_t ecc_flags) |
| 60 | { |
| 61 | return KERN_NOT_SUPPORTED; |
| 62 | } |
| 63 | |
| 64 | kern_return_t |
| 65 | ecc_log_memory_error_internal( |
| 66 | __unused pmap_paddr_t physical_address, |
| 67 | __unused uint32_t ecc_flags) |
| 68 | { |
| 69 | return KERN_NOT_SUPPORTED; |
| 70 | } |
| 71 | |
| 72 | kern_return_t |
| 73 | ecc_log_memory_error_ce( |
| 74 | __unused pmap_paddr_t physical_address, |
| 75 | __unused uint32_t ecc_flags, |
| 76 | __unused uint32_t ce_count) |
| 77 | { |
| 78 | return KERN_NOT_SUPPORTED; |
| 79 | } |
| 80 | |
| 81 | |
| 82 | kern_return_t |
| 83 | kern_ecc_poll_register( |
| 84 | __unused platform_error_handler_ecc_poll_t poll_func, |
| 85 | __unused uint32_t max_errors) |
| 86 | { |
| 87 | return KERN_NOT_SUPPORTED; |
| 88 | } |
| 89 | |
| 90 | /* |
| 91 | * Used to report earlier errors that were found after ECC gets enabled. |
| 92 | * We don't want the VM to panic for these. |
| 93 | */ |
| 94 | kern_return_t |
| 95 | ecc_log_memory_error_delayed( |
| 96 | __unused pmap_paddr_t physical_address, |
| 97 | __unused uint32_t ecc_flags) |
| 98 | { |
| 99 | return KERN_FAILURE; |
| 100 | } |
| 101 | |
| 102 | /** |
| 103 | * MCC Logging |
| 104 | */ |
| 105 | |
| 106 | /** |
| 107 | * TODO: rdar://97394997 (Clean up ECC / MCC logging) |
| 108 | * We can probably clean some of this up and share some of the code with ECC. |
| 109 | */ |
| 110 | #if XNU_HANDLE_MCC |
| 111 | |
| 112 | static struct mpsc_daemon_queue mcc_memory_error_event_queue; |
| 113 | struct _mcc_mem_err_event { |
| 114 | struct mpsc_queue_chain link; |
| 115 | mcc_ecc_event_t event; |
| 116 | }; |
| 117 | typedef struct _mcc_mem_err_event* mcc_mem_err_event_t; |
| 118 | |
| 119 | #define MCC_ECC_NUM_ERRORS (1024) |
| 120 | #define MCC_ERROR_EVENT_QUEUE_PRIORITY MAXPRI_USER |
| 121 | static struct _mcc_mem_err_event mcc_events[MCC_ECC_NUM_ERRORS]; |
| 122 | static atomic_int mcc_events_producer_idx = 0; |
| 123 | static atomic_int mcc_events_consumer_idx = 0; |
| 124 | SCALABLE_COUNTER_DEFINE(mcc_dropped_events); |
| 125 | LCK_GRP_DECLARE(mcc_lock_grp, "mcc" ); |
| 126 | LCK_SPIN_DECLARE(mcc_lock, &mcc_lock_grp); |
| 127 | |
| 128 | static inline int |
| 129 | mcc_events_next(int idx) |
| 130 | { |
| 131 | assert(idx < MCC_ECC_NUM_ERRORS); |
| 132 | return (idx + 1) % MCC_ECC_NUM_ERRORS; |
| 133 | } |
| 134 | |
| 135 | /* MCC ECC CoreAnalytics Error Logging */ |
| 136 | static void |
| 137 | mcc_error_notify_user(mcc_ecc_event_t event) |
| 138 | { |
| 139 | mach_port_t user_port = MACH_PORT_NULL; |
| 140 | kern_return_t kr; |
| 141 | |
| 142 | kr = host_get_memory_error_port(host_priv_self(), &user_port); |
| 143 | assert(kr == KERN_SUCCESS); |
| 144 | if (!IPC_PORT_VALID(user_port)) { |
| 145 | os_log_error(OS_LOG_DEFAULT, "Failed to get memory error port - mcc" ); |
| 146 | return; |
| 147 | } |
| 148 | |
| 149 | mcc_memory_error_notification(user_port, event); |
| 150 | |
| 151 | ipc_port_release_send(user_port); |
| 152 | } |
| 153 | |
| 154 | static void |
| 155 | mcc_memory_error_event_queue_invoke(mpsc_queue_chain_t e, mpsc_daemon_queue_t queue __unused) |
| 156 | { |
| 157 | mcc_mem_err_event_t event; |
| 158 | |
| 159 | /* The consumer should never be invoked if there is nothing to consume. */ |
| 160 | int mcc_events_consumer_curr_idx = atomic_load(&mcc_events_consumer_idx); |
| 161 | assert(mcc_events_consumer_curr_idx != atomic_load(&mcc_events_producer_idx)); |
| 162 | |
| 163 | event = mpsc_queue_element(e, struct _mcc_mem_err_event, link); |
| 164 | mcc_error_notify_user(event->event); |
| 165 | int mcc_events_consumer_next_idx = mcc_events_next(mcc_events_consumer_curr_idx); |
| 166 | atomic_store(&mcc_events_consumer_idx, mcc_events_consumer_next_idx); |
| 167 | } |
| 168 | |
| 169 | static mcc_mem_err_event_t |
| 170 | mcc_memory_error_create_event(mcc_ecc_event_t mcc_event) |
| 171 | { |
| 172 | mcc_mem_err_event_t ret = NULL; |
| 173 | |
| 174 | /** |
| 175 | * @note We are unable to dynamically allocate events, because this function can be called from |
| 176 | * the primary interrupt context. Instead, we allocate from a statically sized ring buffer. |
| 177 | */ |
| 178 | const boolean_t interrupts_enabled = ml_set_interrupts_enabled(FALSE); |
| 179 | lck_spin_lock(&mcc_lock); |
| 180 | int mcc_events_producer_curr_idx = atomic_load(&mcc_events_producer_idx); |
| 181 | int mcc_events_producer_next_idx = mcc_events_next(mcc_events_producer_curr_idx); |
| 182 | if (mcc_events_producer_next_idx == atomic_load(&mcc_events_consumer_idx)) { |
| 183 | /** |
| 184 | * The consumer is running behind the producer, and we're in the primary interrupt context. |
| 185 | * Drop this event and return NULL to the caller. |
| 186 | */ |
| 187 | counter_inc(&mcc_dropped_events); |
| 188 | ret = NULL; |
| 189 | goto done; |
| 190 | } |
| 191 | |
| 192 | mcc_mem_err_event_t event = &mcc_events[mcc_events_producer_curr_idx]; |
| 193 | event->event = mcc_event; |
| 194 | atomic_store(&mcc_events_producer_idx, mcc_events_producer_next_idx); |
| 195 | ret = event; |
| 196 | |
| 197 | done: |
| 198 | lck_spin_unlock(&mcc_lock); |
| 199 | ml_set_interrupts_enabled(interrupts_enabled); |
| 200 | return ret; |
| 201 | } |
| 202 | |
| 203 | __startup_func |
| 204 | static void |
| 205 | mcc_logging_init(void) |
| 206 | { |
| 207 | mpsc_daemon_queue_init_with_thread(&mcc_memory_error_event_queue, |
| 208 | mcc_memory_error_event_queue_invoke, MCC_ERROR_EVENT_QUEUE_PRIORITY, |
| 209 | "daemon.mcc_error-events" , MPSC_DAEMON_INIT_INACTIVE); |
| 210 | |
| 211 | mpsc_daemon_queue_activate(&mcc_memory_error_event_queue); |
| 212 | } |
| 213 | STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, mcc_logging_init); |
| 214 | |
| 215 | #endif /* XNU_HANDLE_MCC */ |
| 216 | |
| 217 | kern_return_t |
| 218 | mcc_log_memory_error(mcc_ecc_event_t mcc_event __unused) |
| 219 | { |
| 220 | #if XNU_HANDLE_MCC |
| 221 | mcc_mem_err_event_t event = mcc_memory_error_create_event(mcc_event); |
| 222 | if (event == NULL) { |
| 223 | return KERN_RESOURCE_SHORTAGE; |
| 224 | } |
| 225 | assert(mcc_memory_error_event_queue.mpd_thread != NULL); |
| 226 | mpsc_daemon_enqueue(&mcc_memory_error_event_queue, |
| 227 | &event->link, MPSC_QUEUE_DISABLE_PREEMPTION); |
| 228 | return KERN_SUCCESS; |
| 229 | #else |
| 230 | return KERN_FAILURE; |
| 231 | #endif |
| 232 | } |
| 233 | |
| 234 | #if (DEBUG || DEVELOPMENT) |
| 235 | static int |
| 236 | mcc_memory_error_notify_test_run(int64_t in, int64_t *out) |
| 237 | { |
| 238 | printf("Running mcc_memory_error_notify_test for %llu iterations\n" , in); |
| 239 | for (uint64_t i = 0; i < in; i++) { |
| 240 | mcc_ecc_event_t event = {.version = MCC_ECC_V1, .status = (uint32_t)i}; |
| 241 | /** |
| 242 | * To accurately test mcc_log_memory_error, we must disable preemption, because it is called |
| 243 | * from the primary interrupt context. |
| 244 | */ |
| 245 | disable_preemption(); |
| 246 | mcc_log_memory_error(event); |
| 247 | enable_preemption(); |
| 248 | } |
| 249 | |
| 250 | *out = 1; |
| 251 | return 0; |
| 252 | } |
| 253 | |
| 254 | SYSCTL_TEST_REGISTER(mcc_memory_error_notify_test, mcc_memory_error_notify_test_run); |
| 255 | #endif /* (DEBUG || DEVELOPMENT) */ |
| 256 | |
| 257 | |
| 258 | /* Legacy ECC logging mechanism */ |
| 259 | |
| 260 | /* |
| 261 | * ECC data. Not really KPCs, but this still seems like the |
| 262 | * best home for this code. |
| 263 | * |
| 264 | * Circular buffer of events. When we fill up, drop data. |
| 265 | */ |
| 266 | #define ECC_EVENT_BUFFER_COUNT (256) |
| 267 | |
| 268 | struct ecc_event ecc_data[ECC_EVENT_BUFFER_COUNT]; |
| 269 | static uint32_t ecc_data_next_read; |
| 270 | static uint32_t ecc_data_next_write; |
| 271 | static boolean_t ecc_data_empty = TRUE; // next read == next write : empty or full? |
| 272 | static LCK_GRP_DECLARE(ecc_data_lock_group, "ecc-data" ); |
| 273 | static LCK_SPIN_DECLARE(ecc_data_lock, &ecc_data_lock_group); |
| 274 | static uint32_t ecc_correction_count; |
| 275 | |
| 276 | |
| 277 | uint32_t |
| 278 | ecc_log_get_correction_count() |
| 279 | { |
| 280 | return ecc_correction_count; |
| 281 | } |
| 282 | |
| 283 | kern_return_t |
| 284 | ecc_log_record_event(const struct ecc_event *ev) |
| 285 | { |
| 286 | spl_t x; |
| 287 | |
| 288 | if (ev->count > ECC_EVENT_INFO_DATA_ENTRIES) { |
| 289 | panic("Count of %u on ecc event is too large." , (unsigned)ev->count); |
| 290 | } |
| 291 | |
| 292 | x = splhigh(); |
| 293 | lck_spin_lock(lck: &ecc_data_lock); |
| 294 | |
| 295 | ecc_correction_count++; |
| 296 | |
| 297 | if (ecc_data_next_read == ecc_data_next_write && !ecc_data_empty) { |
| 298 | lck_spin_unlock(lck: &ecc_data_lock); |
| 299 | splx(x); |
| 300 | return KERN_FAILURE; |
| 301 | } |
| 302 | |
| 303 | bcopy(src: ev, dst: &ecc_data[ecc_data_next_write], n: sizeof(*ev)); |
| 304 | ecc_data_next_write++; |
| 305 | ecc_data_next_write %= ECC_EVENT_BUFFER_COUNT; |
| 306 | ecc_data_empty = FALSE; |
| 307 | |
| 308 | lck_spin_unlock(lck: &ecc_data_lock); |
| 309 | splx(x); |
| 310 | |
| 311 | return KERN_SUCCESS; |
| 312 | } |
| 313 | |
| 314 | |
| 315 | kern_return_t |
| 316 | ecc_log_get_next_event(struct ecc_event *ev) |
| 317 | { |
| 318 | spl_t x; |
| 319 | |
| 320 | x = splhigh(); |
| 321 | lck_spin_lock(lck: &ecc_data_lock); |
| 322 | |
| 323 | if (ecc_data_empty) { |
| 324 | assert(ecc_data_next_write == ecc_data_next_read); |
| 325 | |
| 326 | lck_spin_unlock(lck: &ecc_data_lock); |
| 327 | splx(x); |
| 328 | return KERN_FAILURE; |
| 329 | } |
| 330 | |
| 331 | bcopy(src: &ecc_data[ecc_data_next_read], dst: ev, n: sizeof(*ev)); |
| 332 | ecc_data_next_read++; |
| 333 | ecc_data_next_read %= ECC_EVENT_BUFFER_COUNT; |
| 334 | |
| 335 | if (ecc_data_next_read == ecc_data_next_write) { |
| 336 | ecc_data_empty = TRUE; |
| 337 | } |
| 338 | |
| 339 | lck_spin_unlock(lck: &ecc_data_lock); |
| 340 | splx(x); |
| 341 | |
| 342 | return KERN_SUCCESS; |
| 343 | } |
| 344 | |