1/*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/host_priv.h>
30#include <mach/host_special_ports.h>
31#include <mach/memory_error_notification.h>
32
33#include <mach/mach_types.h>
34#include <mach/host_info.h>
35#include <kern/host.h>
36#include <kern/locks.h>
37#include <kern/ecc.h>
38#include <kern/spl.h>
39#include <kern/mpsc_queue.h>
40#include <kern/thread.h>
41#include <kern/thread_call.h>
42#include <kern/startup.h>
43#include <os/log.h>
44#include <pexpert/pexpert.h>
45#include <pexpert/device_tree.h>
46#include <libkern/OSAtomic.h>
47#include <arm/pmap_public.h>
48#include <vm/vm_protos.h>
49
50/* New CoreAnalytics ECC logging mechanism */
51
52/**
53 * Stubs for targets which do not support ECC.
54 */
55
56kern_return_t
57ecc_log_memory_error(
58 __unused pmap_paddr_t physical_address,
59 __unused uint32_t ecc_flags)
60{
61 return KERN_NOT_SUPPORTED;
62}
63
64kern_return_t
65ecc_log_memory_error_internal(
66 __unused pmap_paddr_t physical_address,
67 __unused uint32_t ecc_flags)
68{
69 return KERN_NOT_SUPPORTED;
70}
71
72kern_return_t
73ecc_log_memory_error_ce(
74 __unused pmap_paddr_t physical_address,
75 __unused uint32_t ecc_flags,
76 __unused uint32_t ce_count)
77{
78 return KERN_NOT_SUPPORTED;
79}
80
81
82kern_return_t
83kern_ecc_poll_register(
84 __unused platform_error_handler_ecc_poll_t poll_func,
85 __unused uint32_t max_errors)
86{
87 return KERN_NOT_SUPPORTED;
88}
89
90/*
91 * Used to report earlier errors that were found after ECC gets enabled.
92 * We don't want the VM to panic for these.
93 */
94kern_return_t
95ecc_log_memory_error_delayed(
96 __unused pmap_paddr_t physical_address,
97 __unused uint32_t ecc_flags)
98{
99 return KERN_FAILURE;
100}
101
102/**
103 * MCC Logging
104 */
105
106/**
107 * TODO: rdar://97394997 (Clean up ECC / MCC logging)
108 * We can probably clean some of this up and share some of the code with ECC.
109 */
110#if XNU_HANDLE_MCC
111
112static struct mpsc_daemon_queue mcc_memory_error_event_queue;
113struct _mcc_mem_err_event {
114 struct mpsc_queue_chain link;
115 mcc_ecc_event_t event;
116};
117typedef struct _mcc_mem_err_event* mcc_mem_err_event_t;
118
119#define MCC_ECC_NUM_ERRORS (1024)
120#define MCC_ERROR_EVENT_QUEUE_PRIORITY MAXPRI_USER
121static struct _mcc_mem_err_event mcc_events[MCC_ECC_NUM_ERRORS];
122static atomic_int mcc_events_producer_idx = 0;
123static atomic_int mcc_events_consumer_idx = 0;
124SCALABLE_COUNTER_DEFINE(mcc_dropped_events);
125LCK_GRP_DECLARE(mcc_lock_grp, "mcc");
126LCK_SPIN_DECLARE(mcc_lock, &mcc_lock_grp);
127
128static inline int
129mcc_events_next(int idx)
130{
131 assert(idx < MCC_ECC_NUM_ERRORS);
132 return (idx + 1) % MCC_ECC_NUM_ERRORS;
133}
134
135/* MCC ECC CoreAnalytics Error Logging */
136static void
137mcc_error_notify_user(mcc_ecc_event_t event)
138{
139 mach_port_t user_port = MACH_PORT_NULL;
140 kern_return_t kr;
141
142 kr = host_get_memory_error_port(host_priv_self(), &user_port);
143 assert(kr == KERN_SUCCESS);
144 if (!IPC_PORT_VALID(user_port)) {
145 os_log_error(OS_LOG_DEFAULT, "Failed to get memory error port - mcc");
146 return;
147 }
148
149 mcc_memory_error_notification(user_port, event);
150
151 ipc_port_release_send(user_port);
152}
153
154static void
155mcc_memory_error_event_queue_invoke(mpsc_queue_chain_t e, mpsc_daemon_queue_t queue __unused)
156{
157 mcc_mem_err_event_t event;
158
159 /* The consumer should never be invoked if there is nothing to consume. */
160 int mcc_events_consumer_curr_idx = atomic_load(&mcc_events_consumer_idx);
161 assert(mcc_events_consumer_curr_idx != atomic_load(&mcc_events_producer_idx));
162
163 event = mpsc_queue_element(e, struct _mcc_mem_err_event, link);
164 mcc_error_notify_user(event->event);
165 int mcc_events_consumer_next_idx = mcc_events_next(mcc_events_consumer_curr_idx);
166 atomic_store(&mcc_events_consumer_idx, mcc_events_consumer_next_idx);
167}
168
169static mcc_mem_err_event_t
170mcc_memory_error_create_event(mcc_ecc_event_t mcc_event)
171{
172 mcc_mem_err_event_t ret = NULL;
173
174 /**
175 * @note We are unable to dynamically allocate events, because this function can be called from
176 * the primary interrupt context. Instead, we allocate from a statically sized ring buffer.
177 */
178 const boolean_t interrupts_enabled = ml_set_interrupts_enabled(FALSE);
179 lck_spin_lock(&mcc_lock);
180 int mcc_events_producer_curr_idx = atomic_load(&mcc_events_producer_idx);
181 int mcc_events_producer_next_idx = mcc_events_next(mcc_events_producer_curr_idx);
182 if (mcc_events_producer_next_idx == atomic_load(&mcc_events_consumer_idx)) {
183 /**
184 * The consumer is running behind the producer, and we're in the primary interrupt context.
185 * Drop this event and return NULL to the caller.
186 */
187 counter_inc(&mcc_dropped_events);
188 ret = NULL;
189 goto done;
190 }
191
192 mcc_mem_err_event_t event = &mcc_events[mcc_events_producer_curr_idx];
193 event->event = mcc_event;
194 atomic_store(&mcc_events_producer_idx, mcc_events_producer_next_idx);
195 ret = event;
196
197done:
198 lck_spin_unlock(&mcc_lock);
199 ml_set_interrupts_enabled(interrupts_enabled);
200 return ret;
201}
202
203__startup_func
204static void
205mcc_logging_init(void)
206{
207 mpsc_daemon_queue_init_with_thread(&mcc_memory_error_event_queue,
208 mcc_memory_error_event_queue_invoke, MCC_ERROR_EVENT_QUEUE_PRIORITY,
209 "daemon.mcc_error-events", MPSC_DAEMON_INIT_INACTIVE);
210
211 mpsc_daemon_queue_activate(&mcc_memory_error_event_queue);
212}
213STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, mcc_logging_init);
214
215#endif /* XNU_HANDLE_MCC */
216
217kern_return_t
218mcc_log_memory_error(mcc_ecc_event_t mcc_event __unused)
219{
220#if XNU_HANDLE_MCC
221 mcc_mem_err_event_t event = mcc_memory_error_create_event(mcc_event);
222 if (event == NULL) {
223 return KERN_RESOURCE_SHORTAGE;
224 }
225 assert(mcc_memory_error_event_queue.mpd_thread != NULL);
226 mpsc_daemon_enqueue(&mcc_memory_error_event_queue,
227 &event->link, MPSC_QUEUE_DISABLE_PREEMPTION);
228 return KERN_SUCCESS;
229#else
230 return KERN_FAILURE;
231#endif
232}
233
234#if (DEBUG || DEVELOPMENT)
235static int
236mcc_memory_error_notify_test_run(int64_t in, int64_t *out)
237{
238 printf("Running mcc_memory_error_notify_test for %llu iterations\n", in);
239 for (uint64_t i = 0; i < in; i++) {
240 mcc_ecc_event_t event = {.version = MCC_ECC_V1, .status = (uint32_t)i};
241 /**
242 * To accurately test mcc_log_memory_error, we must disable preemption, because it is called
243 * from the primary interrupt context.
244 */
245 disable_preemption();
246 mcc_log_memory_error(event);
247 enable_preemption();
248 }
249
250 *out = 1;
251 return 0;
252}
253
254SYSCTL_TEST_REGISTER(mcc_memory_error_notify_test, mcc_memory_error_notify_test_run);
255#endif /* (DEBUG || DEVELOPMENT) */
256
257
258/* Legacy ECC logging mechanism */
259
260/*
261 * ECC data. Not really KPCs, but this still seems like the
262 * best home for this code.
263 *
264 * Circular buffer of events. When we fill up, drop data.
265 */
266#define ECC_EVENT_BUFFER_COUNT (256)
267
268struct ecc_event ecc_data[ECC_EVENT_BUFFER_COUNT];
269static uint32_t ecc_data_next_read;
270static uint32_t ecc_data_next_write;
271static boolean_t ecc_data_empty = TRUE; // next read == next write : empty or full?
272static LCK_GRP_DECLARE(ecc_data_lock_group, "ecc-data");
273static LCK_SPIN_DECLARE(ecc_data_lock, &ecc_data_lock_group);
274static uint32_t ecc_correction_count;
275
276
277uint32_t
278ecc_log_get_correction_count()
279{
280 return ecc_correction_count;
281}
282
283kern_return_t
284ecc_log_record_event(const struct ecc_event *ev)
285{
286 spl_t x;
287
288 if (ev->count > ECC_EVENT_INFO_DATA_ENTRIES) {
289 panic("Count of %u on ecc event is too large.", (unsigned)ev->count);
290 }
291
292 x = splhigh();
293 lck_spin_lock(lck: &ecc_data_lock);
294
295 ecc_correction_count++;
296
297 if (ecc_data_next_read == ecc_data_next_write && !ecc_data_empty) {
298 lck_spin_unlock(lck: &ecc_data_lock);
299 splx(x);
300 return KERN_FAILURE;
301 }
302
303 bcopy(src: ev, dst: &ecc_data[ecc_data_next_write], n: sizeof(*ev));
304 ecc_data_next_write++;
305 ecc_data_next_write %= ECC_EVENT_BUFFER_COUNT;
306 ecc_data_empty = FALSE;
307
308 lck_spin_unlock(lck: &ecc_data_lock);
309 splx(x);
310
311 return KERN_SUCCESS;
312}
313
314
315kern_return_t
316ecc_log_get_next_event(struct ecc_event *ev)
317{
318 spl_t x;
319
320 x = splhigh();
321 lck_spin_lock(lck: &ecc_data_lock);
322
323 if (ecc_data_empty) {
324 assert(ecc_data_next_write == ecc_data_next_read);
325
326 lck_spin_unlock(lck: &ecc_data_lock);
327 splx(x);
328 return KERN_FAILURE;
329 }
330
331 bcopy(src: &ecc_data[ecc_data_next_read], dst: ev, n: sizeof(*ev));
332 ecc_data_next_read++;
333 ecc_data_next_read %= ECC_EVENT_BUFFER_COUNT;
334
335 if (ecc_data_next_read == ecc_data_next_write) {
336 ecc_data_empty = TRUE;
337 }
338
339 lck_spin_unlock(lck: &ecc_data_lock);
340 splx(x);
341
342 return KERN_SUCCESS;
343}
344