ecc_logging.c source code [xnu/osfmk/kern/ecc_logging.c]

1	/*
2	* Copyright (c) 2013 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <mach/host_priv.h>
30	#include <mach/host_special_ports.h>
31	#include <mach/memory_error_notification.h>
32
33	#include <mach/mach_types.h>
34	#include <mach/host_info.h>
35	#include <kern/host.h>
36	#include <kern/locks.h>
37	#include <kern/ecc.h>
38	#include <kern/spl.h>
39	#include <kern/mpsc_queue.h>
40	#include <kern/thread.h>
41	#include <kern/thread_call.h>
42	#include <kern/startup.h>
43	#include <os/log.h>
44	#include <pexpert/pexpert.h>
45	#include <pexpert/device_tree.h>
46	#include <libkern/OSAtomic.h>
47	#include <arm/pmap_public.h>
48	#include <vm/vm_protos.h>
49
50	/ New CoreAnalytics ECC logging mechanism /
51
52	/**
53	* Stubs for targets which do not support ECC.
54	*/
55
56	kern_return_t
57	ecc_log_memory_error(
58	__unused pmap_paddr_t physical_address,
59	__unused uint32_t ecc_flags)
60	{
61	return KERN_NOT_SUPPORTED;
62	}
63
64	kern_return_t
65	ecc_log_memory_error_internal(
66	__unused pmap_paddr_t physical_address,
67	__unused uint32_t ecc_flags)
68	{
69	return KERN_NOT_SUPPORTED;
70	}
71
72	kern_return_t
73	ecc_log_memory_error_ce(
74	__unused pmap_paddr_t physical_address,
75	__unused uint32_t ecc_flags,
76	__unused uint32_t ce_count)
77	{
78	return KERN_NOT_SUPPORTED;
79	}
80
81
82	kern_return_t
83	kern_ecc_poll_register(
84	__unused platform_error_handler_ecc_poll_t poll_func,
85	__unused uint32_t max_errors)
86	{
87	return KERN_NOT_SUPPORTED;
88	}
89
90	/*
91	* Used to report earlier errors that were found after ECC gets enabled.
92	* We don't want the VM to panic for these.
93	*/
94	kern_return_t
95	ecc_log_memory_error_delayed(
96	__unused pmap_paddr_t physical_address,
97	__unused uint32_t ecc_flags)
98	{
99	return KERN_FAILURE;
100	}
101
102	/**
103	* MCC Logging
104	*/
105
106	/**
107	* TODO: rdar://97394997 (Clean up ECC / MCC logging)
108	* We can probably clean some of this up and share some of the code with ECC.
109	*/
110	#if XNU_HANDLE_MCC
111
112	static struct mpsc_daemon_queue mcc_memory_error_event_queue;
113	struct _mcc_mem_err_event {
114	struct mpsc_queue_chain link;
115	mcc_ecc_event_t event;
116	};
117	typedef struct _mcc_mem_err_event* mcc_mem_err_event_t;
118
119	#define MCC_ECC_NUM_ERRORS (1024)
120	#define MCC_ERROR_EVENT_QUEUE_PRIORITY MAXPRI_USER
121	static struct _mcc_mem_err_event mcc_events[MCC_ECC_NUM_ERRORS];
122	static atomic_int mcc_events_producer_idx = `0`;
123	static atomic_int mcc_events_consumer_idx = `0`;
124	SCALABLE_COUNTER_DEFINE(mcc_dropped_events);
125	LCK_GRP_DECLARE(mcc_lock_grp, "mcc");
126	LCK_SPIN_DECLARE(mcc_lock, &mcc_lock_grp);
127
128	static inline int
129	mcc_events_next(int idx)
130	{
131	assert(idx < MCC_ECC_NUM_ERRORS);
132	return (idx + `1`) % MCC_ECC_NUM_ERRORS;
133	}
134
135	/ MCC ECC CoreAnalytics Error Logging /
136	static void
137	mcc_error_notify_user(mcc_ecc_event_t event)
138	{
139	mach_port_t user_port = MACH_PORT_NULL;
140	kern_return_t kr;
141
142	kr = host_get_memory_error_port(host_priv_self(), &user_port);
143	assert(kr == KERN_SUCCESS);
144	if (!IPC_PORT_VALID(user_port)) {
145	os_log_error(OS_LOG_DEFAULT, "Failed to get memory error port - mcc");
146	return;
147	}
148
149	mcc_memory_error_notification(user_port, event);
150
151	ipc_port_release_send(user_port);
152	}
153
154	static void
155	mcc_memory_error_event_queue_invoke(mpsc_queue_chain_t e, mpsc_daemon_queue_t queue __unused)
156	{
157	mcc_mem_err_event_t event;
158
159	/ The consumer should never be invoked if there is nothing to consume. /
160	int mcc_events_consumer_curr_idx = atomic_load(&mcc_events_consumer_idx);
161	assert(mcc_events_consumer_curr_idx != atomic_load(&mcc_events_producer_idx));
162
163	event = mpsc_queue_element(e, struct _mcc_mem_err_event, link);
164	mcc_error_notify_user(event->event);
165	int mcc_events_consumer_next_idx = mcc_events_next(mcc_events_consumer_curr_idx);
166	atomic_store(&mcc_events_consumer_idx, mcc_events_consumer_next_idx);
167	}
168
169	static mcc_mem_err_event_t
170	mcc_memory_error_create_event(mcc_ecc_event_t mcc_event)
171	{
172	mcc_mem_err_event_t ret = NULL;
173
174	/**
175	* @note We are unable to dynamically allocate events, because this function can be called from
176	* the primary interrupt context. Instead, we allocate from a statically sized ring buffer.
177	*/
178	const boolean_t interrupts_enabled = ml_set_interrupts_enabled(FALSE);
179	lck_spin_lock(&mcc_lock);
180	int mcc_events_producer_curr_idx = atomic_load(&mcc_events_producer_idx);
181	int mcc_events_producer_next_idx = mcc_events_next(mcc_events_producer_curr_idx);
182	if (mcc_events_producer_next_idx == atomic_load(&mcc_events_consumer_idx)) {
183	/**
184	* The consumer is running behind the producer, and we're in the primary interrupt context.
185	* Drop this event and return NULL to the caller.
186	*/
187	counter_inc(&mcc_dropped_events);
188	ret = NULL;
189	goto done;
190	}
191
192	mcc_mem_err_event_t event = &mcc_events[mcc_events_producer_curr_idx];
193	event->event = mcc_event;
194	atomic_store(&mcc_events_producer_idx, mcc_events_producer_next_idx);
195	ret = event;
196
197	done:
198	lck_spin_unlock(&mcc_lock);
199	ml_set_interrupts_enabled(interrupts_enabled);
200	return ret;
201	}
202
203	__startup_func
204	static void
205	mcc_logging_init(void)
206	{
207	mpsc_daemon_queue_init_with_thread(&mcc_memory_error_event_queue,
208	mcc_memory_error_event_queue_invoke, MCC_ERROR_EVENT_QUEUE_PRIORITY,
209	"daemon.mcc_error-events", MPSC_DAEMON_INIT_INACTIVE);
210
211	mpsc_daemon_queue_activate(&mcc_memory_error_event_queue);
212	}
213	STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, mcc_logging_init);
214
215	#endif /* XNU_HANDLE_MCC */
216
217	kern_return_t
218	mcc_log_memory_error(mcc_ecc_event_t mcc_event __unused)
219	{
220	#if XNU_HANDLE_MCC
221	mcc_mem_err_event_t event = mcc_memory_error_create_event(mcc_event);
222	if (event == NULL) {
223	return KERN_RESOURCE_SHORTAGE;
224	}
225	assert(mcc_memory_error_event_queue.mpd_thread != NULL);
226	mpsc_daemon_enqueue(&mcc_memory_error_event_queue,
227	&event->link, MPSC_QUEUE_DISABLE_PREEMPTION);
228	return KERN_SUCCESS;
229	#else
230	return KERN_FAILURE;
231	#endif
232	}
233
234	#if (DEBUG \|\| DEVELOPMENT)
235	static int
236	mcc_memory_error_notify_test_run(int64_t in, int64_t *out)
237	{
238	printf("Running mcc_memory_error_notify_test for %llu iterations\n", in);
239	for (uint64_t i = `0`; i < in; i++) {
240	mcc_ecc_event_t event = {.version = MCC_ECC_V1, .status = (uint32_t)i};
241	/**
242	* To accurately test mcc_log_memory_error, we must disable preemption, because it is called
243	* from the primary interrupt context.
244	*/
245	disable_preemption();
246	mcc_log_memory_error(event);
247	enable_preemption();
248	}
249
250	*out = `1`;
251	return `0`;
252	}
253
254	SYSCTL_TEST_REGISTER(mcc_memory_error_notify_test, mcc_memory_error_notify_test_run);
255	#endif /* (DEBUG \|\| DEVELOPMENT) */
256
257
258	/ Legacy ECC logging mechanism /
259
260	/*
261	* ECC data. Not really KPCs, but this still seems like the
262	* best home for this code.
263	*
264	* Circular buffer of events. When we fill up, drop data.
265	*/
266	#define ECC_EVENT_BUFFER_COUNT (256)
267
268	struct ecc_event ecc_data[ECC_EVENT_BUFFER_COUNT];
269	static uint32_t ecc_data_next_read;
270	static uint32_t ecc_data_next_write;
271	static boolean_t ecc_data_empty = TRUE; // next read == next write : empty or full?
272	static LCK_GRP_DECLARE(ecc_data_lock_group, "ecc-data");
273	static LCK_SPIN_DECLARE(ecc_data_lock, &ecc_data_lock_group);
274	static uint32_t ecc_correction_count;
275
276
277	uint32_t
278	ecc_log_get_correction_count()
279	{
280	return ecc_correction_count;
281	}
282
283	kern_return_t
284	ecc_log_record_event(const struct ecc_event *ev)
285	{
286	spl_t x;
287
288	if (ev->count > ECC_EVENT_INFO_DATA_ENTRIES) {
289	panic("Count of %u on ecc event is too large.", (unsigned)ev->count);
290	}
291
292	x = splhigh();
293	lck_spin_lock(lck: &ecc_data_lock);
294
295	ecc_correction_count++;
296
297	if (ecc_data_next_read == ecc_data_next_write && !ecc_data_empty) {
298	lck_spin_unlock(lck: &ecc_data_lock);
299	splx(x);
300	return KERN_FAILURE;
301	}
302
303	bcopy(src: ev, dst: &ecc_data[ecc_data_next_write], n: sizeof(*ev));
304	ecc_data_next_write++;
305	ecc_data_next_write %= ECC_EVENT_BUFFER_COUNT;
306	ecc_data_empty = FALSE;
307
308	lck_spin_unlock(lck: &ecc_data_lock);
309	splx(x);
310
311	return KERN_SUCCESS;
312	}
313
314
315	kern_return_t
316	ecc_log_get_next_event(struct ecc_event *ev)
317	{
318	spl_t x;
319
320	x = splhigh();
321	lck_spin_lock(lck: &ecc_data_lock);
322
323	if (ecc_data_empty) {
324	assert(ecc_data_next_write == ecc_data_next_read);
325
326	lck_spin_unlock(lck: &ecc_data_lock);
327	splx(x);
328	return KERN_FAILURE;
329	}
330
331	bcopy(src: &ecc_data[ecc_data_next_read], dst: ev, n: sizeof(*ev));
332	ecc_data_next_read++;
333	ecc_data_next_read %= ECC_EVENT_BUFFER_COUNT;
334
335	if (ecc_data_next_read == ecc_data_next_write) {
336	ecc_data_empty = TRUE;
337	}
338
339	lck_spin_unlock(lck: &ecc_data_lock);
340	splx(x);
341
342	return KERN_SUCCESS;
343	}
344

Browse the source code of xnu/osfmk/kern/ecc_logging.c