1/*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#pragma once
30
31#include <mach/kern_return.h>
32#include <stdint.h>
33#include <sys/cdefs.h>
34#include <mach/vm_types.h>
35
36__BEGIN_DECLS
37
38#ifdef XNU_KERNEL_PRIVATE
39extern ppnum_t *ecc_bad_pages;
40extern uint32_t ecc_bad_pages_count;
41
42/* Counts for sysctls*/
43extern uint32_t vm_ecc_db_pages_count;
44extern uint32_t vm_ecc_zero_pages_count;
45extern uint32_t vm_ecc_panic_pages_count;
46extern uint32_t vm_ecc_max_db_pages;
47#endif
48
49/* Old ECC logging mechanism */
50
51#define ECC_EVENT_INFO_DATA_ENTRIES 8
52struct ecc_event {
53 uint8_t id; // ID of memory (e.g. L2C), platform-specific
54 uint8_t count; // Of uint64_t's used, starting at index 0
55 uint64_t data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data
56};
57
58#ifdef KERNEL_PRIVATE
59extern kern_return_t ecc_log_record_event(const struct ecc_event *ev);
60#endif
61
62#ifdef XNU_KERNEL_PRIVATE
63#include <mach/vm_param.h>
64
65#define ECC_PANIC_PAGE_MAGIC 0xEC
66#define ECC_PANIC_PAGE_SIGN ((1ULL << 63) | (ECC_PANIC_PAGE_MAGIC))
67#define ECC_PANIC_PAGE_MASK ((1ULL << 63) | (PAGE_MASK))
68extern kern_return_t ecc_log_get_next_event(struct ecc_event *ev);
69extern uint32_t ecc_log_get_correction_count(void);
70#endif
71
72#define ECC_TESTING (DEVELOPMENT || DEBUG)
73
74/* New CoreAnalytics ECC logging mechanism */
75
76#define VM_ECC_PAGE_POISON_GRANULE_SHIFT (7)
77#define VM_ECC_PAGE_POISON_GRANULE (1 << VM_ECC_PAGE_POISON_GRANULE_SHIFT)
78
79/* Flags to describe ECC memory errors */
80__options_decl(ecc_flags_t, uint32_t, {
81 ECC_NONE = 0x00000000,
82 /* An error is correctable (1) or uncorrectable (0). */
83 ECC_IS_CORRECTABLE = 0x00000001,
84 /* The database is corrupt. */
85 ECC_DB_CORRUPTED = 0x00000002,
86 /* The error was injected for testing purposes. */
87 ECC_IS_TEST_ERROR = 0x00000004,
88 /* Do not trigger a CA report, just record to the DB (for testing purposes) */
89 ECC_DB_ONLY = 0x00000008,
90});
91
92/**
93 * ECC versions.
94 */
95__options_decl(ecc_version_t, uint32_t, {
96 ECC_V1,
97
98 // Metadata
99 ECC_NUM_VERSIONS
100});
101
102/**
103 * ECC event descriptor.
104 *
105 * @note If a new ECC version has been added (e.g. future hardware must
106 * log new or different data) new fields should be appended to this struct to
107 * represent the new data. No fields should be deleted from this struct unless
108 * the field corresponds only to hardware that has been deprecated.
109 */
110typedef struct {
111 /* Version of this struct. */
112 ecc_version_t version;
113 /* Flags describing the reported error. */
114 ecc_flags_t flags;
115 /* Physical address of failure */
116 uint64_t physaddr;
117 /* Number of CEs reported at physaddr */
118 uint32_t ce_count;
119 /* Vendor ID */
120 uint32_t vendor;
121 /* Reserved for future extension to report row, column, bank, etc. */
122 uint32_t reserved[4];
123} ecc_event_t;
124_Static_assert(sizeof(ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs");
125
126/**
127 * platform_error_handler_ecc_poll_t is the type of callback registered by the
128 * platform error handler that xnu can use to poll for ECC data.
129 */
130typedef int (*platform_error_handler_ecc_poll_t)(uint64_t *addrs, uint32_t *error_count);
131kern_return_t kern_ecc_poll_register(platform_error_handler_ecc_poll_t poll_func, uint32_t max_errors);
132
133/* Flags to describe MCC memory errors */
134__options_decl(mcc_flags_t, uint32_t, {
135 MCC_NONE = 0x00000000,
136 MCC_IS_SINGLE_BIT = 0x00000001,
137 MCC_IS_MULTI_BIT = 0x00000002,
138});
139
140/**
141 * MCC ECC versions.
142 */
143typedef enum {
144 MCC_ECC_V1,
145
146 // Metadata
147 MCC_ECC_NUM_VERSIONS
148} mcc_ecc_version_t;
149
150/**
151 * MCC ECC event descriptor.
152 *
153 * @note If a new MCC ECC version has been added, because i.e. future hardware must log new or different data,
154 * new fields should be appended to this struct to represent the new data. No fields should be
155 * deleted from this struct unless the field corresponds only to hardware that has been deprecated.
156 */
157typedef struct {
158 /* Version of this struct. */
159 mcc_ecc_version_t version;
160 /* Flags used to describe the error. */
161 mcc_flags_t flags;
162 /* Interrupt status at the time of the MCC error. */
163 uint32_t status;
164 /* AMCC on which the error occurred. */
165 uint32_t amcc;
166 /* Plane of the AMCC on which the error occurred. */
167 uint32_t plane;
168 /* MemCache error Bank of first one bit error. */
169 uint32_t bank;
170 /* MemCache error Way of first one bit error. */
171 uint32_t way;
172 /* MemCache error Index of first one bit error. */
173 uint32_t index;
174 /* Indicates whether the error is in upper half cache line or lower half cache line. */
175 uint32_t bit_off_cl;
176 /* MemCache one bit error bit offset of first one bit error with in half cache line. */
177 uint32_t bit_off_within_hcl;
178} mcc_ecc_event_t;
179_Static_assert(sizeof(mcc_ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs");
180
181#if KERNEL_PRIVATE
182
183/**
184 * Logs any memory error.
185 *
186 * This will notify mmaintenanced of the error. The error
187 * will get added to a database of errors and sent to
188 * CoreAnalytics. If ECC_IS_CORRECTABLE == 0,
189 * the address will be added to dramecc.db and will
190 * be retired for the lifetime of the device.
191 *
192 * If it is too early in boot to send a notification directly
193 * to the deamon, the error will be added to an array to be serviced
194 * later by an mpsc_daemon_queue.
195 *
196 * If ECC_IS_CORRECTABLE flag is set with this function, it
197 * assumes one error. If caller wishes to report the CE count
198 * reported by hardware, use ecc_log_memory_error_ce().
199 *
200 * @param physical_address address that the error occured on
201 * @param ecc_flags flags used to describe the error
202 *
203 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not
204 */
205extern kern_return_t ecc_log_memory_error(uint64_t physical_address, ecc_flags_t ecc_flags);
206extern kern_return_t ecc_log_memory_error_internal(uint64_t physical_address, ecc_flags_t ecc_flags);
207
208/*
209 * Used to report delayed errors, scraped after ECC is enabled.
210 */
211extern kern_return_t ecc_log_memory_error_delayed(uint64_t physical_address, ecc_flags_t ecc_flags);
212
213/**
214 * Logs a correctable memory error.
215 *
216 * ECC_IS_CORRECTABLE is implied. Including this flag or not
217 * makes no difference for this function.
218 *
219 * @param physical_address address that the error occured on
220 * @param ecc_flags flags used to describe the error
221 * @param ce_count number of CEs occured on this page reported by HW
222 *
223 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not
224 */
225kern_return_t ecc_log_memory_error_ce(uint64_t physical_address, ecc_flags_t ecc_flags, uint32_t ce_count);
226
227/**
228 * Logs an MCC error.
229 *
230 * @param event Event to be logged
231 * @returns KERN_SUCCESS on success, KERN_FAILURE otherwise
232 */
233kern_return_t
234mcc_log_memory_error(mcc_ecc_event_t event);
235
236#endif /* KERNEL_PRIVATE */
237
238__END_DECLS
239