| 1 | /* | 
| 2 |  * Copyright (c) 2013 Apple Inc. All rights reserved. | 
| 3 |  * | 
| 4 |  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | 
| 5 |  * | 
| 6 |  * This file contains Original Code and/or Modifications of Original Code | 
| 7 |  * as defined in and that are subject to the Apple Public Source License | 
| 8 |  * Version 2.0 (the 'License'). You may not use this file except in | 
| 9 |  * compliance with the License. The rights granted to you under the License | 
| 10 |  * may not be used to create, or enable the creation or redistribution of, | 
| 11 |  * unlawful or unlicensed copies of an Apple operating system, or to | 
| 12 |  * circumvent, violate, or enable the circumvention or violation of, any | 
| 13 |  * terms of an Apple operating system software license agreement. | 
| 14 |  * | 
| 15 |  * Please obtain a copy of the License at | 
| 16 |  * http://www.opensource.apple.com/apsl/ and read it before using this file. | 
| 17 |  * | 
| 18 |  * The Original Code and all software distributed under the License are | 
| 19 |  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | 
| 20 |  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | 
| 21 |  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | 
| 22 |  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | 
| 23 |  * Please see the License for the specific language governing rights and | 
| 24 |  * limitations under the License. | 
| 25 |  * | 
| 26 |  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | 
| 27 |  */ | 
| 28 |  | 
| 29 | #pragma once | 
| 30 |  | 
| 31 | #include <mach/kern_return.h> | 
| 32 | #include <stdint.h> | 
| 33 | #include <sys/cdefs.h> | 
| 34 | #include <mach/vm_types.h> | 
| 35 |  | 
| 36 | __BEGIN_DECLS | 
| 37 |  | 
| 38 | #ifdef XNU_KERNEL_PRIVATE | 
| 39 | extern ppnum_t *ecc_bad_pages; | 
| 40 | extern uint32_t ecc_bad_pages_count; | 
| 41 |  | 
| 42 | /* Counts for sysctls*/ | 
| 43 | extern uint32_t vm_ecc_db_pages_count; | 
| 44 | extern uint32_t vm_ecc_zero_pages_count; | 
| 45 | extern uint32_t vm_ecc_panic_pages_count; | 
| 46 | extern uint32_t vm_ecc_max_db_pages; | 
| 47 | #endif | 
| 48 |  | 
| 49 | /* Old ECC logging mechanism */ | 
| 50 |  | 
| 51 | #define ECC_EVENT_INFO_DATA_ENTRIES     8 | 
| 52 | struct ecc_event { | 
| 53 | 	uint8_t         id;     // ID of memory (e.g. L2C), platform-specific | 
| 54 | 	uint8_t         count;  // Of uint64_t's used, starting at index 0 | 
| 55 | 	uint64_t        data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data | 
| 56 | }; | 
| 57 |  | 
| 58 | #ifdef KERNEL_PRIVATE | 
| 59 | extern kern_return_t    ecc_log_record_event(const struct ecc_event *ev); | 
| 60 | #endif | 
| 61 |  | 
| 62 | #ifdef XNU_KERNEL_PRIVATE | 
| 63 | #include <mach/vm_param.h> | 
| 64 |  | 
| 65 | #define ECC_PANIC_PAGE_MAGIC 0xEC | 
| 66 | #define ECC_PANIC_PAGE_SIGN ((1ULL << 63) | (ECC_PANIC_PAGE_MAGIC)) | 
| 67 | #define ECC_PANIC_PAGE_MASK ((1ULL << 63) | (PAGE_MASK)) | 
| 68 | extern kern_return_t    ecc_log_get_next_event(struct ecc_event *ev); | 
| 69 | extern uint32_t         ecc_log_get_correction_count(void); | 
| 70 | #endif | 
| 71 |  | 
| 72 | #define ECC_TESTING (DEVELOPMENT || DEBUG) | 
| 73 |  | 
| 74 | /* New CoreAnalytics ECC logging mechanism */ | 
| 75 |  | 
| 76 | #define VM_ECC_PAGE_POISON_GRANULE_SHIFT (7) | 
| 77 | #define VM_ECC_PAGE_POISON_GRANULE (1 << VM_ECC_PAGE_POISON_GRANULE_SHIFT) | 
| 78 |  | 
| 79 | /* Flags to describe ECC memory errors */ | 
| 80 | __options_decl(ecc_flags_t, uint32_t, { | 
| 81 | 	ECC_NONE                        = 0x00000000, | 
| 82 | 	/* An error is correctable (1) or uncorrectable (0). */ | 
| 83 | 	ECC_IS_CORRECTABLE              = 0x00000001, | 
| 84 | 	/* The database is corrupt. */ | 
| 85 | 	ECC_DB_CORRUPTED                = 0x00000002, | 
| 86 | 	/* The error was injected for testing purposes. */ | 
| 87 | 	ECC_IS_TEST_ERROR               = 0x00000004, | 
| 88 | 	/* Do not trigger a CA report, just record to the DB (for testing purposes) */ | 
| 89 | 	ECC_DB_ONLY                     = 0x00000008, | 
| 90 | }); | 
| 91 |  | 
| 92 | /** | 
| 93 |  * ECC versions. | 
| 94 |  */ | 
| 95 | __options_decl(ecc_version_t, uint32_t, { | 
| 96 | 	ECC_V1, | 
| 97 |  | 
| 98 | 	// Metadata | 
| 99 | 	ECC_NUM_VERSIONS | 
| 100 | }); | 
| 101 |  | 
| 102 | /** | 
| 103 |  * ECC event descriptor. | 
| 104 |  * | 
| 105 |  * @note If a new ECC version has been added (e.g. future hardware must | 
| 106 |  * log new or different data) new fields should be appended to this struct to | 
| 107 |  * represent the new data.  No fields should be deleted from this struct unless | 
| 108 |  * the field corresponds only to hardware that has been deprecated. | 
| 109 |  */ | 
| 110 | typedef struct { | 
| 111 | 	/* Version of this struct. */ | 
| 112 | 	ecc_version_t version; | 
| 113 | 	/* Flags describing the reported error. */ | 
| 114 | 	ecc_flags_t flags; | 
| 115 | 	/* Physical address of failure */ | 
| 116 | 	uint64_t physaddr; | 
| 117 | 	/* Number of CEs reported at physaddr */ | 
| 118 | 	uint32_t ce_count; | 
| 119 | 	/* Vendor ID */ | 
| 120 | 	uint32_t vendor; | 
| 121 | 	/* Reserved for future extension to report row, column, bank, etc. */ | 
| 122 | 	uint32_t reserved[4]; | 
| 123 | } ecc_event_t; | 
| 124 | _Static_assert(sizeof(ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs" ); | 
| 125 |  | 
| 126 | /** | 
| 127 |  * platform_error_handler_ecc_poll_t is the type of callback registered by the | 
| 128 |  * platform error handler that xnu can use to poll for ECC data. | 
| 129 |  */ | 
| 130 | typedef int (*platform_error_handler_ecc_poll_t)(uint64_t *addrs, uint32_t *error_count); | 
| 131 | kern_return_t kern_ecc_poll_register(platform_error_handler_ecc_poll_t poll_func, uint32_t max_errors); | 
| 132 |  | 
| 133 | /* Flags to describe MCC memory errors */ | 
| 134 | __options_decl(mcc_flags_t, uint32_t, { | 
| 135 | 	MCC_NONE                        = 0x00000000, | 
| 136 | 	MCC_IS_SINGLE_BIT               = 0x00000001, | 
| 137 | 	MCC_IS_MULTI_BIT                = 0x00000002, | 
| 138 | }); | 
| 139 |  | 
| 140 | /** | 
| 141 |  * MCC ECC versions. | 
| 142 |  */ | 
| 143 | typedef enum { | 
| 144 | 	MCC_ECC_V1, | 
| 145 |  | 
| 146 | 	// Metadata | 
| 147 | 	MCC_ECC_NUM_VERSIONS | 
| 148 | } mcc_ecc_version_t; | 
| 149 |  | 
| 150 | /** | 
| 151 |  * MCC ECC event descriptor. | 
| 152 |  * | 
| 153 |  * @note If a new MCC ECC version has been added, because i.e. future hardware must log new or different data, | 
| 154 |  * new fields should be appended to this struct to represent the new data.  No fields should be | 
| 155 |  * deleted from this struct unless the field corresponds only to hardware that has been deprecated. | 
| 156 |  */ | 
| 157 | typedef struct { | 
| 158 | 	/* Version of this struct. */ | 
| 159 | 	mcc_ecc_version_t version; | 
| 160 | 	/* Flags used to describe the error. */ | 
| 161 | 	mcc_flags_t flags; | 
| 162 | 	/* Interrupt status at the time of the MCC error. */ | 
| 163 | 	uint32_t status; | 
| 164 | 	/* AMCC on which the error occurred. */ | 
| 165 | 	uint32_t amcc; | 
| 166 | 	/* Plane of the AMCC on which the error occurred. */ | 
| 167 | 	uint32_t plane; | 
| 168 | 	/* MemCache error Bank of first one bit error. */ | 
| 169 | 	uint32_t bank; | 
| 170 | 	/* MemCache error Way of first one bit error. */ | 
| 171 | 	uint32_t way; | 
| 172 | 	/* MemCache error Index of first one bit error. */ | 
| 173 | 	uint32_t index; | 
| 174 | 	/* Indicates whether the error is in upper half cache line or lower half cache line. */ | 
| 175 | 	uint32_t bit_off_cl; | 
| 176 | 	/* MemCache one bit error bit offset of first one bit error with in half cache line. */ | 
| 177 | 	uint32_t bit_off_within_hcl; | 
| 178 | } mcc_ecc_event_t; | 
| 179 | _Static_assert(sizeof(mcc_ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs" ); | 
| 180 |  | 
| 181 | #if KERNEL_PRIVATE | 
| 182 |  | 
| 183 | /** | 
| 184 |  * Logs any memory error. | 
| 185 |  * | 
| 186 |  * This will notify mmaintenanced of the error. The error | 
| 187 |  * will get added to a database of errors and sent to | 
| 188 |  * CoreAnalytics. If ECC_IS_CORRECTABLE == 0, | 
| 189 |  * the address will be added to dramecc.db and will | 
| 190 |  * be retired for the lifetime of the device. | 
| 191 |  * | 
| 192 |  * If it is too early in boot to send a notification directly | 
| 193 |  * to the deamon, the error will be added to an array to be serviced | 
| 194 |  * later by an mpsc_daemon_queue. | 
| 195 |  * | 
| 196 |  * If ECC_IS_CORRECTABLE flag is set with this function, it | 
| 197 |  * assumes one error. If caller wishes to report the CE count | 
| 198 |  * reported by hardware, use ecc_log_memory_error_ce(). | 
| 199 |  * | 
| 200 |  * @param physical_address address that the error occured on | 
| 201 |  * @param ecc_flags flags used to describe the error | 
| 202 |  * | 
| 203 |  * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not | 
| 204 |  */ | 
| 205 | extern kern_return_t ecc_log_memory_error(uint64_t physical_address, ecc_flags_t ecc_flags); | 
| 206 | extern kern_return_t ecc_log_memory_error_internal(uint64_t physical_address, ecc_flags_t ecc_flags); | 
| 207 |  | 
| 208 | /* | 
| 209 |  * Used to report delayed errors, scraped after ECC is enabled. | 
| 210 |  */ | 
| 211 | extern kern_return_t ecc_log_memory_error_delayed(uint64_t physical_address, ecc_flags_t ecc_flags); | 
| 212 |  | 
| 213 | /** | 
| 214 |  * Logs a correctable memory error. | 
| 215 |  * | 
| 216 |  * ECC_IS_CORRECTABLE is implied. Including this flag or not | 
| 217 |  * makes no difference for this function. | 
| 218 |  * | 
| 219 |  * @param physical_address address that the error occured on | 
| 220 |  * @param ecc_flags flags used to describe the error | 
| 221 |  * @param ce_count number of CEs occured on this page reported by HW | 
| 222 |  * | 
| 223 |  * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not | 
| 224 |  */ | 
| 225 | kern_return_t ecc_log_memory_error_ce(uint64_t physical_address, ecc_flags_t ecc_flags, uint32_t ce_count); | 
| 226 |  | 
| 227 | /** | 
| 228 |  * Logs an MCC error. | 
| 229 |  * | 
| 230 |  * @param event Event to be logged | 
| 231 |  * @returns KERN_SUCCESS on success, KERN_FAILURE otherwise | 
| 232 |  */ | 
| 233 | kern_return_t | 
| 234 | mcc_log_memory_error(mcc_ecc_event_t event); | 
| 235 |  | 
| 236 | #endif /* KERNEL_PRIVATE */ | 
| 237 |  | 
| 238 | __END_DECLS | 
| 239 |  |