| 1 | /* |
| 2 | * Copyright (c) 2013 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #pragma once |
| 30 | |
| 31 | #include <mach/kern_return.h> |
| 32 | #include <stdint.h> |
| 33 | #include <sys/cdefs.h> |
| 34 | #include <mach/vm_types.h> |
| 35 | |
| 36 | __BEGIN_DECLS |
| 37 | |
| 38 | #ifdef XNU_KERNEL_PRIVATE |
| 39 | extern ppnum_t *ecc_bad_pages; |
| 40 | extern uint32_t ecc_bad_pages_count; |
| 41 | |
| 42 | /* Counts for sysctls*/ |
| 43 | extern uint32_t vm_ecc_db_pages_count; |
| 44 | extern uint32_t vm_ecc_zero_pages_count; |
| 45 | extern uint32_t vm_ecc_panic_pages_count; |
| 46 | extern uint32_t vm_ecc_max_db_pages; |
| 47 | #endif |
| 48 | |
| 49 | /* Old ECC logging mechanism */ |
| 50 | |
| 51 | #define ECC_EVENT_INFO_DATA_ENTRIES 8 |
| 52 | struct ecc_event { |
| 53 | uint8_t id; // ID of memory (e.g. L2C), platform-specific |
| 54 | uint8_t count; // Of uint64_t's used, starting at index 0 |
| 55 | uint64_t data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data |
| 56 | }; |
| 57 | |
| 58 | #ifdef KERNEL_PRIVATE |
| 59 | extern kern_return_t ecc_log_record_event(const struct ecc_event *ev); |
| 60 | #endif |
| 61 | |
| 62 | #ifdef XNU_KERNEL_PRIVATE |
| 63 | #include <mach/vm_param.h> |
| 64 | |
| 65 | #define ECC_PANIC_PAGE_MAGIC 0xEC |
| 66 | #define ECC_PANIC_PAGE_SIGN ((1ULL << 63) | (ECC_PANIC_PAGE_MAGIC)) |
| 67 | #define ECC_PANIC_PAGE_MASK ((1ULL << 63) | (PAGE_MASK)) |
| 68 | extern kern_return_t ecc_log_get_next_event(struct ecc_event *ev); |
| 69 | extern uint32_t ecc_log_get_correction_count(void); |
| 70 | #endif |
| 71 | |
| 72 | #define ECC_TESTING (DEVELOPMENT || DEBUG) |
| 73 | |
| 74 | /* New CoreAnalytics ECC logging mechanism */ |
| 75 | |
| 76 | #define VM_ECC_PAGE_POISON_GRANULE_SHIFT (7) |
| 77 | #define VM_ECC_PAGE_POISON_GRANULE (1 << VM_ECC_PAGE_POISON_GRANULE_SHIFT) |
| 78 | |
| 79 | /* Flags to describe ECC memory errors */ |
| 80 | __options_decl(ecc_flags_t, uint32_t, { |
| 81 | ECC_NONE = 0x00000000, |
| 82 | /* An error is correctable (1) or uncorrectable (0). */ |
| 83 | ECC_IS_CORRECTABLE = 0x00000001, |
| 84 | /* The database is corrupt. */ |
| 85 | ECC_DB_CORRUPTED = 0x00000002, |
| 86 | /* The error was injected for testing purposes. */ |
| 87 | ECC_IS_TEST_ERROR = 0x00000004, |
| 88 | /* Do not trigger a CA report, just record to the DB (for testing purposes) */ |
| 89 | ECC_DB_ONLY = 0x00000008, |
| 90 | }); |
| 91 | |
| 92 | /** |
| 93 | * ECC versions. |
| 94 | */ |
| 95 | __options_decl(ecc_version_t, uint32_t, { |
| 96 | ECC_V1, |
| 97 | |
| 98 | // Metadata |
| 99 | ECC_NUM_VERSIONS |
| 100 | }); |
| 101 | |
| 102 | /** |
| 103 | * ECC event descriptor. |
| 104 | * |
| 105 | * @note If a new ECC version has been added (e.g. future hardware must |
| 106 | * log new or different data) new fields should be appended to this struct to |
| 107 | * represent the new data. No fields should be deleted from this struct unless |
| 108 | * the field corresponds only to hardware that has been deprecated. |
| 109 | */ |
| 110 | typedef struct { |
| 111 | /* Version of this struct. */ |
| 112 | ecc_version_t version; |
| 113 | /* Flags describing the reported error. */ |
| 114 | ecc_flags_t flags; |
| 115 | /* Physical address of failure */ |
| 116 | uint64_t physaddr; |
| 117 | /* Number of CEs reported at physaddr */ |
| 118 | uint32_t ce_count; |
| 119 | /* Vendor ID */ |
| 120 | uint32_t vendor; |
| 121 | /* Reserved for future extension to report row, column, bank, etc. */ |
| 122 | uint32_t reserved[4]; |
| 123 | } ecc_event_t; |
| 124 | _Static_assert(sizeof(ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs" ); |
| 125 | |
| 126 | /** |
| 127 | * platform_error_handler_ecc_poll_t is the type of callback registered by the |
| 128 | * platform error handler that xnu can use to poll for ECC data. |
| 129 | */ |
| 130 | typedef int (*platform_error_handler_ecc_poll_t)(uint64_t *addrs, uint32_t *error_count); |
| 131 | kern_return_t kern_ecc_poll_register(platform_error_handler_ecc_poll_t poll_func, uint32_t max_errors); |
| 132 | |
| 133 | /* Flags to describe MCC memory errors */ |
| 134 | __options_decl(mcc_flags_t, uint32_t, { |
| 135 | MCC_NONE = 0x00000000, |
| 136 | MCC_IS_SINGLE_BIT = 0x00000001, |
| 137 | MCC_IS_MULTI_BIT = 0x00000002, |
| 138 | }); |
| 139 | |
| 140 | /** |
| 141 | * MCC ECC versions. |
| 142 | */ |
| 143 | typedef enum { |
| 144 | MCC_ECC_V1, |
| 145 | |
| 146 | // Metadata |
| 147 | MCC_ECC_NUM_VERSIONS |
| 148 | } mcc_ecc_version_t; |
| 149 | |
| 150 | /** |
| 151 | * MCC ECC event descriptor. |
| 152 | * |
| 153 | * @note If a new MCC ECC version has been added, because i.e. future hardware must log new or different data, |
| 154 | * new fields should be appended to this struct to represent the new data. No fields should be |
| 155 | * deleted from this struct unless the field corresponds only to hardware that has been deprecated. |
| 156 | */ |
| 157 | typedef struct { |
| 158 | /* Version of this struct. */ |
| 159 | mcc_ecc_version_t version; |
| 160 | /* Flags used to describe the error. */ |
| 161 | mcc_flags_t flags; |
| 162 | /* Interrupt status at the time of the MCC error. */ |
| 163 | uint32_t status; |
| 164 | /* AMCC on which the error occurred. */ |
| 165 | uint32_t amcc; |
| 166 | /* Plane of the AMCC on which the error occurred. */ |
| 167 | uint32_t plane; |
| 168 | /* MemCache error Bank of first one bit error. */ |
| 169 | uint32_t bank; |
| 170 | /* MemCache error Way of first one bit error. */ |
| 171 | uint32_t way; |
| 172 | /* MemCache error Index of first one bit error. */ |
| 173 | uint32_t index; |
| 174 | /* Indicates whether the error is in upper half cache line or lower half cache line. */ |
| 175 | uint32_t bit_off_cl; |
| 176 | /* MemCache one bit error bit offset of first one bit error with in half cache line. */ |
| 177 | uint32_t bit_off_within_hcl; |
| 178 | } mcc_ecc_event_t; |
| 179 | _Static_assert(sizeof(mcc_ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs" ); |
| 180 | |
| 181 | #if KERNEL_PRIVATE |
| 182 | |
| 183 | /** |
| 184 | * Logs any memory error. |
| 185 | * |
| 186 | * This will notify mmaintenanced of the error. The error |
| 187 | * will get added to a database of errors and sent to |
| 188 | * CoreAnalytics. If ECC_IS_CORRECTABLE == 0, |
| 189 | * the address will be added to dramecc.db and will |
| 190 | * be retired for the lifetime of the device. |
| 191 | * |
| 192 | * If it is too early in boot to send a notification directly |
| 193 | * to the deamon, the error will be added to an array to be serviced |
| 194 | * later by an mpsc_daemon_queue. |
| 195 | * |
| 196 | * If ECC_IS_CORRECTABLE flag is set with this function, it |
| 197 | * assumes one error. If caller wishes to report the CE count |
| 198 | * reported by hardware, use ecc_log_memory_error_ce(). |
| 199 | * |
| 200 | * @param physical_address address that the error occured on |
| 201 | * @param ecc_flags flags used to describe the error |
| 202 | * |
| 203 | * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not |
| 204 | */ |
| 205 | extern kern_return_t ecc_log_memory_error(uint64_t physical_address, ecc_flags_t ecc_flags); |
| 206 | extern kern_return_t ecc_log_memory_error_internal(uint64_t physical_address, ecc_flags_t ecc_flags); |
| 207 | |
| 208 | /* |
| 209 | * Used to report delayed errors, scraped after ECC is enabled. |
| 210 | */ |
| 211 | extern kern_return_t ecc_log_memory_error_delayed(uint64_t physical_address, ecc_flags_t ecc_flags); |
| 212 | |
| 213 | /** |
| 214 | * Logs a correctable memory error. |
| 215 | * |
| 216 | * ECC_IS_CORRECTABLE is implied. Including this flag or not |
| 217 | * makes no difference for this function. |
| 218 | * |
| 219 | * @param physical_address address that the error occured on |
| 220 | * @param ecc_flags flags used to describe the error |
| 221 | * @param ce_count number of CEs occured on this page reported by HW |
| 222 | * |
| 223 | * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not |
| 224 | */ |
| 225 | kern_return_t ecc_log_memory_error_ce(uint64_t physical_address, ecc_flags_t ecc_flags, uint32_t ce_count); |
| 226 | |
| 227 | /** |
| 228 | * Logs an MCC error. |
| 229 | * |
| 230 | * @param event Event to be logged |
| 231 | * @returns KERN_SUCCESS on success, KERN_FAILURE otherwise |
| 232 | */ |
| 233 | kern_return_t |
| 234 | mcc_log_memory_error(mcc_ecc_event_t event); |
| 235 | |
| 236 | #endif /* KERNEL_PRIVATE */ |
| 237 | |
| 238 | __END_DECLS |
| 239 | |