| 1 | /* Copyright (c) (2011,2014,2015,2018,2019,2021,2023) Apple Inc. All rights reserved. |
| 2 | * |
| 3 | * corecrypto is licensed under Apple Inc.’s Internal Use License Agreement (which |
| 4 | * is contained in the License.txt file distributed with corecrypto) and only to |
| 5 | * people who accept that license. IMPORTANT: Any license rights granted to you by |
| 6 | * Apple Inc. (if any) are limited to internal use within your organization only on |
| 7 | * devices and computers you own or control, for the sole purpose of verifying the |
| 8 | * security characteristics and correct functioning of the Apple Software. You may |
| 9 | * not, directly or indirectly, redistribute the Apple Software or any portions thereof. |
| 10 | * |
| 11 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 12 | * |
| 13 | * This file contains Original Code and/or Modifications of Original Code |
| 14 | * as defined in and that are subject to the Apple Public Source License |
| 15 | * Version 2.0 (the 'License'). You may not use this file except in |
| 16 | * compliance with the License. The rights granted to you under the License |
| 17 | * may not be used to create, or enable the creation or redistribution of, |
| 18 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 19 | * circumvent, violate, or enable the circumvention or violation of, any |
| 20 | * terms of an Apple operating system software license agreement. |
| 21 | * |
| 22 | * Please obtain a copy of the License at |
| 23 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 24 | * |
| 25 | * The Original Code and all software distributed under the License are |
| 26 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 27 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 28 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 29 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 30 | * Please see the License for the specific language governing rights and |
| 31 | * limitations under the License. |
| 32 | * |
| 33 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 34 | */ |
| 35 | |
| 36 | #include <corecrypto/cc_config.h> |
| 37 | #include "ccmode_internal.h" |
| 38 | #include "ccn_internal.h" |
| 39 | |
| 40 | |
| 41 | #if (CCN_UNIT_SIZE == 8) && CC_DUNIT_SUPPORTED |
| 42 | |
| 43 | // Binary multiplication, x * y = (r_hi << 64) | r_lo. |
| 44 | static void |
| 45 | bmul64(uint64_t x, uint64_t y, uint64_t *r_hi, uint64_t *r_lo) |
| 46 | { |
| 47 | cc_dunit x1, x2, x3, x4, x5; |
| 48 | cc_dunit y1, y2, y3, y4, y5; |
| 49 | cc_dunit r, z; |
| 50 | |
| 51 | const cc_unit m1 = 0x1084210842108421; |
| 52 | const cc_unit m2 = 0x2108421084210842; |
| 53 | const cc_unit m3 = 0x4210842108421084; |
| 54 | const cc_unit m4 = 0x8421084210842108; |
| 55 | const cc_unit m5 = 0x0842108421084210; |
| 56 | |
| 57 | x1 = x & m1; |
| 58 | y1 = y & m1; |
| 59 | x2 = x & m2; |
| 60 | y2 = y & m2; |
| 61 | x3 = x & m3; |
| 62 | y3 = y & m3; |
| 63 | x4 = x & m4; |
| 64 | y4 = y & m4; |
| 65 | x5 = x & m5; |
| 66 | y5 = y & m5; |
| 67 | |
| 68 | z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2); |
| 69 | r = z & (((cc_dunit)m2 << 64) | m1); |
| 70 | z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3); |
| 71 | r |= z & (((cc_dunit)m3 << 64) | m2); |
| 72 | z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4); |
| 73 | r |= z & (((cc_dunit)m4 << 64) | m3); |
| 74 | z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5); |
| 75 | r |= z & (((cc_dunit)m5 << 64) | m4); |
| 76 | z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1); |
| 77 | r |= z & (((cc_dunit)m1 << 64) | m5); |
| 78 | |
| 79 | *r_hi = (uint64_t)(r >> 64); |
| 80 | *r_lo = (uint64_t)r; |
| 81 | } |
| 82 | |
| 83 | void |
| 84 | ccmode_gcm_gf_mult_64(const unsigned char *a, const unsigned char *b, unsigned char *c) |
| 85 | { |
| 86 | cc_unit a_lo, a_hi, b_lo, b_hi; |
| 87 | cc_unit z0_lo, z0_hi, z1_lo, z1_hi, z2_lo, z2_hi; |
| 88 | cc_dunit z_hi, z_lo; |
| 89 | |
| 90 | a_lo = cc_load64_be(y: a + 8);; |
| 91 | a_hi = cc_load64_be(y: a); |
| 92 | |
| 93 | b_lo = cc_load64_be(y: b + 8); |
| 94 | b_hi = cc_load64_be(y: b); |
| 95 | |
| 96 | // Binary Karatsuba multiplication z = a * b. |
| 97 | bmul64(x: a_lo, y: b_lo, r_hi: &z0_hi, r_lo: &z0_lo); |
| 98 | bmul64(x: a_hi, y: b_hi, r_hi: &z2_hi, r_lo: &z2_lo); |
| 99 | bmul64(x: a_hi ^ a_lo, y: b_hi ^ b_lo, r_hi: &z1_hi, r_lo: &z1_lo); |
| 100 | z1_hi ^= z2_hi ^ z0_hi; |
| 101 | z1_lo ^= z2_lo ^ z0_lo; |
| 102 | z_hi = ((cc_dunit)z2_hi << 64) | (z2_lo ^ z1_hi); |
| 103 | z_lo = (((cc_dunit)z0_hi << 64) | z0_lo) ^ (((cc_dunit)z1_lo) << 64); |
| 104 | |
| 105 | // Shift left by one to get reflected(a * b). |
| 106 | z_hi = (z_hi << 1) | (z_lo >> 127); |
| 107 | z_lo <<= 1; |
| 108 | |
| 109 | // Reduce. |
| 110 | z_lo ^= (z_lo << 126) ^ (z_lo << 121); |
| 111 | z_hi ^= z_lo ^ (z_lo >> 1) ^ (z_lo >> 2) ^ (z_lo >> 7); |
| 112 | |
| 113 | cc_store64_be(x: (cc_unit)z_hi, y: c + 8); |
| 114 | cc_store64_be(x: (cc_unit)(z_hi >> 64), y: c); |
| 115 | } |
| 116 | |
| 117 | #endif |
| 118 | |
| 119 | // Binary multiplication, x * y = (r_hi << 32) | r_lo. |
| 120 | static void |
| 121 | bmul32(uint32_t x, uint32_t y, uint32_t *r_hi, uint32_t *r_lo) |
| 122 | { |
| 123 | uint32_t x0, x1, x2, x3; |
| 124 | uint32_t y0, y1, y2, y3; |
| 125 | uint64_t z, z0, z1, z2, z3; |
| 126 | |
| 127 | const uint32_t m1 = 0x11111111; |
| 128 | const uint32_t m2 = 0x22222222; |
| 129 | const uint32_t m4 = 0x44444444; |
| 130 | const uint32_t m8 = 0x88888888; |
| 131 | |
| 132 | x0 = x & m1; |
| 133 | x1 = x & m2; |
| 134 | x2 = x & m4; |
| 135 | x3 = x & m8; |
| 136 | y0 = y & m1; |
| 137 | y1 = y & m2; |
| 138 | y2 = y & m4; |
| 139 | y3 = y & m8; |
| 140 | |
| 141 | z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^ ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1); |
| 142 | z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^ ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2); |
| 143 | z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^ ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3); |
| 144 | z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^ ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0); |
| 145 | |
| 146 | z0 &= ((uint64_t)m1 << 32) | m1; |
| 147 | z1 &= ((uint64_t)m2 << 32) | m2; |
| 148 | z2 &= ((uint64_t)m4 << 32) | m4; |
| 149 | z3 &= ((uint64_t)m8 << 32) | m8; |
| 150 | z = z0 | z1 | z2 | z3; |
| 151 | |
| 152 | *r_hi = (uint32_t)(z >> 32); |
| 153 | *r_lo = (uint32_t)z; |
| 154 | } |
| 155 | |
| 156 | void |
| 157 | ccmode_gcm_gf_mult_32(const unsigned char *a, const unsigned char *b, unsigned char *c) |
| 158 | { |
| 159 | uint32_t a_hi_h, a_hi_l, a_lo_h, a_lo_l; |
| 160 | uint32_t b_hi_h, b_hi_l, b_lo_h, b_lo_l; |
| 161 | |
| 162 | uint64_t z_hi_h, z_hi_l, z_lo_h, z_lo_l; |
| 163 | uint32_t z0_a_h, z0_a_l, z0_b_h, z0_b_l; |
| 164 | uint32_t z1_a_h, z1_a_l, z1_b_h, z1_b_l; |
| 165 | uint32_t z2_a_h, z2_a_l, z2_b_h, z2_b_l; |
| 166 | |
| 167 | uint32_t t_hi, t_lo; |
| 168 | |
| 169 | a_lo_l = cc_load32_be(y: a + 12); |
| 170 | a_lo_h = cc_load32_be(y: a + 8); |
| 171 | a_hi_l = cc_load32_be(y: a + 4); |
| 172 | a_hi_h = cc_load32_be(y: a); |
| 173 | |
| 174 | uint32_t a_hiXlo_h = a_hi_h ^ a_lo_h; |
| 175 | uint32_t a_hiXlo_l = a_hi_l ^ a_lo_l; |
| 176 | |
| 177 | b_lo_l = cc_load32_be(y: b + 12); |
| 178 | b_lo_h = cc_load32_be(y: b + 8); |
| 179 | b_hi_l = cc_load32_be(y: b + 4); |
| 180 | b_hi_h = cc_load32_be(y: b); |
| 181 | |
| 182 | uint32_t b_hiXlo_h = b_hi_h ^ b_lo_h; |
| 183 | uint32_t b_hiXlo_l = b_hi_l ^ b_lo_l; |
| 184 | |
| 185 | // Binary Karatsuba multiplication z = a * b. |
| 186 | |
| 187 | // a_lo * b_lo (64 bits) |
| 188 | bmul32(x: a_lo_h, y: b_lo_h, r_hi: &z0_a_h, r_lo: &z0_a_l); |
| 189 | bmul32(x: a_lo_l, y: b_lo_l, r_hi: &z0_b_h, r_lo: &z0_b_l); |
| 190 | bmul32(x: a_lo_h ^ a_lo_l, y: b_lo_h ^ b_lo_l, r_hi: &t_hi, r_lo: &t_lo); |
| 191 | t_hi ^= z0_a_h ^ z0_b_h; |
| 192 | t_lo ^= z0_a_l ^ z0_b_l; |
| 193 | z0_a_l ^= t_hi; |
| 194 | z0_b_h ^= t_lo; |
| 195 | |
| 196 | // a_hi * b_hi (64 bits) |
| 197 | bmul32(x: a_hi_h, y: b_hi_h, r_hi: &z2_a_h, r_lo: &z2_a_l); |
| 198 | bmul32(x: a_hi_l, y: b_hi_l, r_hi: &z2_b_h, r_lo: &z2_b_l); |
| 199 | bmul32(x: a_hi_h ^ a_hi_l, y: b_hi_h ^ b_hi_l, r_hi: &t_hi, r_lo: &t_lo); |
| 200 | t_hi ^= z2_a_h ^ z2_b_h; |
| 201 | t_lo ^= z2_a_l ^ z2_b_l; |
| 202 | z2_a_l ^= t_hi; |
| 203 | z2_b_h ^= t_lo; |
| 204 | |
| 205 | // (a_hi ^ a_lo) * (b_hi ^ b_lo) (64 bits) |
| 206 | bmul32(x: a_hiXlo_h, y: b_hiXlo_h, r_hi: &z1_a_h, r_lo: &z1_a_l); |
| 207 | bmul32(x: a_hiXlo_l, y: b_hiXlo_l, r_hi: &z1_b_h, r_lo: &z1_b_l); |
| 208 | bmul32(x: a_hiXlo_h ^ a_hiXlo_l, y: b_hiXlo_h ^ b_hiXlo_l, r_hi: &t_hi, r_lo: &t_lo); |
| 209 | t_hi ^= z1_a_h ^ z1_b_h; |
| 210 | t_lo ^= z1_a_l ^ z1_b_l; |
| 211 | z1_a_l ^= t_hi; |
| 212 | z1_b_h ^= t_lo; |
| 213 | |
| 214 | // Another round of Karatsuba for a 128-bit result. |
| 215 | z1_a_h ^= z0_a_h ^ z2_a_h; |
| 216 | z1_a_l ^= z0_a_l ^ z2_a_l; |
| 217 | z1_b_h ^= z0_b_h ^ z2_b_h; |
| 218 | z1_b_l ^= z0_b_l ^ z2_b_l; |
| 219 | z_hi_h = ((uint64_t)z2_a_h << 32) | z2_a_l; |
| 220 | z_hi_l = (((uint64_t)z2_b_h << 32) | z2_b_l) ^ (((uint64_t)z1_a_h << 32) | z1_a_l); |
| 221 | z_lo_h = (((uint64_t)z0_a_h << 32) | z0_a_l) ^ (((uint64_t)z1_b_h << 32) | z1_b_l); |
| 222 | z_lo_l = ((uint64_t)z0_b_h << 32) | z0_b_l; |
| 223 | |
| 224 | // Shift left by one to get reflected(a * b). |
| 225 | z_hi_h = (z_hi_h << 1) | (z_hi_l >> 63); |
| 226 | z_hi_l = (z_hi_l << 1) | (z_lo_h >> 63); |
| 227 | z_lo_h = (z_lo_h << 1) | (z_lo_l >> 63); |
| 228 | z_lo_l <<= 1; |
| 229 | |
| 230 | // Reduce. |
| 231 | z_lo_h ^= (z_lo_l << 62) ^ (z_lo_l << 57); |
| 232 | z_hi_h ^= z_lo_h ^ (z_lo_h >> 1) ^ (z_lo_h >> 2) ^ (z_lo_h >> 7); |
| 233 | z_hi_l ^= z_lo_l ^ (z_lo_l >> 1) ^ (z_lo_l >> 2) ^ (z_lo_l >> 7); |
| 234 | z_hi_l ^= (z_lo_h << 63) ^ (z_lo_h << 62) ^ (z_lo_h << 57); |
| 235 | |
| 236 | cc_store64_be(x: z_hi_l, y: c + 8); |
| 237 | cc_store64_be(x: z_hi_h, y: c); |
| 238 | } |
| 239 | |
| 240 | void |
| 241 | ccmode_gcm_gf_mult(const unsigned char *a, const unsigned char *b, unsigned char *c) |
| 242 | { |
| 243 | #if (CCN_UNIT_SIZE == 8) && CC_DUNIT_SUPPORTED |
| 244 | ccmode_gcm_gf_mult_64(a, b, c); |
| 245 | #else |
| 246 | ccmode_gcm_gf_mult_32(a, b, c); |
| 247 | #endif |
| 248 | } |
| 249 | |