| 1 | /* Copyright (c) (2017-2023) Apple Inc. All rights reserved. |
| 2 | * |
| 3 | * corecrypto is licensed under Apple Inc.’s Internal Use License Agreement (which |
| 4 | * is contained in the License.txt file distributed with corecrypto) and only to |
| 5 | * people who accept that license. IMPORTANT: Any license rights granted to you by |
| 6 | * Apple Inc. (if any) are limited to internal use within your organization only on |
| 7 | * devices and computers you own or control, for the sole purpose of verifying the |
| 8 | * security characteristics and correct functioning of the Apple Software. You may |
| 9 | * not, directly or indirectly, redistribute the Apple Software or any portions thereof. |
| 10 | * |
| 11 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 12 | * |
| 13 | * This file contains Original Code and/or Modifications of Original Code |
| 14 | * as defined in and that are subject to the Apple Public Source License |
| 15 | * Version 2.0 (the 'License'). You may not use this file except in |
| 16 | * compliance with the License. The rights granted to you under the License |
| 17 | * may not be used to create, or enable the creation or redistribution of, |
| 18 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 19 | * circumvent, violate, or enable the circumvention or violation of, any |
| 20 | * terms of an Apple operating system software license agreement. |
| 21 | * |
| 22 | * Please obtain a copy of the License at |
| 23 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 24 | * |
| 25 | * The Original Code and all software distributed under the License are |
| 26 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 27 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 28 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 29 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 30 | * Please see the License for the specific language governing rights and |
| 31 | * limitations under the License. |
| 32 | * |
| 33 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 34 | */ |
| 35 | |
| 36 | #ifndef _CORECRYPTO_CCN_INTERNAL_H |
| 37 | #define _CORECRYPTO_CCN_INTERNAL_H |
| 38 | |
| 39 | #include <corecrypto/ccn.h> |
| 40 | #include "cc_workspaces.h" |
| 41 | #include "cc_memory.h" |
| 42 | #include "cc_internal.h" |
| 43 | |
| 44 | CC_PTRCHECK_CAPABLE_HEADER() |
| 45 | |
| 46 | #if CCN_UNIT_SIZE == 8 |
| 47 | |
| 48 | #if CC_DUNIT_SUPPORTED |
| 49 | typedef unsigned cc_dunit __attribute__((mode(TI))); |
| 50 | #endif |
| 51 | |
| 52 | #define cc_clz_nonzero cc_clz64 |
| 53 | #define cc_ctz_nonzero cc_ctz64 |
| 54 | #define CC_STORE_UNIT_BE(x, out) cc_store64_be(x, out) |
| 55 | #define CC_LOAD_UNIT_BE(x, out) (x = cc_load64_be(out)) |
| 56 | |
| 57 | #elif CCN_UNIT_SIZE == 4 |
| 58 | |
| 59 | typedef uint64_t cc_dunit; |
| 60 | |
| 61 | #define cc_clz_nonzero cc_clz32 |
| 62 | #define cc_ctz_nonzero cc_ctz32 |
| 63 | #define CC_STORE_UNIT_BE(x, out) cc_store32_be(x, out) |
| 64 | #define CC_LOAD_UNIT_BE(x, out) (x = cc_load32_be(out)) |
| 65 | |
| 66 | #else |
| 67 | |
| 68 | #error Unsupported CCN_UNIT_SIZE |
| 69 | |
| 70 | #endif |
| 71 | |
| 72 | #if CC_DUNIT_SUPPORTED |
| 73 | |
| 74 | // r := x + y |
| 75 | #define CC_DUNIT_ADD(r, x, y, tmp) \ |
| 76 | do { \ |
| 77 | tmp = ((cc_dunit)(x)) + (y); \ |
| 78 | r = (cc_unit)tmp; \ |
| 79 | } while (0); |
| 80 | |
| 81 | // r := x + y + (tmp >> 64) |
| 82 | #define CC_DUNIT_ADC(r, x, y, tmp) \ |
| 83 | do { \ |
| 84 | cc_unit _c = (tmp) >> CCN_UNIT_BITS; \ |
| 85 | tmp = ((cc_dunit)(x)) + (y) + _c; \ |
| 86 | r = (cc_unit)tmp; \ |
| 87 | } while (0); |
| 88 | |
| 89 | // r := x - y |
| 90 | #define CC_DUNIT_SUB(r, x, y, tmp) \ |
| 91 | do { \ |
| 92 | tmp = ((cc_dunit)(x)) - (y); \ |
| 93 | r = (cc_unit)tmp; \ |
| 94 | } while (0); |
| 95 | |
| 96 | // r := x - y - (tmp >> 127) |
| 97 | #define CC_DUNIT_SBC(r, x, y, tmp) \ |
| 98 | do { \ |
| 99 | cc_unit _b = (tmp) >> (2 * CCN_UNIT_BITS - 1); \ |
| 100 | tmp = ((cc_dunit)(x)) - (y) - _b; \ |
| 101 | r = (cc_unit)tmp; \ |
| 102 | } while (0); |
| 103 | |
| 104 | // (hi,lo) += (x * y) |
| 105 | #define CC_DUNIT_MUL(x, y, hi, lo, tmp) \ |
| 106 | do { \ |
| 107 | tmp = (cc_dunit)(x) * (y); \ |
| 108 | lo += (tmp) & CCN_UNIT_MASK; \ |
| 109 | hi += (tmp) >> CCN_UNIT_BITS; \ |
| 110 | } while (0); |
| 111 | |
| 112 | // (hi,lo) += (x * y) * i |
| 113 | #define CC_DUNIT_MULI(x, y, hi, lo, tmp, i) \ |
| 114 | do { \ |
| 115 | tmp = (cc_dunit)(x) * (y); \ |
| 116 | lo += ((tmp) & CCN_UNIT_MASK) * (i); \ |
| 117 | hi += ((tmp) >> CCN_UNIT_BITS) * (i); \ |
| 118 | } while (0); |
| 119 | |
| 120 | // r := lo and (hi,lo) >>= 64 |
| 121 | #define CC_STORE_LO(r, hi, lo) \ |
| 122 | do { \ |
| 123 | r = (cc_unit)lo; \ |
| 124 | hi += lo >> CCN_UNIT_BITS; \ |
| 125 | lo = hi & CCN_UNIT_MASK; \ |
| 126 | hi >>= CCN_UNIT_BITS; \ |
| 127 | } while (0); |
| 128 | |
| 129 | #endif |
| 130 | |
| 131 | CC_NONNULL((2, 3)) |
| 132 | void ccn_set(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s); |
| 133 | |
| 134 | CC_INLINE |
| 135 | CC_NONNULL((2, 4)) |
| 136 | void |
| 137 | ccn_setn(cc_size n, cc_unit *cc_counted_by (n)r, const cc_size s_size, const cc_unit *cc_counted_by (s_size)s) |
| 138 | { |
| 139 | cc_assert(n > 0 && s_size <= n); |
| 140 | |
| 141 | if (s_size > 0) { |
| 142 | ccn_set(n: s_size, r, s); |
| 143 | } |
| 144 | |
| 145 | ccn_zero(n: n - s_size, r: r + s_size); |
| 146 | } |
| 147 | |
| 148 | CC_INLINE |
| 149 | CC_NONNULL((2)) |
| 150 | void |
| 151 | ccn_clear(cc_size n, cc_unit *cc_sized_by (n)r) |
| 152 | { |
| 153 | cc_clear(ccn_sizeof_n(n), dst: r); |
| 154 | } |
| 155 | |
| 156 | /* Returns the value of bit _k_ of _ccn_, both are only evaluated once. */ |
| 157 | CC_INLINE cc_unit |
| 158 | ccn_bit(const cc_unit *cc_indexable x, size_t k) |
| 159 | { |
| 160 | return 1 & (x[k >> CCN_LOG2_BITS_PER_UNIT] >> (k & (CCN_UNIT_BITS - 1))); |
| 161 | } |
| 162 | |
| 163 | /* |s - t| -> r return 1 iff t > s, 0 otherwise */ |
| 164 | CC_WARN_RESULT |
| 165 | cc_unit ccn_abs(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, const cc_unit *cc_counted_by(n) t); |
| 166 | |
| 167 | /* Returns the number of bits which are zero before the first one bit |
| 168 | * counting from least to most significant bit. */ |
| 169 | CC_WARN_RESULT |
| 170 | CC_NONNULL((2)) |
| 171 | size_t ccn_trailing_zeros(cc_size n, const cc_unit *s); |
| 172 | |
| 173 | /*! @function ccn_shift_right |
| 174 | * @abstract Shifts s to the right by k bits, where 0 <= k < CCN_UNIT_BITS. |
| 175 | * |
| 176 | * @param n Length of r and s |
| 177 | * @param r Resulting big int. |
| 178 | * @param s Big int to shift. |
| 179 | * @param k Number of bits to shift by. |
| 180 | */ |
| 181 | CC_NONNULL_ALL |
| 182 | void ccn_shift_right(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, size_t k) __asm__("_ccn_shift_right" ); |
| 183 | |
| 184 | /*! @function ccn_shift_right_multi |
| 185 | * @abstract Constant-time, SPA-safe, right shift. |
| 186 | * |
| 187 | * @param n Length of r and s as number of cc_units. |
| 188 | * @param r Destination, can overlap with s. |
| 189 | * @param s Input that's shifted by k bits. |
| 190 | * @param k Number of bits by which to shift s to the right. |
| 191 | */ |
| 192 | CC_NONNULL_ALL |
| 193 | void ccn_shift_right_multi(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, size_t k); |
| 194 | |
| 195 | /* s << k -> r return bits shifted out of most significant word in bits [0, n> |
| 196 | * { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8 |
| 197 | * the _multi version doesn't return the shifted bits, but does support multiple |
| 198 | * word shifts */ |
| 199 | CC_NONNULL_ALL |
| 200 | void ccn_shift_left(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, size_t k) __asm__("_ccn_shift_left" ); |
| 201 | |
| 202 | CC_NONNULL_ALL |
| 203 | void ccn_shift_left_multi(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, size_t k); |
| 204 | |
| 205 | // Conditionally swap the content of r0 and r1 buffers in constant time |
| 206 | // r0:r1 <- r1*k1 + s0*(k1-1) |
| 207 | CC_NONNULL_ALL |
| 208 | void ccn_cond_swap(cc_size n, cc_unit ki, cc_unit *cc_counted_by(n) r0, cc_unit *cc_counted_by(n) r1); |
| 209 | |
| 210 | /*! @function ccn_cond_shift_right |
| 211 | * @abstract Constant-time, SPA-safe, conditional right shift. |
| 212 | * |
| 213 | * @param n Length of each of r and a as number of cc_units. |
| 214 | * @param s Selector bit (0 or 1). |
| 215 | * @param r Destination, can overlap with a. |
| 216 | * @param a Input that's shifted by k bits, if s=1. |
| 217 | * @param k Number of bits by which to shift a to the right, if s=1. |
| 218 | * (k must not be larger than CCN_UNIT_BITS.) |
| 219 | */ |
| 220 | CC_NONNULL_ALL |
| 221 | void ccn_cond_shift_right(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) a, size_t k); |
| 222 | |
| 223 | /*! @function ccn_cond_neg |
| 224 | * @abstract Constant-time, SPA-safe, conditional negation. |
| 225 | * |
| 226 | * @param n Length of each of r and x as number of cc_units. |
| 227 | * @param s Selector bit (0 or 1). |
| 228 | * @param r Destination, can overlap with x. |
| 229 | * @param x Input that's negated, if s=1. |
| 230 | */ |
| 231 | void ccn_cond_neg(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) x); |
| 232 | |
| 233 | /*! @function ccn_cond_shift_right_carry |
| 234 | * @abstract Constant-time, SPA-safe, conditional right shift. |
| 235 | * |
| 236 | * @param n Length of each of r and a as number of cc_units. |
| 237 | * @param s Selector bit (0 or 1). |
| 238 | * @param r Destination, can overlap with a. |
| 239 | * @param a Input that's shifted by k bits, if s=1. |
| 240 | * @param k Number of bits by which to shift a to the right, if s=1. |
| 241 | * (k must not be larger than CCN_UNIT_BITS.) |
| 242 | * @param c Carry bit(s), the most significant bit(s) after shifting, if s=1. |
| 243 | */ |
| 244 | CC_NONNULL_ALL |
| 245 | void ccn_cond_shift_right_carry(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) a, size_t k, cc_unit c); |
| 246 | |
| 247 | /*! @function ccn_cond_add |
| 248 | * @abstract Constant-time, SPA-safe, conditional addition. |
| 249 | * Computes r:= x + y, iff s = 1. Set r := x otherwise. |
| 250 | * |
| 251 | * @param n Length of each of r, x, and y as number of cc_units. |
| 252 | * @param s Selector bit (0 or 1). |
| 253 | * @param r Destination, can overlap with x or y. |
| 254 | * @param x First addend. |
| 255 | * @param y Second addend. |
| 256 | * |
| 257 | * @return The carry bit, if s=1. 0 otherwise. |
| 258 | */ |
| 259 | CC_WARN_RESULT CC_NONNULL_ALL |
| 260 | cc_unit ccn_cond_add(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) x, const cc_unit *cc_counted_by(n) y); |
| 261 | |
| 262 | /*! @function ccn_cond_rsub |
| 263 | * @abstract Constant-time, SPA-safe, conditional reverse subtraction. |
| 264 | * Computes r := y - x, iff s = 1. Sets r := x otherwise. |
| 265 | * |
| 266 | * @param n Length of each of r, x, and y as number of cc_units. |
| 267 | * @param s Selector bit (0 or 1). |
| 268 | * @param r Destination, can overlap with x or y. |
| 269 | * @param x Subtrahend. |
| 270 | * @param y Minuend. |
| 271 | * |
| 272 | * @return The carry bit, if s=1. 0 otherwise. |
| 273 | */ |
| 274 | CC_WARN_RESULT CC_NONNULL_ALL |
| 275 | cc_unit ccn_cond_rsub(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) x, const cc_unit *cc_counted_by(n) y); |
| 276 | |
| 277 | /*! @function ccn_cond_sub |
| 278 | * @abstract Constant-time, SPA-safe, conditional subtraction. |
| 279 | * Computes r := x - y, iff s = 1. Sets r := x otherwise. |
| 280 | * |
| 281 | * @param n Length of each of r, x, and y as number of cc_units. |
| 282 | * @param s Selector bit (0 or 1). |
| 283 | * @param r Destination, can overlap with x or y. |
| 284 | * @param x Minuend. |
| 285 | * @param y Subtrahend. |
| 286 | * |
| 287 | * @return The carry bit, if s=1. 0 otherwise. |
| 288 | */ |
| 289 | CC_WARN_RESULT CC_NONNULL_ALL |
| 290 | cc_unit ccn_cond_sub(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) x, const cc_unit *cc_counted_by(n) y); |
| 291 | |
| 292 | /*! @function ccn_cond_clear |
| 293 | * @abstract Constant-time, SPA-safe, conditional zeroization. |
| 294 | * Sets r := 0, if s = 1. Does nothing otherwise. |
| 295 | * |
| 296 | * @param n Length of r as number of cc_units. |
| 297 | * @param s Selector bit (0 or 1). |
| 298 | * @param r Destination, can overlap with x or y. |
| 299 | */ |
| 300 | CC_NONNULL_ALL |
| 301 | void ccn_cond_clear(cc_size n, cc_unit s, cc_unit *r); |
| 302 | |
| 303 | /*! @function ccn_mux |
| 304 | * @abstract Constant-time, SPA-safe multiplexer. Sets r = (s ? a : b). |
| 305 | * |
| 306 | * @discussion This works like a normal multiplexer (s & a) | (~s & b) but is |
| 307 | * slightly more complicated and expensive. Out of `s` we build |
| 308 | * half-word masks to hide extreme Hamming weights of operands. |
| 309 | * |
| 310 | * @param n Length of each of r, a, and b as number of cc_units. |
| 311 | * @param s Selector bit (0 or 1). |
| 312 | * @param r Destination, can overlap with a or b. |
| 313 | * @param a Input selected when s=1. |
| 314 | * @param b Input selected when s=0. |
| 315 | */ |
| 316 | CC_NONNULL_ALL |
| 317 | void ccn_mux(cc_size n, cc_unit s, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) a, const cc_unit *cc_counted_by(n) b); |
| 318 | |
| 319 | /*! @function ccn_gcd_ws |
| 320 | * @abstract Computes the greatest common divisor of s and t, |
| 321 | * r = gcd(s,t) / 2^k, and returns k. |
| 322 | * |
| 323 | * @param ws Workspace. |
| 324 | * @param rn Length of r as a number of cc_units. |
| 325 | * @param r Resulting GCD. |
| 326 | * @param sn Length of s as a number of cc_units. |
| 327 | * @param s First number s. |
| 328 | * @param tn Length of t as a number of cc_units. |
| 329 | * @param t First number t. |
| 330 | * |
| 331 | * @return The factor of two to shift r by to compute the actual GCD. |
| 332 | */ |
| 333 | CC_WARN_RESULT |
| 334 | CC_NONNULL_ALL |
| 335 | size_t ccn_gcd_ws(cc_ws_t ws, cc_size rn, cc_unit *cc_counted_by(rn) r, cc_size sn, const cc_unit *cc_counted_by(sn) s, cc_size tn, const cc_unit *cc_counted_by(tn) t); |
| 336 | |
| 337 | /*! @function ccn_lcm_ws |
| 338 | * @abstract Computes lcm(s,t), the least common multiple of s and t. |
| 339 | * |
| 340 | * @param ws Workspace. |
| 341 | * @param n Length of s,t as a number of cc_units. |
| 342 | * @param r2n Resulting LCM of length 2*n. |
| 343 | * @param s First number s. |
| 344 | * @param t First number t. |
| 345 | */ |
| 346 | void ccn_lcm_ws(cc_ws_t ws, cc_size n, cc_unit *cc_unsafe_indexable r2n, const cc_unit *cc_counted_by(n)s, const cc_unit *cc_counted_by(n)t); |
| 347 | |
| 348 | /* s * t -> r_2n r_2n must not overlap with s nor t |
| 349 | * { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8 |
| 350 | * { N bit, N bit -> 2N bit } N = ccn_bitsof(n) */ |
| 351 | CC_NONNULL((2, 3, 4)) |
| 352 | void ccn_mul(cc_size n, cc_unit *cc_unsafe_indexable r_2n, const cc_unit *cc_counted_by(n)s, const cc_unit *cc_counted_by(n)t) __asm__("_ccn_mul" ); |
| 353 | |
| 354 | /* s[0..n) * v -> r[0..n)+return value |
| 355 | * { N bit, sizeof(cc_unit) * 8 bit -> N + sizeof(cc_unit) * 8 bit } N = n * sizeof(cc_unit) * 8 */ |
| 356 | CC_WARN_RESULT |
| 357 | CC_NONNULL((2, 3)) |
| 358 | cc_unit ccn_mul1(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, const cc_unit v); |
| 359 | |
| 360 | /* s[0..n) * v[0..nv] -> r[0..n+nv) |
| 361 | * { n bit, nv bit -> n + nv bit} n = count * sizeof(cc_unit) * 8 |
| 362 | * { N bit, NV bit -> N + NV bit} N = ccn_bitsof(n), NV = ccn_bitsof(nv) |
| 363 | * r, s, and v should not overlap |
| 364 | * Leaks n and nv through timing */ |
| 365 | CC_NONNULL_ALL |
| 366 | void ccn_muln(cc_size n, cc_unit *cc_counted_by(n + nv) r, const cc_unit *cc_counted_by(n) s, cc_size nv, const cc_unit *cc_counted_by(n) v); |
| 367 | |
| 368 | /* s[0..n) * v + r[0..n) -> r[0..n)+return value |
| 369 | * { N bit, sizeof(cc_unit) * 8 bit -> N + sizeof(cc_unit) * 8 bit } N = n * sizeof(cc_unit) * 8 */ |
| 370 | CC_WARN_RESULT |
| 371 | CC_NONNULL((2, 3)) |
| 372 | cc_unit ccn_addmul1(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, const cc_unit v); |
| 373 | |
| 374 | /* s * t -> r_2n r_2n must not overlap with s nor t |
| 375 | * { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8 |
| 376 | * { N bit, N bit -> 2N bit } N = ccn_bitsof(n) |
| 377 | * Provide a workspace for potential speedup */ |
| 378 | CC_NONNULL_ALL |
| 379 | void ccn_mul_ws(cc_ws_t ws, cc_size count, cc_unit *cc_unsafe_indexable r, const cc_unit *cc_counted_by(count)s, const cc_unit *cc_counted_by(count)t); |
| 380 | |
| 381 | /* s^2 -> r |
| 382 | * { n bit -> 2 * n bit } */ |
| 383 | CC_NONNULL_ALL |
| 384 | void ccn_sqr_ws(cc_ws_t ws, cc_size n, cc_unit *cc_unsafe_indexable r, const cc_unit *cc_counted_by(n)s); |
| 385 | |
| 386 | /*! @function ccn_mod_ws |
| 387 | * @abstract Computes r = a % d. |
| 388 | * |
| 389 | * @discussion Use CCN_DIVMOD_WORKSPACE_N(n) for the workspace. |
| 390 | * |
| 391 | * @param ws Workspace |
| 392 | * @param na Length of a as a number of cc_units. |
| 393 | * @param a The dividend a. |
| 394 | * @param n Length of r,d as a number of cc_units. |
| 395 | * @param r The resulting remainder. |
| 396 | * @param d The divisor d. |
| 397 | */ |
| 398 | #define ccn_mod_ws(ws, na, a, n, r, d) ccn_divmod_ws(ws, na, a, 0, NULL, n, r, d) |
| 399 | #define ccn_mod(na, a, n, r, d) ccn_divmod(na, a, 0, NULL, n, r, d) |
| 400 | |
| 401 | /*! @function ccn_neg |
| 402 | * @abstract Computes the two's complement of x. |
| 403 | * |
| 404 | * @param n Length of r and x |
| 405 | * @param r Result of the negation |
| 406 | * @param x Number to negate |
| 407 | */ |
| 408 | CC_NONNULL_ALL |
| 409 | void ccn_neg(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) x); |
| 410 | |
| 411 | /*! @function ccn_invert |
| 412 | * @abstract Computes x^-1 (mod 2^w). |
| 413 | * |
| 414 | * @param x Number to invert |
| 415 | * |
| 416 | * @return x^-1 (mod 2^w) |
| 417 | */ |
| 418 | CC_WARN_RESULT |
| 419 | CC_CONST CC_NONNULL_ALL |
| 420 | CC_INLINE cc_unit |
| 421 | ccn_invert(cc_unit x) |
| 422 | { |
| 423 | cc_assert(x & 1); |
| 424 | |
| 425 | // Initial precision is 5 bits. |
| 426 | cc_unit y = (3 * x) ^ 2; |
| 427 | |
| 428 | // Newton-Raphson iterations. |
| 429 | // Precision doubles with every step. |
| 430 | y *= 2 - y * x; |
| 431 | y *= 2 - y * x; |
| 432 | y *= 2 - y * x; |
| 433 | #if CCN_UNIT_SIZE == 8 |
| 434 | y *= 2 - y * x; |
| 435 | #endif |
| 436 | |
| 437 | cc_assert(y * x == 1); |
| 438 | return y; |
| 439 | } |
| 440 | |
| 441 | /*! @function ccn_div_exact_ws |
| 442 | * @abstract Computes q = a / d where a = 0 (mod d). |
| 443 | * |
| 444 | * @param ws Workspace |
| 445 | * @param n Length of q,a,d as a number of cc_units. |
| 446 | * @param q The resulting exact quotient. |
| 447 | * @param a The dividend a. |
| 448 | * @param d The divisor d. |
| 449 | */ |
| 450 | CC_NONNULL_ALL |
| 451 | void ccn_div_exact_ws(cc_ws_t ws, cc_size n, cc_unit *cc_counted_by(n) q, const cc_unit *cc_counted_by(n) a, const cc_unit *cc_counted_by(n) d); |
| 452 | |
| 453 | /*! @function ccn_divides1 |
| 454 | * @abstract Returns whether q divides x. |
| 455 | * |
| 456 | * @param n Length of x as a number of cc_units. |
| 457 | * @param x The dividend x. |
| 458 | * @param q The divisor q. |
| 459 | * |
| 460 | * @return True if q divides x without remainder, false otherwise. |
| 461 | */ |
| 462 | CC_WARN_RESULT |
| 463 | CC_NONNULL_ALL |
| 464 | bool ccn_divides1(cc_size n, const cc_unit *cc_counted_by(n)x, cc_unit q); |
| 465 | |
| 466 | /*! @function ccn_select |
| 467 | * @abstract Select r[i] in constant-time, not revealing i via cache-timing. |
| 468 | * |
| 469 | * @param start Start index. |
| 470 | * @param end End index (length of r). |
| 471 | * @param r Big int r. |
| 472 | * @param i Offset into r. |
| 473 | * |
| 474 | * @return r[i], or zero if start > i or end < i. |
| 475 | */ |
| 476 | CC_WARN_RESULT |
| 477 | CC_INLINE cc_unit |
| 478 | ccn_select(cc_size start, cc_size end, const cc_unit *cc_counted_by(end)r, cc_size i) |
| 479 | { |
| 480 | cc_unit ri = 0; |
| 481 | |
| 482 | for (cc_size j = start; j < end; j++) { |
| 483 | cc_size i_neq_j; // i≠j? |
| 484 | CC_HEAVISIDE_STEP(i_neq_j, i ^ j); |
| 485 | ri |= r[j] & ((cc_unit)i_neq_j - 1); |
| 486 | } |
| 487 | |
| 488 | return ri; |
| 489 | } |
| 490 | |
| 491 | /*! @function ccn_invmod_ws |
| 492 | * @abstract Computes the inverse of x modulo m, r = x^-1 (mod m). |
| 493 | * Returns an error if there's no inverse, i.e. gcd(x,m) ≠ 1. |
| 494 | * |
| 495 | * @discussion This is a very generic version of the binary XGCD algorithm. You |
| 496 | * don't want to use it when you have an odd modulus. |
| 497 | * |
| 498 | * This function is meant to be used by RSA key generation, for |
| 499 | * computation of d = e^1 (mod lcm(p-1,q-1)), where m can be even. |
| 500 | * |
| 501 | * x > m is allowed as long as xn == n, i.e. they occupy the same |
| 502 | * number of cc_units. |
| 503 | * |
| 504 | * @param ws Workspace. |
| 505 | * @param n Length of r and m as a number of cc_units. |
| 506 | * @param r The resulting inverse r. |
| 507 | * @param xn Length of x as a number of cc_units. |
| 508 | * @param x The number to invert. |
| 509 | * @param m The modulus. |
| 510 | * |
| 511 | * @return 0 on success, non-zero on failure. See cc_error.h for more details. |
| 512 | */ |
| 513 | CC_WARN_RESULT |
| 514 | int ccn_invmod_ws(cc_ws_t ws, cc_size n, cc_unit *cc_counted_by(n) r, cc_size xn, const cc_unit *cc_counted_by(xn) x, const cc_unit *cc_counted_by(n) m); |
| 515 | |
| 516 | /*! @function ccn_mux_seed_mask |
| 517 | * @abstract Refreshes the internal state of the PRNG used to mask cmov/cswap |
| 518 | * operations with a given seed. |
| 519 | * |
| 520 | * @discussion The seed should be of good entropy, i.e. generated by our default |
| 521 | * RNG. This function should be called before running algorithms that |
| 522 | * defend against side-channel attacks by using cmov/cswap. Examples |
| 523 | * are blinded modular exponentation (for RSA, DH, or MR) and EC |
| 524 | * scalar multiplication. |
| 525 | * |
| 526 | * @param seed A seed value. |
| 527 | */ |
| 528 | void ccn_mux_seed_mask(cc_unit seed); |
| 529 | |
| 530 | /*! @function ccn_divmod |
| 531 | * @abstract Computes a = q * d + r with r < d. |
| 532 | * |
| 533 | * @param na Length of a as a number of cc_units. |
| 534 | * @param a The dividend a. |
| 535 | * @param nq Length of q as a number of cc_units. |
| 536 | * @param q The quotient q. |
| 537 | * @param n Length of r and d as a number of cc_units. |
| 538 | * @param r The remainder r. |
| 539 | * @param d The divisor d. |
| 540 | * |
| 541 | * @return 0 on success, non-zero on failure. See cc_error.h for more details. |
| 542 | */ |
| 543 | CC_NONNULL((2, 7)) CC_WARN_RESULT |
| 544 | int ccn_divmod(cc_size na, const cc_unit *cc_counted_by(na) a, cc_size nq, cc_unit *cc_counted_by(nq) q, cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) d); |
| 545 | |
| 546 | CC_NONNULL((1, 3, 8)) |
| 547 | void ccn_divmod_ws(cc_ws_t ws, cc_size na, const cc_unit *cc_counted_by(na) a, cc_size nq, cc_unit *cc_counted_by(nq) q, cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) d); |
| 548 | |
| 549 | CC_NONNULL((2)) CC_SENTINEL |
| 550 | void ccn_zero_multi(cc_size n, cc_unit *r, ...); |
| 551 | |
| 552 | CC_NONNULL((3, 4, 5)) |
| 553 | cc_unit ccn_add_ws(cc_ws_t ws, cc_size count, cc_unit *r, const cc_unit *s, const cc_unit *t); |
| 554 | |
| 555 | CC_NONNULL((3, 4, 5)) |
| 556 | cc_unit ccn_sub_ws(cc_ws_t ws, cc_size count, cc_unit *r, const cc_unit *s, const cc_unit *t); |
| 557 | |
| 558 | CC_NONNULL((3, 4)) |
| 559 | cc_unit ccn_add1_ws(cc_ws_t ws, cc_size n, cc_unit *r, const cc_unit *s, cc_unit v); |
| 560 | |
| 561 | /* s + t -> r return carry if result doesn't fit in n bits |
| 562 | * { N bit, NT bit -> N bit NT <= N} N = n * sizeof(cc_unit) * 8 */ |
| 563 | CC_NONNULL_ALL |
| 564 | cc_unit ccn_addn(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, cc_size nt, const cc_unit *cc_counted_by(nt) t); |
| 565 | |
| 566 | /* s - v -> r return 1 iff v > s return 0 otherwise. |
| 567 | * { N bit, sizeof(cc_unit) * 8 bit -> N bit } N = n * sizeof(cc_unit) * 8 */ |
| 568 | CC_NONNULL_ALL |
| 569 | cc_unit ccn_sub1(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, cc_unit v); |
| 570 | |
| 571 | /* s - t -> r return 1 iff t > s |
| 572 | * { N bit, NT bit -> N bit NT <= N} N = n * sizeof(cc_unit) * 8 */ |
| 573 | CC_NONNULL_ALL |
| 574 | cc_unit ccn_subn(cc_size n, cc_unit *cc_counted_by(n) r, const cc_unit *cc_counted_by(n) s, cc_size nt, const cc_unit *cc_counted_by(nt) t); |
| 575 | |
| 576 | /* Return the number of used units after stripping leading 0 units. */ |
| 577 | CC_NONNULL_ALL |
| 578 | cc_size ccn_n(cc_size n, const cc_unit *cc_counted_by(n)s); |
| 579 | |
| 580 | /* Make a ccn of size ccn_nof(nbits) units with up to nbits sized random value. */ |
| 581 | CC_NONNULL_ALL |
| 582 | int ccn_random_bits(cc_size nbits, cc_unit *cc_unsafe_indexable r, struct ccrng_state *rng); |
| 583 | |
| 584 | /* Like ccn_random_bits, but uses ccrng_generate_fips under the hood. */ |
| 585 | CC_NONNULL_ALL |
| 586 | int ccn_random_bits_fips(cc_size nbits, cc_unit *cc_unsafe_indexable r, struct ccrng_state *rng); |
| 587 | |
| 588 | // Joint Sparse Form recoding context for EC double-scalar multiplication. |
| 589 | struct ccn_rjsf_state { |
| 590 | uint8_t u[2]; |
| 591 | const cc_unit *s; |
| 592 | const cc_unit *t; |
| 593 | }; |
| 594 | |
| 595 | /*! @function ccn_recode_jsf_init |
| 596 | * @abstract Initialize Joint Sparse Form recoding for EC scalars s and t. |
| 597 | * |
| 598 | * @param r JSF-recoding context. |
| 599 | * @param nbits Max. bit length of s and t. |
| 600 | * @param s Scalar to be recoded. |
| 601 | * @param t Scalar to be recoded. |
| 602 | */ |
| 603 | CC_NONNULL_ALL |
| 604 | void ccn_recode_jsf_init(struct ccn_rjsf_state *r, size_t nbits, const cc_unit *s, const cc_unit *t); |
| 605 | |
| 606 | /*! @function ccn_recode_jsf_column |
| 607 | * @abstract Retrieve JSF-recoded digits for column k. |
| 608 | * |
| 609 | * @param r JSF-recoding context. |
| 610 | * @param k Column index. |
| 611 | * @param c Digits (output). |
| 612 | */ |
| 613 | CC_NONNULL_ALL |
| 614 | void ccn_recode_jsf_column(struct ccn_rjsf_state *r, size_t k, int c[2]); |
| 615 | |
| 616 | /*! @function ccn_recode_jsf_index |
| 617 | * @abstract Retrieve the lookup table index for given column digits. |
| 618 | * |
| 619 | * @discussion For EC double-scalar multiplication, we assume a lookup table |
| 620 | * holding the four values [P, Q, P+Q, P-Q], in the same order. |
| 621 | * |
| 622 | * @param c Column digits. |
| 623 | * |
| 624 | * @return The lookup table index. |
| 625 | */ |
| 626 | CC_NONNULL_ALL CC_WARN_RESULT |
| 627 | size_t ccn_recode_jsf_index(int c[2]); |
| 628 | |
| 629 | /*! @function ccn_recode_jsf_direction |
| 630 | * @abstract Retrieve the "direction" for given column digits. |
| 631 | * |
| 632 | * @discussion For EC double-scalar multiplication, we assume a lookup table |
| 633 | * holding the four values [P, Q, P+Q, P-Q]. Negating each of |
| 634 | * these also yields [-P, -Q, -P-Q, -P+Q]. |
| 635 | * |
| 636 | * An EC double-and-add algorithm will either add or subtract a |
| 637 | * precomputed point to cover all possible digit combinations of two |
| 638 | * JSF-recoded EC scalars. |
| 639 | * |
| 640 | * @param c Column digits. |
| 641 | * |
| 642 | * @return The "direction". 1 for addition. -1 for subtraction. |
| 643 | */ |
| 644 | CC_NONNULL_ALL CC_WARN_RESULT |
| 645 | int ccn_recode_jsf_direction(int c[2]); |
| 646 | |
| 647 | /*! @function ccn_read_le_bytes |
| 648 | * @abstract Copies a number given as little-endian bytes into `out`. |
| 649 | * |
| 650 | * @param n Number of limbs of `out`. |
| 651 | * @param in Number to parse as little-endian bytes. |
| 652 | * @param out Output. |
| 653 | */ |
| 654 | CC_NONNULL_ALL |
| 655 | CC_INLINE void |
| 656 | ccn_read_le_bytes(cc_size n, const uint8_t *in, cc_unit *out) |
| 657 | { |
| 658 | for (cc_size i = 0; i < n; i++) { |
| 659 | out[i] = cc_load_le(y: &in[i * CCN_UNIT_SIZE]); |
| 660 | } |
| 661 | } |
| 662 | |
| 663 | /*! @function ccn_write_le_bytes |
| 664 | * @abstract Encodes a number as little-endian bytes into `out`. |
| 665 | * |
| 666 | * @param n Number of limbs of `in`. |
| 667 | * @param in Number to encode as little-endian bytes. |
| 668 | * @param out Output. |
| 669 | */ |
| 670 | CC_NONNULL_ALL |
| 671 | CC_INLINE void |
| 672 | ccn_write_le_bytes(cc_size n, const cc_unit *in, uint8_t *out) |
| 673 | { |
| 674 | for (cc_size i = 0; i < n; i++) { |
| 675 | cc_store_le(x: in[i], y: &out[i * CCN_UNIT_SIZE]); |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | /*! @function ccn_recode_ssw |
| 680 | * @abstract Recodes a given number into signed sliding windows. |
| 681 | * |
| 682 | * @param n Number of limbs of `s`. |
| 683 | * @param s Number to recode. |
| 684 | * @param w Recode width, for windows in range (-2^w,2^w). |
| 685 | * @param r Output for the computed signed sliding windows. |
| 686 | */ |
| 687 | CC_NONNULL_ALL |
| 688 | void ccn_recode_ssw(cc_size n, const cc_unit *s, int w, int8_t *r); |
| 689 | |
| 690 | #endif // _CORECRYPTO_CCN_INTERNAL_H |
| 691 | |