ccmode_gcm_gf_mult.c source code [xnu/osfmk/corecrypto/ccmode_gcm_gf_mult.c]

1	/ Copyright (c) (2011,2014,2015,2018,2019,2021,2023) Apple Inc. All rights reserved.*
2	*
3	* corecrypto is licensed under Apple Inc.’s Internal Use License Agreement (which
4	* is contained in the License.txt file distributed with corecrypto) and only to
5	* people who accept that license. IMPORTANT: Any license rights granted to you by
6	* Apple Inc. (if any) are limited to internal use within your organization only on
7	* devices and computers you own or control, for the sole purpose of verifying the
8	* security characteristics and correct functioning of the Apple Software. You may
9	* not, directly or indirectly, redistribute the Apple Software or any portions thereof.
10	*
11	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
12	*
13	* This file contains Original Code and/or Modifications of Original Code
14	* as defined in and that are subject to the Apple Public Source License
15	* Version 2.0 (the 'License'). You may not use this file except in
16	* compliance with the License. The rights granted to you under the License
17	* may not be used to create, or enable the creation or redistribution of,
18	* unlawful or unlicensed copies of an Apple operating system, or to
19	* circumvent, violate, or enable the circumvention or violation of, any
20	* terms of an Apple operating system software license agreement.
21	*
22	* Please obtain a copy of the License at
23	* http://www.opensource.apple.com/apsl/ and read it before using this file.
24	*
25	* The Original Code and all software distributed under the License are
26	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
27	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
28	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
29	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
30	* Please see the License for the specific language governing rights and
31	* limitations under the License.
32	*
33	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
34	*/
35
36	#include <corecrypto/cc_config.h>
37	#include "ccmode_internal.h"
38	#include "ccn_internal.h"
39
40
41	#if (CCN_UNIT_SIZE == 8) && CC_DUNIT_SUPPORTED
42
43	// Binary multiplication, x y = (r_hi << 64) \| r_lo.*
44	static void
45	bmul64(uint64_t x, uint64_t y, uint64_t r_hi, uint64_t r_lo)
46	{
47	cc_dunit x1, x2, x3, x4, x5;
48	cc_dunit y1, y2, y3, y4, y5;
49	cc_dunit r, z;
50
51	const cc_unit m1 = `0x1084210842108421`;
52	const cc_unit m2 = `0x2108421084210842`;
53	const cc_unit m3 = `0x4210842108421084`;
54	const cc_unit m4 = `0x8421084210842108`;
55	const cc_unit m5 = `0x0842108421084210`;
56
57	x1 = x & m1;
58	y1 = y & m1;
59	x2 = x & m2;
60	y2 = y & m2;
61	x3 = x & m3;
62	y3 = y & m3;
63	x4 = x & m4;
64	y4 = y & m4;
65	x5 = x & m5;
66	y5 = y & m5;
67
68	z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2);
69	r = z & (((cc_dunit)m2 << `64`) \| m1);
70	z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3);
71	r \|= z & (((cc_dunit)m3 << `64`) \| m2);
72	z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4);
73	r \|= z & (((cc_dunit)m4 << `64`) \| m3);
74	z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5);
75	r \|= z & (((cc_dunit)m5 << `64`) \| m4);
76	z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1);
77	r \|= z & (((cc_dunit)m1 << `64`) \| m5);
78
79	*r_hi = (uint64_t)(r >> `64`);
80	*r_lo = (uint64_t)r;
81	}
82
83	void
84	ccmode_gcm_gf_mult_64(const unsigned char a, const* unsigned char b, unsigned* char *c)
85	{
86	cc_unit a_lo, a_hi, b_lo, b_hi;
87	cc_unit z0_lo, z0_hi, z1_lo, z1_hi, z2_lo, z2_hi;
88	cc_dunit z_hi, z_lo;
89
90	a_lo = cc_load64_be(y: a + `8`);;
91	a_hi = cc_load64_be(y: a);
92
93	b_lo = cc_load64_be(y: b + `8`);
94	b_hi = cc_load64_be(y: b);
95
96	// Binary Karatsuba multiplication z = a b.*
97	bmul64(x: a_lo, y: b_lo, r_hi: &z0_hi, r_lo: &z0_lo);
98	bmul64(x: a_hi, y: b_hi, r_hi: &z2_hi, r_lo: &z2_lo);
99	bmul64(x: a_hi ^ a_lo, y: b_hi ^ b_lo, r_hi: &z1_hi, r_lo: &z1_lo);
100	z1_hi ^= z2_hi ^ z0_hi;
101	z1_lo ^= z2_lo ^ z0_lo;
102	z_hi = ((cc_dunit)z2_hi << `64`) \| (z2_lo ^ z1_hi);
103	z_lo = (((cc_dunit)z0_hi << `64`) \| z0_lo) ^ (((cc_dunit)z1_lo) << `64`);
104
105	// Shift left by one to get reflected(a b).*
106	z_hi = (z_hi << `1`) \| (z_lo >> `127`);
107	z_lo <<= `1`;
108
109	// Reduce.
110	z_lo ^= (z_lo << `126`) ^ (z_lo << `121`);
111	z_hi ^= z_lo ^ (z_lo >> `1`) ^ (z_lo >> `2`) ^ (z_lo >> `7`);
112
113	cc_store64_be(x: (cc_unit)z_hi, y: c + `8`);
114	cc_store64_be(x: (cc_unit)(z_hi >> `64`), y: c);
115	}
116
117	#endif
118
119	// Binary multiplication, x y = (r_hi << 32) \| r_lo.*
120	static void
121	bmul32(uint32_t x, uint32_t y, uint32_t r_hi, uint32_t r_lo)
122	{
123	uint32_t x0, x1, x2, x3;
124	uint32_t y0, y1, y2, y3;
125	uint64_t z, z0, z1, z2, z3;
126
127	const uint32_t m1 = `0x11111111`;
128	const uint32_t m2 = `0x22222222`;
129	const uint32_t m4 = `0x44444444`;
130	const uint32_t m8 = `0x88888888`;
131
132	x0 = x & m1;
133	x1 = x & m2;
134	x2 = x & m4;
135	x3 = x & m8;
136	y0 = y & m1;
137	y1 = y & m2;
138	y2 = y & m4;
139	y3 = y & m8;
140
141	z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^ ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1);
142	z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^ ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2);
143	z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^ ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3);
144	z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^ ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0);
145
146	z0 &= ((uint64_t)m1 << `32`) \| m1;
147	z1 &= ((uint64_t)m2 << `32`) \| m2;
148	z2 &= ((uint64_t)m4 << `32`) \| m4;
149	z3 &= ((uint64_t)m8 << `32`) \| m8;
150	z = z0 \| z1 \| z2 \| z3;
151
152	*r_hi = (uint32_t)(z >> `32`);
153	*r_lo = (uint32_t)z;
154	}
155
156	void
157	ccmode_gcm_gf_mult_32(const unsigned char a, const* unsigned char b, unsigned* char *c)
158	{
159	uint32_t a_hi_h, a_hi_l, a_lo_h, a_lo_l;
160	uint32_t b_hi_h, b_hi_l, b_lo_h, b_lo_l;
161
162	uint64_t z_hi_h, z_hi_l, z_lo_h, z_lo_l;
163	uint32_t z0_a_h, z0_a_l, z0_b_h, z0_b_l;
164	uint32_t z1_a_h, z1_a_l, z1_b_h, z1_b_l;
165	uint32_t z2_a_h, z2_a_l, z2_b_h, z2_b_l;
166
167	uint32_t t_hi, t_lo;
168
169	a_lo_l = cc_load32_be(y: a + `12`);
170	a_lo_h = cc_load32_be(y: a + `8`);
171	a_hi_l = cc_load32_be(y: a + `4`);
172	a_hi_h = cc_load32_be(y: a);
173
174	uint32_t a_hiXlo_h = a_hi_h ^ a_lo_h;
175	uint32_t a_hiXlo_l = a_hi_l ^ a_lo_l;
176
177	b_lo_l = cc_load32_be(y: b + `12`);
178	b_lo_h = cc_load32_be(y: b + `8`);
179	b_hi_l = cc_load32_be(y: b + `4`);
180	b_hi_h = cc_load32_be(y: b);
181
182	uint32_t b_hiXlo_h = b_hi_h ^ b_lo_h;
183	uint32_t b_hiXlo_l = b_hi_l ^ b_lo_l;
184
185	// Binary Karatsuba multiplication z = a b.*
186
187	// a_lo b_lo (64 bits)*
188	bmul32(x: a_lo_h, y: b_lo_h, r_hi: &z0_a_h, r_lo: &z0_a_l);
189	bmul32(x: a_lo_l, y: b_lo_l, r_hi: &z0_b_h, r_lo: &z0_b_l);
190	bmul32(x: a_lo_h ^ a_lo_l, y: b_lo_h ^ b_lo_l, r_hi: &t_hi, r_lo: &t_lo);
191	t_hi ^= z0_a_h ^ z0_b_h;
192	t_lo ^= z0_a_l ^ z0_b_l;
193	z0_a_l ^= t_hi;
194	z0_b_h ^= t_lo;
195
196	// a_hi b_hi (64 bits)*
197	bmul32(x: a_hi_h, y: b_hi_h, r_hi: &z2_a_h, r_lo: &z2_a_l);
198	bmul32(x: a_hi_l, y: b_hi_l, r_hi: &z2_b_h, r_lo: &z2_b_l);
199	bmul32(x: a_hi_h ^ a_hi_l, y: b_hi_h ^ b_hi_l, r_hi: &t_hi, r_lo: &t_lo);
200	t_hi ^= z2_a_h ^ z2_b_h;
201	t_lo ^= z2_a_l ^ z2_b_l;
202	z2_a_l ^= t_hi;
203	z2_b_h ^= t_lo;
204
205	// (a_hi ^ a_lo) (b_hi ^ b_lo) (64 bits)*
206	bmul32(x: a_hiXlo_h, y: b_hiXlo_h, r_hi: &z1_a_h, r_lo: &z1_a_l);
207	bmul32(x: a_hiXlo_l, y: b_hiXlo_l, r_hi: &z1_b_h, r_lo: &z1_b_l);
208	bmul32(x: a_hiXlo_h ^ a_hiXlo_l, y: b_hiXlo_h ^ b_hiXlo_l, r_hi: &t_hi, r_lo: &t_lo);
209	t_hi ^= z1_a_h ^ z1_b_h;
210	t_lo ^= z1_a_l ^ z1_b_l;
211	z1_a_l ^= t_hi;
212	z1_b_h ^= t_lo;
213
214	// Another round of Karatsuba for a 128-bit result.
215	z1_a_h ^= z0_a_h ^ z2_a_h;
216	z1_a_l ^= z0_a_l ^ z2_a_l;
217	z1_b_h ^= z0_b_h ^ z2_b_h;
218	z1_b_l ^= z0_b_l ^ z2_b_l;
219	z_hi_h = ((uint64_t)z2_a_h << `32`) \| z2_a_l;
220	z_hi_l = (((uint64_t)z2_b_h << `32`) \| z2_b_l) ^ (((uint64_t)z1_a_h << `32`) \| z1_a_l);
221	z_lo_h = (((uint64_t)z0_a_h << `32`) \| z0_a_l) ^ (((uint64_t)z1_b_h << `32`) \| z1_b_l);
222	z_lo_l = ((uint64_t)z0_b_h << `32`) \| z0_b_l;
223
224	// Shift left by one to get reflected(a b).*
225	z_hi_h = (z_hi_h << `1`) \| (z_hi_l >> `63`);
226	z_hi_l = (z_hi_l << `1`) \| (z_lo_h >> `63`);
227	z_lo_h = (z_lo_h << `1`) \| (z_lo_l >> `63`);
228	z_lo_l <<= `1`;
229
230	// Reduce.
231	z_lo_h ^= (z_lo_l << `62`) ^ (z_lo_l << `57`);
232	z_hi_h ^= z_lo_h ^ (z_lo_h >> `1`) ^ (z_lo_h >> `2`) ^ (z_lo_h >> `7`);
233	z_hi_l ^= z_lo_l ^ (z_lo_l >> `1`) ^ (z_lo_l >> `2`) ^ (z_lo_l >> `7`);
234	z_hi_l ^= (z_lo_h << `63`) ^ (z_lo_h << `62`) ^ (z_lo_h << `57`);
235
236	cc_store64_be(x: z_hi_l, y: c + `8`);
237	cc_store64_be(x: z_hi_h, y: c);
238	}
239
240	void
241	ccmode_gcm_gf_mult(const unsigned char a, const* unsigned char b, unsigned* char *c)
242	{
243	#if (CCN_UNIT_SIZE == 8) && CC_DUNIT_SUPPORTED
244	ccmode_gcm_gf_mult_64(a, b, c);
245	#else
246	ccmode_gcm_gf_mult_32(a, b, c);
247	#endif
248	}
249

Browse the source code of xnu/osfmk/corecrypto/ccmode_gcm_gf_mult.c