1 /*
2 * Copyright 2011-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26 /*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30 #include "internal/deprecated.h"
31
32 /*
33 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
34 *
35 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
36 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
37 * work which got its smarts from Daniel J. Bernstein's work on the same.
38 */
39
40 #include <openssl/opensslconf.h>
41
42 #include <stdint.h>
43 #include <string.h>
44 #include <openssl/err.h>
45 #include "ec_local.h"
46
47 #include "internal/numbers.h"
48
49 #ifndef INT128_MAX
50 #error "Your compiler doesn't appear to support 128-bit integer types"
51 #endif
52
53 typedef uint8_t u8;
54 typedef uint32_t u32;
55 typedef uint64_t u64;
56
57 /*
58 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
59 * can serialize an element of this field into 32 bytes. We call this an
60 * felem_bytearray.
61 */
62
63 typedef u8 felem_bytearray[32];
64
65 /*
66 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
67 * values are big-endian.
68 */
69 static const felem_bytearray nistp256_curve_params[5] = {
70 { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
71 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
72 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
74 { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
75 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
76 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc },
78 { 0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */
79 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
80 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
81 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b },
82 { 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
83 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
84 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
85 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96 },
86 { 0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
87 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
88 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
89 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5 }
90 };
91
92 /*-
93 * The representation of field elements.
94 * ------------------------------------
95 *
96 * We represent field elements with either four 128-bit values, eight 128-bit
97 * values, or four 64-bit values. The field element represented is:
98 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
99 * or:
100 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[7]*2^448 (mod p)
101 *
102 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
103 * apart, but are 128-bits wide, the most significant bits of each limb overlap
104 * with the least significant bits of the next.
105 *
106 * A field element with four limbs is an 'felem'. One with eight limbs is a
107 * 'longfelem'
108 *
109 * A field element with four, 64-bit values is called a 'smallfelem'. Small
110 * values are used as intermediate values before multiplication.
111 */
112
113 #define NLIMBS 4
114
115 typedef uint128_t limb;
116 typedef limb felem[NLIMBS];
117 typedef limb longfelem[NLIMBS * 2];
118 typedef u64 smallfelem[NLIMBS];
119
120 /* This is the value of the prime as four 64-bit words, little-endian. */
121 static const u64 kPrime[4] = {
122 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul
123 };
124 static const u64 bottom63bits = 0x7ffffffffffffffful;
125
126 /*
127 * bin32_to_felem takes a little-endian byte array and converts it into felem
128 * form. This assumes that the CPU is little-endian.
129 */
bin32_to_felem(felem out,const u8 in[32])130 static void bin32_to_felem(felem out, const u8 in[32])
131 {
132 out[0] = *((u64 *)&in[0]);
133 out[1] = *((u64 *)&in[8]);
134 out[2] = *((u64 *)&in[16]);
135 out[3] = *((u64 *)&in[24]);
136 }
137
138 /*
139 * smallfelem_to_bin32 takes a smallfelem and serializes into a little
140 * endian, 32 byte array. This assumes that the CPU is little-endian.
141 */
smallfelem_to_bin32(u8 out[32],const smallfelem in)142 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
143 {
144 *((u64 *)&out[0]) = in[0];
145 *((u64 *)&out[8]) = in[1];
146 *((u64 *)&out[16]) = in[2];
147 *((u64 *)&out[24]) = in[3];
148 }
149
150 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
BN_to_felem(felem out,const BIGNUM * bn)151 static int BN_to_felem(felem out, const BIGNUM *bn)
152 {
153 felem_bytearray b_out;
154 int num_bytes;
155
156 if (BN_is_negative(bn)) {
157 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
158 return 0;
159 }
160 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
161 if (num_bytes < 0) {
162 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
163 return 0;
164 }
165 bin32_to_felem(out, b_out);
166 return 1;
167 }
168
169 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
smallfelem_to_BN(BIGNUM * out,const smallfelem in)170 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
171 {
172 felem_bytearray b_out;
173 smallfelem_to_bin32(b_out, in);
174 return BN_lebin2bn(b_out, sizeof(b_out), out);
175 }
176
177 /*-
178 * Field operations
179 * ----------------
180 */
181
smallfelem_one(smallfelem out)182 static void smallfelem_one(smallfelem out)
183 {
184 out[0] = 1;
185 out[1] = 0;
186 out[2] = 0;
187 out[3] = 0;
188 }
189
smallfelem_assign(smallfelem out,const smallfelem in)190 static void smallfelem_assign(smallfelem out, const smallfelem in)
191 {
192 out[0] = in[0];
193 out[1] = in[1];
194 out[2] = in[2];
195 out[3] = in[3];
196 }
197
felem_assign(felem out,const felem in)198 static void felem_assign(felem out, const felem in)
199 {
200 out[0] = in[0];
201 out[1] = in[1];
202 out[2] = in[2];
203 out[3] = in[3];
204 }
205
206 /* felem_sum sets out = out + in. */
felem_sum(felem out,const felem in)207 static void felem_sum(felem out, const felem in)
208 {
209 out[0] += in[0];
210 out[1] += in[1];
211 out[2] += in[2];
212 out[3] += in[3];
213 }
214
215 /* felem_small_sum sets out = out + in. */
felem_small_sum(felem out,const smallfelem in)216 static void felem_small_sum(felem out, const smallfelem in)
217 {
218 out[0] += in[0];
219 out[1] += in[1];
220 out[2] += in[2];
221 out[3] += in[3];
222 }
223
224 /* felem_scalar sets out = out * scalar */
felem_scalar(felem out,const u64 scalar)225 static void felem_scalar(felem out, const u64 scalar)
226 {
227 out[0] *= scalar;
228 out[1] *= scalar;
229 out[2] *= scalar;
230 out[3] *= scalar;
231 }
232
233 /* longfelem_scalar sets out = out * scalar */
longfelem_scalar(longfelem out,const u64 scalar)234 static void longfelem_scalar(longfelem out, const u64 scalar)
235 {
236 out[0] *= scalar;
237 out[1] *= scalar;
238 out[2] *= scalar;
239 out[3] *= scalar;
240 out[4] *= scalar;
241 out[5] *= scalar;
242 out[6] *= scalar;
243 out[7] *= scalar;
244 }
245
246 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
247 #define two105 (((limb)1) << 105)
248 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
249
250 /* zero105 is 0 mod p */
251 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
252
253 /*-
254 * smallfelem_neg sets |out| to |-small|
255 * On exit:
256 * out[i] < out[i] + 2^105
257 */
smallfelem_neg(felem out,const smallfelem small)258 static void smallfelem_neg(felem out, const smallfelem small)
259 {
260 /* In order to prevent underflow, we subtract from 0 mod p. */
261 out[0] = zero105[0] - small[0];
262 out[1] = zero105[1] - small[1];
263 out[2] = zero105[2] - small[2];
264 out[3] = zero105[3] - small[3];
265 }
266
267 /*-
268 * felem_diff subtracts |in| from |out|
269 * On entry:
270 * in[i] < 2^104
271 * On exit:
272 * out[i] < out[i] + 2^105
273 */
felem_diff(felem out,const felem in)274 static void felem_diff(felem out, const felem in)
275 {
276 /*
277 * In order to prevent underflow, we add 0 mod p before subtracting.
278 */
279 out[0] += zero105[0];
280 out[1] += zero105[1];
281 out[2] += zero105[2];
282 out[3] += zero105[3];
283
284 out[0] -= in[0];
285 out[1] -= in[1];
286 out[2] -= in[2];
287 out[3] -= in[3];
288 }
289
290 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
291 #define two107 (((limb)1) << 107)
292 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
293
294 /* zero107 is 0 mod p */
295 static const felem zero107 = {
296 two107m43m11, two107, two107m43p11, two107m43p11
297 };
298
299 /*-
300 * An alternative felem_diff for larger inputs |in|
301 * felem_diff_zero107 subtracts |in| from |out|
302 * On entry:
303 * in[i] < 2^106
304 * On exit:
305 * out[i] < out[i] + 2^107
306 */
felem_diff_zero107(felem out,const felem in)307 static void felem_diff_zero107(felem out, const felem in)
308 {
309 /*
310 * In order to prevent underflow, we add 0 mod p before subtracting.
311 */
312 out[0] += zero107[0];
313 out[1] += zero107[1];
314 out[2] += zero107[2];
315 out[3] += zero107[3];
316
317 out[0] -= in[0];
318 out[1] -= in[1];
319 out[2] -= in[2];
320 out[3] -= in[3];
321 }
322
323 /*-
324 * longfelem_diff subtracts |in| from |out|
325 * On entry:
326 * in[i] < 7*2^67
327 * On exit:
328 * out[i] < out[i] + 2^70 + 2^40
329 */
longfelem_diff(longfelem out,const longfelem in)330 static void longfelem_diff(longfelem out, const longfelem in)
331 {
332 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
333 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
334 static const limb two70 = (((limb)1) << 70);
335 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
336 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
337
338 /* add 0 mod p to avoid underflow */
339 out[0] += two70m8p6;
340 out[1] += two70p40;
341 out[2] += two70;
342 out[3] += two70m40m38p6;
343 out[4] += two70m6;
344 out[5] += two70m6;
345 out[6] += two70m6;
346 out[7] += two70m6;
347
348 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
349 out[0] -= in[0];
350 out[1] -= in[1];
351 out[2] -= in[2];
352 out[3] -= in[3];
353 out[4] -= in[4];
354 out[5] -= in[5];
355 out[6] -= in[6];
356 out[7] -= in[7];
357 }
358
359 #define two64m0 (((limb)1) << 64) - 1
360 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
361 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
362 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
363
364 /* zero110 is 0 mod p */
365 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
366
367 /*-
368 * felem_shrink converts an felem into a smallfelem. The result isn't quite
369 * minimal as the value may be greater than p.
370 *
371 * On entry:
372 * in[i] < 2^109
373 * On exit:
374 * out[i] < 2^64
375 */
felem_shrink(smallfelem out,const felem in)376 static void felem_shrink(smallfelem out, const felem in)
377 {
378 felem tmp;
379 u64 a, b, mask;
380 u64 high, low;
381 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
382
383 /* Carry 2->3 */
384 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
385 /* tmp[3] < 2^110 */
386
387 tmp[2] = zero110[2] + (u64)in[2];
388 tmp[0] = zero110[0] + in[0];
389 tmp[1] = zero110[1] + in[1];
390 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
391
392 /*
393 * We perform two partial reductions where we eliminate the high-word of
394 * tmp[3]. We don't update the other words till the end.
395 */
396 a = tmp[3] >> 64; /* a < 2^46 */
397 tmp[3] = (u64)tmp[3];
398 tmp[3] -= a;
399 tmp[3] += ((limb)a) << 32;
400 /* tmp[3] < 2^79 */
401
402 b = a;
403 a = tmp[3] >> 64; /* a < 2^15 */
404 b += a; /* b < 2^46 + 2^15 < 2^47 */
405 tmp[3] = (u64)tmp[3];
406 tmp[3] -= a;
407 tmp[3] += ((limb)a) << 32;
408 /* tmp[3] < 2^64 + 2^47 */
409
410 /*
411 * This adjusts the other two words to complete the two partial
412 * reductions.
413 */
414 tmp[0] += b;
415 tmp[1] -= (((limb)b) << 32);
416
417 /*
418 * In order to make space in tmp[3] for the carry from 2 -> 3, we
419 * conditionally subtract kPrime if tmp[3] is large enough.
420 */
421 high = (u64)(tmp[3] >> 64);
422 /* As tmp[3] < 2^65, high is either 1 or 0 */
423 high = 0 - high;
424 /*-
425 * high is:
426 * all ones if the high word of tmp[3] is 1
427 * all zeros if the high word of tmp[3] if 0
428 */
429 low = (u64)tmp[3];
430 mask = 0 - (low >> 63);
431 /*-
432 * mask is:
433 * all ones if the MSB of low is 1
434 * all zeros if the MSB of low if 0
435 */
436 low &= bottom63bits;
437 low -= kPrime3Test;
438 /* if low was greater than kPrime3Test then the MSB is zero */
439 low = ~low;
440 low = 0 - (low >> 63);
441 /*-
442 * low is:
443 * all ones if low was > kPrime3Test
444 * all zeros if low was <= kPrime3Test
445 */
446 mask = (mask & low) | high;
447 tmp[0] -= mask & kPrime[0];
448 tmp[1] -= mask & kPrime[1];
449 /* kPrime[2] is zero, so omitted */
450 tmp[3] -= mask & kPrime[3];
451 /* tmp[3] < 2**64 - 2**32 + 1 */
452
453 tmp[1] += ((u64)(tmp[0] >> 64));
454 tmp[0] = (u64)tmp[0];
455 tmp[2] += ((u64)(tmp[1] >> 64));
456 tmp[1] = (u64)tmp[1];
457 tmp[3] += ((u64)(tmp[2] >> 64));
458 tmp[2] = (u64)tmp[2];
459 /* tmp[i] < 2^64 */
460
461 out[0] = tmp[0];
462 out[1] = tmp[1];
463 out[2] = tmp[2];
464 out[3] = tmp[3];
465 }
466
467 /* smallfelem_expand converts a smallfelem to an felem */
smallfelem_expand(felem out,const smallfelem in)468 static void smallfelem_expand(felem out, const smallfelem in)
469 {
470 out[0] = in[0];
471 out[1] = in[1];
472 out[2] = in[2];
473 out[3] = in[3];
474 }
475
476 /*-
477 * smallfelem_square sets |out| = |small|^2
478 * On entry:
479 * small[i] < 2^64
480 * On exit:
481 * out[i] < 7 * 2^64 < 2^67
482 */
smallfelem_square(longfelem out,const smallfelem small)483 static void smallfelem_square(longfelem out, const smallfelem small)
484 {
485 limb a;
486 u64 high, low;
487
488 a = ((uint128_t)small[0]) * small[0];
489 low = a;
490 high = a >> 64;
491 out[0] = low;
492 out[1] = high;
493
494 a = ((uint128_t)small[0]) * small[1];
495 low = a;
496 high = a >> 64;
497 out[1] += low;
498 out[1] += low;
499 out[2] = high;
500
501 a = ((uint128_t)small[0]) * small[2];
502 low = a;
503 high = a >> 64;
504 out[2] += low;
505 out[2] *= 2;
506 out[3] = high;
507
508 a = ((uint128_t)small[0]) * small[3];
509 low = a;
510 high = a >> 64;
511 out[3] += low;
512 out[4] = high;
513
514 a = ((uint128_t)small[1]) * small[2];
515 low = a;
516 high = a >> 64;
517 out[3] += low;
518 out[3] *= 2;
519 out[4] += high;
520
521 a = ((uint128_t)small[1]) * small[1];
522 low = a;
523 high = a >> 64;
524 out[2] += low;
525 out[3] += high;
526
527 a = ((uint128_t)small[1]) * small[3];
528 low = a;
529 high = a >> 64;
530 out[4] += low;
531 out[4] *= 2;
532 out[5] = high;
533
534 a = ((uint128_t)small[2]) * small[3];
535 low = a;
536 high = a >> 64;
537 out[5] += low;
538 out[5] *= 2;
539 out[6] = high;
540 out[6] += high;
541
542 a = ((uint128_t)small[2]) * small[2];
543 low = a;
544 high = a >> 64;
545 out[4] += low;
546 out[5] += high;
547
548 a = ((uint128_t)small[3]) * small[3];
549 low = a;
550 high = a >> 64;
551 out[6] += low;
552 out[7] = high;
553 }
554
555 /*-
556 * felem_square sets |out| = |in|^2
557 * On entry:
558 * in[i] < 2^109
559 * On exit:
560 * out[i] < 7 * 2^64 < 2^67
561 */
felem_square(longfelem out,const felem in)562 static void felem_square(longfelem out, const felem in)
563 {
564 u64 small[4];
565 felem_shrink(small, in);
566 smallfelem_square(out, small);
567 }
568
569 /*-
570 * smallfelem_mul sets |out| = |small1| * |small2|
571 * On entry:
572 * small1[i] < 2^64
573 * small2[i] < 2^64
574 * On exit:
575 * out[i] < 7 * 2^64 < 2^67
576 */
smallfelem_mul(longfelem out,const smallfelem small1,const smallfelem small2)577 static void smallfelem_mul(longfelem out, const smallfelem small1,
578 const smallfelem small2)
579 {
580 limb a;
581 u64 high, low;
582
583 a = ((uint128_t)small1[0]) * small2[0];
584 low = a;
585 high = a >> 64;
586 out[0] = low;
587 out[1] = high;
588
589 a = ((uint128_t)small1[0]) * small2[1];
590 low = a;
591 high = a >> 64;
592 out[1] += low;
593 out[2] = high;
594
595 a = ((uint128_t)small1[1]) * small2[0];
596 low = a;
597 high = a >> 64;
598 out[1] += low;
599 out[2] += high;
600
601 a = ((uint128_t)small1[0]) * small2[2];
602 low = a;
603 high = a >> 64;
604 out[2] += low;
605 out[3] = high;
606
607 a = ((uint128_t)small1[1]) * small2[1];
608 low = a;
609 high = a >> 64;
610 out[2] += low;
611 out[3] += high;
612
613 a = ((uint128_t)small1[2]) * small2[0];
614 low = a;
615 high = a >> 64;
616 out[2] += low;
617 out[3] += high;
618
619 a = ((uint128_t)small1[0]) * small2[3];
620 low = a;
621 high = a >> 64;
622 out[3] += low;
623 out[4] = high;
624
625 a = ((uint128_t)small1[1]) * small2[2];
626 low = a;
627 high = a >> 64;
628 out[3] += low;
629 out[4] += high;
630
631 a = ((uint128_t)small1[2]) * small2[1];
632 low = a;
633 high = a >> 64;
634 out[3] += low;
635 out[4] += high;
636
637 a = ((uint128_t)small1[3]) * small2[0];
638 low = a;
639 high = a >> 64;
640 out[3] += low;
641 out[4] += high;
642
643 a = ((uint128_t)small1[1]) * small2[3];
644 low = a;
645 high = a >> 64;
646 out[4] += low;
647 out[5] = high;
648
649 a = ((uint128_t)small1[2]) * small2[2];
650 low = a;
651 high = a >> 64;
652 out[4] += low;
653 out[5] += high;
654
655 a = ((uint128_t)small1[3]) * small2[1];
656 low = a;
657 high = a >> 64;
658 out[4] += low;
659 out[5] += high;
660
661 a = ((uint128_t)small1[2]) * small2[3];
662 low = a;
663 high = a >> 64;
664 out[5] += low;
665 out[6] = high;
666
667 a = ((uint128_t)small1[3]) * small2[2];
668 low = a;
669 high = a >> 64;
670 out[5] += low;
671 out[6] += high;
672
673 a = ((uint128_t)small1[3]) * small2[3];
674 low = a;
675 high = a >> 64;
676 out[6] += low;
677 out[7] = high;
678 }
679
680 /*-
681 * felem_mul sets |out| = |in1| * |in2|
682 * On entry:
683 * in1[i] < 2^109
684 * in2[i] < 2^109
685 * On exit:
686 * out[i] < 7 * 2^64 < 2^67
687 */
felem_mul(longfelem out,const felem in1,const felem in2)688 static void felem_mul(longfelem out, const felem in1, const felem in2)
689 {
690 smallfelem small1, small2;
691 felem_shrink(small1, in1);
692 felem_shrink(small2, in2);
693 smallfelem_mul(out, small1, small2);
694 }
695
696 /*-
697 * felem_small_mul sets |out| = |small1| * |in2|
698 * On entry:
699 * small1[i] < 2^64
700 * in2[i] < 2^109
701 * On exit:
702 * out[i] < 7 * 2^64 < 2^67
703 */
felem_small_mul(longfelem out,const smallfelem small1,const felem in2)704 static void felem_small_mul(longfelem out, const smallfelem small1,
705 const felem in2)
706 {
707 smallfelem small2;
708 felem_shrink(small2, in2);
709 smallfelem_mul(out, small1, small2);
710 }
711
712 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
713 #define two100 (((limb)1) << 100)
714 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
715 /* zero100 is 0 mod p */
716 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
717
718 /*-
719 * Internal function for the different flavours of felem_reduce.
720 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
721 * On entry:
722 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
723 * out[1] >= in[7] + 2^32*in[4]
724 * out[2] >= in[5] + 2^32*in[5]
725 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
726 * On exit:
727 * out[0] <= out[0] + in[4] + 2^32*in[5]
728 * out[1] <= out[1] + in[5] + 2^33*in[6]
729 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
730 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
731 */
felem_reduce_(felem out,const longfelem in)732 static void felem_reduce_(felem out, const longfelem in)
733 {
734 int128_t c;
735 /* combine common terms from below */
736 c = in[4] + (in[5] << 32);
737 out[0] += c;
738 out[3] -= c;
739
740 c = in[5] - in[7];
741 out[1] += c;
742 out[2] -= c;
743
744 /* the remaining terms */
745 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
746 out[1] -= (in[4] << 32);
747 out[3] += (in[4] << 32);
748
749 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
750 out[2] -= (in[5] << 32);
751
752 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
753 out[0] -= in[6];
754 out[0] -= (in[6] << 32);
755 out[1] += (in[6] << 33);
756 out[2] += (in[6] * 2);
757 out[3] -= (in[6] << 32);
758
759 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
760 out[0] -= in[7];
761 out[0] -= (in[7] << 32);
762 out[2] += (in[7] << 33);
763 out[3] += (in[7] * 3);
764 }
765
766 /*-
767 * felem_reduce converts a longfelem into an felem.
768 * To be called directly after felem_square or felem_mul.
769 * On entry:
770 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
771 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
772 * On exit:
773 * out[i] < 2^101
774 */
felem_reduce(felem out,const longfelem in)775 static void felem_reduce(felem out, const longfelem in)
776 {
777 out[0] = zero100[0] + in[0];
778 out[1] = zero100[1] + in[1];
779 out[2] = zero100[2] + in[2];
780 out[3] = zero100[3] + in[3];
781
782 felem_reduce_(out, in);
783
784 /*-
785 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
786 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
787 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
788 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
789 *
790 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
791 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
792 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
793 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
794 */
795 }
796
797 /*-
798 * felem_reduce_zero105 converts a larger longfelem into an felem.
799 * On entry:
800 * in[0] < 2^71
801 * On exit:
802 * out[i] < 2^106
803 */
felem_reduce_zero105(felem out,const longfelem in)804 static void felem_reduce_zero105(felem out, const longfelem in)
805 {
806 out[0] = zero105[0] + in[0];
807 out[1] = zero105[1] + in[1];
808 out[2] = zero105[2] + in[2];
809 out[3] = zero105[3] + in[3];
810
811 felem_reduce_(out, in);
812
813 /*-
814 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
815 * out[1] > 2^105 - 2^71 - 2^103 > 0
816 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
817 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
818 *
819 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
820 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
821 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
822 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
823 */
824 }
825
826 /*
827 * subtract_u64 sets *result = *result - v and *carry to one if the
828 * subtraction underflowed.
829 */
subtract_u64(u64 * result,u64 * carry,u64 v)830 static void subtract_u64(u64 *result, u64 *carry, u64 v)
831 {
832 uint128_t r = *result;
833 r -= v;
834 *carry = (r >> 64) & 1;
835 *result = (u64)r;
836 }
837
838 /*
839 * felem_contract converts |in| to its unique, minimal representation. On
840 * entry: in[i] < 2^109
841 */
felem_contract(smallfelem out,const felem in)842 static void felem_contract(smallfelem out, const felem in)
843 {
844 unsigned i;
845 u64 all_equal_so_far = 0, result = 0, carry;
846
847 felem_shrink(out, in);
848 /* small is minimal except that the value might be > p */
849
850 all_equal_so_far--;
851 /*
852 * We are doing a constant time test if out >= kPrime. We need to compare
853 * each u64, from most-significant to least significant. For each one, if
854 * all words so far have been equal (m is all ones) then a non-equal
855 * result is the answer. Otherwise we continue.
856 */
857 for (i = 3; i < 4; i--) {
858 u64 equal;
859 uint128_t a = ((uint128_t)kPrime[i]) - out[i];
860 /*
861 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
862 * will all be set.
863 */
864 result |= all_equal_so_far & ((u64)(a >> 64));
865
866 /*
867 * if kPrime[i] == out[i] then |equal| will be all zeros and the
868 * decrement will make it all ones.
869 */
870 equal = kPrime[i] ^ out[i];
871 equal--;
872 equal &= equal << 32;
873 equal &= equal << 16;
874 equal &= equal << 8;
875 equal &= equal << 4;
876 equal &= equal << 2;
877 equal &= equal << 1;
878 equal = 0 - (equal >> 63);
879
880 all_equal_so_far &= equal;
881 }
882
883 /*
884 * if all_equal_so_far is still all ones then the two values are equal
885 * and so out >= kPrime is true.
886 */
887 result |= all_equal_so_far;
888
889 /* if out >= kPrime then we subtract kPrime. */
890 subtract_u64(&out[0], &carry, result & kPrime[0]);
891 subtract_u64(&out[1], &carry, carry);
892 subtract_u64(&out[2], &carry, carry);
893 subtract_u64(&out[3], &carry, carry);
894
895 subtract_u64(&out[1], &carry, result & kPrime[1]);
896 subtract_u64(&out[2], &carry, carry);
897 subtract_u64(&out[3], &carry, carry);
898
899 subtract_u64(&out[2], &carry, result & kPrime[2]);
900 subtract_u64(&out[3], &carry, carry);
901
902 subtract_u64(&out[3], &carry, result & kPrime[3]);
903 }
904
smallfelem_square_contract(smallfelem out,const smallfelem in)905 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
906 {
907 longfelem longtmp;
908 felem tmp;
909
910 smallfelem_square(longtmp, in);
911 felem_reduce(tmp, longtmp);
912 felem_contract(out, tmp);
913 }
914
smallfelem_mul_contract(smallfelem out,const smallfelem in1,const smallfelem in2)915 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
916 const smallfelem in2)
917 {
918 longfelem longtmp;
919 felem tmp;
920
921 smallfelem_mul(longtmp, in1, in2);
922 felem_reduce(tmp, longtmp);
923 felem_contract(out, tmp);
924 }
925
926 /*-
927 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
928 * otherwise.
929 * On entry:
930 * small[i] < 2^64
931 */
smallfelem_is_zero(const smallfelem small)932 static limb smallfelem_is_zero(const smallfelem small)
933 {
934 limb result;
935 u64 is_p;
936
937 u64 is_zero = small[0] | small[1] | small[2] | small[3];
938 is_zero--;
939 is_zero &= is_zero << 32;
940 is_zero &= is_zero << 16;
941 is_zero &= is_zero << 8;
942 is_zero &= is_zero << 4;
943 is_zero &= is_zero << 2;
944 is_zero &= is_zero << 1;
945 is_zero = 0 - (is_zero >> 63);
946
947 is_p = (small[0] ^ kPrime[0]) | (small[1] ^ kPrime[1]) | (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
948 is_p--;
949 is_p &= is_p << 32;
950 is_p &= is_p << 16;
951 is_p &= is_p << 8;
952 is_p &= is_p << 4;
953 is_p &= is_p << 2;
954 is_p &= is_p << 1;
955 is_p = 0 - (is_p >> 63);
956
957 is_zero |= is_p;
958
959 result = is_zero;
960 result |= ((limb)is_zero) << 64;
961 return result;
962 }
963
smallfelem_is_zero_int(const void * small)964 static int smallfelem_is_zero_int(const void *small)
965 {
966 return (int)(smallfelem_is_zero(small) & ((limb)1));
967 }
968
969 /*-
970 * felem_inv calculates |out| = |in|^{-1}
971 *
972 * Based on Fermat's Little Theorem:
973 * a^p = a (mod p)
974 * a^{p-1} = 1 (mod p)
975 * a^{p-2} = a^{-1} (mod p)
976 */
felem_inv(felem out,const felem in)977 static void felem_inv(felem out, const felem in)
978 {
979 felem ftmp, ftmp2;
980 /* each e_I will hold |in|^{2^I - 1} */
981 felem e2, e4, e8, e16, e32, e64;
982 longfelem tmp;
983 unsigned i;
984
985 felem_square(tmp, in);
986 felem_reduce(ftmp, tmp); /* 2^1 */
987 felem_mul(tmp, in, ftmp);
988 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
989 felem_assign(e2, ftmp);
990 felem_square(tmp, ftmp);
991 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
992 felem_square(tmp, ftmp);
993 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
994 felem_mul(tmp, ftmp, e2);
995 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
996 felem_assign(e4, ftmp);
997 felem_square(tmp, ftmp);
998 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
999 felem_square(tmp, ftmp);
1000 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1001 felem_square(tmp, ftmp);
1002 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1003 felem_square(tmp, ftmp);
1004 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1005 felem_mul(tmp, ftmp, e4);
1006 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1007 felem_assign(e8, ftmp);
1008 for (i = 0; i < 8; i++) {
1009 felem_square(tmp, ftmp);
1010 felem_reduce(ftmp, tmp);
1011 } /* 2^16 - 2^8 */
1012 felem_mul(tmp, ftmp, e8);
1013 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1014 felem_assign(e16, ftmp);
1015 for (i = 0; i < 16; i++) {
1016 felem_square(tmp, ftmp);
1017 felem_reduce(ftmp, tmp);
1018 } /* 2^32 - 2^16 */
1019 felem_mul(tmp, ftmp, e16);
1020 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1021 felem_assign(e32, ftmp);
1022 for (i = 0; i < 32; i++) {
1023 felem_square(tmp, ftmp);
1024 felem_reduce(ftmp, tmp);
1025 } /* 2^64 - 2^32 */
1026 felem_assign(e64, ftmp);
1027 felem_mul(tmp, ftmp, in);
1028 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1029 for (i = 0; i < 192; i++) {
1030 felem_square(tmp, ftmp);
1031 felem_reduce(ftmp, tmp);
1032 } /* 2^256 - 2^224 + 2^192 */
1033
1034 felem_mul(tmp, e64, e32);
1035 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1036 for (i = 0; i < 16; i++) {
1037 felem_square(tmp, ftmp2);
1038 felem_reduce(ftmp2, tmp);
1039 } /* 2^80 - 2^16 */
1040 felem_mul(tmp, ftmp2, e16);
1041 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1042 for (i = 0; i < 8; i++) {
1043 felem_square(tmp, ftmp2);
1044 felem_reduce(ftmp2, tmp);
1045 } /* 2^88 - 2^8 */
1046 felem_mul(tmp, ftmp2, e8);
1047 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1048 for (i = 0; i < 4; i++) {
1049 felem_square(tmp, ftmp2);
1050 felem_reduce(ftmp2, tmp);
1051 } /* 2^92 - 2^4 */
1052 felem_mul(tmp, ftmp2, e4);
1053 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1054 felem_square(tmp, ftmp2);
1055 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1056 felem_square(tmp, ftmp2);
1057 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1058 felem_mul(tmp, ftmp2, e2);
1059 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1060 felem_square(tmp, ftmp2);
1061 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1062 felem_square(tmp, ftmp2);
1063 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1064 felem_mul(tmp, ftmp2, in);
1065 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1066
1067 felem_mul(tmp, ftmp2, ftmp);
1068 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1069 }
1070
smallfelem_inv_contract(smallfelem out,const smallfelem in)1071 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1072 {
1073 felem tmp;
1074
1075 smallfelem_expand(tmp, in);
1076 felem_inv(tmp, tmp);
1077 felem_contract(out, tmp);
1078 }
1079
1080 /*-
1081 * Group operations
1082 * ----------------
1083 *
1084 * Building on top of the field operations we have the operations on the
1085 * elliptic curve group itself. Points on the curve are represented in Jacobian
1086 * coordinates
1087 */
1088
1089 /*-
1090 * point_double calculates 2*(x_in, y_in, z_in)
1091 *
1092 * The method is taken from:
1093 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1094 *
1095 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1096 * while x_out == y_in is not (maybe this works, but it's not tested).
1097 */
1098 static void
point_double(felem x_out,felem y_out,felem z_out,const felem x_in,const felem y_in,const felem z_in)1099 point_double(felem x_out, felem y_out, felem z_out,
1100 const felem x_in, const felem y_in, const felem z_in)
1101 {
1102 longfelem tmp, tmp2;
1103 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1104 smallfelem small1, small2;
1105
1106 felem_assign(ftmp, x_in);
1107 /* ftmp[i] < 2^106 */
1108 felem_assign(ftmp2, x_in);
1109 /* ftmp2[i] < 2^106 */
1110
1111 /* delta = z^2 */
1112 felem_square(tmp, z_in);
1113 felem_reduce(delta, tmp);
1114 /* delta[i] < 2^101 */
1115
1116 /* gamma = y^2 */
1117 felem_square(tmp, y_in);
1118 felem_reduce(gamma, tmp);
1119 /* gamma[i] < 2^101 */
1120 felem_shrink(small1, gamma);
1121
1122 /* beta = x*gamma */
1123 felem_small_mul(tmp, small1, x_in);
1124 felem_reduce(beta, tmp);
1125 /* beta[i] < 2^101 */
1126
1127 /* alpha = 3*(x-delta)*(x+delta) */
1128 felem_diff(ftmp, delta);
1129 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1130 felem_sum(ftmp2, delta);
1131 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1132 felem_scalar(ftmp2, 3);
1133 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1134 felem_mul(tmp, ftmp, ftmp2);
1135 felem_reduce(alpha, tmp);
1136 /* alpha[i] < 2^101 */
1137 felem_shrink(small2, alpha);
1138
1139 /* x' = alpha^2 - 8*beta */
1140 smallfelem_square(tmp, small2);
1141 felem_reduce(x_out, tmp);
1142 felem_assign(ftmp, beta);
1143 felem_scalar(ftmp, 8);
1144 /* ftmp[i] < 8 * 2^101 = 2^104 */
1145 felem_diff(x_out, ftmp);
1146 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1147
1148 /* z' = (y + z)^2 - gamma - delta */
1149 felem_sum(delta, gamma);
1150 /* delta[i] < 2^101 + 2^101 = 2^102 */
1151 felem_assign(ftmp, y_in);
1152 felem_sum(ftmp, z_in);
1153 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1154 felem_square(tmp, ftmp);
1155 felem_reduce(z_out, tmp);
1156 felem_diff(z_out, delta);
1157 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1158
1159 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1160 felem_scalar(beta, 4);
1161 /* beta[i] < 4 * 2^101 = 2^103 */
1162 felem_diff_zero107(beta, x_out);
1163 /* beta[i] < 2^107 + 2^103 < 2^108 */
1164 felem_small_mul(tmp, small2, beta);
1165 /* tmp[i] < 7 * 2^64 < 2^67 */
1166 smallfelem_square(tmp2, small1);
1167 /* tmp2[i] < 7 * 2^64 */
1168 longfelem_scalar(tmp2, 8);
1169 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1170 longfelem_diff(tmp, tmp2);
1171 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1172 felem_reduce_zero105(y_out, tmp);
1173 /* y_out[i] < 2^106 */
1174 }
1175
1176 /*
1177 * point_double_small is the same as point_double, except that it operates on
1178 * smallfelems
1179 */
1180 static void
point_double_small(smallfelem x_out,smallfelem y_out,smallfelem z_out,const smallfelem x_in,const smallfelem y_in,const smallfelem z_in)1181 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1182 const smallfelem x_in, const smallfelem y_in,
1183 const smallfelem z_in)
1184 {
1185 felem felem_x_out, felem_y_out, felem_z_out;
1186 felem felem_x_in, felem_y_in, felem_z_in;
1187
1188 smallfelem_expand(felem_x_in, x_in);
1189 smallfelem_expand(felem_y_in, y_in);
1190 smallfelem_expand(felem_z_in, z_in);
1191 point_double(felem_x_out, felem_y_out, felem_z_out,
1192 felem_x_in, felem_y_in, felem_z_in);
1193 felem_shrink(x_out, felem_x_out);
1194 felem_shrink(y_out, felem_y_out);
1195 felem_shrink(z_out, felem_z_out);
1196 }
1197
1198 /* copy_conditional copies in to out iff mask is all ones. */
copy_conditional(felem out,const felem in,limb mask)1199 static void copy_conditional(felem out, const felem in, limb mask)
1200 {
1201 unsigned i;
1202 for (i = 0; i < NLIMBS; ++i) {
1203 const limb tmp = mask & (in[i] ^ out[i]);
1204 out[i] ^= tmp;
1205 }
1206 }
1207
1208 /* copy_small_conditional copies in to out iff mask is all ones. */
copy_small_conditional(felem out,const smallfelem in,limb mask)1209 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1210 {
1211 unsigned i;
1212 const u64 mask64 = mask;
1213 for (i = 0; i < NLIMBS; ++i) {
1214 out[i] = ((limb)(in[i] & mask64)) | (out[i] & ~mask);
1215 }
1216 }
1217
1218 /*-
1219 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1220 *
1221 * The method is taken from:
1222 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1223 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1224 *
1225 * This function includes a branch for checking whether the two input points
1226 * are equal, (while not equal to the point at infinity). This case never
1227 * happens during single point multiplication, so there is no timing leak for
1228 * ECDH or ECDSA signing.
1229 */
point_add(felem x3,felem y3,felem z3,const felem x1,const felem y1,const felem z1,const int mixed,const smallfelem x2,const smallfelem y2,const smallfelem z2)1230 static void point_add(felem x3, felem y3, felem z3,
1231 const felem x1, const felem y1, const felem z1,
1232 const int mixed, const smallfelem x2,
1233 const smallfelem y2, const smallfelem z2)
1234 {
1235 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1236 longfelem tmp, tmp2;
1237 smallfelem small1, small2, small3, small4, small5;
1238 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1239 limb points_equal;
1240
1241 felem_shrink(small3, z1);
1242
1243 z1_is_zero = smallfelem_is_zero(small3);
1244 z2_is_zero = smallfelem_is_zero(z2);
1245
1246 /* ftmp = z1z1 = z1**2 */
1247 smallfelem_square(tmp, small3);
1248 felem_reduce(ftmp, tmp);
1249 /* ftmp[i] < 2^101 */
1250 felem_shrink(small1, ftmp);
1251
1252 if (!mixed) {
1253 /* ftmp2 = z2z2 = z2**2 */
1254 smallfelem_square(tmp, z2);
1255 felem_reduce(ftmp2, tmp);
1256 /* ftmp2[i] < 2^101 */
1257 felem_shrink(small2, ftmp2);
1258
1259 felem_shrink(small5, x1);
1260
1261 /* u1 = ftmp3 = x1*z2z2 */
1262 smallfelem_mul(tmp, small5, small2);
1263 felem_reduce(ftmp3, tmp);
1264 /* ftmp3[i] < 2^101 */
1265
1266 /* ftmp5 = z1 + z2 */
1267 felem_assign(ftmp5, z1);
1268 felem_small_sum(ftmp5, z2);
1269 /* ftmp5[i] < 2^107 */
1270
1271 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1272 felem_square(tmp, ftmp5);
1273 felem_reduce(ftmp5, tmp);
1274 /* ftmp2 = z2z2 + z1z1 */
1275 felem_sum(ftmp2, ftmp);
1276 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1277 felem_diff(ftmp5, ftmp2);
1278 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1279
1280 /* ftmp2 = z2 * z2z2 */
1281 smallfelem_mul(tmp, small2, z2);
1282 felem_reduce(ftmp2, tmp);
1283
1284 /* s1 = ftmp2 = y1 * z2**3 */
1285 felem_mul(tmp, y1, ftmp2);
1286 felem_reduce(ftmp6, tmp);
1287 /* ftmp6[i] < 2^101 */
1288 } else {
1289 /*
1290 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1291 */
1292
1293 /* u1 = ftmp3 = x1*z2z2 */
1294 felem_assign(ftmp3, x1);
1295 /* ftmp3[i] < 2^106 */
1296
1297 /* ftmp5 = 2z1z2 */
1298 felem_assign(ftmp5, z1);
1299 felem_scalar(ftmp5, 2);
1300 /* ftmp5[i] < 2*2^106 = 2^107 */
1301
1302 /* s1 = ftmp2 = y1 * z2**3 */
1303 felem_assign(ftmp6, y1);
1304 /* ftmp6[i] < 2^106 */
1305 }
1306
1307 /* u2 = x2*z1z1 */
1308 smallfelem_mul(tmp, x2, small1);
1309 felem_reduce(ftmp4, tmp);
1310
1311 /* h = ftmp4 = u2 - u1 */
1312 felem_diff_zero107(ftmp4, ftmp3);
1313 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1314 felem_shrink(small4, ftmp4);
1315
1316 x_equal = smallfelem_is_zero(small4);
1317
1318 /* z_out = ftmp5 * h */
1319 felem_small_mul(tmp, small4, ftmp5);
1320 felem_reduce(z_out, tmp);
1321 /* z_out[i] < 2^101 */
1322
1323 /* ftmp = z1 * z1z1 */
1324 smallfelem_mul(tmp, small1, small3);
1325 felem_reduce(ftmp, tmp);
1326
1327 /* s2 = tmp = y2 * z1**3 */
1328 felem_small_mul(tmp, y2, ftmp);
1329 felem_reduce(ftmp5, tmp);
1330
1331 /* r = ftmp5 = (s2 - s1)*2 */
1332 felem_diff_zero107(ftmp5, ftmp6);
1333 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1334 felem_scalar(ftmp5, 2);
1335 /* ftmp5[i] < 2^109 */
1336 felem_shrink(small1, ftmp5);
1337 y_equal = smallfelem_is_zero(small1);
1338
1339 /*
1340 * The formulae are incorrect if the points are equal, in affine coordinates
1341 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1342 * happens.
1343 *
1344 * We use bitwise operations to avoid potential side-channels introduced by
1345 * the short-circuiting behaviour of boolean operators.
1346 *
1347 * The special case of either point being the point at infinity (z1 and/or
1348 * z2 are zero), is handled separately later on in this function, so we
1349 * avoid jumping to point_double here in those special cases.
1350 */
1351 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1352
1353 if (points_equal) {
1354 /*
1355 * This is obviously not constant-time but, as mentioned before, this
1356 * case never happens during single point multiplication, so there is no
1357 * timing leak for ECDH or ECDSA signing.
1358 */
1359 point_double(x3, y3, z3, x1, y1, z1);
1360 return;
1361 }
1362
1363 /* I = ftmp = (2h)**2 */
1364 felem_assign(ftmp, ftmp4);
1365 felem_scalar(ftmp, 2);
1366 /* ftmp[i] < 2*2^108 = 2^109 */
1367 felem_square(tmp, ftmp);
1368 felem_reduce(ftmp, tmp);
1369
1370 /* J = ftmp2 = h * I */
1371 felem_mul(tmp, ftmp4, ftmp);
1372 felem_reduce(ftmp2, tmp);
1373
1374 /* V = ftmp4 = U1 * I */
1375 felem_mul(tmp, ftmp3, ftmp);
1376 felem_reduce(ftmp4, tmp);
1377
1378 /* x_out = r**2 - J - 2V */
1379 smallfelem_square(tmp, small1);
1380 felem_reduce(x_out, tmp);
1381 felem_assign(ftmp3, ftmp4);
1382 felem_scalar(ftmp4, 2);
1383 felem_sum(ftmp4, ftmp2);
1384 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1385 felem_diff(x_out, ftmp4);
1386 /* x_out[i] < 2^105 + 2^101 */
1387
1388 /* y_out = r(V-x_out) - 2 * s1 * J */
1389 felem_diff_zero107(ftmp3, x_out);
1390 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1391 felem_small_mul(tmp, small1, ftmp3);
1392 felem_mul(tmp2, ftmp6, ftmp2);
1393 longfelem_scalar(tmp2, 2);
1394 /* tmp2[i] < 2*2^67 = 2^68 */
1395 longfelem_diff(tmp, tmp2);
1396 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1397 felem_reduce_zero105(y_out, tmp);
1398 /* y_out[i] < 2^106 */
1399
1400 copy_small_conditional(x_out, x2, z1_is_zero);
1401 copy_conditional(x_out, x1, z2_is_zero);
1402 copy_small_conditional(y_out, y2, z1_is_zero);
1403 copy_conditional(y_out, y1, z2_is_zero);
1404 copy_small_conditional(z_out, z2, z1_is_zero);
1405 copy_conditional(z_out, z1, z2_is_zero);
1406 felem_assign(x3, x_out);
1407 felem_assign(y3, y_out);
1408 felem_assign(z3, z_out);
1409 }
1410
1411 /*
1412 * point_add_small is the same as point_add, except that it operates on
1413 * smallfelems
1414 */
point_add_small(smallfelem x3,smallfelem y3,smallfelem z3,smallfelem x1,smallfelem y1,smallfelem z1,smallfelem x2,smallfelem y2,smallfelem z2)1415 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1416 smallfelem x1, smallfelem y1, smallfelem z1,
1417 smallfelem x2, smallfelem y2, smallfelem z2)
1418 {
1419 felem felem_x3, felem_y3, felem_z3;
1420 felem felem_x1, felem_y1, felem_z1;
1421 smallfelem_expand(felem_x1, x1);
1422 smallfelem_expand(felem_y1, y1);
1423 smallfelem_expand(felem_z1, z1);
1424 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1425 x2, y2, z2);
1426 felem_shrink(x3, felem_x3);
1427 felem_shrink(y3, felem_y3);
1428 felem_shrink(z3, felem_z3);
1429 }
1430
1431 /*-
1432 * Base point pre computation
1433 * --------------------------
1434 *
1435 * Two different sorts of precomputed tables are used in the following code.
1436 * Each contain various points on the curve, where each point is three field
1437 * elements (x, y, z).
1438 *
1439 * For the base point table, z is usually 1 (0 for the point at infinity).
1440 * This table has 2 * 16 elements, starting with the following:
1441 * index | bits | point
1442 * ------+---------+------------------------------
1443 * 0 | 0 0 0 0 | 0G
1444 * 1 | 0 0 0 1 | 1G
1445 * 2 | 0 0 1 0 | 2^64G
1446 * 3 | 0 0 1 1 | (2^64 + 1)G
1447 * 4 | 0 1 0 0 | 2^128G
1448 * 5 | 0 1 0 1 | (2^128 + 1)G
1449 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1450 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1451 * 8 | 1 0 0 0 | 2^192G
1452 * 9 | 1 0 0 1 | (2^192 + 1)G
1453 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1454 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1455 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1456 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1457 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1458 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1459 * followed by a copy of this with each element multiplied by 2^32.
1460 *
1461 * The reason for this is so that we can clock bits into four different
1462 * locations when doing simple scalar multiplies against the base point,
1463 * and then another four locations using the second 16 elements.
1464 *
1465 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1466
1467 /* gmul is the table of precomputed base points */
1468 static const smallfelem gmul[2][16][3] = {
1469 { { { 0, 0, 0, 0 },
1470 { 0, 0, 0, 0 },
1471 { 0, 0, 0, 0 } },
1472 { { 0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1473 0x6b17d1f2e12c4247 },
1474 { 0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1475 0x4fe342e2fe1a7f9b },
1476 { 1, 0, 0, 0 } },
1477 { { 0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1478 0x0fa822bc2811aaa5 },
1479 { 0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1480 0xbff44ae8f5dba80d },
1481 { 1, 0, 0, 0 } },
1482 { { 0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1483 0x300a4bbc89d6726f },
1484 { 0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1485 0x72aac7e0d09b4644 },
1486 { 1, 0, 0, 0 } },
1487 { { 0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1488 0x447d739beedb5e67 },
1489 { 0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1490 0x2d4825ab834131ee },
1491 { 1, 0, 0, 0 } },
1492 { { 0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1493 0xef9519328a9c72ff },
1494 { 0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1495 0x611e9fc37dbb2c9b },
1496 { 1, 0, 0, 0 } },
1497 { { 0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1498 0x550663797b51f5d8 },
1499 { 0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1500 0x157164848aecb851 },
1501 { 1, 0, 0, 0 } },
1502 { { 0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1503 0xeb5d7745b21141ea },
1504 { 0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1505 0xeafd72ebdbecc17b },
1506 { 1, 0, 0, 0 } },
1507 { { 0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1508 0xa6d39677a7849276 },
1509 { 0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1510 0x674f84749b0b8816 },
1511 { 1, 0, 0, 0 } },
1512 { { 0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1513 0x4e769e7672c9ddad },
1514 { 0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1515 0x42b99082de830663 },
1516 { 1, 0, 0, 0 } },
1517 { { 0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1518 0x78878ef61c6ce04d },
1519 { 0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1520 0xb6cb3f5d7b72c321 },
1521 { 1, 0, 0, 0 } },
1522 { { 0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1523 0x0c88bc4d716b1287 },
1524 { 0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1525 0xdd5ddea3f3901dc6 },
1526 { 1, 0, 0, 0 } },
1527 { { 0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1528 0x68f344af6b317466 },
1529 { 0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1530 0x31b9c405f8540a20 },
1531 { 1, 0, 0, 0 } },
1532 { { 0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1533 0x4052bf4b6f461db9 },
1534 { 0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1535 0xfecf4d5190b0fc61 },
1536 { 1, 0, 0, 0 } },
1537 { { 0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1538 0x1eddbae2c802e41a },
1539 { 0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1540 0x43104d86560ebcfc },
1541 { 1, 0, 0, 0 } },
1542 { { 0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1543 0xb48e26b484f7a21c },
1544 { 0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1545 0xfac015404d4d3dab },
1546 { 1, 0, 0, 0 } } },
1547 { { { 0, 0, 0, 0 },
1548 { 0, 0, 0, 0 },
1549 { 0, 0, 0, 0 } },
1550 { { 0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1551 0x7fe36b40af22af89 },
1552 { 0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1553 0xe697d45825b63624 },
1554 { 1, 0, 0, 0 } },
1555 { { 0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1556 0x4a5b506612a677a6 },
1557 { 0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1558 0xeb13461ceac089f1 },
1559 { 1, 0, 0, 0 } },
1560 { { 0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1561 0x0781b8291c6a220a },
1562 { 0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1563 0x690cde8df0151593 },
1564 { 1, 0, 0, 0 } },
1565 { { 0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1566 0x8a535f566ec73617 },
1567 { 0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1568 0x0455c08468b08bd7 },
1569 { 1, 0, 0, 0 } },
1570 { { 0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1571 0x06bada7ab77f8276 },
1572 { 0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1573 0x5b476dfd0e6cb18a },
1574 { 1, 0, 0, 0 } },
1575 { { 0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1576 0x3e29864e8a2ec908 },
1577 { 0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1578 0x239b90ea3dc31e7e },
1579 { 1, 0, 0, 0 } },
1580 { { 0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1581 0x820f4dd949f72ff7 },
1582 { 0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1583 0x140406ec783a05ec },
1584 { 1, 0, 0, 0 } },
1585 { { 0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1586 0x68f6b8542783dfee },
1587 { 0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1588 0xcbe1feba92e40ce6 },
1589 { 1, 0, 0, 0 } },
1590 { { 0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1591 0xd0b2f94d2f420109 },
1592 { 0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1593 0x971459828b0719e5 },
1594 { 1, 0, 0, 0 } },
1595 { { 0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1596 0x961610004a866aba },
1597 { 0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1598 0x7acb9fadcee75e44 },
1599 { 1, 0, 0, 0 } },
1600 { { 0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1601 0x24eb9acca333bf5b },
1602 { 0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1603 0x69f891c5acd079cc },
1604 { 1, 0, 0, 0 } },
1605 { { 0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1606 0xe51f547c5972a107 },
1607 { 0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1608 0x1c309a2b25bb1387 },
1609 { 1, 0, 0, 0 } },
1610 { { 0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1611 0x20b87b8aa2c4e503 },
1612 { 0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1613 0xf5c6fa49919776be },
1614 { 1, 0, 0, 0 } },
1615 { { 0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1616 0x1ed7d1b9332010b9 },
1617 { 0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1618 0x3a2b03f03217257a },
1619 { 1, 0, 0, 0 } },
1620 { { 0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1621 0x15fee545c78dd9f6 },
1622 { 0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1623 0x4ab5b6b2b8753f81 },
1624 { 1, 0, 0, 0 } } }
1625 };
1626
1627 /*
1628 * select_point selects the |idx|th point from a precomputation table and
1629 * copies it to out.
1630 */
select_point(const u64 idx,unsigned int size,const smallfelem pre_comp[16][3],smallfelem out[3])1631 static void select_point(const u64 idx, unsigned int size,
1632 const smallfelem pre_comp[16][3], smallfelem out[3])
1633 {
1634 unsigned i, j;
1635 u64 *outlimbs = &out[0][0];
1636
1637 memset(out, 0, sizeof(*out) * 3);
1638
1639 for (i = 0; i < size; i++) {
1640 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1641 u64 mask = i ^ idx;
1642 mask |= mask >> 4;
1643 mask |= mask >> 2;
1644 mask |= mask >> 1;
1645 mask &= 1;
1646 mask--;
1647 for (j = 0; j < NLIMBS * 3; j++)
1648 outlimbs[j] |= inlimbs[j] & mask;
1649 }
1650 }
1651
1652 /* get_bit returns the |i|th bit in |in| */
get_bit(const felem_bytearray in,int i)1653 static char get_bit(const felem_bytearray in, int i)
1654 {
1655 if ((i < 0) || (i >= 256))
1656 return 0;
1657 return (in[i >> 3] >> (i & 7)) & 1;
1658 }
1659
1660 /*
1661 * Interleaved point multiplication using precomputed point multiples: The
1662 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1663 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1664 * generator, using certain (large) precomputed multiples in g_pre_comp.
1665 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1666 */
batch_mul(felem x_out,felem y_out,felem z_out,const felem_bytearray scalars[],const unsigned num_points,const u8 * g_scalar,const int mixed,const smallfelem pre_comp[][17][3],const smallfelem g_pre_comp[2][16][3])1667 static void batch_mul(felem x_out, felem y_out, felem z_out,
1668 const felem_bytearray scalars[],
1669 const unsigned num_points, const u8 *g_scalar,
1670 const int mixed, const smallfelem pre_comp[][17][3],
1671 const smallfelem g_pre_comp[2][16][3])
1672 {
1673 int i, skip;
1674 unsigned num, gen_mul = (g_scalar != NULL);
1675 felem nq[3], ftmp;
1676 smallfelem tmp[3];
1677 u64 bits;
1678 u8 sign, digit;
1679
1680 /* set nq to the point at infinity */
1681 memset(nq, 0, sizeof(nq));
1682
1683 /*
1684 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1685 * of the generator (two in each of the last 32 rounds) and additions of
1686 * other points multiples (every 5th round).
1687 */
1688 skip = 1; /* save two point operations in the first
1689 * round */
1690 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1691 /* double */
1692 if (!skip)
1693 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1694
1695 /* add multiples of the generator */
1696 if (gen_mul && (i <= 31)) {
1697 /* first, look 32 bits upwards */
1698 bits = get_bit(g_scalar, i + 224) << 3;
1699 bits |= get_bit(g_scalar, i + 160) << 2;
1700 bits |= get_bit(g_scalar, i + 96) << 1;
1701 bits |= get_bit(g_scalar, i + 32);
1702 /* select the point to add, in constant time */
1703 select_point(bits, 16, g_pre_comp[1], tmp);
1704
1705 if (!skip) {
1706 /* Arg 1 below is for "mixed" */
1707 point_add(nq[0], nq[1], nq[2],
1708 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1709 } else {
1710 smallfelem_expand(nq[0], tmp[0]);
1711 smallfelem_expand(nq[1], tmp[1]);
1712 smallfelem_expand(nq[2], tmp[2]);
1713 skip = 0;
1714 }
1715
1716 /* second, look at the current position */
1717 bits = get_bit(g_scalar, i + 192) << 3;
1718 bits |= get_bit(g_scalar, i + 128) << 2;
1719 bits |= get_bit(g_scalar, i + 64) << 1;
1720 bits |= get_bit(g_scalar, i);
1721 /* select the point to add, in constant time */
1722 select_point(bits, 16, g_pre_comp[0], tmp);
1723 /* Arg 1 below is for "mixed" */
1724 point_add(nq[0], nq[1], nq[2],
1725 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1726 }
1727
1728 /* do other additions every 5 doublings */
1729 if (num_points && (i % 5 == 0)) {
1730 /* loop over all scalars */
1731 for (num = 0; num < num_points; ++num) {
1732 bits = get_bit(scalars[num], i + 4) << 5;
1733 bits |= get_bit(scalars[num], i + 3) << 4;
1734 bits |= get_bit(scalars[num], i + 2) << 3;
1735 bits |= get_bit(scalars[num], i + 1) << 2;
1736 bits |= get_bit(scalars[num], i) << 1;
1737 bits |= get_bit(scalars[num], i - 1);
1738 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1739
1740 /*
1741 * select the point to add or subtract, in constant time
1742 */
1743 select_point(digit, 17, pre_comp[num], tmp);
1744 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1745 * point */
1746 copy_small_conditional(ftmp, tmp[1], (((limb)sign) - 1));
1747 felem_contract(tmp[1], ftmp);
1748
1749 if (!skip) {
1750 point_add(nq[0], nq[1], nq[2],
1751 nq[0], nq[1], nq[2],
1752 mixed, tmp[0], tmp[1], tmp[2]);
1753 } else {
1754 smallfelem_expand(nq[0], tmp[0]);
1755 smallfelem_expand(nq[1], tmp[1]);
1756 smallfelem_expand(nq[2], tmp[2]);
1757 skip = 0;
1758 }
1759 }
1760 }
1761 }
1762 felem_assign(x_out, nq[0]);
1763 felem_assign(y_out, nq[1]);
1764 felem_assign(z_out, nq[2]);
1765 }
1766
1767 /* Precomputation for the group generator. */
1768 struct nistp256_pre_comp_st {
1769 smallfelem g_pre_comp[2][16][3];
1770 CRYPTO_REF_COUNT references;
1771 };
1772
EC_GFp_nistp256_method(void)1773 const EC_METHOD *EC_GFp_nistp256_method(void)
1774 {
1775 static const EC_METHOD ret = {
1776 EC_FLAGS_DEFAULT_OCT,
1777 NID_X9_62_prime_field,
1778 ossl_ec_GFp_nistp256_group_init,
1779 ossl_ec_GFp_simple_group_finish,
1780 ossl_ec_GFp_simple_group_clear_finish,
1781 ossl_ec_GFp_nist_group_copy,
1782 ossl_ec_GFp_nistp256_group_set_curve,
1783 ossl_ec_GFp_simple_group_get_curve,
1784 ossl_ec_GFp_simple_group_get_degree,
1785 ossl_ec_group_simple_order_bits,
1786 ossl_ec_GFp_simple_group_check_discriminant,
1787 ossl_ec_GFp_simple_point_init,
1788 ossl_ec_GFp_simple_point_finish,
1789 ossl_ec_GFp_simple_point_clear_finish,
1790 ossl_ec_GFp_simple_point_copy,
1791 ossl_ec_GFp_simple_point_set_to_infinity,
1792 ossl_ec_GFp_simple_point_set_affine_coordinates,
1793 ossl_ec_GFp_nistp256_point_get_affine_coordinates,
1794 0 /* point_set_compressed_coordinates */,
1795 0 /* point2oct */,
1796 0 /* oct2point */,
1797 ossl_ec_GFp_simple_add,
1798 ossl_ec_GFp_simple_dbl,
1799 ossl_ec_GFp_simple_invert,
1800 ossl_ec_GFp_simple_is_at_infinity,
1801 ossl_ec_GFp_simple_is_on_curve,
1802 ossl_ec_GFp_simple_cmp,
1803 ossl_ec_GFp_simple_make_affine,
1804 ossl_ec_GFp_simple_points_make_affine,
1805 ossl_ec_GFp_nistp256_points_mul,
1806 ossl_ec_GFp_nistp256_precompute_mult,
1807 ossl_ec_GFp_nistp256_have_precompute_mult,
1808 ossl_ec_GFp_nist_field_mul,
1809 ossl_ec_GFp_nist_field_sqr,
1810 0 /* field_div */,
1811 ossl_ec_GFp_simple_field_inv,
1812 0 /* field_encode */,
1813 0 /* field_decode */,
1814 0, /* field_set_to_one */
1815 ossl_ec_key_simple_priv2oct,
1816 ossl_ec_key_simple_oct2priv,
1817 0, /* set private */
1818 ossl_ec_key_simple_generate_key,
1819 ossl_ec_key_simple_check_key,
1820 ossl_ec_key_simple_generate_public_key,
1821 0, /* keycopy */
1822 0, /* keyfinish */
1823 ossl_ecdh_simple_compute_key,
1824 ossl_ecdsa_simple_sign_setup,
1825 ossl_ecdsa_simple_sign_sig,
1826 ossl_ecdsa_simple_verify_sig,
1827 0, /* field_inverse_mod_ord */
1828 0, /* blind_coordinates */
1829 0, /* ladder_pre */
1830 0, /* ladder_step */
1831 0 /* ladder_post */
1832 };
1833
1834 return &ret;
1835 }
1836
1837 /******************************************************************************/
1838 /*
1839 * FUNCTIONS TO MANAGE PRECOMPUTATION
1840 */
1841
nistp256_pre_comp_new(void)1842 static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)
1843 {
1844 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1845
1846 if (ret == NULL)
1847 return ret;
1848
1849 if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1850 OPENSSL_free(ret);
1851 return NULL;
1852 }
1853 return ret;
1854 }
1855
EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP * p)1856 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1857 {
1858 int i;
1859 if (p != NULL)
1860 CRYPTO_UP_REF(&p->references, &i);
1861 return p;
1862 }
1863
EC_nistp256_pre_comp_free(NISTP256_PRE_COMP * pre)1864 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1865 {
1866 int i;
1867
1868 if (pre == NULL)
1869 return;
1870
1871 CRYPTO_DOWN_REF(&pre->references, &i);
1872 REF_PRINT_COUNT("EC_nistp256", i, pre);
1873 if (i > 0)
1874 return;
1875 REF_ASSERT_ISNT(i < 0);
1876
1877 CRYPTO_FREE_REF(&pre->references);
1878 OPENSSL_free(pre);
1879 }
1880
1881 /******************************************************************************/
1882 /*
1883 * OPENSSL EC_METHOD FUNCTIONS
1884 */
1885
ossl_ec_GFp_nistp256_group_init(EC_GROUP * group)1886 int ossl_ec_GFp_nistp256_group_init(EC_GROUP *group)
1887 {
1888 int ret;
1889 ret = ossl_ec_GFp_simple_group_init(group);
1890 group->a_is_minus3 = 1;
1891 return ret;
1892 }
1893
ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP * group,const BIGNUM * p,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)1894 int ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1895 const BIGNUM *a, const BIGNUM *b,
1896 BN_CTX *ctx)
1897 {
1898 int ret = 0;
1899 BIGNUM *curve_p, *curve_a, *curve_b;
1900 #ifndef FIPS_MODULE
1901 BN_CTX *new_ctx = NULL;
1902
1903 if (ctx == NULL)
1904 ctx = new_ctx = BN_CTX_new();
1905 #endif
1906 if (ctx == NULL)
1907 return 0;
1908
1909 BN_CTX_start(ctx);
1910 curve_p = BN_CTX_get(ctx);
1911 curve_a = BN_CTX_get(ctx);
1912 curve_b = BN_CTX_get(ctx);
1913 if (curve_b == NULL)
1914 goto err;
1915 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1916 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1917 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1918 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1919 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1920 goto err;
1921 }
1922 group->field_mod_func = BN_nist_mod_256;
1923 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1924 err:
1925 BN_CTX_end(ctx);
1926 #ifndef FIPS_MODULE
1927 BN_CTX_free(new_ctx);
1928 #endif
1929 return ret;
1930 }
1931
1932 /*
1933 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1934 * (X/Z^2, Y/Z^3)
1935 */
ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)1936 int ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1937 const EC_POINT *point,
1938 BIGNUM *x, BIGNUM *y,
1939 BN_CTX *ctx)
1940 {
1941 felem z1, z2, x_in, y_in;
1942 smallfelem x_out, y_out;
1943 longfelem tmp;
1944
1945 if (EC_POINT_is_at_infinity(group, point)) {
1946 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1947 return 0;
1948 }
1949 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) || (!BN_to_felem(z1, point->Z)))
1950 return 0;
1951 felem_inv(z2, z1);
1952 felem_square(tmp, z2);
1953 felem_reduce(z1, tmp);
1954 felem_mul(tmp, x_in, z1);
1955 felem_reduce(x_in, tmp);
1956 felem_contract(x_out, x_in);
1957 if (x != NULL) {
1958 if (!smallfelem_to_BN(x, x_out)) {
1959 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1960 return 0;
1961 }
1962 }
1963 felem_mul(tmp, z1, z2);
1964 felem_reduce(z1, tmp);
1965 felem_mul(tmp, y_in, z1);
1966 felem_reduce(y_in, tmp);
1967 felem_contract(y_out, y_in);
1968 if (y != NULL) {
1969 if (!smallfelem_to_BN(y, y_out)) {
1970 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1971 return 0;
1972 }
1973 }
1974 return 1;
1975 }
1976
1977 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
make_points_affine(size_t num,smallfelem points[][3],smallfelem tmp_smallfelems[])1978 static void make_points_affine(size_t num, smallfelem points[][3],
1979 smallfelem tmp_smallfelems[])
1980 {
1981 /*
1982 * Runs in constant time, unless an input is the point at infinity (which
1983 * normally shouldn't happen).
1984 */
1985 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1986 points,
1987 sizeof(smallfelem),
1988 tmp_smallfelems,
1989 (void (*)(void *))smallfelem_one,
1990 smallfelem_is_zero_int,
1991 (void (*)(void *, const void *))
1992 smallfelem_assign,
1993 (void (*)(void *, const void *))
1994 smallfelem_square_contract,
1995 (void (*)(void *, const void *,
1996 const void *))
1997 smallfelem_mul_contract,
1998 (void (*)(void *, const void *))
1999 smallfelem_inv_contract,
2000 /* nothing to contract */
2001 (void (*)(void *, const void *))
2002 smallfelem_assign);
2003 }
2004
2005 /*
2006 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2007 * values Result is stored in r (r can equal one of the inputs).
2008 */
ossl_ec_GFp_nistp256_points_mul(const EC_GROUP * group,EC_POINT * r,const BIGNUM * scalar,size_t num,const EC_POINT * points[],const BIGNUM * scalars[],BN_CTX * ctx)2009 int ossl_ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2010 const BIGNUM *scalar, size_t num,
2011 const EC_POINT *points[],
2012 const BIGNUM *scalars[], BN_CTX *ctx)
2013 {
2014 int ret = 0;
2015 int j;
2016 int mixed = 0;
2017 BIGNUM *x, *y, *z, *tmp_scalar;
2018 felem_bytearray g_secret;
2019 felem_bytearray *secrets = NULL;
2020 smallfelem(*pre_comp)[17][3] = NULL;
2021 smallfelem *tmp_smallfelems = NULL;
2022 unsigned i;
2023 int num_bytes;
2024 int have_pre_comp = 0;
2025 size_t num_points = num;
2026 smallfelem x_in, y_in, z_in;
2027 felem x_out, y_out, z_out;
2028 NISTP256_PRE_COMP *pre = NULL;
2029 const smallfelem(*g_pre_comp)[16][3] = NULL;
2030 EC_POINT *generator = NULL;
2031 const EC_POINT *p = NULL;
2032 const BIGNUM *p_scalar = NULL;
2033
2034 BN_CTX_start(ctx);
2035 x = BN_CTX_get(ctx);
2036 y = BN_CTX_get(ctx);
2037 z = BN_CTX_get(ctx);
2038 tmp_scalar = BN_CTX_get(ctx);
2039 if (tmp_scalar == NULL)
2040 goto err;
2041
2042 if (scalar != NULL) {
2043 pre = group->pre_comp.nistp256;
2044 if (pre)
2045 /* we have precomputation, try to use it */
2046 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2047 else
2048 /* try to use the standard precomputation */
2049 g_pre_comp = &gmul[0];
2050 generator = EC_POINT_new(group);
2051 if (generator == NULL)
2052 goto err;
2053 /* get the generator from precomputation */
2054 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) || !smallfelem_to_BN(y, g_pre_comp[0][1][1]) || !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2055 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2056 goto err;
2057 }
2058 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
2059 generator,
2060 x, y, z, ctx))
2061 goto err;
2062 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2063 /* precomputation matches generator */
2064 have_pre_comp = 1;
2065 else
2066 /*
2067 * we don't have valid precomputation: treat the generator as a
2068 * random point
2069 */
2070 num_points++;
2071 }
2072 if (num_points > 0) {
2073 if (num_points >= 3) {
2074 /*
2075 * unless we precompute multiples for just one or two points,
2076 * converting those into affine form is time well spent
2077 */
2078 mixed = 1;
2079 }
2080 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2081 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2082 if (mixed)
2083 tmp_smallfelems = OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2084 if ((secrets == NULL) || (pre_comp == NULL)
2085 || (mixed && (tmp_smallfelems == NULL)))
2086 goto err;
2087
2088 /*
2089 * we treat NULL scalars as 0, and NULL points as points at infinity,
2090 * i.e., they contribute nothing to the linear combination
2091 */
2092 memset(secrets, 0, sizeof(*secrets) * num_points);
2093 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2094 for (i = 0; i < num_points; ++i) {
2095 if (i == num) {
2096 /*
2097 * we didn't have a valid precomputation, so we pick the
2098 * generator
2099 */
2100 p = EC_GROUP_get0_generator(group);
2101 p_scalar = scalar;
2102 } else {
2103 /* the i^th point */
2104 p = points[i];
2105 p_scalar = scalars[i];
2106 }
2107 if ((p_scalar != NULL) && (p != NULL)) {
2108 /* reduce scalar to 0 <= scalar < 2^256 */
2109 if ((BN_num_bits(p_scalar) > 256)
2110 || (BN_is_negative(p_scalar))) {
2111 /*
2112 * this is an unusual input, and we don't guarantee
2113 * constant-timeness
2114 */
2115 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2116 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2117 goto err;
2118 }
2119 num_bytes = BN_bn2lebinpad(tmp_scalar,
2120 secrets[i], sizeof(secrets[i]));
2121 } else {
2122 num_bytes = BN_bn2lebinpad(p_scalar,
2123 secrets[i], sizeof(secrets[i]));
2124 }
2125 if (num_bytes < 0) {
2126 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2127 goto err;
2128 }
2129 /* precompute multiples */
2130 if ((!BN_to_felem(x_out, p->X)) || (!BN_to_felem(y_out, p->Y)) || (!BN_to_felem(z_out, p->Z)))
2131 goto err;
2132 felem_shrink(pre_comp[i][1][0], x_out);
2133 felem_shrink(pre_comp[i][1][1], y_out);
2134 felem_shrink(pre_comp[i][1][2], z_out);
2135 for (j = 2; j <= 16; ++j) {
2136 if (j & 1) {
2137 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2138 pre_comp[i][j][2], pre_comp[i][1][0],
2139 pre_comp[i][1][1], pre_comp[i][1][2],
2140 pre_comp[i][j - 1][0],
2141 pre_comp[i][j - 1][1],
2142 pre_comp[i][j - 1][2]);
2143 } else {
2144 point_double_small(pre_comp[i][j][0],
2145 pre_comp[i][j][1],
2146 pre_comp[i][j][2],
2147 pre_comp[i][j / 2][0],
2148 pre_comp[i][j / 2][1],
2149 pre_comp[i][j / 2][2]);
2150 }
2151 }
2152 }
2153 }
2154 if (mixed)
2155 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2156 }
2157
2158 /* the scalar for the generator */
2159 if ((scalar != NULL) && (have_pre_comp)) {
2160 memset(g_secret, 0, sizeof(g_secret));
2161 /* reduce scalar to 0 <= scalar < 2^256 */
2162 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2163 /*
2164 * this is an unusual input, and we don't guarantee
2165 * constant-timeness
2166 */
2167 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2168 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2169 goto err;
2170 }
2171 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
2172 } else {
2173 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
2174 }
2175 /* do the multiplication with generator precomputation */
2176 batch_mul(x_out, y_out, z_out,
2177 (const felem_bytearray(*))secrets, num_points,
2178 g_secret,
2179 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2180 } else {
2181 /* do the multiplication without generator precomputation */
2182 batch_mul(x_out, y_out, z_out,
2183 (const felem_bytearray(*))secrets, num_points,
2184 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2185 }
2186 /* reduce the output to its unique minimal representation */
2187 felem_contract(x_in, x_out);
2188 felem_contract(y_in, y_out);
2189 felem_contract(z_in, z_out);
2190 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) || (!smallfelem_to_BN(z, z_in))) {
2191 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2192 goto err;
2193 }
2194 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
2195 ctx);
2196
2197 err:
2198 BN_CTX_end(ctx);
2199 EC_POINT_free(generator);
2200 OPENSSL_free(secrets);
2201 OPENSSL_free(pre_comp);
2202 OPENSSL_free(tmp_smallfelems);
2203 return ret;
2204 }
2205
ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP * group,BN_CTX * ctx)2206 int ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2207 {
2208 int ret = 0;
2209 NISTP256_PRE_COMP *pre = NULL;
2210 int i, j;
2211 BIGNUM *x, *y;
2212 EC_POINT *generator = NULL;
2213 smallfelem tmp_smallfelems[32];
2214 felem x_tmp, y_tmp, z_tmp;
2215 #ifndef FIPS_MODULE
2216 BN_CTX *new_ctx = NULL;
2217 #endif
2218
2219 /* throw away old precomputation */
2220 EC_pre_comp_free(group);
2221
2222 #ifndef FIPS_MODULE
2223 if (ctx == NULL)
2224 ctx = new_ctx = BN_CTX_new();
2225 #endif
2226 if (ctx == NULL)
2227 return 0;
2228
2229 BN_CTX_start(ctx);
2230 x = BN_CTX_get(ctx);
2231 y = BN_CTX_get(ctx);
2232 if (y == NULL)
2233 goto err;
2234 /* get the generator */
2235 if (group->generator == NULL)
2236 goto err;
2237 generator = EC_POINT_new(group);
2238 if (generator == NULL)
2239 goto err;
2240 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2241 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2242 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
2243 goto err;
2244 if ((pre = nistp256_pre_comp_new()) == NULL)
2245 goto err;
2246 /*
2247 * if the generator is the standard one, use built-in precomputation
2248 */
2249 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2250 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2251 goto done;
2252 }
2253 if ((!BN_to_felem(x_tmp, group->generator->X)) || (!BN_to_felem(y_tmp, group->generator->Y)) || (!BN_to_felem(z_tmp, group->generator->Z)))
2254 goto err;
2255 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2256 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2257 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2258 /*
2259 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2260 * 2^160*G, 2^224*G for the second one
2261 */
2262 for (i = 1; i <= 8; i <<= 1) {
2263 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2264 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2265 pre->g_pre_comp[0][i][1],
2266 pre->g_pre_comp[0][i][2]);
2267 for (j = 0; j < 31; ++j) {
2268 point_double_small(pre->g_pre_comp[1][i][0],
2269 pre->g_pre_comp[1][i][1],
2270 pre->g_pre_comp[1][i][2],
2271 pre->g_pre_comp[1][i][0],
2272 pre->g_pre_comp[1][i][1],
2273 pre->g_pre_comp[1][i][2]);
2274 }
2275 if (i == 8)
2276 break;
2277 point_double_small(pre->g_pre_comp[0][2 * i][0],
2278 pre->g_pre_comp[0][2 * i][1],
2279 pre->g_pre_comp[0][2 * i][2],
2280 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2281 pre->g_pre_comp[1][i][2]);
2282 for (j = 0; j < 31; ++j) {
2283 point_double_small(pre->g_pre_comp[0][2 * i][0],
2284 pre->g_pre_comp[0][2 * i][1],
2285 pre->g_pre_comp[0][2 * i][2],
2286 pre->g_pre_comp[0][2 * i][0],
2287 pre->g_pre_comp[0][2 * i][1],
2288 pre->g_pre_comp[0][2 * i][2]);
2289 }
2290 }
2291 for (i = 0; i < 2; i++) {
2292 /* g_pre_comp[i][0] is the point at infinity */
2293 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2294 /* the remaining multiples */
2295 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2296 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2297 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2298 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2299 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2300 pre->g_pre_comp[i][2][2]);
2301 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2302 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2303 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2304 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2305 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2306 pre->g_pre_comp[i][2][2]);
2307 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2308 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2309 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2310 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2311 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2312 pre->g_pre_comp[i][4][2]);
2313 /*
2314 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2315 */
2316 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2317 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2318 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2319 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2320 pre->g_pre_comp[i][2][2]);
2321 for (j = 1; j < 8; ++j) {
2322 /* odd multiples: add G resp. 2^32*G */
2323 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2324 pre->g_pre_comp[i][2 * j + 1][1],
2325 pre->g_pre_comp[i][2 * j + 1][2],
2326 pre->g_pre_comp[i][2 * j][0],
2327 pre->g_pre_comp[i][2 * j][1],
2328 pre->g_pre_comp[i][2 * j][2],
2329 pre->g_pre_comp[i][1][0],
2330 pre->g_pre_comp[i][1][1],
2331 pre->g_pre_comp[i][1][2]);
2332 }
2333 }
2334 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2335
2336 done:
2337 SETPRECOMP(group, nistp256, pre);
2338 pre = NULL;
2339 ret = 1;
2340
2341 err:
2342 BN_CTX_end(ctx);
2343 EC_POINT_free(generator);
2344 #ifndef FIPS_MODULE
2345 BN_CTX_free(new_ctx);
2346 #endif
2347 EC_nistp256_pre_comp_free(pre);
2348 return ret;
2349 }
2350
ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP * group)2351 int ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2352 {
2353 return HAVEPRECOMP(group, nistp256);
2354 }
2355