1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
4 * This package is an SSL implementation written
5 * by Eric Young (eay@cryptsoft.com).
6 * The implementation was written so as to conform with Netscapes SSL.
8 * This library is free for commercial and non-commercial use as long as
9 * the following conditions are aheared to. The following conditions
10 * apply to all code found in this distribution, be it the RC4, RSA,
11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
12 * included with this distribution is covered by the same copyright terms
13 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 * Copyright remains Eric Young's, and as such any Copyright notices in
16 * the code are not to be removed.
17 * If this package is used in a product, Eric Young should be given attribution
18 * as the author of the parts of the library used.
19 * This can be in the form of a textual message at program startup or
20 * in documentation (online or textual) provided with the package.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
25 * 1. Redistributions of source code must retain the copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * "This product includes cryptographic software written by
33 * Eric Young (eay@cryptsoft.com)"
34 * The word 'cryptographic' can be left out if the rouines from the library
35 * being used are not cryptographic related :-).
36 * 4. If you include any Windows specific code (or a derivative thereof) from
37 * the apps directory (application code) you must include an acknowledgement:
38 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * The licence and distribution terms for any publically available version or
53 * derivative of this code cannot be changed. i.e. this code cannot simply be
54 * copied and put under another distribution licence
55 * [including the GNU Public Licence.] */
57 #include <openssl/bn.h>
64 // This file has two other implementations: x86 assembly language in
65 // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
66 #if defined(OPENSSL_NO_ASM) || \
67 !(defined(OPENSSL_X86) || \
68 (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
71 #define mul_add(r, a, w, c) \
74 t = (BN_ULLONG)(w) * (a) + (r) + (c); \
79 #define mul(r, a, w, c) \
82 t = (BN_ULLONG)(w) * (a) + (c); \
87 #define sqr(r0, r1, a) \
90 t = (BN_ULLONG)(a) * (a); \
97 #define mul_add(r, a, w, c) \
99 BN_ULONG high, low, ret, tmp = (a); \
101 BN_UMULT_LOHI(low, high, w, tmp); \
103 (c) = (ret < (c)) ? 1 : 0; \
106 (c) += (ret < low) ? 1 : 0; \
110 #define mul(r, a, w, c) \
112 BN_ULONG high, low, ret, ta = (a); \
113 BN_UMULT_LOHI(low, high, w, ta); \
116 (c) += (ret < low) ? 1 : 0; \
120 #define sqr(r0, r1, a) \
122 BN_ULONG tmp = (a); \
123 BN_UMULT_LOHI(r0, r1, tmp, tmp); \
128 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
137 mul_add(rp[0], ap[0], w, c1);
138 mul_add(rp[1], ap[1], w, c1);
139 mul_add(rp[2], ap[2], w, c1);
140 mul_add(rp[3], ap[3], w, c1);
147 mul_add(rp[0], ap[0], w, c1);
156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
165 mul(rp[0], ap[0], w, c1);
166 mul(rp[1], ap[1], w, c1);
167 mul(rp[2], ap[2], w, c1);
168 mul(rp[3], ap[3], w, c1);
174 mul(rp[0], ap[0], w, c1);
182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
188 sqr(r[0], r[1], a[0]);
189 sqr(r[2], r[3], a[1]);
190 sqr(r[4], r[5], a[2]);
191 sqr(r[6], r[7], a[3]);
197 sqr(r[0], r[1], a[0]);
205 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
214 ll += (BN_ULLONG)a[0] + b[0];
217 ll += (BN_ULLONG)a[1] + b[1];
220 ll += (BN_ULLONG)a[2] + b[2];
223 ll += (BN_ULLONG)a[3] + b[3];
232 ll += (BN_ULLONG)a[0] + b[0];
245 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
301 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
355 // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
356 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
357 // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
358 // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
362 // Keep in mind that additions to multiplication result can not overflow,
363 // because its high half cannot be all-ones.
364 #define mul_add_c(a, b, c0, c1, c2) \
367 BN_ULLONG t = (BN_ULLONG)(a) * (b); \
368 t += (c0); /* no carry */ \
369 (c0) = (BN_ULONG)Lw(t); \
370 hi = (BN_ULONG)Hw(t); \
377 #define mul_add_c2(a, b, c0, c1, c2) \
380 BN_ULLONG t = (BN_ULLONG)(a) * (b); \
381 BN_ULLONG tt = t + (c0); /* no carry */ \
382 (c0) = (BN_ULONG)Lw(tt); \
383 hi = (BN_ULONG)Hw(tt); \
388 t += (c0); /* no carry */ \
389 (c0) = (BN_ULONG)Lw(t); \
390 hi = (BN_ULONG)Hw(t); \
397 #define sqr_add_c(a, i, c0, c1, c2) \
400 BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
401 t += (c0); /* no carry */ \
402 (c0) = (BN_ULONG)Lw(t); \
403 hi = (BN_ULONG)Hw(t); \
410 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
414 // Keep in mind that additions to hi can not overflow, because the high word of
415 // a multiplication result cannot be all-ones.
416 #define mul_add_c(a, b, c0, c1, c2) \
418 BN_ULONG ta = (a), tb = (b); \
420 BN_UMULT_LOHI(lo, hi, ta, tb); \
422 hi += ((c0) < lo) ? 1 : 0; \
424 (c2) += ((c1) < hi) ? 1 : 0; \
427 #define mul_add_c2(a, b, c0, c1, c2) \
429 BN_ULONG ta = (a), tb = (b); \
430 BN_ULONG lo, hi, tt; \
431 BN_UMULT_LOHI(lo, hi, ta, tb); \
433 tt = hi + (((c0) < lo) ? 1 : 0); \
435 (c2) += ((c1) < tt) ? 1 : 0; \
437 hi += (c0 < lo) ? 1 : 0; \
439 (c2) += ((c1) < hi) ? 1 : 0; \
442 #define sqr_add_c(a, i, c0, c1, c2) \
444 BN_ULONG ta = (a)[i]; \
446 BN_UMULT_LOHI(lo, hi, ta, ta); \
448 hi += (c0 < lo) ? 1 : 0; \
450 (c2) += ((c1) < hi) ? 1 : 0; \
453 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
457 void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
463 mul_add_c(a[0], b[0], c1, c2, c3);
466 mul_add_c(a[0], b[1], c2, c3, c1);
467 mul_add_c(a[1], b[0], c2, c3, c1);
470 mul_add_c(a[2], b[0], c3, c1, c2);
471 mul_add_c(a[1], b[1], c3, c1, c2);
472 mul_add_c(a[0], b[2], c3, c1, c2);
475 mul_add_c(a[0], b[3], c1, c2, c3);
476 mul_add_c(a[1], b[2], c1, c2, c3);
477 mul_add_c(a[2], b[1], c1, c2, c3);
478 mul_add_c(a[3], b[0], c1, c2, c3);
481 mul_add_c(a[4], b[0], c2, c3, c1);
482 mul_add_c(a[3], b[1], c2, c3, c1);
483 mul_add_c(a[2], b[2], c2, c3, c1);
484 mul_add_c(a[1], b[3], c2, c3, c1);
485 mul_add_c(a[0], b[4], c2, c3, c1);
488 mul_add_c(a[0], b[5], c3, c1, c2);
489 mul_add_c(a[1], b[4], c3, c1, c2);
490 mul_add_c(a[2], b[3], c3, c1, c2);
491 mul_add_c(a[3], b[2], c3, c1, c2);
492 mul_add_c(a[4], b[1], c3, c1, c2);
493 mul_add_c(a[5], b[0], c3, c1, c2);
496 mul_add_c(a[6], b[0], c1, c2, c3);
497 mul_add_c(a[5], b[1], c1, c2, c3);
498 mul_add_c(a[4], b[2], c1, c2, c3);
499 mul_add_c(a[3], b[3], c1, c2, c3);
500 mul_add_c(a[2], b[4], c1, c2, c3);
501 mul_add_c(a[1], b[5], c1, c2, c3);
502 mul_add_c(a[0], b[6], c1, c2, c3);
505 mul_add_c(a[0], b[7], c2, c3, c1);
506 mul_add_c(a[1], b[6], c2, c3, c1);
507 mul_add_c(a[2], b[5], c2, c3, c1);
508 mul_add_c(a[3], b[4], c2, c3, c1);
509 mul_add_c(a[4], b[3], c2, c3, c1);
510 mul_add_c(a[5], b[2], c2, c3, c1);
511 mul_add_c(a[6], b[1], c2, c3, c1);
512 mul_add_c(a[7], b[0], c2, c3, c1);
515 mul_add_c(a[7], b[1], c3, c1, c2);
516 mul_add_c(a[6], b[2], c3, c1, c2);
517 mul_add_c(a[5], b[3], c3, c1, c2);
518 mul_add_c(a[4], b[4], c3, c1, c2);
519 mul_add_c(a[3], b[5], c3, c1, c2);
520 mul_add_c(a[2], b[6], c3, c1, c2);
521 mul_add_c(a[1], b[7], c3, c1, c2);
524 mul_add_c(a[2], b[7], c1, c2, c3);
525 mul_add_c(a[3], b[6], c1, c2, c3);
526 mul_add_c(a[4], b[5], c1, c2, c3);
527 mul_add_c(a[5], b[4], c1, c2, c3);
528 mul_add_c(a[6], b[3], c1, c2, c3);
529 mul_add_c(a[7], b[2], c1, c2, c3);
532 mul_add_c(a[7], b[3], c2, c3, c1);
533 mul_add_c(a[6], b[4], c2, c3, c1);
534 mul_add_c(a[5], b[5], c2, c3, c1);
535 mul_add_c(a[4], b[6], c2, c3, c1);
536 mul_add_c(a[3], b[7], c2, c3, c1);
539 mul_add_c(a[4], b[7], c3, c1, c2);
540 mul_add_c(a[5], b[6], c3, c1, c2);
541 mul_add_c(a[6], b[5], c3, c1, c2);
542 mul_add_c(a[7], b[4], c3, c1, c2);
545 mul_add_c(a[7], b[5], c1, c2, c3);
546 mul_add_c(a[6], b[6], c1, c2, c3);
547 mul_add_c(a[5], b[7], c1, c2, c3);
550 mul_add_c(a[6], b[7], c2, c3, c1);
551 mul_add_c(a[7], b[6], c2, c3, c1);
554 mul_add_c(a[7], b[7], c3, c1, c2);
559 void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
565 mul_add_c(a[0], b[0], c1, c2, c3);
568 mul_add_c(a[0], b[1], c2, c3, c1);
569 mul_add_c(a[1], b[0], c2, c3, c1);
572 mul_add_c(a[2], b[0], c3, c1, c2);
573 mul_add_c(a[1], b[1], c3, c1, c2);
574 mul_add_c(a[0], b[2], c3, c1, c2);
577 mul_add_c(a[0], b[3], c1, c2, c3);
578 mul_add_c(a[1], b[2], c1, c2, c3);
579 mul_add_c(a[2], b[1], c1, c2, c3);
580 mul_add_c(a[3], b[0], c1, c2, c3);
583 mul_add_c(a[3], b[1], c2, c3, c1);
584 mul_add_c(a[2], b[2], c2, c3, c1);
585 mul_add_c(a[1], b[3], c2, c3, c1);
588 mul_add_c(a[2], b[3], c3, c1, c2);
589 mul_add_c(a[3], b[2], c3, c1, c2);
592 mul_add_c(a[3], b[3], c1, c2, c3);
597 void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
603 sqr_add_c(a, 0, c1, c2, c3);
606 sqr_add_c2(a, 1, 0, c2, c3, c1);
609 sqr_add_c(a, 1, c3, c1, c2);
610 sqr_add_c2(a, 2, 0, c3, c1, c2);
613 sqr_add_c2(a, 3, 0, c1, c2, c3);
614 sqr_add_c2(a, 2, 1, c1, c2, c3);
617 sqr_add_c(a, 2, c2, c3, c1);
618 sqr_add_c2(a, 3, 1, c2, c3, c1);
619 sqr_add_c2(a, 4, 0, c2, c3, c1);
622 sqr_add_c2(a, 5, 0, c3, c1, c2);
623 sqr_add_c2(a, 4, 1, c3, c1, c2);
624 sqr_add_c2(a, 3, 2, c3, c1, c2);
627 sqr_add_c(a, 3, c1, c2, c3);
628 sqr_add_c2(a, 4, 2, c1, c2, c3);
629 sqr_add_c2(a, 5, 1, c1, c2, c3);
630 sqr_add_c2(a, 6, 0, c1, c2, c3);
633 sqr_add_c2(a, 7, 0, c2, c3, c1);
634 sqr_add_c2(a, 6, 1, c2, c3, c1);
635 sqr_add_c2(a, 5, 2, c2, c3, c1);
636 sqr_add_c2(a, 4, 3, c2, c3, c1);
639 sqr_add_c(a, 4, c3, c1, c2);
640 sqr_add_c2(a, 5, 3, c3, c1, c2);
641 sqr_add_c2(a, 6, 2, c3, c1, c2);
642 sqr_add_c2(a, 7, 1, c3, c1, c2);
645 sqr_add_c2(a, 7, 2, c1, c2, c3);
646 sqr_add_c2(a, 6, 3, c1, c2, c3);
647 sqr_add_c2(a, 5, 4, c1, c2, c3);
650 sqr_add_c(a, 5, c2, c3, c1);
651 sqr_add_c2(a, 6, 4, c2, c3, c1);
652 sqr_add_c2(a, 7, 3, c2, c3, c1);
655 sqr_add_c2(a, 7, 4, c3, c1, c2);
656 sqr_add_c2(a, 6, 5, c3, c1, c2);
659 sqr_add_c(a, 6, c1, c2, c3);
660 sqr_add_c2(a, 7, 5, c1, c2, c3);
663 sqr_add_c2(a, 7, 6, c2, c3, c1);
666 sqr_add_c(a, 7, c3, c1, c2);
671 void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
677 sqr_add_c(a, 0, c1, c2, c3);
680 sqr_add_c2(a, 1, 0, c2, c3, c1);
683 sqr_add_c(a, 1, c3, c1, c2);
684 sqr_add_c2(a, 2, 0, c3, c1, c2);
687 sqr_add_c2(a, 3, 0, c1, c2, c3);
688 sqr_add_c2(a, 2, 1, c1, c2, c3);
691 sqr_add_c(a, 2, c2, c3, c1);
692 sqr_add_c2(a, 3, 1, c2, c3, c1);
695 sqr_add_c2(a, 3, 2, c3, c1, c2);
698 sqr_add_c(a, 3, c1, c2, c3);