|
|
1.1.1.2 ! root 1: /* C source code for multiprecision arithmetic library routines. ! 2: Implemented Nov 86 by Philip Zimmermann ! 3: Last revised 27 Nov 91 by PRZ ! 4: ! 5: Boulder Software Engineering ! 6: 3021 Eleventh Street ! 7: Boulder, CO 80304 ! 8: (303) 541-0140 ! 9: ! 10: (c) Copyright 1986-92 by Philip Zimmermann. All rights reserved. ! 11: The author assumes no liability for damages resulting from the use ! 12: of this software, even if the damage results from defects in this ! 13: software. No warranty is expressed or implied. The use of this ! 14: cryptographic software for developing weapon systems is expressly ! 15: forbidden. ! 16: ! 17: These routines implement all of the multiprecision arithmetic ! 18: necessary for number-theoretic cryptographic algorithms such as ! 19: ElGamal, Diffie-Hellman, Rabin, or factoring studies for large ! 20: composite numbers, as well as Rivest-Shamir-Adleman (RSA) public ! 21: key cryptography. ! 22: ! 23: Although originally developed in Microsoft C for the IBM PC, this code ! 24: contains few machine dependencies. It assumes 2's complement ! 25: arithmetic. It can be adapted to 8-bit, 16-bit, or 32-bit machines, ! 26: lowbyte-highbyte order or highbyte-lowbyte order. This version ! 27: has been converted to ANSI C. ! 28: ! 29: ! 30: The internal representation for these extended precision integer ! 31: "registers" is an array of "units". A unit is a machine word, which ! 32: is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit ! 33: unsigned integer, depending on the machine's word size. For example, ! 34: an IBM PC or AT uses a unit size of 16 bits. To perform arithmetic ! 35: on these huge precision integers, we pass pointers to these unit ! 36: arrays to various subroutines. A pointer to an array of units is of ! 37: type unitptr. This is a pointer to a huge integer "register". ! 38: ! 39: When calling a subroutine, we always pass a pointer to the BEGINNING ! 40: of the array of units, regardless of the byte order of the machine. ! 41: On a lowbyte-first machine, such as the Intel 80x86, this unitptr ! 42: points to the LEAST significant unit, and the array of units increases ! 43: significance to the right. On a highbyte-first machine, such as the ! 44: Motorola 680x0, this unitptr points to the MOST significant unit, and ! 45: the array of units decreases significance to the right. ! 46: ! 47: Modified 8 Apr 92 - HAJK ! 48: Implement new VAX/VMS primitive support. ! 49: ! 50: Modified 30 Sep 92 -Castor Fu <[email protected]> ! 51: Upgraded PORTABLE support to allow sizeof(unit) == sizeof(long) ! 52: ! 53: Modified 28 Nov 92 - Thad Smith ! 54: Added Smith modmult, generalized non-portable support. ! 55: */ ! 56: ! 57: /* #define COUNTMULTS */ /* count modmults for performance studies */ ! 58: ! 59: #ifdef DEBUG ! 60: #ifdef MSDOS ! 61: #include <conio.h> ! 62: #define poll_for_break() {while (kbhit()) getch();} ! 63: #endif /* MSDOS */ ! 64: #endif /* DEBUG */ ! 65: ! 66: #ifndef poll_for_break ! 67: #define poll_for_break() /* stub */ ! 68: #endif ! 69: ! 70: #include "mpilib.h" ! 71: ! 72: #ifdef mp_smula ! 73: #ifdef mp_smul ! 74: Error: Both mp_smula and mp_smul cannot be defined. ! 75: #else ! 76: #define mp_smul mp_smula ! 77: #endif ! 78: #endif ! 79: ! 80: /* set macros for MULTUNIT */ ! 81: #ifdef MUNIT8 ! 82: #define MULTUNITSIZE 8 ! 83: typedef unsigned char MULTUNIT; ! 84: #ifdef UNIT8 ! 85: #define MULTUNIT_SIZE_SAME ! 86: #endif ! 87: #else /* not MUNIT8 */ ! 88: #ifdef MUNIT32 ! 89: #define MULTUNITSIZE 32 ! 90: typedef unsigned long MULTUNIT; ! 91: #ifdef UNIT32 ! 92: #define MULTUNIT_SIZE_SAME ! 93: #else ! 94: /* #error is not portable, this has the same effect */ ! 95: #include "UNITSIZE cannot be smaller than MULTUNITSIZE" ! 96: #endif ! 97: #else /* assume MUNIT16 */ ! 98: #define MULTUNITSIZE 16 ! 99: typedef unsigned short MULTUNIT; ! 100: #ifdef UNIT16 ! 101: #define MULTUNIT_SIZE_SAME ! 102: #endif /* UNIT16 */ ! 103: #ifdef UNIT8 ! 104: #include "UNITSIZE cannot be smaller than MULTUNITSIZE" ! 105: #endif /* UNIT8 */ ! 106: #endif /* MUNIT32 */ ! 107: #endif /* MUNIT8 */ ! 108: ! 109: #define MULTUNIT_msb ((MULTUNIT)1 << (MULTUNITSIZE-1)) /* msb of MULTUNIT */ ! 110: #define DMULTUNIT_msb (1L << (2*MULTUNITSIZE-1)) ! 111: #define MULTUNIT_mask ((MULTUNIT)((1L << MULTUNITSIZE)-1)) ! 112: #define MULTUNITs_perunit (UNITSIZE/MULTUNITSIZE) ! 113: ! 114: ! 115: void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier); ! 116: void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier); ! 117: ! 118: short global_precision = 0; /* units of precision for all routines */ ! 119: /* global_precision is the unit precision last set by set_precision. ! 120: Initially, set_precision() should be called to define global_precision ! 121: before using any of these other multiprecision library routines. ! 122: i.e.: set_precision(MAX_UNIT_PRECISION); ! 123: */ ! 124: ! 125: /*************** multiprecision library primitives ****************/ ! 126: /* The following portable C primitives should be recoded in assembly. ! 127: The entry point name should be defined, in "mpilib.h" to the external ! 128: entry point name. If undefined, the C version will be used. ! 129: */ ! 130: ! 131: typedef unsigned long int ulint; ! 132: ! 133: #ifndef mp_addc ! 134: boolean mp_addc ! 135: (register unitptr r1,register unitptr r2,register boolean carry) ! 136: /* multiprecision add with carry r2 to r1, result in r1 */ ! 137: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 138: { register unit x; ! 139: short precision; /* number of units to add */ ! 140: precision = global_precision; ! 141: make_lsbptr(r1,precision); ! 142: make_lsbptr(r2,precision); ! 143: while (precision--) ! 144: { ! 145: if (carry) ! 146: { x = *r1 + *r2 + 1; ! 147: carry = (*r2 >= (unit)(~ *r1)); ! 148: } else ! 149: { x = *r1 + *r2; ! 150: carry = (x < *r1) ; ! 151: } ! 152: post_higherunit(r2); ! 153: *post_higherunit(r1) = x; ! 154: } ! 155: return(carry); /* return the final carry flag bit */ ! 156: } /* mp_addc */ ! 157: #endif /* mp_addc */ ! 158: ! 159: #ifndef mp_subb ! 160: boolean mp_subb ! 161: (register unitptr r1,register unitptr r2,register boolean borrow) ! 162: /* multiprecision subtract with borrow, r2 from r1, result in r1 */ ! 163: /* borrow is incoming borrow flag-- value should be 0 or 1 */ ! 164: { register unit x; ! 165: short precision; /* number of units to subtract */ ! 166: precision = global_precision; ! 167: make_lsbptr(r1,precision); ! 168: make_lsbptr(r2,precision); ! 169: while (precision--) ! 170: { if (borrow) ! 171: { x = *r1 - *r2 - 1; ! 172: borrow = (*r1 <= *r2); ! 173: } else ! 174: { x = *r1 - *r2; ! 175: borrow = (*r1 < *r2); ! 176: } ! 177: post_higherunit(r2); ! 178: *post_higherunit(r1) = x; ! 179: } ! 180: return(borrow); /* return the final carry/borrow flag bit */ ! 181: } /* mp_subb */ ! 182: #endif /* mp_subb */ ! 183: ! 184: #ifndef mp_rotate_left ! 185: boolean mp_rotate_left(register unitptr r1,register boolean carry) ! 186: /* multiprecision rotate left 1 bit with carry, result in r1. */ ! 187: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 188: { register int precision; /* number of units to rotate */ ! 189: unsigned int mcarry = carry, nextcarry; /* int is supposed to be ! 190: * the efficient size for ops*/ ! 191: precision = global_precision; ! 192: make_lsbptr(r1,precision); ! 193: while (precision--) ! 194: { ! 195: nextcarry = (((signedunit) *r1) < 0); ! 196: *r1 = (*r1 << 1) | mcarry; ! 197: mcarry = nextcarry; ! 198: pre_higherunit(r1); ! 199: } ! 200: return(nextcarry); /* return the final carry flag bit */ ! 201: } /* mp_rotate_left */ ! 202: #endif /* mp_rotate_left */ ! 203: ! 204: /************** end of primitives that should be in assembly *************/ ! 205: ! 206: ! 207: /* The mp_shift_right_bits function is not called in any time-critical ! 208: situations in public-key cryptographic functions, so it doesn't ! 209: need to be coded in assembly language. ! 210: */ ! 211: void mp_shift_right_bits(register unitptr r1,register short bits) ! 212: /* multiprecision shift right bits, result in r1. ! 213: bits is how many bits to shift, must be <= UNITSIZE. ! 214: */ ! 215: { register short precision; /* number of units to shift */ ! 216: register unit carry,nextcarry,bitmask; ! 217: register short unbits; ! 218: if (bits==0) return; /* shift zero bits is a no-op */ ! 219: carry = 0; ! 220: bitmask = power_of_2(bits)-1; ! 221: unbits = UNITSIZE-bits; /* shift bits must be <= UNITSIZE */ ! 222: precision = global_precision; ! 223: make_msbptr(r1,precision); ! 224: if (bits == UNITSIZE) { ! 225: while (precision--) { ! 226: nextcarry = *r1; ! 227: *r1 = carry; ! 228: carry = nextcarry; ! 229: pre_lowerunit(r1); ! 230: } ! 231: } else { ! 232: while (precision--) ! 233: { nextcarry = *r1 & bitmask; ! 234: *r1 >>= bits; ! 235: *r1 |= carry << unbits; ! 236: carry = nextcarry; ! 237: pre_lowerunit(r1); ! 238: } ! 239: } ! 240: } /* mp_shift_right_bits */ ! 241: ! 242: ! 243: #ifndef mp_compare ! 244: short mp_compare(register unitptr r1,register unitptr r2) ! 245: /* Compares multiprecision integers *r1, *r2, and returns: ! 246: -1 iff *r1 < *r2 ! 247: 0 iff *r1 == *r2 ! 248: +1 iff *r1 > *r2 ! 249: */ ! 250: { register short precision; /* number of units to compare */ ! 251: ! 252: precision = global_precision; ! 253: make_msbptr(r1,precision); ! 254: make_msbptr(r2,precision); ! 255: do ! 256: { if (*r1 < *r2) ! 257: return(-1); ! 258: if (*post_lowerunit(r1) > *post_lowerunit(r2)) ! 259: return(1); ! 260: } while (--precision); ! 261: return(0); /* *r1 == *r2 */ ! 262: } /* mp_compare */ ! 263: #endif /* mp_compare */ ! 264: ! 265: ! 266: boolean mp_inc(register unitptr r) ! 267: /* Increment multiprecision integer r. */ ! 268: { register short precision; ! 269: precision = global_precision; ! 270: make_lsbptr(r,precision); ! 271: do ! 272: { if ( ++(*r) ) return(0); /* no carry */ ! 273: post_higherunit(r); ! 274: } while (--precision); ! 275: return(1); /* return carry set */ ! 276: } /* mp_inc */ ! 277: ! 278: ! 279: boolean mp_dec(register unitptr r) ! 280: /* Decrement multiprecision integer r. */ ! 281: { register short precision; ! 282: precision = global_precision; ! 283: make_lsbptr(r,precision); ! 284: do ! 285: { if ( (signedunit) (--(*r)) != (signedunit) -1 ) ! 286: return(0); /* no borrow */ ! 287: post_higherunit(r); ! 288: } while (--precision); ! 289: return(1); /* return borrow set */ ! 290: } /* mp_dec */ ! 291: ! 292: ! 293: void mp_neg(register unitptr r) ! 294: /* Compute 2's complement, the arithmetic negative, of r */ ! 295: { register short precision; /* number of units to negate */ ! 296: precision = global_precision; ! 297: mp_dec(r); /* 2's complement is 1's complement plus 1 */ ! 298: do /* now do 1's complement */ ! 299: { *r = ~(*r); ! 300: r++; ! 301: } while (--precision); ! 302: } /* mp_neg */ ! 303: ! 304: #ifndef mp_move ! 305: void mp_move(register unitptr dst,register unitptr src) ! 306: { register short precision; /* number of units to move */ ! 307: precision = global_precision; ! 308: do { *dst++ = *src++; } while (--precision); ! 309: } /* mp_move */ ! 310: #endif /* mp_move */ ! 311: ! 312: void mp_init(register unitptr r, word16 value) ! 313: /* Init multiprecision register r with short value. */ ! 314: { /* Note that mp_init doesn't extend sign bit for >32767 */ ! 315: ! 316: unitfill0( r, global_precision); ! 317: make_lsbptr(r,global_precision); ! 318: *post_higherunit(r) = value; ! 319: #ifdef UNIT8 ! 320: *post_higherunit(r) = value >> UNITSIZE; ! 321: #endif ! 322: } /* mp_init */ ! 323: ! 324: ! 325: short significance(register unitptr r) ! 326: /* Returns number of significant units in r */ ! 327: { register short precision; ! 328: precision = global_precision; ! 329: make_msbptr(r,precision); ! 330: do ! 331: { if (*post_lowerunit(r)) ! 332: return(precision); ! 333: } while (--precision); ! 334: return(precision); ! 335: } /* significance */ ! 336: ! 337: ! 338: #ifndef unitfill0 ! 339: void unitfill0(unitptr r,word16 unitcount) ! 340: /* Zero-fill the unit buffer r. */ ! 341: { while (unitcount--) *r++ = 0; ! 342: } /* unitfill0 */ ! 343: #endif /* unitfill0 */ ! 344: ! 345: ! 346: int mp_udiv(register unitptr remainder,register unitptr quotient, ! 347: register unitptr dividend,register unitptr divisor) ! 348: /* Unsigned divide, treats both operands as positive. */ ! 349: { int bits; ! 350: short dprec; ! 351: register unit bitmask; ! 352: if (testeq(divisor,0)) ! 353: return(-1); /* zero divisor means divide error */ ! 354: mp_init0(remainder); ! 355: mp_init0(quotient); ! 356: /* normalize and compute number of bits in dividend first */ ! 357: init_bitsniffer(dividend,bitmask,dprec,bits); ! 358: /* rescale quotient to same precision (dprec) as dividend */ ! 359: rescale(quotient,global_precision,dprec); ! 360: make_msbptr(quotient,dprec); ! 361: ! 362: while (bits--) ! 363: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0)); ! 364: if (mp_compare(remainder,divisor) >= 0) ! 365: { mp_sub(remainder,divisor); ! 366: stuff_bit(quotient,bitmask); ! 367: } ! 368: bump_2bitsniffers(dividend,quotient,bitmask); ! 369: } ! 370: return(0); ! 371: } /* mp_udiv */ ! 372: ! 373: ! 374: #ifdef UPTON_OR_SMITH ! 375: #define RECIPMARGIN 0 /* extra margin bits used by mp_recip() */ ! 376: ! 377: int mp_recip(register unitptr quotient,register unitptr divisor) ! 378: /* Compute reciprocal (quotient) as 1/divisor. Used by faster modmult. */ ! 379: { int bits; ! 380: short qprec; ! 381: register unit bitmask; ! 382: unit remainder[MAX_UNIT_PRECISION]; ! 383: if (testeq(divisor,0)) ! 384: return(-1); /* zero divisor means divide error */ ! 385: mp_init0(remainder); ! 386: mp_init0(quotient); ! 387: ! 388: /* normalize and compute number of bits in quotient first */ ! 389: bits = countbits(divisor) + RECIPMARGIN; ! 390: bitmask = bitmsk(bits); /* bitmask within a single unit */ ! 391: qprec = bits2units(bits+1); ! 392: mp_setbit(remainder,(bits-RECIPMARGIN)-1); ! 393: /* rescale quotient to precision of divisor + RECIPMARGIN bits */ ! 394: rescale(quotient,global_precision,qprec); ! 395: make_msbptr(quotient,qprec); ! 396: ! 397: while (bits--) ! 398: { mp_shift_left(remainder); ! 399: if (mp_compare(remainder,divisor) >= 0) ! 400: { mp_sub(remainder,divisor); ! 401: stuff_bit(quotient,bitmask); ! 402: } ! 403: bump_bitsniffer(quotient,bitmask); ! 404: } ! 405: mp_init0(remainder); /* burn sensitive data left on stack */ ! 406: return(0); ! 407: } /* mp_recip */ ! 408: #endif /* UPTON_OR_SMITH */ ! 409: ! 410: ! 411: int mp_div(register unitptr remainder,register unitptr quotient, ! 412: register unitptr dividend,register unitptr divisor) ! 413: /* Signed divide, either or both operands may be negative. */ ! 414: { boolean dvdsign,dsign; ! 415: int status; ! 416: dvdsign = (boolean)(mp_tstminus(dividend)!=0); ! 417: dsign = (boolean)(mp_tstminus(divisor)!=0); ! 418: if (dvdsign) mp_neg(dividend); ! 419: if (dsign) mp_neg(divisor); ! 420: status = mp_udiv(remainder,quotient,dividend,divisor); ! 421: if (dvdsign) mp_neg(dividend); /* repair caller's dividend */ ! 422: if (dsign) mp_neg(divisor); /* repair caller's divisor */ ! 423: if (status<0) return(status); /* divide error? */ ! 424: if (dvdsign) mp_neg(remainder); ! 425: if (dvdsign ^ dsign) mp_neg(quotient); ! 426: return(status); ! 427: } /* mp_div */ ! 428: ! 429: ! 430: word16 mp_shortdiv(register unitptr quotient, ! 431: register unitptr dividend,register word16 divisor) ! 432: /* This function does a fast divide and mod on a multiprecision dividend ! 433: using a short integer divisor returning a short integer remainder. ! 434: This is an unsigned divide. It treats both operands as positive. ! 435: It is used mainly for faster printing of large numbers in base 10. ! 436: */ ! 437: { int bits; ! 438: short dprec; ! 439: register unit bitmask; ! 440: register word16 remainder; ! 441: if (!divisor) /* if divisor == 0 */ ! 442: return(-1); /* zero divisor means divide error */ ! 443: remainder=0; ! 444: mp_init0(quotient); ! 445: /* normalize and compute number of bits in dividend first */ ! 446: init_bitsniffer(dividend,bitmask,dprec,bits); ! 447: /* rescale quotient to same precision (dprec) as dividend */ ! 448: rescale(quotient,global_precision,dprec); ! 449: make_msbptr(quotient,dprec); ! 450: ! 451: while (bits--) ! 452: { remainder <<= 1; ! 453: if (sniff_bit(dividend,bitmask)) ! 454: remainder++; ! 455: if (remainder >= divisor) ! 456: { remainder -= divisor; ! 457: stuff_bit(quotient,bitmask); ! 458: } ! 459: bump_2bitsniffers(dividend,quotient,bitmask); ! 460: } ! 461: return(remainder); ! 462: } /* mp_shortdiv */ ! 463: ! 464: ! 465: int mp_mod(register unitptr remainder, ! 466: register unitptr dividend,register unitptr divisor) ! 467: /* Unsigned divide, treats both operands as positive. */ ! 468: { int bits; ! 469: short dprec; ! 470: register unit bitmask; ! 471: if (testeq(divisor,0)) ! 472: return(-1); /* zero divisor means divide error */ ! 473: mp_init0(remainder); ! 474: /* normalize and compute number of bits in dividend first */ ! 475: init_bitsniffer(dividend,bitmask,dprec,bits); ! 476: ! 477: while (bits--) ! 478: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0)); ! 479: msub(remainder,divisor); ! 480: bump_bitsniffer(dividend,bitmask); ! 481: } ! 482: return(0); ! 483: } /* mp_mod */ ! 484: ! 485: ! 486: word16 mp_shortmod(register unitptr dividend,register word16 divisor) ! 487: /* This function does a fast mod operation on a multiprecision dividend ! 488: using a short integer modulus returning a short integer remainder. ! 489: This is an unsigned divide. It treats both operands as positive. ! 490: It is used mainly for fast sieve searches for large primes. ! 491: */ ! 492: { int bits; ! 493: short dprec; ! 494: register unit bitmask; ! 495: register word16 remainder; ! 496: if (!divisor) /* if divisor == 0 */ ! 497: return(-1); /* zero divisor means divide error */ ! 498: remainder=0; ! 499: /* normalize and compute number of bits in dividend first */ ! 500: init_bitsniffer(dividend,bitmask,dprec,bits); ! 501: ! 502: while (bits--) ! 503: { remainder <<= 1; ! 504: if (sniff_bit(dividend,bitmask)) ! 505: remainder++; ! 506: if (remainder >= divisor) remainder -= divisor; ! 507: bump_bitsniffer(dividend,bitmask); ! 508: } ! 509: return(remainder); ! 510: } /* mp_shortmod */ ! 511: ! 512: ! 513: ! 514: #ifdef COMB_MULT /* use faster "comb" multiply algorithm */ ! 515: /* We are skipping this code because it has a bug... */ ! 516: ! 517: int mp_mult(register unitptr prod, ! 518: register unitptr multiplicand, register unitptr multiplier) ! 519: /* Computes multiprecision prod = multiplicand * multiplier */ ! 520: { /* Uses interleaved comb multiply algorithm. ! 521: This improved multiply more than twice as fast as a Russian ! 522: peasant multiply, because it does a lot fewer shifts. ! 523: Must have global_precision set to the size of the multiplicand ! 524: plus UNITSIZE-1 SLOP_BITS. Produces a product that is the sum ! 525: of the lengths of the multiplier and multiplicand. ! 526: ! 527: BUG ALERT: Unfortunately, this code has a bug. It fails for ! 528: some numbers. One such example: ! 529: x= 59DE 60CE 2345 8091 A02B 2A1C DBC3 8BE5 ! 530: x*x= 59DE 60CE 2345 26B3 993B 67A5 2499 0B7D ! 531: 52C8 CDC7 AFB3 61C8 243C 741B ! 532: --which is obviously wrong. The answer should be: ! 533: x*x= 1F8C 607B 5EA6 C061 2714 04A9 A0C6 A17A ! 534: C9AB 6095 C62F 3756 3843 E4D0 3950 7AD9 ! 535: We'll have to fix this some day. In the meantime, we'll ! 536: just have the compiler skip over this code. ! 537: */ ! 538: int bits; ! 539: register unit bitmask; ! 540: unitptr product, mplier, temp; ! 541: short mprec,mprec2; ! 542: unit mplicand[MAX_UNIT_PRECISION]; ! 543: mp_init(prod,0); /* better clear full width--double precision */ ! 544: if (testeq(multiplicand,0)) ! 545: return(0); /* zero multiplicand means zero product */ ! 546: ! 547: mp_move(mplicand,multiplicand); /* save it from damage */ ! 548: ! 549: normalize(multiplier,mprec); ! 550: if (!mprec) ! 551: return(0); ! 552: ! 553: make_lsbptr(multiplier,mprec); ! 554: bitmask = 1; /* start scan at LSB of multiplier */ ! 555: ! 556: do /* UNITSIZE times */ ! 557: { /* do for bits 0-15 */ ! 558: product = prod; ! 559: mplier = multiplier; ! 560: mprec2 = mprec; ! 561: while (mprec2--) /* do for each word in multiplier */ ! 562: { ! 563: if (sniff_bit(mplier,bitmask)) ! 564: { if (mp_addc(product,multiplicand,0)) /* ripple carry */ ! 565: { /* After 1st time thru, this is rarely encountered. */ ! 566: temp = msbptr(product,global_precision); ! 567: pre_higherunit(temp); ! 568: /* temp now points to LSU of carry region. */ ! 569: unmake_lsbptr(temp,global_precision); ! 570: mp_inc(temp); ! 571: } /* ripple carry */ ! 572: } ! 573: pre_higherunit(mplier); ! 574: pre_higherunit(product); ! 575: } ! 576: if (!(bitmask <<= 1)) ! 577: break; ! 578: mp_shift_left(multiplicand); ! 579: ! 580: } while (TRUE); ! 581: ! 582: mp_move(multiplicand,mplicand); /* recover */ ! 583: ! 584: return(0); /* normal return */ ! 585: } /* mp_mult */ ! 586: ! 587: #endif /* COMB_MULT */ ! 588: ! 589: ! 590: /* Because the "comb" multiply has a bug, we will use the slower ! 591: Russian peasant multiply instead. Fortunately, the mp_mult ! 592: function is not called from any time-critical code. ! 593: */ ! 594: ! 595: int mp_mult(register unitptr prod, ! 596: register unitptr multiplicand,register unitptr multiplier) ! 597: /* Computes multiprecision prod = multiplicand * multiplier */ ! 598: { /* Uses "Russian peasant" multiply algorithm. */ ! 599: int bits; ! 600: register unit bitmask; ! 601: short mprec; ! 602: mp_init(prod,0); ! 603: if (testeq(multiplicand,0)) ! 604: return(0); /* zero multiplicand means zero product */ ! 605: /* normalize and compute number of bits in multiplier first */ ! 606: init_bitsniffer(multiplier,bitmask,mprec,bits); ! 607: ! 608: while (bits--) ! 609: { mp_shift_left(prod); ! 610: if (sniff_bit(multiplier,bitmask)) ! 611: mp_add(prod,multiplicand); ! 612: bump_bitsniffer(multiplier,bitmask); ! 613: } ! 614: return(0); ! 615: } /* mp_mult */ ! 616: ! 617: ! 618: ! 619: /* mp_modmult computes a multiprecision multiply combined with a ! 620: modulo operation. This is the most time-critical function in ! 621: this multiprecision arithmetic library for performing modulo ! 622: exponentiation. We experimented with different versions of modmult, ! 623: depending on the machine architecture and performance requirements. ! 624: We will either use a Russian Peasant modmult (peasant_modmult), ! 625: Charlie Merritt's modmult (merritt_modmult), Jimmy Upton's ! 626: modmult (upton_modmult), or Thad Smith's modmult (smith_modmult). ! 627: On machines with a hardware atomic multiply instruction, ! 628: Smith's modmult is fastest. It can utilize assembly subroutines to ! 629: speed up the hardware multiply logic and trial quotient calculation. ! 630: If the machine lacks a fast hardware multiply, Merritt's modmult ! 631: is preferred, which doesn't call any assembly multiply routine. ! 632: We use the alias names mp_modmult, stage_modulus, and modmult_burn ! 633: for the corresponding true names, which depend on what flavor of ! 634: modmult we are using. ! 635: ! 636: Before making the first call to mp_modmult, you must set up the ! 637: modulus-dependant precomputated tables by calling stage_modulus. ! 638: After making all the calls to mp_modmult, you call modmult_burn to ! 639: erase the tables created by stage_modulus that were left in memory. ! 640: */ ! 641: ! 642: #ifdef COUNTMULTS ! 643: /* "number of modmults" counters, used for performance studies. */ ! 644: static unsigned int tally_modmults = 0; ! 645: static unsigned int tally_modsquares = 0; ! 646: #endif /* COUNTMULTS */ ! 647: ! 648: ! 649: #ifdef PEASANT ! 650: /* Conventional Russian peasant multiply with modulo algorithm. */ ! 651: ! 652: static unitptr pmodulus = 0; /* used only by mp_modmult */ ! 653: ! 654: int stage_peasant_modulus(unitptr n) ! 655: /* Must pass modulus to stage_modulus before calling modmult. ! 656: Assumes that global_precision has already been adjusted to the ! 657: size of the modulus, plus SLOP_BITS. ! 658: */ ! 659: { /* For this simple version of modmult, just copy unit pointer. */ ! 660: pmodulus = n; ! 661: return(0); /* normal return */ ! 662: } /* stage_peasant_modulus */ ! 663: ! 664: ! 665: int peasant_modmult(register unitptr prod, ! 666: unitptr multiplicand,register unitptr multiplier) ! 667: { /* "Russian peasant" multiply algorithm, combined with a modulo ! 668: operation. This is a simple naive replacement for Merritt's ! 669: faster modmult algorithm. References global unitptr "modulus". ! 670: Computes: prod = (multiplicand*multiplier) mod modulus ! 671: WARNING: All the arguments must be less than the modulus! ! 672: */ ! 673: int bits; ! 674: register unit bitmask; ! 675: short mprec; ! 676: mp_init(prod,0); ! 677: /* if (testeq(multiplicand,0)) ! 678: return(0); */ /* zero multiplicand means zero product */ ! 679: /* normalize and compute number of bits in multiplier first */ ! 680: init_bitsniffer(multiplier,bitmask,mprec,bits); ! 681: ! 682: while (bits--) ! 683: { mp_shift_left(prod); ! 684: msub(prod,pmodulus); /* turns mult into modmult */ ! 685: if (sniff_bit(multiplier,bitmask)) ! 686: { mp_add(prod,multiplicand); ! 687: msub(prod,pmodulus); /* turns mult into modmult */ ! 688: } ! 689: bump_bitsniffer(multiplier,bitmask); ! 690: } ! 691: return(0); ! 692: } /* peasant_modmult */ ! 693: ! 694: ! 695: /* If we are using a version of mp_modmult that uses internal tables ! 696: in memory, we have to call modmult_burn() at the end of mp_modexp. ! 697: This is so that no sensitive data is left in memory after the program ! 698: exits. The Russian peasant method doesn't use any such tables. ! 699: */ ! 700: void peasant_burn(void) ! 701: /* Alias for modmult_burn, called only from mp_modexp(). Destroys ! 702: internal modmult tables. This version does nothing because no ! 703: tables are used by the Russian peasant modmult. */ ! 704: { } /* peasant_burn */ ! 705: ! 706: #endif /* PEASANT */ ! 707: ! 708: ! 709: #ifdef MERRITT ! 710: /*=========================================================================*/ ! 711: /* ! 712: This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ. ! 713: Also refined by Zhahai Stewart to reduce the number of subtracts. ! 714: Modified by Raymond Brand to reduce the number of SLOP_BITS by 1. ! 715: It performs a multiply combined with a modulo operation, without ! 716: going into "double precision". It is faster than the Russian peasant ! 717: method, and still works well on machines that lack a fast hardware ! 718: multiply instruction. ! 719: */ ! 720: ! 721: /* The following support functions, macros, and data structures ! 722: are used only by Merritt's modmult algorithm... */ ! 723: ! 724: static void mp_lshift_unit(register unitptr r1) ! 725: /* Shift r1 1 whole unit to the left. Used only by modmult function. */ ! 726: { register short precision; ! 727: register unitptr r2; ! 728: precision = global_precision; ! 729: make_msbptr(r1,precision); ! 730: r2 = r1; ! 731: while (--precision) ! 732: *post_lowerunit(r1) = *pre_lowerunit(r2); ! 733: *r1 = 0; ! 734: } /* mp_lshift_unit */ ! 735: ! 736: ! 737: /* moduli_buf contains shifted images of the modulus, set by stage_modulus */ ! 738: static unit moduli_buf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0}; ! 739: static unitptr moduli[UNITSIZE] = /* contains pointers into moduli_buf */ ! 740: { 0 ! 741: ,&moduli_buf[ 0][0], &moduli_buf[ 1][0], &moduli_buf[ 2][0], &moduli_buf[ 3][0], ! 742: &moduli_buf[ 4][0], &moduli_buf[ 5][0], &moduli_buf[ 6][0] ! 743: #ifndef UNIT8 ! 744: ,&moduli_buf[ 7][0] ! 745: ,&moduli_buf[ 8][0], &moduli_buf[ 9][0], &moduli_buf[10][0], &moduli_buf[11][0], ! 746: &moduli_buf[12][0], &moduli_buf[13][0], &moduli_buf[14][0] ! 747: #ifndef UNIT16 /* and not UNIT8 */ ! 748: ,&moduli_buf[15][0] ! 749: ,&moduli_buf[16][0], &moduli_buf[17][0], &moduli_buf[18][0], &moduli_buf[19][0], ! 750: &moduli_buf[20][0], &moduli_buf[21][0], &moduli_buf[22][0], &moduli_buf[23][0], ! 751: &moduli_buf[24][0], &moduli_buf[25][0], &moduli_buf[26][0], &moduli_buf[27][0], ! 752: &moduli_buf[28][0], &moduli_buf[29][0], &moduli_buf[30][0] ! 753: #endif /* UNIT16 and UNIT8 not defined */ ! 754: #endif /* UNIT8 not defined */ ! 755: }; ! 756: ! 757: /* To optimize msubs, need following 2 unit arrays, each filled ! 758: with the most significant unit and next-to-most significant unit ! 759: of the preshifted images of the modulus. */ ! 760: static unit msu_moduli[UNITSIZE] = {0}; /* most signif. unit */ ! 761: static unit nmsu_moduli[UNITSIZE] = {0}; /* next-most signif. unit */ ! 762: ! 763: /* mpdbuf contains preshifted images of the multiplicand, mod n. ! 764: It is used only by mp_modmult. It could be staticly declared ! 765: inside of mp_modmult, but we put it outside mp_modmult so that ! 766: it can be wiped clean by modmult_burn(), which is called at the ! 767: end of mp_modexp. This is so that no sensitive data is left in ! 768: memory after the program exits. ! 769: */ ! 770: static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0}; ! 771: ! 772: ! 773: static void stage_mp_images(unitptr images[UNITSIZE],unitptr r) ! 774: /* Computes UNITSIZE images of r, each shifted left 1 more bit. ! 775: Used only by modmult function. ! 776: */ ! 777: { short int i; ! 778: images[0] = r; /* no need to move the first image, just copy ptr */ ! 779: for (i=1; i<UNITSIZE; i++) ! 780: { mp_move(images[i],images[i-1]); ! 781: mp_shift_left(images[i]); ! 782: msub(images[i],moduli[0]); ! 783: } ! 784: } /* stage_mp_images */ ! 785: ! 786: ! 787: int stage_merritt_modulus(unitptr n) ! 788: /* Computes UNITSIZE images of modulus, each shifted left 1 more bit. ! 789: Before calling mp_modmult, you must first stage the moduli images by ! 790: calling stage_modulus. n is the pointer to the modulus. ! 791: Assumes that global_precision has already been adjusted to the ! 792: size of the modulus, plus SLOP_BITS. ! 793: */ ! 794: { short int i; ! 795: unitptr msu; /* ptr to most significant unit, for faster msubs */ ! 796: moduli[0] = n; /* no need to move the first image, just copy ptr */ ! 797: ! 798: /* used by optimized msubs macro... */ ! 799: msu = msbptr(n,global_precision); /* needed by msubs */ ! 800: msu_moduli[0] = *post_lowerunit(msu); ! 801: nmsu_moduli[0] = *msu; ! 802: ! 803: for (i=1; i<UNITSIZE; i++) ! 804: { mp_move(moduli[i],moduli[i-1]); ! 805: mp_shift_left(moduli[i]); ! 806: ! 807: /* used by optimized msubs macro... */ ! 808: msu = msbptr(moduli[i],global_precision); /* needed by msubs */ ! 809: msu_moduli[i] = *post_lowerunit(msu); /* for faster msubs */ ! 810: nmsu_moduli[i] = *msu; ! 811: } ! 812: return(0); /* normal return */ ! 813: } /* stage_merritt_modulus */ ! 814: ! 815: ! 816: /* The following macros, sniffadd and msubs, are used by modmult... */ ! 817: #define sniffadd(i) if (*multiplier & power_of_2(i)) mp_add(prod,mpd[i]) ! 818: ! 819: /* Unoptimized msubs macro (msubs0) follows... */ ! 820: /* #define msubs0(i) msub(prod,moduli[i]) ! 821: */ ! 822: ! 823: /* To optimize msubs, compare the most significant units of the ! 824: product and the shifted modulus before deciding to call the full ! 825: mp_compare routine. Better still, compare the upper two units ! 826: before deciding to call mp_compare. ! 827: Optimization of msubs relies heavily on standard C left-to-right ! 828: parsimonious evaluation of logical expressions. ! 829: */ ! 830: ! 831: /* Partially-optimized msubs macro (msubs1) follows... */ ! 832: /* #define msubs1(i) if ( \ ! 833: ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \ ! 834: (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \ ! 835: ) mp_sub(prod,moduli[i]) ! 836: */ ! 837: ! 838: /* Fully-optimized msubs macro (msubs2) follows... */ ! 839: #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \ ! 840: (p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \ ! 841: (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \ ! 842: mp_sub(prod,moduli[i]) ! 843: ! 844: ! 845: int merritt_modmult(register unitptr prod, ! 846: unitptr multiplicand,register unitptr multiplier) ! 847: /* Performs combined multiply/modulo operation. ! 848: Computes: prod = (multiplicand*multiplier) mod modulus ! 849: WARNING: All the arguments must be less than the modulus! ! 850: Assumes the modulus has been predefined by first calling ! 851: stage_modulus. ! 852: */ ! 853: { ! 854: /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/ ! 855: register signedunit p_m; ! 856: register unitptr msu_prod; /* ptr to most significant unit of product */ ! 857: register unitptr nmsu_prod; /* next-most signif. unit of product */ ! 858: short mprec; /* precision of multiplier, in units */ ! 859: /* Array mpd contains a list of pointers to preshifted images of ! 860: the multiplicand: */ ! 861: static unitptr mpd[UNITSIZE] = ! 862: { 0, &mpdbuf[ 0][0], &mpdbuf[ 1][0], &mpdbuf[ 2][0], ! 863: &mpdbuf[ 3][0], &mpdbuf[ 4][0], &mpdbuf[ 5][0], &mpdbuf[ 6][0] ! 864: #ifndef UNIT8 ! 865: ,&mpdbuf[ 7][0], &mpdbuf[ 8][0], &mpdbuf[ 9][0], &mpdbuf[10][0], ! 866: &mpdbuf[11][0], &mpdbuf[12][0], &mpdbuf[13][0], &mpdbuf[14][0] ! 867: #ifndef UNIT16 /* and not UNIT8 */ ! 868: ,&mpdbuf[15][0], &mpdbuf[16][0], &mpdbuf[17][0], &mpdbuf[18][0], ! 869: &mpdbuf[19][0], &mpdbuf[20][0], &mpdbuf[21][0], &mpdbuf[22][0], ! 870: &mpdbuf[23][0], &mpdbuf[24][0], &mpdbuf[25][0], &mpdbuf[26][0], ! 871: &mpdbuf[27][0], &mpdbuf[28][0], &mpdbuf[29][0], &mpdbuf[30][0] ! 872: #endif /* UNIT16 and UNIT8 not defined */ ! 873: #endif /* UNIT8 not defined */ ! 874: }; ! 875: ! 876: /* Compute preshifted images of multiplicand, mod n: */ ! 877: stage_mp_images(mpd,multiplicand); ! 878: ! 879: /* To optimize msubs, set up msu_prod and nmsu_prod: */ ! 880: msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */ ! 881: nmsu_prod = msu_prod; ! 882: post_lowerunit(nmsu_prod); /* Get next-MSU of prod */ ! 883: ! 884: /* To understand this algorithm, it would be helpful to first ! 885: study the conventional Russian peasant modmult algorithm. ! 886: This one does about the same thing as Russian peasant, but ! 887: is organized differently to save some steps. It loops ! 888: through the multiplier a word (unit) at a time, instead of ! 889: a bit at a time. It word-shifts the product instead of ! 890: bit-shifting it, so it should be faster. It also does about ! 891: half as many subtracts as Russian peasant. ! 892: */ ! 893: ! 894: mp_init(prod,0); /* Initialize product to 0. */ ! 895: ! 896: /* The way mp_modmult is actually used in cryptographic ! 897: applications, there will NEVER be a zero multiplier or ! 898: multiplicand. So there is no need to optimize for that ! 899: condition. ! 900: */ ! 901: /* if (testeq(multiplicand,0)) ! 902: return(0); */ /* zero multiplicand means zero product */ ! 903: /* Normalize and compute number of units in multiplier first: */ ! 904: normalize(multiplier,mprec); ! 905: if (mprec==0) /* if precision of multiplier is 0 */ ! 906: return(0); /* zero multiplier means zero product */ ! 907: make_msbptr(multiplier,mprec); /* start at MSU of multiplier */ ! 908: ! 909: while (mprec--) /* Loop for the number of units in the multiplier */ ! 910: { ! 911: /* Shift the product one whole unit to the left. ! 912: This is part of the multiply phase of modmult. ! 913: */ ! 914: ! 915: mp_lshift_unit(prod); ! 916: ! 917: /* The product may have grown by as many as UNITSIZE ! 918: bits. That's why we have global_precision set to the ! 919: size of the modulus plus UNITSIZE slop bits. ! 920: Now reduce the product back down by conditionally ! 921: subtracting the preshifted images of the modulus. ! 922: This is part of the modulo reduction phase of modmult. ! 923: The following loop is unrolled for speed, using macros... ! 924: ! 925: for (i=UNITSIZE-1; i>=LOG_UNITSIZE; i--) ! 926: if (mp_compare(prod,moduli[i]) >= 0) ! 927: mp_sub(prod,moduli[i]); ! 928: */ ! 929: ! 930: #ifndef UNIT8 ! 931: #ifndef UNIT16 /* and not UNIT8 */ ! 932: msubs(31); ! 933: msubs(30); ! 934: msubs(29); ! 935: msubs(28); ! 936: msubs(27); ! 937: msubs(26); ! 938: msubs(25); ! 939: msubs(24); ! 940: msubs(23); ! 941: msubs(22); ! 942: msubs(21); ! 943: msubs(20); ! 944: msubs(19); ! 945: msubs(18); ! 946: msubs(17); ! 947: msubs(16); ! 948: #endif /* not UNIT16 and not UNIT8 */ ! 949: msubs(15); ! 950: msubs(14); ! 951: msubs(13); ! 952: msubs(12); ! 953: msubs(11); ! 954: msubs(10); ! 955: msubs(9); ! 956: msubs(8); ! 957: #endif /* not UNIT8 */ ! 958: msubs(7); ! 959: msubs(6); ! 960: msubs(5); ! 961: #ifndef UNIT32 ! 962: msubs(4); ! 963: #ifndef UNIT16 ! 964: msubs(3); ! 965: #endif ! 966: #endif ! 967: ! 968: /* Sniff each bit in the current unit of the multiplier, ! 969: and conditionally add the corresponding preshifted ! 970: image of the multiplicand to the product. ! 971: This is also part of the multiply phase of modmult. ! 972: ! 973: The following loop is unrolled for speed, using macros... ! 974: ! 975: for (i=UNITSIZE-1; i>=0; i--) ! 976: if (*multiplier & power_of_2(i)) ! 977: mp_add(prod,mpd[i]); ! 978: */ ! 979: #ifndef UNIT8 ! 980: #ifndef UNIT16 /* and not UNIT8 */ ! 981: sniffadd(31); ! 982: sniffadd(30); ! 983: sniffadd(29); ! 984: sniffadd(28); ! 985: sniffadd(27); ! 986: sniffadd(26); ! 987: sniffadd(25); ! 988: sniffadd(24); ! 989: sniffadd(23); ! 990: sniffadd(22); ! 991: sniffadd(21); ! 992: sniffadd(20); ! 993: sniffadd(19); ! 994: sniffadd(18); ! 995: sniffadd(17); ! 996: sniffadd(16); ! 997: #endif /* not UNIT16 and not UNIT8 */ ! 998: sniffadd(15); ! 999: sniffadd(14); ! 1000: sniffadd(13); ! 1001: sniffadd(12); ! 1002: sniffadd(11); ! 1003: sniffadd(10); ! 1004: sniffadd(9); ! 1005: sniffadd(8); ! 1006: #endif /* not UNIT8 */ ! 1007: sniffadd(7); ! 1008: sniffadd(6); ! 1009: sniffadd(5); ! 1010: sniffadd(4); ! 1011: sniffadd(3); ! 1012: sniffadd(2); ! 1013: sniffadd(1); ! 1014: sniffadd(0); ! 1015: ! 1016: /* The product may have grown by as many as LOG_UNITSIZE+1 ! 1017: bits. ! 1018: Now reduce the product back down by conditionally ! 1019: subtracting LOG_UNITSIZE+1 preshifted images of the ! 1020: modulus. This is the modulo reduction phase of modmult. ! 1021: ! 1022: The following loop is unrolled for speed, using macros... ! 1023: ! 1024: for (i=LOG_UNITSIZE; i>=0; i--) ! 1025: if (mp_compare(prod,moduli[i]) >= 0) ! 1026: mp_sub(prod,moduli[i]); ! 1027: */ ! 1028: ! 1029: #ifndef UNIT8 ! 1030: #ifndef UNIT16 ! 1031: msubs(5); ! 1032: #endif ! 1033: msubs(4); ! 1034: #endif ! 1035: msubs(3); ! 1036: msubs(2); ! 1037: msubs(1); ! 1038: msubs(0); ! 1039: ! 1040: /* Bump pointer to next lower unit of multiplier: */ ! 1041: post_lowerunit(multiplier); ! 1042: ! 1043: } /* Loop for the number of units in the multiplier */ ! 1044: ! 1045: return(0); /* normal return */ ! 1046: ! 1047: } /* merritt_modmult */ ! 1048: ! 1049: ! 1050: #undef msubs ! 1051: #undef sniffadd ! 1052: ! 1053: ! 1054: /* Merritt's mp_modmult function leaves some internal tables in memory, ! 1055: so we have to call modmult_burn() at the end of mp_modexp. ! 1056: This is so that no cryptographically sensitive data is left in memory ! 1057: after the program exits. ! 1058: */ ! 1059: void merritt_burn(void) ! 1060: /* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */ ! 1061: { unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION); ! 1062: unitfill0(&(moduli_buf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION); ! 1063: unitfill0(msu_moduli,UNITSIZE); ! 1064: unitfill0(nmsu_moduli,UNITSIZE); ! 1065: } /* merritt_burn() */ ! 1066: ! 1067: /******* end of Merritt's MODMULT stuff. *******/ ! 1068: /*=========================================================================*/ ! 1069: #endif /* MERRITT */ ! 1070: ! 1071: ! 1072: #ifdef UPTON_OR_SMITH /* Used by Upton's and Smith's modmult algorithms */ ! 1073: ! 1074: /* Jimmy Upton's multiprecision modmult algorithm in C. ! 1075: Performs a multiply combined with a modulo operation. ! 1076: ! 1077: The following support functions and data structures ! 1078: are used only by Upton's modmult algorithm... ! 1079: */ ! 1080: ! 1081: short munit_prec; /* global_precision expressed in MULTUNITs */ ! 1082: ! 1083: /* Note that since the SPARC CPU has no hardware integer multiply ! 1084: instruction, there is not that much advantage in having an ! 1085: assembly version of mp_smul on that machine. It might be faster ! 1086: to use Merritt's modmult instead of Upton's modmult on the SPARC. ! 1087: */ ! 1088: ! 1089: /* ! 1090: Multiply the single-word multiplier times the multiprecision integer ! 1091: in multiplicand, accumulating result in prod. The resulting ! 1092: multiprecision prod will be 1 word longer than the multiplicand. ! 1093: multiplicand is munit_prec words long. We add into prod, so caller ! 1094: should zero it out first. For best results, this time-critical ! 1095: function should be implemented in assembly. ! 1096: NOTE: Unlike other functions in the multiprecision arithmetic ! 1097: library, both multiplicand and prod are pointing at the LSB, ! 1098: regardless of byte order of the machine. On an 80x86, this makes ! 1099: no difference. But if this assembly function is implemented ! 1100: on a 680x0, it becomes important. ! 1101: */ ! 1102: /* Note that this has been modified from the previous version to allow ! 1103: better support for Smith's modmult: ! 1104: The final carry bit is added to the existing product ! 1105: array, rather than simply stored. ! 1106: */ ! 1107: ! 1108: #ifndef mp_smul ! 1109: void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier) ! 1110: { ! 1111: short i; ! 1112: unsigned long p, carry; ! 1113: ! 1114: carry = 0; ! 1115: for (i=0; i<munit_prec; ++i) ! 1116: { p = (unsigned long)multiplier * *post_higherunit(multiplicand); ! 1117: p += *prod + carry; ! 1118: *post_higherunit(prod) = (MULTUNIT)p; ! 1119: carry = p >> MULTUNITSIZE; ! 1120: } ! 1121: /* Add carry to the next higher word of product / dividend */ ! 1122: *prod += (MULTUNIT)carry; ! 1123: } /* mp_smul */ ! 1124: #endif ! 1125: ! 1126: /* mp_dmul is a double-precision multiply multiplicand times multiplier, ! 1127: result into prod. prod must be pointing at a "double precision" ! 1128: buffer. E.g. If global_precision is 10 words, prod must be ! 1129: pointing at a 20-word buffer. ! 1130: */ ! 1131: #ifndef mp_dmul ! 1132: void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier) ! 1133: { ! 1134: register int i; ! 1135: register MULTUNIT *p_multiplicand, *p_multiplier; ! 1136: register MULTUNIT *prodp; ! 1137: ! 1138: ! 1139: unitfill0(prod,global_precision*2); /* Pre-zero prod */ ! 1140: /* Calculate precision in units of MULTUNIT */ ! 1141: munit_prec = global_precision * UNITSIZE / MULTUNITSIZE; ! 1142: p_multiplicand = (MULTUNIT *)multiplicand; ! 1143: p_multiplier = (MULTUNIT *)multiplier; ! 1144: prodp = (MULTUNIT *)prod; ! 1145: make_lsbptr(p_multiplicand,munit_prec); ! 1146: make_lsbptr(p_multiplier,munit_prec); ! 1147: make_lsbptr(prodp,munit_prec*2); ! 1148: /* Multiply multiplicand by each word in multiplier, accumulating prod: */ ! 1149: for (i=0; i<munit_prec; ++i) ! 1150: mp_smul (post_higherunit(prodp), ! 1151: p_multiplicand, *post_higherunit(p_multiplier)); ! 1152: } /* mp_dmul */ ! 1153: #endif /* mp_dmul */ ! 1154: ! 1155: static unit ALIGN modulus[MAX_UNIT_PRECISION]; ! 1156: static short nbits; /* number of modulus significant bits */ ! 1157: #endif /* UPTON_OR_SMITH */ ! 1158: ! 1159: #ifdef UPTON ! 1160: ! 1161: /* These scratchpad arrays are used only by upton_modmult (mp_modmult). ! 1162: Some of them could be staticly declared inside of mp_modmult, but we ! 1163: put them outside mp_modmult so that they can be wiped clean by ! 1164: modmult_burn(), which is called at the end of mp_modexp. This is ! 1165: so that no sensitive data is left in memory after the program exits. ! 1166: */ ! 1167: ! 1168: static unit ALIGN reciprocal[MAX_UNIT_PRECISION]; ! 1169: static unit ALIGN dhi[MAX_UNIT_PRECISION]; ! 1170: static unit ALIGN d_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1171: static unit ALIGN e_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1172: static unit ALIGN f_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1173: ! 1174: static short nbitsDivUNITSIZE; ! 1175: static short nbitsModUNITSIZE; ! 1176: ! 1177: /* stage_upton_modulus() is aliased to stage_modulus(). ! 1178: Prepare for an Upton modmult. Calculate the reciprocal of modulus, ! 1179: and save both. Note that reciprocal will have one more bit than ! 1180: modulus. ! 1181: Assumes that global_precision has already been adjusted to the ! 1182: size of the modulus, plus SLOP_BITS. ! 1183: */ ! 1184: int stage_upton_modulus(unitptr n) ! 1185: { ! 1186: mp_move(modulus, n); ! 1187: mp_recip(reciprocal, modulus); ! 1188: nbits = countbits(modulus); ! 1189: nbitsDivUNITSIZE = nbits / UNITSIZE; ! 1190: nbitsModUNITSIZE = nbits % UNITSIZE; ! 1191: return(0); /* normal return */ ! 1192: } /* stage_upton_modulus */ ! 1193: ! 1194: ! 1195: /* Upton's algorithm performs a multiply combined with a modulo operation. ! 1196: Computes: prod = (multiplicand*multiplier) mod modulus ! 1197: WARNING: All the arguments must be less than the modulus! ! 1198: References global unitptr modulus and reciprocal. ! 1199: The reciprocal of modulus is 1 bit longer than the modulus. ! 1200: upton_modmult() is aliased to mp_modmult(). ! 1201: */ ! 1202: int upton_modmult (unitptr prod, unitptr multiplicand, unitptr multiplier) ! 1203: { ! 1204: unitptr d = d_data; ! 1205: unitptr d1 = d_data; ! 1206: unitptr e = e_data; ! 1207: unitptr f = f_data; ! 1208: short orig_precision; ! 1209: ! 1210: orig_precision = global_precision; ! 1211: mp_dmul (d,multiplicand,multiplier); ! 1212: ! 1213: /* Throw off low nbits of d */ ! 1214: #ifndef HIGHFIRST ! 1215: d1 = d + nbitsDivUNITSIZE; ! 1216: #else ! 1217: d1 = d + orig_precision - nbitsDivUNITSIZE; ! 1218: #endif ! 1219: mp_move(dhi, d1); /* Don't screw up d, we need it later */ ! 1220: mp_shift_right_bits(dhi, nbitsModUNITSIZE); ! 1221: ! 1222: mp_dmul (e,dhi,reciprocal); /* Note - reciprocal has nbits+1 bits */ ! 1223: ! 1224: #ifndef HIGHFIRST ! 1225: e += nbitsDivUNITSIZE; ! 1226: #else ! 1227: e += orig_precision - nbitsDivUNITSIZE; ! 1228: #endif ! 1229: mp_shift_right_bits(e, nbitsModUNITSIZE); ! 1230: ! 1231: mp_dmul (f,e,modulus); ! 1232: ! 1233: /* Now for the only double-precision call to mpilib: */ ! 1234: set_precision (orig_precision * 2); ! 1235: mp_sub (d,f); ! 1236: ! 1237: /* d's precision should be <= orig_precision */ ! 1238: rescale (d, orig_precision*2, orig_precision); ! 1239: set_precision (orig_precision); ! 1240: ! 1241: /* Should never have to do this final subtract more than twice: */ ! 1242: while (mp_compare(d,modulus) > 0) ! 1243: mp_sub (d,modulus); ! 1244: ! 1245: mp_move(prod,d); ! 1246: return(0); /* normal return */ ! 1247: } /* upton_modmult */ ! 1248: ! 1249: ! 1250: /* Upton's mp_modmult function leaves some internal arrays in memory, ! 1251: so we have to call modmult_burn() at the end of mp_modexp. ! 1252: This is so that no cryptographically sensitive data is left in memory ! 1253: after the program exits. ! 1254: upton_burn() is aliased to modmult_burn(). ! 1255: */ ! 1256: void upton_burn(void) ! 1257: { ! 1258: unitfill0(modulus,MAX_UNIT_PRECISION); ! 1259: unitfill0(reciprocal,MAX_UNIT_PRECISION); ! 1260: unitfill0(dhi,MAX_UNIT_PRECISION); ! 1261: unitfill0(d_data,MAX_UNIT_PRECISION*2); ! 1262: unitfill0(e_data,MAX_UNIT_PRECISION*2); ! 1263: unitfill0(f_data,MAX_UNIT_PRECISION*2); ! 1264: nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0; ! 1265: } /* upton_burn */ ! 1266: ! 1267: /******* end of Upton's MODMULT stuff. *******/ ! 1268: /*=========================================================================*/ ! 1269: #endif /* UPTON */ ! 1270: ! 1271: #ifdef SMITH /* using Thad Smith's modmult algorithm */ ! 1272: ! 1273: /* Thad Smith's implementation of multiprecision modmult algorithm in C. ! 1274: Performs a multiply combined with a modulo operation. ! 1275: The multiplication is done with mp_dmul, the same as for Upton's ! 1276: modmult. The modulus reduction is done by long division, in ! 1277: which a trial quotient "digit" is determined, then the product of ! 1278: that digit and the divisor are subtracted from the dividend. ! 1279: ! 1280: In this case, the digit is MULTUNIT in size and the subtraction ! 1281: is done by adding the product to the one's complement of the ! 1282: dividend, which allows use of the existing mp_smul routine. ! 1283: ! 1284: The following support functions and data structures ! 1285: are used only by Smith's modmult algorithm... ! 1286: */ ! 1287: ! 1288: /* These scratchpad arrays are used only by smith_modmult (mp_modmult). ! 1289: Some of them could be statically declared inside of mp_modmult, but we ! 1290: put them outside mp_modmult so that they can be wiped clean by ! 1291: modmult_burn(), which is called at the end of mp_modexp. This is ! 1292: so that no sensitive data is left in memory after the program exits. ! 1293: */ ! 1294: ! 1295: static unit ALIGN modulus[MAX_UNIT_PRECISION]; ! 1296: static unit ALIGN ds_data[MAX_UNIT_PRECISION*2+2]; ! 1297: ! 1298: static unit mod_quotient [4]; ! 1299: static unit mod_divisor [4]; /* 2 most signif. MULTUNITs of modulus */ ! 1300: ! 1301: static MULTUNIT *modmpl; /* ptr to modulus least significant ! 1302: ** MULTUNIT */ ! 1303: static int mshift; /* number of bits for ! 1304: ** recip scaling */ ! 1305: static MULTUNIT reciph; /* MSunit of scaled recip */ ! 1306: static MULTUNIT recipl; /* LSunit of scaled recip */ ! 1307: ! 1308: static short modlenMULTUNITS; /* length of modulus in MULTUNITs */ ! 1309: static MULTUNIT mutemp; /* temporary */ ! 1310: ! 1311: /* The routines mp_smul and mp_dmul are the same as for UPTON and ! 1312: should be coded in assembly. Note, however, that the previous ! 1313: Upton's mp_smul version has been modified to compatible with ! 1314: Smith's modmult. The new version also still works for Upton's ! 1315: modmult. ! 1316: */ ! 1317: ! 1318: #ifndef mp_set_recip ! 1319: #define mp_set_recip(rh,rl,m) /* null */ ! 1320: #else ! 1321: /* setup routine for external mp_quo_digit */ ! 1322: void mp_set_recip(MULTUNIT rh, MULTUNIT rl, int m); ! 1323: #endif ! 1324: MULTUNIT mp_quo_digit (MULTUNIT *dividend); ! 1325: ! 1326: #ifdef MULTUNIT_SIZE_SAME ! 1327: #define mp_musubb mp_subb /* use existing routine */ ! 1328: #else /* ! MULTUNIT_SIZE_SAME */ ! 1329: ! 1330: /* This performs the same function as mp_subb, but with MULTUNITs. ! 1331: Note: Processors without alignment requirements may be able to use ! 1332: mp_subb, even though MULTUNITs are smaller than units. In that case, ! 1333: use mp_subb, since it would be faster if coded in assembly. Note that ! 1334: this implementation won't work for MULTUNITs which are long -- use ! 1335: mp_subb in that case. ! 1336: */ ! 1337: #ifndef mp_musubb ! 1338: boolean mp_musubb ! 1339: (register MULTUNIT* r1,register MULTUNIT* r2,register boolean borrow) ! 1340: /* multiprecision subtract of MULTUNITs with borrow, r2 from r1, ! 1341: ** result in r1 */ ! 1342: /* borrow is incoming borrow flag-- value should be 0 or 1 */ ! 1343: { register ulint x; /* won't work if sizeof(MULTUNIT)== ! 1344: sizeof(long) */ ! 1345: short precision; /* number of MULTUNITs to subtract */ ! 1346: precision = global_precision * MULTUNITs_perunit; ! 1347: make_lsbptr(r1,precision); ! 1348: make_lsbptr(r2,precision); ! 1349: while (precision--) ! 1350: { x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow; ! 1351: *post_higherunit(r1) = x; ! 1352: borrow = (((1L << MULTUNITSIZE) & x) != 0L); ! 1353: } ! 1354: return (borrow); ! 1355: } /* mp_musubb */ ! 1356: #endif /* mp_musubb */ ! 1357: #endif /* MULTUNIT_SIZE_SAME */ ! 1358: ! 1359: /* The function mp_quo_digit is the heart of Smith's modulo reduction, ! 1360: which uses a form of long division. It computes a trial quotient ! 1361: "digit" (MULTUNIT-sized digit) by multiplying the three most ! 1362: significant MULTUNITs of the dividend by the two most significant ! 1363: MULTUNITs of the reciprocal of the modulus. Note that this function ! 1364: requires that MULTUNITSIZE * 2 <= sizeof(unsigned long). ! 1365: ! 1366: An important part of this technique is that the quotient never be ! 1367: too small, although it may occasionally be too large. This was ! 1368: done to eliminate the need to check and correct for a remainder ! 1369: exceeding the divisor. It is easier to check for a negative ! 1370: remainder. The following technique rarely needs correction for ! 1371: MULTUNITs of at least 16 bits. ! 1372: ! 1373: The following routine has two implementations: ! 1374: ! 1375: ASM_PROTOTYPE defined: written to be an executable prototype for ! 1376: an efficient assembly language implementation. Note that several ! 1377: of the following masks and shifts can be done by directly ! 1378: manipulating single precision registers on some architectures. ! 1379: ! 1380: ASM_PROTOTYPE undefined: a slightly more efficient implementation ! 1381: in C. Although this version returns a result larger than the ! 1382: optimum (which is corrected in smith_modmult) more often than the ! 1383: prototype version, the faster execution in C more than makes up ! 1384: for the difference. ! 1385: ! 1386: Parameter: dividend - points to the most significant MULTUNIT ! 1387: of the dividend. Note that dividend actually contains the ! 1388: one's complement of the actual dividend value (see comments for ! 1389: smith_modmult). ! 1390: ! 1391: Return: the trial quotient digit resulting from dividing the first ! 1392: three MULTUNITs at dividend by the upper two MULTUNITs of the ! 1393: modulus. ! 1394: */ ! 1395: ! 1396: /* #define ASM_PROTOTYPE */ /* undefined: use C-optimized version */ ! 1397: ! 1398: #ifndef mp_quo_digit ! 1399: MULTUNIT mp_quo_digit (MULTUNIT *dividend) { ! 1400: unsigned long q, q0, q1, q2; ! 1401: unsigned short lsb_factor; ! 1402: ! 1403: /* Compute the least significant product group. ! 1404: The last terms of q1 and q2 perform upward rounding, which is ! 1405: needed to guarantee that the result not be too small. ! 1406: */ ! 1407: q1 = (dividend[tohigher(-2)] ^ MULTUNIT_mask) * (unsigned long)reciph ! 1408: + reciph; ! 1409: q2 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long)recipl ! 1410: + (1L << MULTUNITSIZE) ; ! 1411: #ifdef ASM_PROTOTYPE ! 1412: lsb_factor = 1 & (q1>>MULTUNITSIZE) & (q2>>MULTUNITSIZE); ! 1413: q = q1 + q2; ! 1414: ! 1415: /* The following statement is equivalent to shifting the sum right ! 1416: one bit while shifting in the carry bit. ! 1417: */ ! 1418: q0 = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1); ! 1419: #else /* optimized C version */ ! 1420: q0 = (q1>>1) + (q2>>1) + 1; ! 1421: #endif ! 1422: ! 1423: /* Compute the middle significant product group. */ ! 1424: ! 1425: q1 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long)reciph; ! 1426: q2 = (dividend[ 0] ^ MULTUNIT_mask) * (unsigned long)recipl; ! 1427: #ifdef ASM_PROTOTYPE ! 1428: q = q1 + q2; ! 1429: q = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1); ! 1430: ! 1431: /* Add in the most significant word of the first group. ! 1432: The last term takes care of the carry from adding the lsb's ! 1433: that were shifted out prior to addition. ! 1434: */ ! 1435: q = (q0 >> MULTUNITSIZE)+ q + (lsb_factor & (q1 ^ q2)); ! 1436: #else /* optimized C version */ ! 1437: q = (q0 >> MULTUNITSIZE)+ (q1>>1) + (q2>>1) + 1; ! 1438: #endif ! 1439: ! 1440: /* Compute the most significant term and add in the others */ ! 1441: ! 1442: q = (q >> (MULTUNITSIZE-2)) + ! 1443: (((dividend[0] ^ MULTUNIT_mask) * (unsigned long)reciph) << 1); ! 1444: q >>= mshift; ! 1445: ! 1446: /* Prevent overflow and then wipe out the intermediate results. */ ! 1447: ! 1448: mutemp = (MULTUNIT)min(q, (1L << MULTUNITSIZE) -1); ! 1449: q= q0 = q1 = q2 = 0; lsb_factor = 0; (void)lsb_factor; ! 1450: return mutemp; ! 1451: } ! 1452: #endif /* mp_quo_digit */ ! 1453: ! 1454: /* stage_smith_modulus() - Prepare for a Smith modmult. ! 1455: ! 1456: Calculate the reciprocal of modulus with a precision of two MULTUNITs. ! 1457: Assumes that global_precision has already been adjusted to the ! 1458: size of the modulus, plus SLOP_BITS. ! 1459: ! 1460: Note: This routine was designed to work with large values and ! 1461: doesn't have the necessary testing or handling to work with a ! 1462: modulus having less than three significant units. For such cases, ! 1463: the separate multiply and modulus routines can be used. ! 1464: ! 1465: stage_smith_modulus() is aliased to stage_modulus(). ! 1466: */ ! 1467: ! 1468: int stage_smith_modulus(unitptr n_modulus) ! 1469: { ! 1470: int original_precision; ! 1471: int sigmod; /* significant units in modulus */ ! 1472: unitptr mp; /* modulus most significant pointer */ ! 1473: MULTUNIT *mpm; /* reciprocal pointer */ ! 1474: int prec; /* precision of reciprocal calc in units */ ! 1475: ! 1476: mp_move(modulus, n_modulus); ! 1477: modmpl = (MULTUNIT*) modulus; ! 1478: modmpl = lsbptr (modmpl, global_precision * MULTUNITs_perunit); ! 1479: nbits = countbits(modulus); ! 1480: modlenMULTUNITS = (nbits+ MULTUNITSIZE-1) / MULTUNITSIZE; ! 1481: ! 1482: original_precision = global_precision; ! 1483: ! 1484: /* The following code copies the three most significant units of ! 1485: * modulus to mod_divisor. ! 1486: */ ! 1487: mp = modulus; ! 1488: sigmod = significance (modulus); ! 1489: rescale (mp, original_precision, sigmod); ! 1490: /* prec is the unit precision required for 3 MULTUNITs */ ! 1491: prec = (3 +(MULTUNITs_perunit-1)) / MULTUNITs_perunit; ! 1492: set_precision (prec); ! 1493: ! 1494: /* set mp = ptr to most significant units of modulus, then move ! 1495: * the most significant units to mp_divisor ! 1496: */ ! 1497: mp = msbptr(mp,sigmod) -tohigher(prec-1); ! 1498: unmake_lsbptr (mp, prec); ! 1499: mp_move (mod_divisor, mp); ! 1500: ! 1501: /* Keep 2*MULTUNITSIZE bits in mod_divisor. ! 1502: * This will (normally) result in a reciprocal of 2*MULTUNITSIZE+1 bits. ! 1503: */ ! 1504: mshift = countbits (mod_divisor) - 2*MULTUNITSIZE; ! 1505: mp_shift_right_bits (mod_divisor, mshift); ! 1506: mp_recip(mod_quotient,mod_divisor); ! 1507: mp_shift_right_bits (mod_quotient,1); ! 1508: ! 1509: /* Reduce to: 0 < mshift <= MULTUNITSIZE */ ! 1510: mshift = ((mshift + (MULTUNITSIZE-1)) % MULTUNITSIZE) +1; ! 1511: /* round up, rescaling if necessary */ ! 1512: mp_inc (mod_quotient); ! 1513: if (countbits (mod_quotient) > 2*MULTUNITSIZE) { ! 1514: mp_shift_right_bits (mod_quotient,1); ! 1515: mshift--; /* now 0 <= mshift <= MULTUNITSIZE */ ! 1516: } ! 1517: mpm = lsbptr ((MULTUNIT*)mod_quotient, prec*MULTUNITs_perunit); ! 1518: recipl = *post_higherunit (mpm); ! 1519: reciph = *mpm; ! 1520: mp_set_recip (reciph, recipl, mshift); ! 1521: set_precision (original_precision); ! 1522: return(0); /* normal return */ ! 1523: } /* stage_smith_modulus */ ! 1524: ! 1525: /* Smith's algorithm performs a multiply combined with a modulo operation. ! 1526: Computes: prod = (multiplicand*multiplier) mod modulus ! 1527: The modulus must first be set by stage_smith_modulus(). ! 1528: smith_modmult() is aliased to mp_modmult(). ! 1529: */ ! 1530: ! 1531: int ! 1532: smith_modmult(unitptr prod, unitptr multiplicand, unitptr multiplier) ! 1533: { ! 1534: unitptr d; /* ptr to product */ ! 1535: MULTUNIT *dmph, *dmpl, *dmp; /* ptrs to dividend (high, low, first) ! 1536: * aligned for subtraction */ ! 1537: /* Note that dmph points one MULTUNIT higher than indicated by ! 1538: global precision. This allows us to zero out a word one higher than ! 1539: the normal precision. ! 1540: */ ! 1541: short orig_precision; ! 1542: short nqd; /* number of quotient digits remaining to ! 1543: * be generated */ ! 1544: short dmi; /* number of significant MULTUNITs in product */ ! 1545: ! 1546: d = ds_data + 1; /* room for leading MSB if HIGHFIRST */ ! 1547: orig_precision = global_precision; ! 1548: mp_dmul(d, multiplicand, multiplier); ! 1549: ! 1550: rescale(d, orig_precision * 2, orig_precision * 2 + 1); ! 1551: set_precision(orig_precision * 2 + 1); ! 1552: *msbptr(d, global_precision) = 0; /* leading 0 unit */ ! 1553: ! 1554: /* We now start working with MULTUNITs. ! 1555: Determine the most significant MULTUNIT of the product so we don't ! 1556: have to process leading zeros in our divide loop. ! 1557: */ ! 1558: dmi = significance(d) * MULTUNITs_perunit; ! 1559: if (dmi >= modlenMULTUNITS) ! 1560: { /* Make dividend negative. This allows the use of mp_smul to ! 1561: * "subtract" the product of the modulus and the trial divisor ! 1562: * by actually adding to a negative dividend. ! 1563: * The one's complement of the dividend is used, since it causes ! 1564: * a zero value to be represented as all ones. This facilitates ! 1565: * testing the result for possible overflow, since a sign bit ! 1566: * indicates that no adjustment is necessary, and we should not ! 1567: * attempt to adjust if the result of the addition is zero. ! 1568: */ ! 1569: mp_inc(d); ! 1570: mp_neg(d); ! 1571: set_precision(orig_precision); ! 1572: munit_prec = global_precision * UNITSIZE / MULTUNITSIZE; ! 1573: ! 1574: /* Set msb, lsb, and normal ptrs of dividend */ ! 1575: dmph = lsbptr((MULTUNIT *) d, (orig_precision * 2 + 1) * ! 1576: MULTUNITs_perunit) + tohigher(dmi); ! 1577: nqd = dmi + 1 - modlenMULTUNITS; ! 1578: dmpl = dmph - tohigher(modlenMULTUNITS); ! 1579: ! 1580: /* Divide loop. ! 1581: Each iteration computes the next quotient MULTUNIT digit, then ! 1582: multiplies the divisor (modulus) by the quotient digit and adds ! 1583: it to the one's complement of the dividend (equivalent to ! 1584: subtracting). If the product was greater than the remaining dividend, ! 1585: we get a non-negative result, in which case we subtract off the ! 1586: modulus to get the proper negative remainder. ! 1587: */ ! 1588: for (; nqd; nqd--) ! 1589: { MULTUNIT q; /* quotient trial digit */ ! 1590: ! 1591: q = mp_quo_digit(dmph); ! 1592: if (q > 0) ! 1593: { mp_smul(dmpl, modmpl, q); ! 1594: ! 1595: /* Perform correction if q too large. ! 1596: * This rarely occurs. ! 1597: */ ! 1598: if (!(*dmph & MULTUNIT_msb)) ! 1599: { dmp = dmpl; ! 1600: unmake_lsbptr(dmp, orig_precision * ! 1601: MULTUNITs_perunit); ! 1602: if (mp_musubb(dmp, ! 1603: (MULTUNIT *) modulus, 0)) ! 1604: (*dmph) --; ! 1605: } ! 1606: } ! 1607: pre_lowerunit(dmph); ! 1608: pre_lowerunit(dmpl); ! 1609: } ! 1610: /* d contains the one's complement of the remainder. */ ! 1611: rescale(d, orig_precision * 2 + 1, orig_precision); ! 1612: set_precision(orig_precision); ! 1613: mp_neg(d); ! 1614: mp_dec(d); ! 1615: } else ! 1616: { /* Product was less than modulus. Return it. */ ! 1617: rescale(d, orig_precision * 2 + 1, orig_precision); ! 1618: set_precision(orig_precision); ! 1619: } ! 1620: mp_move(prod, d); ! 1621: return (0); /* normal return */ ! 1622: } /* smith_modmult */ ! 1623: ! 1624: ! 1625: /* Smith's mp_modmult function leaves some internal arrays in memory, ! 1626: so we have to call modmult_burn() at the end of mp_modexp. ! 1627: This is so that no cryptographically sensitive data is left in memory ! 1628: after the program exits. ! 1629: smith_burn() is aliased to modmult_burn(). ! 1630: */ ! 1631: void smith_burn(void) ! 1632: { ! 1633: empty_array (modulus); ! 1634: empty_array (ds_data); ! 1635: empty_array (mod_quotient); ! 1636: empty_array (mod_divisor); ! 1637: modmpl = 0; ! 1638: mshift =nbits = 0; ! 1639: reciph = recipl = 0; ! 1640: modlenMULTUNITS = mutemp = 0; ! 1641: mp_set_recip (0,0,0); ! 1642: } /* smith_burn */ ! 1643: ! 1644: /* End of Thad Smith's implementation of modmult. */ ! 1645: ! 1646: #endif /* SMITH */ ! 1647: ! 1648: ! 1649: int countbits(unitptr r) ! 1650: /* Returns number of significant bits in r */ ! 1651: { int bits; ! 1652: short prec; ! 1653: register unit bitmask; ! 1654: init_bitsniffer(r,bitmask,prec,bits); ! 1655: return(bits); ! 1656: } /* countbits */ ! 1657: ! 1658: ! 1659: char *copyright_notice(void) ! 1660: /* force linker to include copyright notice in the executable object image. */ ! 1661: { return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */ ! 1662: ! 1663: ! 1664: int mp_modexp(register unitptr expout,register unitptr expin, ! 1665: register unitptr exponent,register unitptr modulus) ! 1666: { /* Russian peasant combined exponentiation/modulo algorithm. ! 1667: Calls modmult instead of mult. ! 1668: Computes: expout = (expin**exponent) mod modulus ! 1669: WARNING: All the arguments must be less than the modulus! ! 1670: */ ! 1671: int bits; ! 1672: short oldprecision; ! 1673: register unit bitmask; ! 1674: unit product[MAX_UNIT_PRECISION]; ! 1675: short eprec; ! 1676: ! 1677: #ifdef COUNTMULTS ! 1678: tally_modmults = 0; /* clear "number of modmults" counter */ ! 1679: tally_modsquares = 0; /* clear "number of modsquares" counter */ ! 1680: #endif /* COUNTMULTS */ ! 1681: mp_init(expout,1); ! 1682: if (testeq(exponent,0)) ! 1683: { if (testeq(expin,0)) ! 1684: return(-1); /* 0 to the 0th power means return error */ ! 1685: return(0); /* otherwise, zero exponent means expout is 1 */ ! 1686: } ! 1687: if (testeq(modulus,0)) ! 1688: return(-2); /* zero modulus means error */ ! 1689: #if SLOP_BITS > 0 /* if there's room for sign bits */ ! 1690: if (mp_tstminus(modulus)) ! 1691: return(-2); /* negative modulus means error */ ! 1692: #endif /* SLOP_BITS > 0 */ ! 1693: if (mp_compare(expin,modulus) >= 0) ! 1694: return(-3); /* if expin >= modulus, return error */ ! 1695: if (mp_compare(exponent,modulus) >= 0) ! 1696: return(-4); /* if exponent >= modulus, return error */ ! 1697: ! 1698: oldprecision = global_precision; /* save global_precision */ ! 1699: /* set smallest optimum precision for this modulus */ ! 1700: set_precision(bits2units(countbits(modulus)+SLOP_BITS)); ! 1701: /* rescale all these registers to global_precision we just defined */ ! 1702: rescale(modulus,oldprecision,global_precision); ! 1703: rescale(expin,oldprecision,global_precision); ! 1704: rescale(exponent,oldprecision,global_precision); ! 1705: rescale(expout,oldprecision,global_precision); ! 1706: ! 1707: if (stage_modulus(modulus)) ! 1708: { set_precision(oldprecision); /* restore original precision */ ! 1709: return(-5); /* unstageable modulus (STEWART algorithm) */ ! 1710: } ! 1711: ! 1712: /* normalize and compute number of bits in exponent first */ ! 1713: init_bitsniffer(exponent,bitmask,eprec,bits); ! 1714: ! 1715: /* We can "optimize out" the first modsquare and modmult: */ ! 1716: bits--; /* We know for sure at this point that bits>0 */ ! 1717: mp_move(expout,expin); /* expout = (1*1)*expin; */ ! 1718: bump_bitsniffer(exponent,bitmask); ! 1719: ! 1720: while (bits--) ! 1721: { ! 1722: poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */ ! 1723: #ifdef COUNTMULTS ! 1724: tally_modsquares++; /* bump "number of modsquares" counter */ ! 1725: #endif /* COUNTMULTS */ ! 1726: mp_modsquare(product,expout); ! 1727: if (sniff_bit(exponent,bitmask)) ! 1728: { mp_modmult(expout,product,expin); ! 1729: #ifdef COUNTMULTS ! 1730: tally_modmults++; /* bump "number of modmults" counter */ ! 1731: #endif /* COUNTMULTS */ ! 1732: } else ! 1733: { ! 1734: mp_move(expout,product); ! 1735: } ! 1736: bump_bitsniffer(exponent,bitmask); ! 1737: } /* while bits-- */ ! 1738: mp_burn(product); /* burn the evidence on the stack */ ! 1739: modmult_burn(); /* ask mp_modmult to also burn its own evidence */ ! 1740: ! 1741: #ifdef COUNTMULTS /* diagnostic analysis */ ! 1742: { long atomic_mults; ! 1743: unsigned int unitcount,totalmults; ! 1744: unitcount = bits2units(countbits(modulus)); ! 1745: /* calculation assumes modsquare takes as long as a modmult: */ ! 1746: atomic_mults = (long) tally_modmults * (unitcount * unitcount); ! 1747: atomic_mults += (long) tally_modsquares * (unitcount * unitcount); ! 1748: printf("%ld atomic mults for ",atomic_mults); ! 1749: printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n", ! 1750: tally_modsquares,tally_modmults, ! 1751: tally_modsquares+tally_modmults, ! 1752: countbits(modulus), unitcount); ! 1753: } ! 1754: #endif /* COUNTMULTS */ ! 1755: ! 1756: set_precision(oldprecision); /* restore original precision */ ! 1757: ! 1758: /* Do an explicit reference to the copyright notice so that the linker ! 1759: will be forced to include it in the executable object image... */ ! 1760: copyright_notice(); /* has no real effect at run time */ ! 1761: ! 1762: return(0); /* normal return */ ! 1763: } /* mp_modexp */ ! 1764: ! 1765: ! 1766: ! 1767: ! 1768: /********************************************************************* ! 1769: RSA-specific routines follow. These are the only functions that ! 1770: are specific to the RSA public key cryptosystem. The other ! 1771: multiprecision integer math functions may be used for non-RSA ! 1772: applications. ! 1773: ! 1774: The RSA public key cryptosystem is patented by the Massachusetts ! 1775: Institute of Technology (U.S. patent #4,405,829). This patent does ! 1776: not apply outside the USA. Public Key Partners (PKP) holds the ! 1777: exclusive commercial license to sell and sub-license the RSA public ! 1778: key cryptosystem. The author of this software implementation of the ! 1779: RSA algorithm is providing this software for educational use only. ! 1780: Licensing this algorithm from PKP is the responsibility of you, the ! 1781: user, not Philip Zimmermann, the author of this software. The author ! 1782: assumes no liability for any breach of patent law resulting from the ! 1783: unlicensed use of this software by the user. ! 1784: *********************************************************************/ ! 1785: ! 1786: ! 1787: int rsa_decrypt(unitptr M, unitptr C, ! 1788: unitptr d, unitptr p, unitptr q, unitptr u) ! 1789: /* Uses Quisquater's Chinese Remainder Theorem shortcut ! 1790: for RSA decryption. ! 1791: M is the output plaintext message. ! 1792: C is the input ciphertext message. ! 1793: d is the secret decryption exponent. ! 1794: p and q are the secret prime factors of n. ! 1795: u is the multiplicative inverse of p, mod q. ! 1796: Note that u is precomputed on the assumption that p<q. ! 1797: n, the common modulus, is not used here because of the ! 1798: Chinese Remainder Theorem shortcut. ! 1799: */ ! 1800: { ! 1801: unit p2[MAX_UNIT_PRECISION]; ! 1802: unit q2[MAX_UNIT_PRECISION]; ! 1803: unit temp1[MAX_UNIT_PRECISION]; ! 1804: unit temp2[MAX_UNIT_PRECISION]; ! 1805: int status; ! 1806: ! 1807: mp_init(M,1); /* initialize result in case of error */ ! 1808: ! 1809: if (mp_compare(p,q) >= 0) /* ensure that p<q */ ! 1810: { /* swap the pointers p and q */ ! 1811: unitptr t; ! 1812: t = p; p = q; q = t; ! 1813: } ! 1814: ! 1815: /* Rather than decrypting by computing modexp with full mod n ! 1816: precision, compute a shorter modexp with mod p precision... */ ! 1817: ! 1818: /* p2 = [ (C mod p)**( d mod (p-1) ) ] mod p */ ! 1819: mp_move(temp1,p); ! 1820: mp_dec(temp1); /* temp1 = p-1 */ ! 1821: mp_mod(temp2,d,temp1); /* temp2 = d mod (p-1) ) */ ! 1822: mp_mod(temp1,C,p); /* temp1 = C mod p */ ! 1823: status = mp_modexp(p2,temp1,temp2,p); ! 1824: if (status < 0) /* mp_modexp returned an error. */ ! 1825: return(status); /* error return */ ! 1826: ! 1827: /* Now compute a short modexp with mod q precision... */ ! 1828: ! 1829: /* q2 = [ (C mod q)**( d mod (q-1) ) ] mod q */ ! 1830: mp_move(temp1,q); ! 1831: mp_dec(temp1); /* temp1 = q-1 */ ! 1832: mp_mod(temp2,d,temp1); /* temp2 = d mod (q-1) */ ! 1833: mp_mod(temp1,C,q); /* temp1 = C mod q */ ! 1834: status = mp_modexp(q2,temp1,temp2,q); ! 1835: if (status < 0) /* mp_modexp returned an error. */ ! 1836: return(status); /* error return */ ! 1837: ! 1838: /* Now use the multiplicative inverse u to glue together the ! 1839: two halves, saving a lot of time by avoiding a full mod n ! 1840: exponentiation. At key generation time, u was computed ! 1841: with the secret key as the multiplicative inverse of ! 1842: p, mod q such that: (p*u) mod q = 1, assuming p<q. ! 1843: */ ! 1844: if (mp_compare(p2,q2) == 0) /* only happens if C<p */ ! 1845: mp_move(M,p2); ! 1846: else ! 1847: { /* q2 = q2 - p2; if q2<0 then q2 = q2 + q */ ! 1848: if (mp_sub(q2,p2)) /* if the result went negative... */ ! 1849: mp_add(q2,q); /* add q to q2 */ ! 1850: ! 1851: /* M = p2 + ( p * [(q2*u) mod q] ) */ ! 1852: mp_mult(temp1,q2,u); /* temp1 = q2*u */ ! 1853: mp_mod(temp2,temp1,q); /* temp2 = temp1 mod q */ ! 1854: mp_mult(temp1,p,temp2); /* temp1 = p * temp2 */ ! 1855: mp_add(temp1,p2); /* temp1 = temp1 + p2 */ ! 1856: mp_move(M,temp1); /* M = temp1 */ ! 1857: } ! 1858: ! 1859: mp_burn(p2); /* burn the evidence on the stack...*/ ! 1860: mp_burn(q2); ! 1861: mp_burn(temp1); ! 1862: mp_burn(temp2); ! 1863: return(0); /* normal return */ ! 1864: } /* rsa_decrypt */ ! 1865: ! 1866: ! 1867: /****************** end of MPI library ****************************/ ! 1868:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.