|
|
1.1 ! root 1: /* C source code for multiprecision arithmetic library routines. ! 2: Implemented Nov 86 by Philip Zimmermann ! 3: Last revised 27 Nov 91 by PRZ ! 4: ! 5: Boulder Software Engineering ! 6: 3021 Eleventh Street ! 7: Boulder, CO 80304 ! 8: (303) 541-0140 ! 9: ! 10: (c) Copyright 1986-92 by Philip Zimmermann. All rights reserved. ! 11: The author assumes no liability for damages resulting from the use ! 12: of this software, even if the damage results from defects in this ! 13: software. No warranty is expressed or implied. The use of this ! 14: cryptographic software for developing weapon systems is expressly ! 15: forbidden. ! 16: ! 17: These routines implement all of the multiprecision arithmetic ! 18: necessary for number-theoretic cryptographic algorithms such as ! 19: ElGamal, Diffie-Hellman, Rabin, or factoring studies for large ! 20: composite numbers, as well as Rivest-Shamir-Adleman (RSA) public ! 21: key cryptography. ! 22: ! 23: Although originally developed in Microsoft C for the IBM PC, this code ! 24: contains few machine dependancies. It assumes 2's complement ! 25: arithmetic. It can be adapted to 8-bit, 16-bit, or 32-bit machines, ! 26: lowbyte-highbyte order or highbyte-lowbyte order. This version ! 27: has been converted to ANSI C. ! 28: ! 29: ! 30: The internal representation for these extended precision integer ! 31: "registers" is an array of "units". A unit is a machine word, which ! 32: is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit ! 33: unsigned integer, depending on the machine's word size. For example, ! 34: an IBM PC or AT uses a unit size of 16 bits. To perform arithmetic ! 35: on these huge precision integers, we pass pointers to these unit ! 36: arrays to various subroutines. A pointer to an array of units is of ! 37: type unitptr. This is a pointer to a huge integer "register". ! 38: ! 39: When calling a subroutine, we always pass a pointer to the BEGINNING ! 40: of the array of units, regardless of the byte order of the machine. ! 41: On a lowbyte-first machine, such as the Intel 80x86, this unitptr ! 42: points to the LEAST significant unit, and the array of units increases ! 43: significance to the right. On a highbyte-first machine, such as the ! 44: Motorola 680x0, this unitptr points to the MOST significant unit, and ! 45: the array of units decreases significance to the right. ! 46: ! 47: Modified 8 Apr 92 - HAJK ! 48: Implement new VAX/VMS primitive support. ! 49: ! 50: */ ! 51: ! 52: /* #define COUNTMULTS */ /* count modmults for performance studies */ ! 53: ! 54: #ifdef DEBUG ! 55: #ifdef MSDOS ! 56: #include <conio.h> ! 57: #define poll_for_break() {while (kbhit()) getch();} ! 58: #endif /* MSDOS */ ! 59: #endif /* DEBUG */ ! 60: ! 61: #ifndef poll_for_break ! 62: #define poll_for_break() /* stub */ ! 63: #endif ! 64: ! 65: #include "mpilib.h" ! 66: ! 67: short global_precision = 0; /* units of precision for all routines */ ! 68: /* global_precision is the unit precision last set by set_precision. ! 69: Initially, set_precision() should be called to define global_precision ! 70: before using any of these other multiprecision library routines. ! 71: i.e.: set_precision(MAX_UNIT_PRECISION); ! 72: */ ! 73: ! 74: #ifdef PORTABLE ! 75: /*************** multiprecision library primitives ****************/ ! 76: /* The following portable C primitives should be recoded in assembly. ! 77: The equivalent assembly primitives are externally defined under ! 78: different names, unless PORTABLE is defined. See the header file ! 79: "mpilib.h" for further details. ! 80: ! 81: The carry bit mechanism we use for these portable primitives ! 82: will only work if the size of unit is smaller than the size of ! 83: a long integer. In most cases, this means UNITSIZE must be ! 84: less than 32 bits -- if you use the C portable primitives. ! 85: */ ! 86: ! 87: typedef unsigned long int ulint; ! 88: #define carrybit ((ulint) 1 << UNITSIZE) ! 89: /* ...assumes sizeof(unit) < sizeof(unsigned long) */ ! 90: ! 91: boolean mp_addc ! 92: (register unitptr r1,register unitptr r2,register boolean carry) ! 93: /* multiprecision add with carry r2 to r1, result in r1 */ ! 94: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 95: { register ulint x; /* won't work if sizeof(unit)==sizeof(long) */ ! 96: short precision; /* number of units to add */ ! 97: precision = global_precision; ! 98: make_lsbptr(r1,precision); ! 99: make_lsbptr(r2,precision); ! 100: while (precision--) ! 101: { x = (ulint) *r1 + (ulint) *post_higherunit(r2) + (ulint) carry; ! 102: *post_higherunit(r1) = x; ! 103: carry = ((x & carrybit) != 0L); ! 104: } ! 105: return(carry); /* return the final carry flag bit */ ! 106: } /* mp_addc */ ! 107: ! 108: ! 109: boolean mp_subb ! 110: (register unitptr r1,register unitptr r2,register boolean borrow) ! 111: /* multiprecision subtract with borrow, r2 from r1, result in r1 */ ! 112: /* borrow is incoming borrow flag-- value should be 0 or 1 */ ! 113: { register ulint x; /* won't work if sizeof(unit)==sizeof(long) */ ! 114: short precision; /* number of units to subtract */ ! 115: precision = global_precision; ! 116: make_lsbptr(r1,precision); ! 117: make_lsbptr(r2,precision); ! 118: while (precision--) ! 119: { x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow; ! 120: *post_higherunit(r1) = x; ! 121: borrow = ((x & carrybit) != 0L); ! 122: } ! 123: return(borrow); /* return the final carry/borrow flag bit */ ! 124: } /* mp_subb */ ! 125: ! 126: #undef carrybit ! 127: ! 128: ! 129: boolean mp_rotate_left(register unitptr r1,register boolean carry) ! 130: /* multiprecision rotate left 1 bit with carry, result in r1. */ ! 131: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 132: { register short precision; /* number of units to rotate */ ! 133: register boolean nextcarry; ! 134: precision = global_precision; ! 135: make_lsbptr(r1,precision); ! 136: while (precision--) ! 137: { ! 138: nextcarry = (((signedunit) *r1) < 0); ! 139: /* nextcarry = ((*r1 & uppermostbit) != 0); */ ! 140: *r1 <<= 1 ; ! 141: if (carry) *r1 |= 1; ! 142: carry = nextcarry; ! 143: pre_higherunit(r1); ! 144: } ! 145: return(nextcarry); /* return the final carry flag bit */ ! 146: } /* mp_rotate_left */ ! 147: ! 148: /************** end of primitives that should be in assembly *************/ ! 149: #endif /* PORTABLE */ ! 150: ! 151: ! 152: /* The mp_shift_right_bits function is not called in any time-critical ! 153: situations in public-key cryptographic functions, so it doesn't ! 154: need to be coded in assembly language. ! 155: */ ! 156: void mp_shift_right_bits(register unitptr r1,register short bits) ! 157: /* multiprecision shift right bits, result in r1. ! 158: bits is how many bits to shift, must be < UNITSIZE. ! 159: */ ! 160: { register short precision; /* number of units to shift */ ! 161: register unit carry,nextcarry,bitmask; ! 162: register short unbits; ! 163: if (bits==0) return; /* shift zero bits is a no-op */ ! 164: carry = 0; ! 165: bitmask = power_of_2(bits)-1; ! 166: unbits = UNITSIZE-bits; /* shift bits must be < UNITSIZE */ ! 167: precision = global_precision; ! 168: make_msbptr(r1,precision); ! 169: while (precision--) ! 170: { nextcarry = *r1 & bitmask; ! 171: *r1 >>= bits; ! 172: *r1 |= carry << unbits; ! 173: carry = nextcarry; ! 174: pre_lowerunit(r1); ! 175: } ! 176: } /* mp_shift_right_bits */ ! 177: ! 178: ! 179: #ifndef VMS ! 180: ! 181: short mp_compare(register unitptr r1,register unitptr r2) ! 182: /* Compares multiprecision integers *r1, *r2, and returns: ! 183: -1 iff *r1 < *r2 ! 184: 0 iff *r1 == *r2 ! 185: +1 iff *r1 > *r2 ! 186: */ ! 187: { register short precision; /* number of units to compare */ ! 188: ! 189: precision = global_precision; ! 190: make_msbptr(r1,precision); ! 191: make_msbptr(r2,precision); ! 192: do ! 193: { if (*r1 < *r2) ! 194: return(-1); ! 195: if (*post_lowerunit(r1) > *post_lowerunit(r2)) ! 196: return(1); ! 197: } while (--precision); ! 198: return(0); /* *r1 == *r2 */ ! 199: } /* mp_compare */ ! 200: ! 201: #endif /* VMS */ ! 202: ! 203: boolean mp_inc(register unitptr r) ! 204: /* Increment multiprecision integer r. */ ! 205: { register short precision; ! 206: precision = global_precision; ! 207: make_lsbptr(r,precision); ! 208: do ! 209: { if ( ++(*r) ) return(0); /* no carry */ ! 210: post_higherunit(r); ! 211: } while (--precision); ! 212: return(1); /* return carry set */ ! 213: } /* mp_inc */ ! 214: ! 215: ! 216: boolean mp_dec(register unitptr r) ! 217: /* Decrement multiprecision integer r. */ ! 218: { register short precision; ! 219: precision = global_precision; ! 220: make_lsbptr(r,precision); ! 221: do ! 222: { if ( (signedunit) (--(*r)) != (signedunit) -1 ) ! 223: return(0); /* no borrow */ ! 224: post_higherunit(r); ! 225: } while (--precision); ! 226: return(1); /* return borrow set */ ! 227: } /* mp_dec */ ! 228: ! 229: ! 230: void mp_neg(register unitptr r) ! 231: /* Compute 2's complement, the arithmetic negative, of r */ ! 232: { register short precision; /* number of units to negate */ ! 233: precision = global_precision; ! 234: mp_dec(r); /* 2's complement is 1's complement plus 1 */ ! 235: do /* now do 1's complement */ ! 236: { *r = ~(*r); ! 237: r++; ! 238: } while (--precision); ! 239: } /* mp_neg */ ! 240: ! 241: #ifndef VAXC /* Replaced by a macro under VAX C */ ! 242: ! 243: void mp_move(register unitptr dst,register unitptr src) ! 244: { register short precision; /* number of units to move */ ! 245: precision = global_precision; ! 246: do { *dst++ = *src++; } while (--precision); ! 247: } /* mp_move */ ! 248: ! 249: #endif /* VAXC */ ! 250: void mp_init(register unitptr r, word16 value) ! 251: /* Init multiprecision register r with short value. */ ! 252: { /* Note that mp_init doesn't extend sign bit for >32767 */ ! 253: register short precision; /* number of units to init */ ! 254: precision = global_precision; ! 255: make_lsbptr(r,precision); ! 256: *post_higherunit(r) = value; precision--; ! 257: #ifdef UNIT8 ! 258: *post_higherunit(r) = value >> UNITSIZE; precision--; ! 259: #endif ! 260: #ifdef VAXC ! 261: ! 262: unitfill0( r, precision); /* Use character insts. on a VAX */ ! 263: ! 264: #else /* VAXC */ ! 265: ! 266: while (precision--) ! 267: *post_higherunit(r) = 0; ! 268: ! 269: #endif /* VAXC */ ! 270: ! 271: } /* mp_init */ ! 272: ! 273: ! 274: short significance(register unitptr r) ! 275: /* Returns number of significant units in r */ ! 276: { register short precision; ! 277: precision = global_precision; ! 278: make_msbptr(r,precision); ! 279: do ! 280: { if (*post_lowerunit(r)) ! 281: return(precision); ! 282: } while (--precision); ! 283: return(precision); ! 284: } /* significance */ ! 285: ! 286: ! 287: #ifndef VAXC /* Replaced by a macro under VAX C */ ! 288: ! 289: void unitfill0(unitptr r,word16 unitcount) ! 290: /* Zero-fill the unit buffer r. */ ! 291: { while (unitcount--) *r++ = 0; ! 292: } /* unitfill0 */ ! 293: ! 294: #endif /* VAXC */ ! 295: ! 296: /* The macro normalize(r,precision) "normalizes" a multiprecision integer ! 297: by adjusting r and precision to new values. For Motorola-style processors ! 298: (MSB-first), r is a pointer to the MSB of the register, and must ! 299: be adjusted to point to the first nonzero unit. For Intel/VAX-style ! 300: (LSB-first) processors, r is a pointer to the LSB of the register, ! 301: and must be left unchanged. The precision counter is always adjusted, ! 302: regardless of processor type. In the case of precision = 0, ! 303: r becomes undefined. ! 304: */ ! 305: ! 306: ! 307: /* The macro rescale(r,current_precision,new_precision) rescales ! 308: a multiprecision integer by adjusting r and its precision to new values. ! 309: It can be used to reverse the effects of the normalize ! 310: routine given above. See the comments in normalize concerning ! 311: Intel vs. Motorola LSB/MSB conventions. ! 312: WARNING: You can only safely call rescale on registers that ! 313: you have previously normalized with the above normalize routine, ! 314: or are known to be big enough for the new precision. You may ! 315: specify a new precision that is smaller than the current precision. ! 316: You must be careful not to specify a new_precision value that is ! 317: too big, or which adjusts the r pointer out of range. ! 318: */ ! 319: ! 320: ! 321: /* The "bit sniffer" macros all begin sniffing at the MSB. ! 322: They are used internally by all the various multiply, divide, ! 323: modulo, exponentiation, and square root functions. ! 324: */ ! 325: ! 326: ! 327: int mp_udiv(register unitptr remainder,register unitptr quotient, ! 328: register unitptr dividend,register unitptr divisor) ! 329: /* Unsigned divide, treats both operands as positive. */ ! 330: { int bits; ! 331: short dprec; ! 332: register unit bitmask; ! 333: if (testeq(divisor,0)) ! 334: return(-1); /* zero divisor means divide error */ ! 335: mp_init0(remainder); ! 336: mp_init0(quotient); ! 337: /* normalize and compute number of bits in dividend first */ ! 338: init_bitsniffer(dividend,bitmask,dprec,bits); ! 339: /* rescale quotient to same precision (dprec) as dividend */ ! 340: rescale(quotient,global_precision,dprec); ! 341: make_msbptr(quotient,dprec); ! 342: ! 343: while (bits--) ! 344: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0)); ! 345: if (mp_compare(remainder,divisor) >= 0) ! 346: { mp_sub(remainder,divisor); ! 347: stuff_bit(quotient,bitmask); ! 348: } ! 349: bump_2bitsniffers(dividend,quotient,bitmask); ! 350: } ! 351: return(0); ! 352: } /* mp_udiv */ ! 353: ! 354: ! 355: #define RECIPMARGIN 0 /* extra margin bits used by mp_recip() */ ! 356: ! 357: int mp_recip(register unitptr quotient,register unitptr divisor) ! 358: /* Compute reciprocal (quotient) as 1/divisor. Used by faster modmult. */ ! 359: { int bits; ! 360: short qprec; ! 361: register unit bitmask; ! 362: unit remainder[MAX_UNIT_PRECISION]; ! 363: if (testeq(divisor,0)) ! 364: return(-1); /* zero divisor means divide error */ ! 365: mp_init0(remainder); ! 366: mp_init0(quotient); ! 367: ! 368: /* normalize and compute number of bits in quotient first */ ! 369: bits = countbits(divisor) + RECIPMARGIN; ! 370: bitmask = bitmsk(bits); /* bitmask within a single unit */ ! 371: qprec = bits2units(bits+1); ! 372: mp_setbit(remainder,(bits-RECIPMARGIN)-1); ! 373: /* rescale quotient to precision of divisor + RECIPMARGIN bits */ ! 374: rescale(quotient,global_precision,qprec); ! 375: make_msbptr(quotient,qprec); ! 376: ! 377: while (bits--) ! 378: { mp_shift_left(remainder); ! 379: if (mp_compare(remainder,divisor) >= 0) ! 380: { mp_sub(remainder,divisor); ! 381: stuff_bit(quotient,bitmask); ! 382: } ! 383: bump_bitsniffer(quotient,bitmask); ! 384: } ! 385: mp_init0(remainder); /* burn sensitive data left on stack */ ! 386: return(0); ! 387: } /* mp_recip */ ! 388: ! 389: ! 390: int mp_div(register unitptr remainder,register unitptr quotient, ! 391: register unitptr dividend,register unitptr divisor) ! 392: /* Signed divide, either or both operands may be negative. */ ! 393: { boolean dvdsign,dsign; ! 394: int status; ! 395: dvdsign = (mp_tstminus(dividend)!=0); ! 396: dsign = (mp_tstminus(divisor)!=0); ! 397: if (dvdsign) mp_neg(dividend); ! 398: if (dsign) mp_neg(divisor); ! 399: status = mp_udiv(remainder,quotient,dividend,divisor); ! 400: if (dvdsign) mp_neg(dividend); /* repair caller's dividend */ ! 401: if (dsign) mp_neg(divisor); /* repair caller's divisor */ ! 402: if (status<0) return(status); /* divide error? */ ! 403: if (dvdsign) mp_neg(remainder); ! 404: if (dvdsign ^ dsign) mp_neg(quotient); ! 405: return(status); ! 406: } /* mp_div */ ! 407: ! 408: ! 409: word16 mp_shortdiv(register unitptr quotient, ! 410: register unitptr dividend,register word16 divisor) ! 411: /* This function does a fast divide and mod on a multiprecision dividend ! 412: using a short integer divisor returning a short integer remainder. ! 413: This is an unsigned divide. It treats both operands as positive. ! 414: It is used mainly for faster printing of large numbers in base 10. ! 415: */ ! 416: { int bits; ! 417: short dprec; ! 418: register unit bitmask; ! 419: register word16 remainder; ! 420: if (!divisor) /* if divisor == 0 */ ! 421: return(-1); /* zero divisor means divide error */ ! 422: remainder=0; ! 423: mp_init0(quotient); ! 424: /* normalize and compute number of bits in dividend first */ ! 425: init_bitsniffer(dividend,bitmask,dprec,bits); ! 426: /* rescale quotient to same precision (dprec) as dividend */ ! 427: rescale(quotient,global_precision,dprec); ! 428: make_msbptr(quotient,dprec); ! 429: ! 430: while (bits--) ! 431: { remainder <<= 1; ! 432: if (sniff_bit(dividend,bitmask)) ! 433: remainder++; ! 434: if (remainder >= divisor) ! 435: { remainder -= divisor; ! 436: stuff_bit(quotient,bitmask); ! 437: } ! 438: bump_2bitsniffers(dividend,quotient,bitmask); ! 439: } ! 440: return(remainder); ! 441: } /* mp_shortdiv */ ! 442: ! 443: ! 444: int mp_mod(register unitptr remainder, ! 445: register unitptr dividend,register unitptr divisor) ! 446: /* Unsigned divide, treats both operands as positive. */ ! 447: { int bits; ! 448: short dprec; ! 449: register unit bitmask; ! 450: if (testeq(divisor,0)) ! 451: return(-1); /* zero divisor means divide error */ ! 452: mp_init0(remainder); ! 453: /* normalize and compute number of bits in dividend first */ ! 454: init_bitsniffer(dividend,bitmask,dprec,bits); ! 455: ! 456: while (bits--) ! 457: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0)); ! 458: msub(remainder,divisor); ! 459: bump_bitsniffer(dividend,bitmask); ! 460: } ! 461: return(0); ! 462: } /* mp_mod */ ! 463: ! 464: ! 465: word16 mp_shortmod(register unitptr dividend,register word16 divisor) ! 466: /* This function does a fast mod operation on a multprecision dividend ! 467: using a short integer modulus returning a short integer remainder. ! 468: This is an unsigned divide. It treats both operands as positive. ! 469: It is used mainly for fast sieve searches for large primes. ! 470: */ ! 471: { int bits; ! 472: short dprec; ! 473: register unit bitmask; ! 474: register word16 remainder; ! 475: if (!divisor) /* if divisor == 0 */ ! 476: return(-1); /* zero divisor means divide error */ ! 477: remainder=0; ! 478: /* normalize and compute number of bits in dividend first */ ! 479: init_bitsniffer(dividend,bitmask,dprec,bits); ! 480: ! 481: while (bits--) ! 482: { remainder <<= 1; ! 483: if (sniff_bit(dividend,bitmask)) ! 484: remainder++; ! 485: if (remainder >= divisor) remainder -= divisor; ! 486: bump_bitsniffer(dividend,bitmask); ! 487: } ! 488: return(remainder); ! 489: } /* mp_shortmod */ ! 490: ! 491: ! 492: ! 493: #ifdef COMB_MULT /* use faster "comb" multiply algorithm */ ! 494: /* We are skipping this code because it has a bug... */ ! 495: ! 496: int mp_mult(register unitptr prod, ! 497: register unitptr multiplicand, register unitptr multiplier) ! 498: /* Computes multiprecision prod = multiplicand * multiplier */ ! 499: { /* Uses interleaved comb multiply algorithm. ! 500: This improved multiply more than twice as fast as a Russian ! 501: peasant multiply, because it does a lot fewer shifts. ! 502: Must have global_precision set to the size of the multiplicand ! 503: plus UNITSIZE-1 SLOP_BITS. Produces a product that is the sum ! 504: of the lengths of the multiplier and multiplicand. ! 505: ! 506: BUG ALERT: Unfortunately, this code has a bug. It fails for ! 507: some numbers. One such example: ! 508: x= 59DE 60CE 2345 8091 A02B 2A1C DBC3 8BE5 ! 509: x*x= 59DE 60CE 2345 26B3 993B 67A5 2499 0B7D ! 510: 52C8 CDC7 AFB3 61C8 243C 741B ! 511: --which is obviously wrong. The answer should be: ! 512: x*x= 1F8C 607B 5EA6 C061 2714 04A9 A0C6 A17A ! 513: C9AB 6095 C62F 3756 3843 E4D0 3950 7AD9 ! 514: We'll have to fix this some day. In the meantime, we'll ! 515: just have the compiler skip over this code. ! 516: */ ! 517: int bits; ! 518: register unit bitmask; ! 519: unitptr product, mplier, temp; ! 520: short mprec,mprec2; ! 521: unit mplicand[MAX_UNIT_PRECISION]; ! 522: mp_init(prod,0); /* better clear full width--double precision */ ! 523: if (testeq(multiplicand,0)) ! 524: return(0); /* zero multiplicand means zero product */ ! 525: ! 526: mp_move(mplicand,multiplicand); /* save it from damage */ ! 527: ! 528: normalize(multiplier,mprec); ! 529: if (!mprec) ! 530: return(0); ! 531: ! 532: make_lsbptr(multiplier,mprec); ! 533: bitmask = 1; /* start scan at LSB of multiplier */ ! 534: ! 535: do /* UNITSIZE times */ ! 536: { /* do for bits 0-15 */ ! 537: product = prod; ! 538: mplier = multiplier; ! 539: mprec2 = mprec; ! 540: while (mprec2--) /* do for each word in multiplier */ ! 541: { ! 542: if (sniff_bit(mplier,bitmask)) ! 543: { if (mp_addc(product,multiplicand,0)) /* ripple carry */ ! 544: { /* After 1st time thru, this is rarely encountered. */ ! 545: temp = msbptr(product,global_precision); ! 546: pre_higherunit(temp); ! 547: /* temp now points to LSU of carry region. */ ! 548: unmake_lsbptr(temp,global_precision); ! 549: mp_inc(temp); ! 550: } /* ripple carry */ ! 551: } ! 552: pre_higherunit(mplier); ! 553: pre_higherunit(product); ! 554: } ! 555: if (!(bitmask <<= 1)) ! 556: break; ! 557: mp_shift_left(multiplicand); ! 558: ! 559: } while (TRUE); ! 560: ! 561: mp_move(multiplicand,mplicand); /* recover */ ! 562: ! 563: return(0); /* normal return */ ! 564: } /* mp_mult */ ! 565: ! 566: #endif /* COMB_MULT */ ! 567: ! 568: ! 569: /* Because the "comb" multiply has a bug, we will use the slower ! 570: Russian peasant multiply instead. Fortunately, the mp_mult ! 571: function is not called from any time-critical code. ! 572: */ ! 573: ! 574: int mp_mult(register unitptr prod, ! 575: register unitptr multiplicand,register unitptr multiplier) ! 576: /* Computes multiprecision prod = multiplicand * multiplier */ ! 577: { /* Uses "Russian peasant" multiply algorithm. */ ! 578: int bits; ! 579: register unit bitmask; ! 580: short mprec; ! 581: mp_init(prod,0); ! 582: if (testeq(multiplicand,0)) ! 583: return(0); /* zero multiplicand means zero product */ ! 584: /* normalize and compute number of bits in multiplier first */ ! 585: init_bitsniffer(multiplier,bitmask,mprec,bits); ! 586: ! 587: while (bits--) ! 588: { mp_shift_left(prod); ! 589: if (sniff_bit(multiplier,bitmask)) ! 590: mp_add(prod,multiplicand); ! 591: bump_bitsniffer(multiplier,bitmask); ! 592: } ! 593: return(0); ! 594: } /* mp_mult */ ! 595: ! 596: ! 597: ! 598: /* mp_modmult computes a multiprecision multiply combined with a ! 599: modulo operation. This is the most time-critical function in ! 600: this multiprecision arithmetic library for performing modulo ! 601: exponentiation. We experimented with different versions of modmult, ! 602: depending on the machine architecture and performance requirements. ! 603: We will either use a Russian Peasant modmult (peasant_modmult), ! 604: Charlie Merritt's modmult (merritt_modmult) or Jimmy Upton's ! 605: modmult (upton_modmult). On machines with a hardware atomic ! 606: multiply instruction, Upton's modmult is fastest, which depends ! 607: on an assembly subroutine to speed up the hardware multiply logic. ! 608: If the machine lacks a fast hardware multiply, Merritt's modmult ! 609: is preferred, which doesn't call any assembly multiply routine. ! 610: We use the alias names mp_modmult, stage_modulus, and modmult_burn ! 611: for the corresponding true names, which depend on what flavor of ! 612: modmult we are using. ! 613: ! 614: Before making the first call to mp_modmult, you must set up the ! 615: modulus-dependant precomputated tables by calling stage_modulus. ! 616: After making all the calls to mp_modmult, you call modmult_burn to ! 617: erase the tables created by stage_modulus that were left in memory. ! 618: */ ! 619: ! 620: #ifdef COUNTMULTS ! 621: /* "number of modmults" counters, used for performance studies. */ ! 622: static unsigned int tally_modmults = 0; ! 623: static unsigned int tally_modsquares = 0; ! 624: #endif /* COUNTMULTS */ ! 625: ! 626: ! 627: #ifdef PEASANT ! 628: /* Conventional Russian peasant multiply with modulo algorithm. */ ! 629: ! 630: static unitptr modulus = 0; /* used only by mp_modmult */ ! 631: ! 632: int stage_peasant_modulus(unitptr n) ! 633: /* Must pass modulus to stage_modulus before calling modmult. ! 634: Assumes that global_precision has already been adjusted to the ! 635: size of the modulus, plus SLOP_BITS. ! 636: */ ! 637: { /* For this simple version of modmult, just copy unit pointer. */ ! 638: modulus = n; ! 639: return(0); /* normal return */ ! 640: } /* stage_peasant_modulus */ ! 641: ! 642: ! 643: int peasant_modmult(register unitptr prod, ! 644: unitptr multiplicand,register unitptr multiplier) ! 645: { /* "Russian peasant" multiply algorithm, combined with a modulo ! 646: operation. This is a simple naive replacement for Merritt's ! 647: faster modmult algorithm. References global unitptr "modulus". ! 648: Computes: prod = (multiplicand*multiplier) mod modulus ! 649: WARNING: All the arguments must be less than the modulus! ! 650: */ ! 651: int bits; ! 652: register unit bitmask; ! 653: short mprec; ! 654: mp_init(prod,0); ! 655: /* if (testeq(multiplicand,0)) ! 656: return(0); */ /* zero multiplicand means zero product */ ! 657: /* normalize and compute number of bits in multiplier first */ ! 658: init_bitsniffer(multiplier,bitmask,mprec,bits); ! 659: ! 660: while (bits--) ! 661: { mp_shift_left(prod); ! 662: msub(prod,modulus); /* turns mult into modmult */ ! 663: if (sniff_bit(multiplier,bitmask)) ! 664: { mp_add(prod,multiplicand); ! 665: msub(prod,modulus); /* turns mult into modmult */ ! 666: } ! 667: bump_bitsniffer(multiplier,bitmask); ! 668: } ! 669: return(0); ! 670: } /* peasant_modmult */ ! 671: ! 672: ! 673: /* If we are using a version of mp_modmult that uses internal tables ! 674: in memory, we have to call modmult_burn() at the end of mp_modexp. ! 675: This is so that no sensitive data is left in memory after the program ! 676: exits. The Russian peasant method doesn't use any such tables. ! 677: */ ! 678: static void peasant_burn(void) ! 679: /* Alias for modmult_burn, called only from mp_modexp(). Destroys ! 680: internal modmult tables. This version does nothing because no ! 681: tables are used by the Russian peasant modmult. */ ! 682: { } /* peasant_burn */ ! 683: ! 684: #endif /* PEASANT */ ! 685: ! 686: ! 687: #ifdef MERRITT ! 688: /*=========================================================================*/ ! 689: /* ! 690: This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ. ! 691: Also refined by Zhahai Stewart to reduce the number of subtracts. ! 692: It performs a multiply combined with a modulo operation, without ! 693: going into "double precision". It is faster than the Russian peasant ! 694: method, and still works well on machines that lack a fast hardware ! 695: multiply instruction. ! 696: */ ! 697: ! 698: /* The following support functions, macros, and data structures ! 699: are used only by Merritt's modmult algorithm... */ ! 700: ! 701: static void mp_lshift_unit(register unitptr r1) ! 702: /* Shift r1 1 whole unit to the left. Used only by modmult function. */ ! 703: { register short precision; ! 704: register unitptr r2; ! 705: precision = global_precision; ! 706: make_msbptr(r1,precision); ! 707: r2 = r1; ! 708: while (--precision) ! 709: *post_lowerunit(r1) = *pre_lowerunit(r2); ! 710: *r1 = 0; ! 711: } /* mp_lshift_unit */ ! 712: ! 713: ! 714: /* moduli_buf contains shifted images of the modulus, set by stage_modulus */ ! 715: static unit moduli_buf[UNITSIZE][MAX_UNIT_PRECISION] = {0}; ! 716: static unitptr moduli[UNITSIZE+1] = /* contains pointers into moduli_buf */ ! 717: { 0 ! 718: ,&moduli_buf[ 0][0], &moduli_buf[ 1][0], &moduli_buf[ 2][0], &moduli_buf[ 3][0], ! 719: &moduli_buf[ 4][0], &moduli_buf[ 5][0], &moduli_buf[ 6][0], &moduli_buf[ 7][0] ! 720: #ifndef UNIT8 ! 721: ,&moduli_buf[ 8][0], &moduli_buf[ 9][0], &moduli_buf[10][0], &moduli_buf[11][0], ! 722: &moduli_buf[12][0], &moduli_buf[13][0], &moduli_buf[14][0], &moduli_buf[15][0] ! 723: #ifndef UNIT16 /* and not UNIT8 */ ! 724: ,&moduli_buf[16][0], &moduli_buf[17][0], &moduli_buf[18][0], &moduli_buf[19][0], ! 725: &moduli_buf[20][0], &moduli_buf[21][0], &moduli_buf[22][0], &moduli_buf[23][0], ! 726: &moduli_buf[24][0], &moduli_buf[25][0], &moduli_buf[26][0], &moduli_buf[27][0], ! 727: &moduli_buf[28][0], &moduli_buf[29][0], &moduli_buf[30][0], &moduli_buf[31][0] ! 728: #endif /* UNIT16 and UNIT8 not defined */ ! 729: #endif /* UNIT8 not defined */ ! 730: }; ! 731: ! 732: /* To optimize msubs, need following 2 unit arrays, each filled ! 733: with the most significant unit and next-to-most significant unit ! 734: of the preshifted images of the modulus. */ ! 735: static unit msu_moduli[UNITSIZE+1] = {0}; /* most signif. unit */ ! 736: static unit nmsu_moduli[UNITSIZE+1] = {0}; /* next-most signif. unit */ ! 737: ! 738: /* mpdbuf contains preshifted images of the multiplicand, mod n. ! 739: It is used only by mp_modmult. It could be staticly declared ! 740: inside of mp_modmult, but we put it outside mp_modmult so that ! 741: it can be wiped clean by modmult_burn(), which is called at the ! 742: end of mp_modexp. This is so that no sensitive data is left in ! 743: memory after the program exits. ! 744: */ ! 745: static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0}; ! 746: ! 747: ! 748: static void stage_mp_images(unitptr images[UNITSIZE],unitptr r) ! 749: /* Computes UNITSIZE images of r, each shifted left 1 more bit. ! 750: Used only by modmult function. ! 751: */ ! 752: { short int i; ! 753: images[0] = r; /* no need to move the first image, just copy ptr */ ! 754: for (i=1; i<UNITSIZE; i++) ! 755: { mp_move(images[i],images[i-1]); ! 756: mp_shift_left(images[i]); ! 757: } ! 758: } /* stage_mp_images */ ! 759: ! 760: ! 761: int stage_merritt_modulus(unitptr n) ! 762: /* Computes UNITSIZE+1 images of modulus, each shifted left 1 more bit. ! 763: Before calling mp_modmult, you must first stage the moduli images by ! 764: calling stage_modulus. n is the pointer to the modulus. ! 765: Assumes that global_precision has already been adjusted to the ! 766: size of the modulus, plus SLOP_BITS. ! 767: */ ! 768: { short int i; ! 769: unitptr msu; /* ptr to most significant unit, for faster msubs */ ! 770: moduli[0] = n; /* no need to move the first image, just copy ptr */ ! 771: ! 772: /* used by optimized msubs macro... */ ! 773: msu = msbptr(n,global_precision); /* needed by msubs */ ! 774: msu_moduli[0] = *post_lowerunit(msu); ! 775: nmsu_moduli[0] = *msu; ! 776: ! 777: for (i=1; i<UNITSIZE+1; i++) ! 778: { mp_move(moduli[i],moduli[i-1]); ! 779: mp_shift_left(moduli[i]); ! 780: ! 781: /* used by optimized msubs macro... */ ! 782: msu = msbptr(moduli[i],global_precision); /* needed by msubs */ ! 783: msu_moduli[i] = *post_lowerunit(msu); /* for faster msubs */ ! 784: nmsu_moduli[i] = *msu; ! 785: } ! 786: return(0); /* normal return */ ! 787: } /* stage_merritt_modulus */ ! 788: ! 789: ! 790: /* The following macros, sniffadd and msubs, are used by modmult... */ ! 791: #define sniffadd(i) if (*multiplier & power_of_2(i)) mp_add(prod,mpd[i]) ! 792: ! 793: /* Unoptimized msubs macro (msubs0) follows... */ ! 794: /* #define msubs0(i) msub(prod,moduli[i]) ! 795: */ ! 796: ! 797: /* To optimize msubs, compare the most significant units of the ! 798: product and the shifted modulus before deciding to call the full ! 799: mp_compare routine. Better still, compare the upper two units ! 800: before deciding to call mp_compare. ! 801: Optimization of msubs relies heavily on standard C left-to-right ! 802: parsimonious evaluation of logical expressions. ! 803: */ ! 804: ! 805: /* Partially-optimized msubs macro (msubs1) follows... */ ! 806: /* #define msubs1(i) if ( \ ! 807: ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \ ! 808: (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \ ! 809: ) mp_sub(prod,moduli[i]) ! 810: */ ! 811: ! 812: /* Fully-optimized msubs macro (msubs2) follows... */ ! 813: #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \ ! 814: (p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \ ! 815: (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \ ! 816: mp_sub(prod,moduli[i]) ! 817: ! 818: ! 819: int merritt_modmult(register unitptr prod, ! 820: unitptr multiplicand,register unitptr multiplier) ! 821: /* Performs combined multiply/modulo operation. ! 822: Computes: prod = (multiplicand*multiplier) mod modulus ! 823: WARNING: All the arguments must be less than the modulus! ! 824: Assumes the modulus has been predefined by first calling ! 825: stage_modulus. ! 826: */ ! 827: { ! 828: /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/ ! 829: register signedunit p_m; ! 830: register unitptr msu_prod; /* ptr to most significant unit of product */ ! 831: register unitptr nmsu_prod; /* next-most signif. unit of product */ ! 832: short mprec; /* precision of multiplier, in units */ ! 833: /* Array mpd contains a list of pointers to preshifted images of ! 834: the multiplicand: */ ! 835: static unitptr mpd[UNITSIZE] = ! 836: { 0, &mpdbuf[ 0][0], &mpdbuf[ 1][0], &mpdbuf[ 2][0], ! 837: &mpdbuf[ 3][0], &mpdbuf[ 4][0], &mpdbuf[ 5][0], &mpdbuf[ 6][0] ! 838: #ifndef UNIT8 ! 839: ,&mpdbuf[ 7][0], &mpdbuf[ 8][0], &mpdbuf[ 9][0], &mpdbuf[10][0], ! 840: &mpdbuf[11][0], &mpdbuf[12][0], &mpdbuf[13][0], &mpdbuf[14][0] ! 841: #ifndef UNIT16 /* and not UNIT8 */ ! 842: ,&mpdbuf[15][0], &mpdbuf[16][0], &mpdbuf[17][0], &mpdbuf[18][0], ! 843: &mpdbuf[19][0], &mpdbuf[20][0], &mpdbuf[21][0], &mpdbuf[22][0], ! 844: &mpdbuf[23][0], &mpdbuf[24][0], &mpdbuf[25][0], &mpdbuf[26][0], ! 845: &mpdbuf[27][0], &mpdbuf[28][0], &mpdbuf[29][0], &mpdbuf[30][0] ! 846: #endif /* UNIT16 and UNIT8 not defined */ ! 847: #endif /* UNIT8 not defined */ ! 848: }; ! 849: ! 850: /* Compute preshifted images of multiplicand, mod n: */ ! 851: stage_mp_images(mpd,multiplicand); ! 852: ! 853: /* To optimize msubs, set up msu_prod and nmsu_prod: */ ! 854: msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */ ! 855: nmsu_prod = msu_prod; ! 856: post_lowerunit(nmsu_prod); /* Get next-MSU of prod */ ! 857: ! 858: /* To understand this algorithm, it would be helpful to first ! 859: study the conventional Russian peasant modmult algorithm. ! 860: This one does about the same thing as Russian peasant, but ! 861: is organized differently to save some steps. It loops ! 862: through the multiplier a word (unit) at a time, instead of ! 863: a bit at a time. It word-shifts the product instead of ! 864: bit-shifting it, so it should be faster. It also does about ! 865: half as many subtracts as Russian peasant. ! 866: */ ! 867: ! 868: mp_init(prod,0); /* Initialize product to 0. */ ! 869: ! 870: /* The way mp_modmult is actually used in cryptographic ! 871: applications, there will NEVER be a zero multiplier or ! 872: multiplicand. So there is no need to optimize for that ! 873: condition. ! 874: */ ! 875: /* if (testeq(multiplicand,0)) ! 876: return(0); */ /* zero multiplicand means zero product */ ! 877: /* Normalize and compute number of units in multiplier first: */ ! 878: normalize(multiplier,mprec); ! 879: if (mprec==0) /* if precision of multiplier is 0 */ ! 880: return(0); /* zero multiplier means zero product */ ! 881: make_msbptr(multiplier,mprec); /* start at MSU of multiplier */ ! 882: ! 883: while (mprec--) /* Loop for the number of units in the multiplier */ ! 884: { ! 885: /* Shift the product one whole unit to the left. ! 886: This is part of the multiply phase of modmult. ! 887: */ ! 888: ! 889: mp_lshift_unit(prod); ! 890: ! 891: /* Sniff each bit in the current unit of the multiplier, ! 892: and conditionally add the the corresponding preshifted ! 893: image of the multiplicand to the product. ! 894: This is also part of the multiply phase of modmult. ! 895: ! 896: The following loop is unrolled for speed, using macros... ! 897: ! 898: for (i=UNITSIZE-1; i>=0; i--) ! 899: if (*multiplier & power_of_2(i)) ! 900: mp_add(prod,mpd[i]); ! 901: */ ! 902: #ifndef UNIT8 ! 903: #ifndef UNIT16 /* and not UNIT8 */ ! 904: sniffadd(31); ! 905: sniffadd(30); ! 906: sniffadd(29); ! 907: sniffadd(28); ! 908: sniffadd(27); ! 909: sniffadd(26); ! 910: sniffadd(25); ! 911: sniffadd(24); ! 912: sniffadd(23); ! 913: sniffadd(22); ! 914: sniffadd(21); ! 915: sniffadd(20); ! 916: sniffadd(19); ! 917: sniffadd(18); ! 918: sniffadd(17); ! 919: sniffadd(16); ! 920: #endif /* not UNIT16 and not UNIT8 */ ! 921: sniffadd(15); ! 922: sniffadd(14); ! 923: sniffadd(13); ! 924: sniffadd(12); ! 925: sniffadd(11); ! 926: sniffadd(10); ! 927: sniffadd(9); ! 928: sniffadd(8); ! 929: #endif /* not UNIT8 */ ! 930: sniffadd(7); ! 931: sniffadd(6); ! 932: sniffadd(5); ! 933: sniffadd(4); ! 934: sniffadd(3); ! 935: sniffadd(2); ! 936: sniffadd(1); ! 937: sniffadd(0); ! 938: ! 939: /* The product may have grown by as many as UNITSIZE+1 ! 940: bits. That's why we have global_precision set to the ! 941: size of the modulus plus UNITSIZE+1 slop bits. ! 942: Now reduce the product back down by conditionally ! 943: subtracting the UNITSIZE+1 preshifted images of the ! 944: modulus. This is the modulo reduction phase of modmult. ! 945: ! 946: The following loop is unrolled for speed, using macros... ! 947: ! 948: for (i=UNITSIZE; i>=0; i--) ! 949: if (mp_compare(prod,moduli[i]) >= 0) ! 950: mp_sub(prod,moduli[i]); ! 951: */ ! 952: #ifndef UNIT8 ! 953: #ifndef UNIT16 /* and not UNIT8 */ ! 954: msubs(32); ! 955: msubs(31); ! 956: msubs(30); ! 957: msubs(29); ! 958: msubs(28); ! 959: msubs(27); ! 960: msubs(26); ! 961: msubs(25); ! 962: msubs(24); ! 963: msubs(23); ! 964: msubs(22); ! 965: msubs(21); ! 966: msubs(20); ! 967: msubs(19); ! 968: msubs(18); ! 969: msubs(17); ! 970: #endif /* not UNIT16 and not UNIT8 */ ! 971: msubs(16); ! 972: msubs(15); ! 973: msubs(14); ! 974: msubs(13); ! 975: msubs(12); ! 976: msubs(11); ! 977: msubs(10); ! 978: msubs(9); ! 979: #endif /* not UNIT8 */ ! 980: msubs(8); ! 981: msubs(7); ! 982: msubs(6); ! 983: msubs(5); ! 984: msubs(4); ! 985: msubs(3); ! 986: msubs(2); ! 987: msubs(1); ! 988: msubs(0); ! 989: ! 990: /* Bump pointer to next lower unit of multiplier: */ ! 991: post_lowerunit(multiplier); ! 992: ! 993: } /* Loop for the number of units in the multiplier */ ! 994: ! 995: return(0); /* normal return */ ! 996: ! 997: } /* merritt_modmult */ ! 998: ! 999: ! 1000: #undef msubs ! 1001: #undef sniffadd ! 1002: ! 1003: ! 1004: /* Merritt's mp_modmult function leaves some internal tables in memory, ! 1005: so we have to call modmult_burn() at the end of mp_modexp. ! 1006: This is so that no cryptographically sensitive data is left in memory ! 1007: after the program exits. ! 1008: */ ! 1009: static void merritt_burn(void) ! 1010: /* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */ ! 1011: { unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION); ! 1012: unitfill0(&(moduli_buf[0][0]),(UNITSIZE)*MAX_UNIT_PRECISION); ! 1013: unitfill0(msu_moduli,UNITSIZE+1); ! 1014: unitfill0(nmsu_moduli,UNITSIZE+1); ! 1015: } /* merritt_burn() */ ! 1016: ! 1017: /******* end of Merritt's MODMULT stuff. *******/ ! 1018: /*=========================================================================*/ ! 1019: #endif /* MERRITT */ ! 1020: ! 1021: ! 1022: #ifdef UPTON /* using Jimmy Upton's modmult algorithm */ ! 1023: ! 1024: /* Jimmy Upton's multiprecision modmult algorithm in C. ! 1025: Performs a multiply combined with a modulo operation. ! 1026: ! 1027: The following support functions and data structures ! 1028: are used only by Upton's modmult algorithm... ! 1029: */ ! 1030: ! 1031: /* The MULTUNIT word is the biggest word size for the native atomic ! 1032: multiply instruction. It may or may not be smaller than UNITSIZE. ! 1033: Many CPU's have 16-by-16-bit multiplies, yielding a 32-bit product. ! 1034: In that case, MULTUINIT should be a 16-bit word, even if the rest of ! 1035: the multiprecision arithmetic functions assume a 32-bit UNIT word. ! 1036: */ ! 1037: typedef unsigned short MULTUNIT; ! 1038: #define MULTUNITSIZE (8*sizeof(MULTUNIT)) /* size in bits */ ! 1039: ! 1040: static short unit_prec; /* global_precision expressed in MULTUNITs */ ! 1041: ! 1042: ! 1043: /* Define MPORTABLE if there is no assembly version of the mp_smul ! 1044: function available. An assembly version is much faster. ! 1045: Note that since the SPARC CPU has no hardware integer multiply ! 1046: instruction, there is not that much advantage in having an ! 1047: assembly version of mp_smul on that machine. It might be faster ! 1048: to use Merritt's modmult instead of Upton's modmult on the SPARC. ! 1049: */ ! 1050: #ifdef MSDOS /* we do have an assembly version available on the 8086 */ ! 1051: #undef MPORTABLE ! 1052: #endif ! 1053: ! 1054: #ifdef MPORTABLE /* use slow portable C version instead of assembly */ ! 1055: ! 1056: /* ! 1057: Multiply the single-word multiplier times the multiprecision integer ! 1058: in multiplicand, accumulating result in prod. The resulting ! 1059: multiprecision prod will be 1 word longer than the multiplicand. ! 1060: multiplicand is unit_prec words long. We add into prod, so caller ! 1061: should zero it out first. For best results, this time-critical ! 1062: function should be implemented in assembly. ! 1063: NOTE: Unlike other functions in the multiprecision arithmetic ! 1064: library, both multiplicand and prod are pointing at the LSB, ! 1065: regardless of byte order of the machine. On an 80x86, this makes ! 1066: no difference. But if this assembly function is implemented ! 1067: on a 680x0, it becomes important. ! 1068: */ ! 1069: void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier) ! 1070: { ! 1071: short i; ! 1072: unsigned long p, carry; ! 1073: ! 1074: carry = 0; ! 1075: for (i=0; i<unit_prec; ++i) ! 1076: { p = (unsigned long)multiplier * *post_higherunit(multiplicand); ! 1077: p += *prod + carry; ! 1078: *post_higherunit(prod) = (MULTUNIT)p; ! 1079: carry = p >> MULTUNITSIZE; ! 1080: } ! 1081: /* We know that the high-order word of prod will always be 0 */ ! 1082: *prod = (MULTUNIT)carry; /* copy carry to empty high word of prod */ ! 1083: } /* mp_smul */ ! 1084: ! 1085: #else /* not MPORTABLE-- use assembly routine */ ! 1086: ! 1087: #define mp_smul P_SMUL /* use external assembly routine */ ! 1088: ! 1089: #endif /* MPORTABLE */ ! 1090: ! 1091: #ifdef VMS ! 1092: ! 1093: #define mp_dmul p_dmul /* use external assembly routine */ ! 1094: ! 1095: void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier); ! 1096: ! 1097: #else /* VMS */ ! 1098: ! 1099: /* mp_dmul is a double-precision multiply multiplicand times multiplier, ! 1100: result into prod. prod must be pointing at a "double precision" ! 1101: buffer. E.g. If global_precision is 10 words, prod must be ! 1102: pointing at a 20-word buffer. ! 1103: */ ! 1104: static void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier) ! 1105: { ! 1106: register int i; ! 1107: register MULTUNIT *p_multiplicand, *p_multiplier; ! 1108: register MULTUNIT *prodp; ! 1109: ! 1110: unitfill0(prod,global_precision*2); /* Pre-zero prod */ ! 1111: /* Calculate precision in units of MULTUNIT */ ! 1112: unit_prec = global_precision * UNITSIZE / MULTUNITSIZE; ! 1113: p_multiplicand = (MULTUNIT *)multiplicand; ! 1114: p_multiplier = (MULTUNIT *)multiplier; ! 1115: prodp = (MULTUNIT *)prod; ! 1116: make_lsbptr(p_multiplicand,unit_prec); ! 1117: make_lsbptr(p_multiplier,unit_prec); ! 1118: make_lsbptr(prodp,unit_prec*2); ! 1119: /* Multiply multiplicand by each word in multiplier, accumulating prod: */ ! 1120: for (i=0; i<unit_prec; ++i) ! 1121: mp_smul (post_higherunit(prodp), ! 1122: p_multiplicand, *post_higherunit(p_multiplier)); ! 1123: } /* mp_dmul */ ! 1124: ! 1125: #endif /* VMS */ ! 1126: ! 1127: /* These scratchpad arrays are used only by upton_modmult (mp_modmult). ! 1128: Some of them could be staticly declared inside of mp_modmult, but we ! 1129: put them outside mp_modmult so that they can be wiped clean by ! 1130: modmult_burn(), which is called at the end of mp_modexp. This is ! 1131: so that no sensitive data is left in memory after the program exits. ! 1132: */ ! 1133: ! 1134: #ifdef VAXC ! 1135: /* ! 1136: * Operations on arrays are sped up on a VAX if the data is quadword (64-bit) ! 1137: * aligned. This is because many VAXen use a minimum of 64-bit data paths to ! 1138: * cache and memory. This technique is VAX C private, hence the conditional. ! 1139: */ ! 1140: static unit _align( quadword ) modulus[MAX_UNIT_PRECISION]; ! 1141: static unit _align( quadword ) reciprocal[MAX_UNIT_PRECISION]; ! 1142: static unit _align( quadword ) dhi[MAX_UNIT_PRECISION]; ! 1143: static unit _align( quadword ) d_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1144: static unit _align( quadword ) e_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1145: static unit _align( quadword ) f_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1146: ! 1147: #else /* VAXC */ ! 1148: ! 1149: static unit modulus[MAX_UNIT_PRECISION]; ! 1150: static unit reciprocal[MAX_UNIT_PRECISION]; ! 1151: static unit dhi[MAX_UNIT_PRECISION]; ! 1152: static unit d_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1153: static unit e_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1154: static unit f_data[MAX_UNIT_PRECISION*2]; /* double-wide register */ ! 1155: ! 1156: #endif /* VAXC */ ! 1157: ! 1158: static short nbits; ! 1159: static short nbitsDivUNITSIZE; ! 1160: static short nbitsModUNITSIZE; ! 1161: ! 1162: /* stage_upton_modulus() is aliased to stage_modulus(). ! 1163: Prepare for an Upton modmult. Calculate the reciprocal of modulus, ! 1164: and save both. Note that reciprocal will have one more bit than ! 1165: modulus. ! 1166: Assumes that global_precision has already been adjusted to the ! 1167: size of the modulus, plus SLOP_BITS. ! 1168: */ ! 1169: int stage_upton_modulus(unitptr n) ! 1170: { ! 1171: mp_move(modulus, n); ! 1172: mp_recip(reciprocal, modulus); ! 1173: nbits = countbits(modulus); ! 1174: nbitsDivUNITSIZE = nbits / UNITSIZE; ! 1175: nbitsModUNITSIZE = nbits % UNITSIZE; ! 1176: return(0); /* normal return */ ! 1177: } /* stage_upton_modulus */ ! 1178: ! 1179: ! 1180: /* Upton's algorithm performs a multiply combined with a modulo operation. ! 1181: Computes: prod = (multiplicand*multiplier) mod modulus ! 1182: WARNING: All the arguments must be less than the modulus! ! 1183: References global unitptr modulus and reciprocal. ! 1184: The reciprocal of modulus is 1 bit longer than the modulus. ! 1185: upton_modmult() is aliased to mp_modmult(). ! 1186: */ ! 1187: int upton_modmult (unitptr prod, unitptr multiplicand, unitptr multiplier) ! 1188: { ! 1189: unitptr d = d_data; ! 1190: unitptr d1 = d_data; ! 1191: unitptr e = e_data; ! 1192: unitptr f = f_data; ! 1193: short orig_precision, dprec; ! 1194: short i; ! 1195: ! 1196: orig_precision = global_precision; ! 1197: mp_dmul (d,multiplicand,multiplier); ! 1198: ! 1199: /* Throw off low nbits of d */ ! 1200: #ifndef HIGHFIRST ! 1201: d1 = d + nbitsDivUNITSIZE; ! 1202: #else ! 1203: d1 = d + orig_precision - nbitsDivUNITSIZE; ! 1204: #endif ! 1205: mp_move(dhi, d1); /* Don't screw up d, we need it later */ ! 1206: mp_shift_right_bits(dhi, nbitsModUNITSIZE); ! 1207: ! 1208: mp_dmul (e,dhi,reciprocal); /* Note - reciprocal has nbits+1 bits */ ! 1209: ! 1210: #ifndef HIGHFIRST ! 1211: e += nbitsDivUNITSIZE; ! 1212: #else ! 1213: e += orig_precision - nbitsDivUNITSIZE; ! 1214: #endif ! 1215: mp_shift_right_bits(e, nbitsModUNITSIZE); ! 1216: ! 1217: mp_dmul (f,e,modulus); ! 1218: ! 1219: /* Now for the only double-precision call to mpilib: */ ! 1220: set_precision (orig_precision * 2); ! 1221: mp_sub (d,f); ! 1222: ! 1223: /* d's precision should be <= orig_precision */ ! 1224: rescale (d, orig_precision*2, orig_precision); ! 1225: set_precision (orig_precision); ! 1226: ! 1227: /* Should never have to do this final subtract more than twice: */ ! 1228: while (mp_compare(d,modulus) > 0) ! 1229: mp_sub (d,modulus); ! 1230: ! 1231: mp_move(prod,d); ! 1232: return(0); /* normal return */ ! 1233: } /* upton_modmult */ ! 1234: ! 1235: ! 1236: /* Upton's mp_modmult function leaves some internal arrays in memory, ! 1237: so we have to call modmult_burn() at the end of mp_modexp. ! 1238: This is so that no cryptographically sensitive data is left in memory ! 1239: after the program exits. ! 1240: upton_burn() is aliased to modmult_burn(). ! 1241: */ ! 1242: void upton_burn(void) ! 1243: { ! 1244: unitfill0(modulus,MAX_UNIT_PRECISION); ! 1245: unitfill0(reciprocal,MAX_UNIT_PRECISION); ! 1246: unitfill0(dhi,MAX_UNIT_PRECISION); ! 1247: unitfill0(d_data,MAX_UNIT_PRECISION*2); ! 1248: unitfill0(e_data,MAX_UNIT_PRECISION*2); ! 1249: unitfill0(f_data,MAX_UNIT_PRECISION*2); ! 1250: nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0; ! 1251: } /* upton_burn */ ! 1252: ! 1253: #endif /* UPTON */ ! 1254: ! 1255: ! 1256: int countbits(unitptr r) ! 1257: /* Returns number of significant bits in r */ ! 1258: { int bits; ! 1259: short prec; ! 1260: register unit bitmask; ! 1261: init_bitsniffer(r,bitmask,prec,bits); ! 1262: return(bits); ! 1263: } /* countbits */ ! 1264: ! 1265: ! 1266: char *copyright_notice(void) ! 1267: /* force linker to include copyright notice in the executable object image. */ ! 1268: { return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */ ! 1269: ! 1270: ! 1271: int mp_modexp(register unitptr expout,register unitptr expin, ! 1272: register unitptr exponent,register unitptr modulus) ! 1273: { /* Russian peasant combined exponentiation/modulo algorithm. ! 1274: Calls modmult instead of mult. ! 1275: Computes: expout = (expin**exponent) mod modulus ! 1276: WARNING: All the arguments must be less than the modulus! ! 1277: */ ! 1278: int bits; ! 1279: short oldprecision; ! 1280: register unit bitmask; ! 1281: unit product[MAX_UNIT_PRECISION]; ! 1282: short eprec; ! 1283: ! 1284: #ifdef COUNTMULTS ! 1285: tally_modmults = 0; /* clear "number of modmults" counter */ ! 1286: tally_modsquares = 0; /* clear "number of modsquares" counter */ ! 1287: #endif /* COUNTMULTS */ ! 1288: mp_init(expout,1); ! 1289: if (testeq(exponent,0)) ! 1290: { if (testeq(expin,0)) ! 1291: return(-1); /* 0 to the 0th power means return error */ ! 1292: return(0); /* otherwise, zero exponent means expout is 1 */ ! 1293: } ! 1294: if (testeq(modulus,0)) ! 1295: return(-2); /* zero modulus means error */ ! 1296: #if SLOP_BITS > 0 /* if there's room for sign bits */ ! 1297: if (mp_tstminus(modulus)) ! 1298: return(-2); /* negative modulus means error */ ! 1299: #endif /* SLOP_BITS > 0 */ ! 1300: if (mp_compare(expin,modulus) >= 0) ! 1301: return(-3); /* if expin >= modulus, return error */ ! 1302: if (mp_compare(exponent,modulus) >= 0) ! 1303: return(-4); /* if exponent >= modulus, return error */ ! 1304: ! 1305: oldprecision = global_precision; /* save global_precision */ ! 1306: /* set smallest optimum precision for this modulus */ ! 1307: set_precision(bits2units(countbits(modulus)+SLOP_BITS)); ! 1308: /* rescale all these registers to global_precision we just defined */ ! 1309: rescale(modulus,oldprecision,global_precision); ! 1310: rescale(expin,oldprecision,global_precision); ! 1311: rescale(exponent,oldprecision,global_precision); ! 1312: rescale(expout,oldprecision,global_precision); ! 1313: ! 1314: if (stage_modulus(modulus)) ! 1315: { set_precision(oldprecision); /* restore original precision */ ! 1316: return(-5); /* unstageable modulus (STEWART algorithm) */ ! 1317: } ! 1318: ! 1319: /* normalize and compute number of bits in exponent first */ ! 1320: init_bitsniffer(exponent,bitmask,eprec,bits); ! 1321: ! 1322: /* We can "optimize out" the first modsquare and modmult: */ ! 1323: bits--; /* We know for sure at this point that bits>0 */ ! 1324: mp_move(expout,expin); /* expout = (1*1)*expin; */ ! 1325: bump_bitsniffer(exponent,bitmask); ! 1326: ! 1327: while (bits--) ! 1328: { ! 1329: poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */ ! 1330: #ifdef COUNTMULTS ! 1331: tally_modsquares++; /* bump "number of modsquares" counter */ ! 1332: #endif /* COUNTMULTS */ ! 1333: mp_modsquare(product,expout); ! 1334: mp_move(expout,product); ! 1335: if (sniff_bit(exponent,bitmask)) ! 1336: { mp_modmult(product,expout,expin); ! 1337: mp_move(expout,product); ! 1338: #ifdef COUNTMULTS ! 1339: tally_modmults++; /* bump "number of modmults" counter */ ! 1340: #endif /* COUNTMULTS */ ! 1341: } ! 1342: bump_bitsniffer(exponent,bitmask); ! 1343: } /* while bits-- */ ! 1344: mp_burn(product); /* burn the evidence on the stack */ ! 1345: modmult_burn(); /* ask mp_modmult to also burn its own evidence */ ! 1346: ! 1347: #ifdef COUNTMULTS /* diagnostic analysis */ ! 1348: { long atomic_mults; ! 1349: unsigned int unitcount,totalmults; ! 1350: unitcount = bits2units(countbits(modulus)); ! 1351: /* calculation assumes modsquare takes as long as a modmult: */ ! 1352: atomic_mults = (long) tally_modmults * (unitcount * unitcount); ! 1353: atomic_mults += (long) tally_modsquares * (unitcount * unitcount); ! 1354: printf("%ld atomic mults for ",atomic_mults); ! 1355: printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n", ! 1356: tally_modsquares,tally_modmults, ! 1357: tally_modsquares+tally_modmults, ! 1358: countbits(modulus), unitcount); ! 1359: } ! 1360: #endif /* COUNTMULTS */ ! 1361: ! 1362: set_precision(oldprecision); /* restore original precision */ ! 1363: ! 1364: /* Do an explicit reference to the copyright notice so that the linker ! 1365: will be forced to include it in the executable object image... */ ! 1366: copyright_notice(); /* has no real effect at run time */ ! 1367: ! 1368: return(0); /* normal return */ ! 1369: } /* mp_modexp */ ! 1370: ! 1371: ! 1372: ! 1373: ! 1374: /********************************************************************* ! 1375: RSA-specific routines follow. These are the only functions that ! 1376: are specific to the RSA public key cryptosystem. The other ! 1377: multiprecision integer math functions may be used for non-RSA ! 1378: applications. ! 1379: ! 1380: The RSA public key cryptosystem is patented by the Massachusetts ! 1381: Institute of Technology (U.S. patent #4,405,829). This patent does ! 1382: not apply outside the USA. Public Key Partners (PKP) holds the ! 1383: exclusive commercial license to sell and sub-license the RSA public ! 1384: key cryptosystem. The author of this software implementation of the ! 1385: RSA algorithm is providing this software for educational use only. ! 1386: Licensing this algorithm from PKP is the responsibility of you, the ! 1387: user, not Philip Zimmermann, the author of this software. The author ! 1388: assumes no liability for any breach of patent law resulting from the ! 1389: unlicensed use of this software by the user. ! 1390: *********************************************************************/ ! 1391: ! 1392: ! 1393: int rsa_decrypt(unitptr M, unitptr C, ! 1394: unitptr d, unitptr p, unitptr q, unitptr u) ! 1395: /* Uses Quisquater's Chinese Remainder Theorem shortcut ! 1396: for RSA decryption. ! 1397: M is the output plaintext message. ! 1398: C is the input ciphertext message. ! 1399: d is the secret decryption exponent. ! 1400: p and q are the secret prime factors of n. ! 1401: u is the multiplicative inverse of p, mod q. ! 1402: Note that u is precomputed on the assumption that p<q. ! 1403: n, the common modulus, is not used here because of the ! 1404: Chinese Remainder Theorem shortcut. ! 1405: */ ! 1406: { ! 1407: unit p2[MAX_UNIT_PRECISION]; ! 1408: unit q2[MAX_UNIT_PRECISION]; ! 1409: unit temp1[MAX_UNIT_PRECISION]; ! 1410: unit temp2[MAX_UNIT_PRECISION]; ! 1411: int status; ! 1412: ! 1413: mp_init(M,1); /* initialize result in case of error */ ! 1414: ! 1415: if (mp_compare(p,q) >= 0) /* ensure that p<q */ ! 1416: { /* swap the pointers p and q */ ! 1417: unitptr t; ! 1418: t = p; p = q; q = t; ! 1419: } ! 1420: ! 1421: /* Rather than decrypting by computing modexp with full mod n ! 1422: precision, compute a shorter modexp with mod p precision... */ ! 1423: ! 1424: /* p2 = [ (C mod p)**( d mod (p-1) ) ] mod p */ ! 1425: mp_move(temp1,p); ! 1426: mp_dec(temp1); /* temp1 = p-1 */ ! 1427: mp_mod(temp2,d,temp1); /* temp2 = d mod (p-1) ) */ ! 1428: mp_mod(temp1,C,p); /* temp1 = C mod p */ ! 1429: status = mp_modexp(p2,temp1,temp2,p); ! 1430: if (status < 0) /* mp_modexp returned an error. */ ! 1431: return(status); /* error return */ ! 1432: ! 1433: /* Now compute a short modexp with mod q precision... */ ! 1434: ! 1435: /* q2 = [ (C mod q)**( d mod (q-1) ) ] mod q */ ! 1436: mp_move(temp1,q); ! 1437: mp_dec(temp1); /* temp1 = q-1 */ ! 1438: mp_mod(temp2,d,temp1); /* temp2 = d mod (q-1) */ ! 1439: mp_mod(temp1,C,q); /* temp1 = C mod q */ ! 1440: status = mp_modexp(q2,temp1,temp2,q); ! 1441: if (status < 0) /* mp_modexp returned an error. */ ! 1442: return(status); /* error return */ ! 1443: ! 1444: /* Now use the multiplicative inverse u to glue together the ! 1445: two halves, saving a lot of time by avoiding a full mod n ! 1446: exponentiation. At key generation time, u was computed ! 1447: with the secret key as the multiplicative inverse of ! 1448: p, mod q such that: (p*u) mod q = 1, assuming p<q. ! 1449: */ ! 1450: if (mp_compare(p2,q2) == 0) /* only happens if C<p */ ! 1451: mp_move(M,p2); ! 1452: else ! 1453: { /* q2 = q2 - p2; if q2<0 then q2 = q2 + q */ ! 1454: if (mp_sub(q2,p2)) /* if the result went negative... */ ! 1455: mp_add(q2,q); /* add q to q2 */ ! 1456: ! 1457: /* M = p2 + ( p * [(q2*u) mod q] ) */ ! 1458: mp_mult(temp1,q2,u); /* temp1 = q2*u */ ! 1459: mp_mod(temp2,temp1,q); /* temp2 = temp1 mod q */ ! 1460: mp_mult(temp1,p,temp2); /* temp1 = p * temp2 */ ! 1461: mp_add(temp1,p2); /* temp1 = temp1 + p2 */ ! 1462: mp_move(M,temp1); /* M = temp1 */ ! 1463: } ! 1464: ! 1465: mp_burn(p2); /* burn the evidence on the stack...*/ ! 1466: mp_burn(q2); ! 1467: mp_burn(temp1); ! 1468: mp_burn(temp2); ! 1469: return(0); /* normal return */ ! 1470: } /* rsa_decrypt */ ! 1471: ! 1472: ! 1473: /****************** end of MPI library ****************************/
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.