|
|
1.1 ! root 1: /* C source code for RSA multiprecision arithmetic library routines. ! 2: Implemented Nov 86 by Philip Zimmermann ! 3: Last revised 10 May 91 by PRZ ! 4: ! 5: Boulder Software Engineering ! 6: 3021 Eleventh Street ! 7: Boulder, CO 80304 ! 8: (303) 444-4541 ! 9: ! 10: (c) Copyright 1986 by Philip Zimmermann. All rights reserved. ! 11: The author assumes no liability for damages resulting from the use ! 12: of this software, even if the damage results from defects in this ! 13: software. No warranty is expressed or implied. The use of this ! 14: cryptographic software for developing weapon systems is expressly ! 15: forbidden. ! 16: ! 17: The RSA public key cryptosystem is patented by the Massachusetts ! 18: Institute of Technology (U.S. patent #4,405,829). Public Key ! 19: Partners (PKP) holds the exclusive commercial license to sell and ! 20: sub-license the RSA public key cryptosystem. The author of this ! 21: software implementation of the RSA algorithm is providing this ! 22: implementation for educational use only. Licensing this algorithm ! 23: from PKP is the responsibility of you, the user, not Philip ! 24: Zimmermann, the author of this implementation. The author assumes ! 25: no liability for any breach of patent law resulting from the ! 26: unlicensed use of this software by the user. ! 27: ! 28: These routines implement all of the multiprecision arithmetic necessary ! 29: for Rivest-Shamir-Adleman (RSA) public key cryptography. ! 30: ! 31: Although originally developed in Microsoft C for the IBM PC, this code ! 32: contains few machine dependancies. It assumes 2's complement ! 33: arithmetic. It can be adapted to 8-bit, 16-bit, or 32-bit machines, ! 34: lowbyte-highbyte order or highbyte-lowbyte order. This version ! 35: has been converted to ANSI C. ! 36: ! 37: ! 38: The internal representation for these extended precision integer ! 39: "registers" is an array of "units". A unit is a machine word, which ! 40: is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit ! 41: unsigned integer, depending on the machine's word size. For example, ! 42: an IBM PC or AT uses a unit size of 16 bits. To perform arithmetic ! 43: on these huge precision integers, we pass pointers to these unit ! 44: arrays to various subroutines. A pointer to an array of units is of ! 45: type unitptr. This is a pointer to a huge integer "register". ! 46: ! 47: When calling a subroutine, we always pass a pointer to the BEGINNING ! 48: of the array of units, regardless of the byte order of the machine. ! 49: On a lowbyte-first machine, such as the Intel 80x86, this unitptr ! 50: points to the LEAST significant unit, and the array of units increases ! 51: significance to the right. On a highbyte-first machine, such as the ! 52: Motorola 680x0, this unitptr points to the MOST significant unit, and ! 53: the array of units decreases significance to the right. ! 54: */ ! 55: ! 56: /* #define COUNTMULTS */ /* count modmults for performance studies */ ! 57: ! 58: #ifdef DEBUG ! 59: #ifndef EMBEDDED /* not EMBEDDED - not compiling for embedded target */ ! 60: #include <stdio.h> /* for printf, etc. */ ! 61: #include <conio.h> /* DEBUG mode: for kbhit() */ ! 62: #define poll_for_break() {while (kbhit()) getch();} ! 63: #endif /* not EMBEDDED */ ! 64: #else ! 65: #define poll_for_break() /* stub */ ! 66: #endif /* not DEBUG */ ! 67: ! 68: #define RSALIB ! 69: #include "rsalib.h" ! 70: ! 71: short global_precision = 0; /* units of precision for all routines */ ! 72: /* global_precision is the unit precision last set by set_precision. ! 73: Initially, set_precision() should be called to define global_precision ! 74: before using any of these other RSA library routines. ! 75: i.e.: set_precision(MAX_UNIT_PRECISION); ! 76: */ ! 77: ! 78: #ifdef PORTABLE ! 79: /*************** multiprecision library primitives ****************/ ! 80: /* The following portable C primitives should be recoded in assembly. ! 81: The equivalent assembly primitives are externally defined under ! 82: different names, unless PORTABLE is defined. See the header file ! 83: "rsalib.h" for further details. ! 84: ! 85: The carry bit mechanism we use for these portable primitives ! 86: will only work if the size of unit is smaller than the size of ! 87: a long integer. In most cases, this means UNITSIZE must be ! 88: less than 32 bits -- if you use the C portable primitives. ! 89: */ ! 90: ! 91: typedef unsigned long int ulint; ! 92: #define carrybit ((ulint) 1 << UNITSIZE) ! 93: /* ...assumes sizeof(unit) < sizeof(unsigned long) */ ! 94: ! 95: boolean mp_addc ! 96: (register unitptr r1,register unitptr r2,register boolean carry) ! 97: /* multiprecision add with carry r2 to r1, result in r1 */ ! 98: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 99: { register ulint x; /* won't work if sizeof(unit)==sizeof(long) */ ! 100: short precision; /* number of units to add */ ! 101: precision = global_precision; ! 102: make_lsbptr(r1,precision); ! 103: make_lsbptr(r2,precision); ! 104: while (precision--) ! 105: { x = (ulint) *r1 + (ulint) *post_higherunit(r2) + (ulint) carry; ! 106: *post_higherunit(r1) = x; ! 107: carry = ((x & carrybit) != 0L); ! 108: } ! 109: return(carry); /* return the final carry flag bit */ ! 110: } /* mp_addc */ ! 111: ! 112: ! 113: boolean mp_subb ! 114: (register unitptr r1,register unitptr r2,register boolean borrow) ! 115: /* multiprecision subtract with borrow, r2 from r1, result in r1 */ ! 116: /* borrow is incoming borrow flag-- value should be 0 or 1 */ ! 117: { register ulint x; /* won't work if sizeof(unit)==sizeof(long) */ ! 118: short precision; /* number of units to subtract */ ! 119: precision = global_precision; ! 120: make_lsbptr(r1,precision); ! 121: make_lsbptr(r2,precision); ! 122: while (precision--) ! 123: { x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow; ! 124: *post_higherunit(r1) = x; ! 125: borrow = ((x & carrybit) != 0L); ! 126: } ! 127: return(borrow); /* return the final carry/borrow flag bit */ ! 128: } /* mp_subb */ ! 129: ! 130: #undef carrybit ! 131: ! 132: ! 133: boolean mp_rotate_left(register unitptr r1,register boolean carry) ! 134: /* multiprecision rotate left 1 bit with carry, result in r1. */ ! 135: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 136: { register short precision; /* number of units to rotate */ ! 137: register boolean nextcarry; ! 138: precision = global_precision; ! 139: make_lsbptr(r1,precision); ! 140: while (precision--) ! 141: { ! 142: nextcarry = (((signedunit) *r1) < 0); ! 143: /* nextcarry = ((*r1 & uppermostbit) != 0); */ ! 144: *r1 <<= 1 ; ! 145: if (carry) *r1 |= 1; ! 146: carry = nextcarry; ! 147: pre_higherunit(r1); ! 148: } ! 149: return(nextcarry); /* return the final carry flag bit */ ! 150: } /* mp_rotate_left */ ! 151: ! 152: /************** end of primitives that should be in assembly *************/ ! 153: #endif /* PORTABLE */ ! 154: ! 155: ! 156: /* The mp_rotate_right function is not called in any time-critical ! 157: situations in RSA functions, so it doesn't need to be coded ! 158: in assembly language. ! 159: */ ! 160: boolean mp_rotate_right(register unitptr r1,register boolean carry) ! 161: /* multiprecision rotate right 1 bit with carry, result in r1. */ ! 162: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 163: { register short precision; /* number of units to rotate */ ! 164: register boolean nextcarry; ! 165: precision = global_precision; ! 166: make_msbptr(r1,precision); ! 167: while (precision--) ! 168: { nextcarry = *r1 & 1; ! 169: *r1 >>= 1 ; ! 170: if (carry) *r1 |= uppermostbit; ! 171: carry = nextcarry; ! 172: pre_lowerunit(r1); ! 173: } ! 174: return(nextcarry); /* return the final carry flag bit */ ! 175: } /* mp_rotate_right */ ! 176: ! 177: ! 178: short mp_compare(register unitptr r1,register unitptr r2) ! 179: /* Compares multiprecision integers *r1, *r2, and returns: ! 180: -1 iff *r1 < *r2 ! 181: 0 iff *r1 == *r2 ! 182: +1 iff *r1 > *r2 ! 183: */ ! 184: { register short precision; /* number of units to compare */ ! 185: precision = global_precision; ! 186: make_msbptr(r1,precision); ! 187: make_msbptr(r2,precision); ! 188: do ! 189: { if (*r1 < *r2) ! 190: return(-1); ! 191: if (*post_lowerunit(r1) > *post_lowerunit(r2)) ! 192: return(1); ! 193: } while (--precision); ! 194: return(0); /* *r1 == *r2 */ ! 195: } /* mp_compare */ ! 196: ! 197: ! 198: boolean mp_inc(register unitptr r) ! 199: /* Increment multiprecision integer r. */ ! 200: { register short precision; ! 201: precision = global_precision; ! 202: make_lsbptr(r,precision); ! 203: do ! 204: { if ( ++(*r) ) return(0); /* no carry */ ! 205: post_higherunit(r); ! 206: } while (--precision); ! 207: return(1); /* return carry set */ ! 208: } /* mp_inc */ ! 209: ! 210: ! 211: boolean mp_dec(register unitptr r) ! 212: /* Decrement multiprecision integer r. */ ! 213: { register short precision; ! 214: precision = global_precision; ! 215: make_lsbptr(r,precision); ! 216: do ! 217: { if ( (signedunit) (--(*r)) != (signedunit) -1 ) ! 218: return(0); /* no borrow */ ! 219: post_higherunit(r); ! 220: } while (--precision); ! 221: return(1); /* return borrow set */ ! 222: } /* mp_dec */ ! 223: ! 224: ! 225: void mp_neg(register unitptr r) ! 226: /* Compute 2's complement, the arithmetic negative, of r */ ! 227: { register short precision; /* number of units to negate */ ! 228: precision = global_precision; ! 229: mp_dec(r); /* 2's complement is 1's complement plus 1 */ ! 230: do /* now do 1's complement */ ! 231: { *r = ~(*r); ! 232: r++; ! 233: } while (--precision); ! 234: } /* mp_neg */ ! 235: ! 236: ! 237: void mp_move(register unitptr dst,register unitptr src) ! 238: { register short precision; /* number of units to move */ ! 239: precision = global_precision; ! 240: do { *dst++ = *src++; } while (--precision); ! 241: } /* mp_move */ ! 242: ! 243: ! 244: void mp_init(register unitptr r, word16 value) ! 245: /* Init multiprecision register r with short value. */ ! 246: { /* Note that mp_init doesn't extend sign bit for >32767 */ ! 247: register short precision; /* number of units to init */ ! 248: precision = global_precision; ! 249: make_lsbptr(r,precision); ! 250: *post_higherunit(r) = value; precision--; ! 251: #ifdef UNIT8 ! 252: *post_higherunit(r) = value >> UNITSIZE; precision--; ! 253: #endif ! 254: while (precision--) ! 255: *post_higherunit(r) = 0; ! 256: } /* mp_init */ ! 257: ! 258: ! 259: short significance(register unitptr r) ! 260: /* Returns number of significant units in r */ ! 261: { register short precision; ! 262: precision = global_precision; ! 263: make_msbptr(r,precision); ! 264: do ! 265: { if (*post_lowerunit(r)) ! 266: return(precision); ! 267: } while (--precision); ! 268: return(precision); ! 269: } /* significance */ ! 270: ! 271: ! 272: void unitfill0(unitptr r,word16 unitcount) ! 273: /* Zero-fill the unit buffer r. */ ! 274: { while (unitcount--) *r++ = 0; ! 275: } /* unitfill0 */ ! 276: ! 277: ! 278: /* The macro normalize(r,precision) "normalizes" a multiprecision integer ! 279: by adjusting r and precision to new values. For Motorola-style processors ! 280: (MSB-first), r is a pointer to the MSB of the register, and must ! 281: be adjusted to point to the first nonzero unit. For Intel/VAX-style ! 282: (LSB-first) processors, r is a pointer to the LSB of the register, ! 283: and must be left unchanged. The precision counter is always adjusted, ! 284: regardless of processor type. In the case of precision = 0, ! 285: r becomes undefined. ! 286: */ ! 287: ! 288: ! 289: /* The macro rescale(r,current_precision,new_precision) rescales ! 290: a multiprecision integer by adjusting r and its precision to new values. ! 291: It can be used to reverse the effects of the normalize ! 292: routine given above. See the comments in normalize concerning ! 293: Intel vs. Motorola LSB/MSB conventions. ! 294: WARNING: You can only safely call rescale on registers that ! 295: you have previously normalized with the above normalize routine, ! 296: or are known to be big enough for the new precision. You may ! 297: specify a new precision that is smaller than the current precision. ! 298: You must be careful not to specify a new_precision value that is ! 299: too big, or which adjusts the r pointer out of range. ! 300: */ ! 301: ! 302: ! 303: /* These bit sniffer definitions begin sniffing at the MSB */ ! 304: #define sniff_bit(bptr,bitmask) (*(bptr) & bitmask) ! 305: ! 306: #define init_bitsniffer(bptr,bitmask,prec,bits) \ ! 307: { normalize(bptr,prec); \ ! 308: if (!prec) \ ! 309: return(0); \ ! 310: bits = units2bits(prec); \ ! 311: make_msbptr(bptr,prec); bitmask = uppermostbit; \ ! 312: while (!sniff_bit(bptr,bitmask)) \ ! 313: { bitmask >>= 1; bits--; \ ! 314: } \ ! 315: } ! 316: ! 317: #define bump_bitsniffer(bptr,bitmask) \ ! 318: { if (!(bitmask >>= 1)) \ ! 319: { bitmask = uppermostbit; \ ! 320: post_lowerunit(bptr); \ ! 321: } \ ! 322: } ! 323: ! 324: /* the following two macros are used by mp_udiv */ ! 325: #define bump_2bitsniffers(bptr,bptr2,bitmask) \ ! 326: { if (!(bitmask >>= 1)) \ ! 327: { bitmask = uppermostbit; \ ! 328: post_lowerunit(bptr); \ ! 329: post_lowerunit(bptr2); \ ! 330: } \ ! 331: } ! 332: ! 333: #define stuff_bit(bptr,bitmask) *(bptr) |= bitmask ! 334: ! 335: ! 336: int mp_udiv(register unitptr remainder,register unitptr quotient, ! 337: register unitptr dividend,register unitptr divisor) ! 338: /* Unsigned divide, treats both operands as positive. */ ! 339: { int bits; ! 340: short dprec; ! 341: register unit bitmask; ! 342: if (testeq(divisor,0)) ! 343: return(-1); /* zero divisor means divide error */ ! 344: mp_init(remainder,0); ! 345: mp_init(quotient,0); ! 346: /* normalize and compute number of bits in dividend first */ ! 347: init_bitsniffer(dividend,bitmask,dprec,bits); ! 348: /* rescale quotient to same precision (dprec) as dividend */ ! 349: rescale(quotient,global_precision,dprec); ! 350: make_msbptr(quotient,dprec); ! 351: ! 352: while (bits--) ! 353: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0)); ! 354: if (mp_compare(remainder,divisor) >= 0) ! 355: { mp_sub(remainder,divisor); ! 356: stuff_bit(quotient,bitmask); ! 357: } ! 358: bump_2bitsniffers(dividend,quotient,bitmask); ! 359: } ! 360: return(0); ! 361: } /* mp_udiv */ ! 362: ! 363: ! 364: int mp_div(register unitptr remainder,register unitptr quotient, ! 365: register unitptr dividend,register unitptr divisor) ! 366: /* Signed divide, either or both operands may be negative. */ ! 367: { boolean dvdsign,dsign; ! 368: int status; ! 369: dvdsign = (mp_tstminus(dividend)!=0); ! 370: dsign = (mp_tstminus(divisor)!=0); ! 371: if (dvdsign) mp_neg(dividend); ! 372: if (dsign) mp_neg(divisor); ! 373: status = mp_udiv(remainder,quotient,dividend,divisor); ! 374: if (dvdsign) mp_neg(dividend); /* repair caller's dividend */ ! 375: if (dsign) mp_neg(divisor); /* repair caller's divisor */ ! 376: if (status<0) return(status); /* divide error? */ ! 377: if (dvdsign) mp_neg(remainder); ! 378: if (dvdsign ^ dsign) mp_neg(quotient); ! 379: return(status); ! 380: } /* mp_div */ ! 381: ! 382: ! 383: word16 mp_shortdiv(register unitptr quotient, ! 384: register unitptr dividend,register word16 divisor) ! 385: /* This function does a fast divide and mod on a multiprecision dividend ! 386: using a short integer divisor returning a short integer remainder. ! 387: This is an unsigned divide. It treats both operands as positive. ! 388: It is used mainly for faster printing of large numbers in base 10. ! 389: */ ! 390: { int bits; ! 391: short dprec; ! 392: register unit bitmask; ! 393: register word16 remainder; ! 394: if (!divisor) /* if divisor == 0 */ ! 395: return(-1); /* zero divisor means divide error */ ! 396: remainder=0; ! 397: mp_init(quotient,0); ! 398: /* normalize and compute number of bits in dividend first */ ! 399: init_bitsniffer(dividend,bitmask,dprec,bits); ! 400: /* rescale quotient to same precision (dprec) as dividend */ ! 401: rescale(quotient,global_precision,dprec); ! 402: make_msbptr(quotient,dprec); ! 403: ! 404: while (bits--) ! 405: { remainder <<= 1; ! 406: if (sniff_bit(dividend,bitmask)) ! 407: remainder++; ! 408: if (remainder >= divisor) ! 409: { remainder -= divisor; ! 410: stuff_bit(quotient,bitmask); ! 411: } ! 412: bump_2bitsniffers(dividend,quotient,bitmask); ! 413: } ! 414: return(remainder); ! 415: } /* mp_shortdiv */ ! 416: ! 417: ! 418: int mp_mod(register unitptr remainder, ! 419: register unitptr dividend,register unitptr divisor) ! 420: /* Unsigned divide, treats both operands as positive. */ ! 421: { int bits; ! 422: short dprec; ! 423: register unit bitmask; ! 424: if (testeq(divisor,0)) ! 425: return(-1); /* zero divisor means divide error */ ! 426: mp_init(remainder,0); ! 427: /* normalize and compute number of bits in dividend first */ ! 428: init_bitsniffer(dividend,bitmask,dprec,bits); ! 429: ! 430: while (bits--) ! 431: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0)); ! 432: msub(remainder,divisor); ! 433: bump_bitsniffer(dividend,bitmask); ! 434: } ! 435: return(0); ! 436: } /* mp_mod */ ! 437: ! 438: ! 439: word16 mp_shortmod(register unitptr dividend,register word16 divisor) ! 440: /* This function does a fast mod operation on a multprecision dividend ! 441: using a short integer modulus returning a short integer remainder. ! 442: This is an unsigned divide. It treats both operands as positive. ! 443: It is used mainly for fast sieve searches for large primes. ! 444: */ ! 445: { int bits; ! 446: short dprec; ! 447: register unit bitmask; ! 448: register word16 remainder; ! 449: if (!divisor) /* if divisor == 0 */ ! 450: return(-1); /* zero divisor means divide error */ ! 451: remainder=0; ! 452: /* normalize and compute number of bits in dividend first */ ! 453: init_bitsniffer(dividend,bitmask,dprec,bits); ! 454: ! 455: while (bits--) ! 456: { remainder <<= 1; ! 457: if (sniff_bit(dividend,bitmask)) ! 458: remainder++; ! 459: if (remainder >= divisor) remainder -= divisor; ! 460: bump_bitsniffer(dividend,bitmask); ! 461: } ! 462: return(remainder); ! 463: } /* mp_shortmod */ ! 464: ! 465: ! 466: ! 467: #ifndef COMB_MULT /* if not "comb" multiply, use peasant multiply */ ! 468: ! 469: int mp_mult(register unitptr prod, ! 470: register unitptr multiplicand,register unitptr multiplier) ! 471: /* Computes multiprecision prod = multiplicand * multiplier */ ! 472: { /* Uses "Russian peasant" multiply algorithm. */ ! 473: int bits; ! 474: register unit bitmask; ! 475: short mprec; ! 476: mp_init(prod,0); ! 477: if (testeq(multiplicand,0)) ! 478: return(0); /* zero multiplicand means zero product */ ! 479: /* normalize and compute number of bits in multiplier first */ ! 480: init_bitsniffer(multiplier,bitmask,mprec,bits); ! 481: ! 482: while (bits--) ! 483: { mp_shift_left(prod); ! 484: if (sniff_bit(multiplier,bitmask)) ! 485: { mp_add(prod,multiplicand); ! 486: } ! 487: bump_bitsniffer(multiplier,bitmask); ! 488: } ! 489: return(0); ! 490: } /* mp_mult */ ! 491: ! 492: #else /* use "comb" multiply algorithm */ ! 493: ! 494: int mp_mult(register unitptr prod, ! 495: register unitptr multiplicand, register unitptr multiplier) ! 496: /* Computes multiprecision prod = multiplicand * multiplier */ ! 497: { /* Uses interleaved comb multiply algorithm. ! 498: This improved multiply more than twice as fast as a Russian ! 499: peasant multiply, because it does a lot fewer shifts. ! 500: Must have global_precision set to the size of the multiplicand ! 501: plus UNITSIZE-1 SLOP_BITS. Produces a product that is the sum ! 502: of the lengths of the multiplier and multiplicand. ! 503: */ ! 504: int bits; ! 505: register unit bitmask; ! 506: unitptr product, mplier, temp; ! 507: short mprec,mprec2; ! 508: unit mplicand[MAX_UNIT_PRECISION]; ! 509: mp_init(prod,0); /* better clear full width--double precision */ ! 510: if (testeq(multiplicand,0)) ! 511: return(0); /* zero multiplicand means zero product */ ! 512: ! 513: mp_move(mplicand,multiplicand); /* save it from damage */ ! 514: ! 515: normalize(multiplier,mprec); ! 516: if (!mprec) ! 517: return(0); ! 518: ! 519: make_lsbptr(multiplier,mprec); ! 520: bitmask = 1; /* start scan at LSB of multiplier */ ! 521: ! 522: do /* UNITSIZE times */ ! 523: { /* do for bits 0-15 */ ! 524: product = prod; ! 525: mplier = multiplier; ! 526: mprec2 = mprec; ! 527: while (mprec2--) /* do for each word in multiplier */ ! 528: { ! 529: if (sniff_bit(mplier,bitmask)) ! 530: { if (mp_addc(product,multiplicand,0)) /* ripple carry */ ! 531: { /* After 1st time thru, this is rarely encountered. */ ! 532: temp = msbptr(product,global_precision); ! 533: pre_higherunit(temp); ! 534: /* temp now points to LSU of carry region. */ ! 535: unmake_lsbptr(temp,global_precision); ! 536: mp_inc(temp); ! 537: } /* ripple carry */ ! 538: } ! 539: pre_higherunit(mplier); ! 540: pre_higherunit(product); ! 541: } ! 542: if (!(bitmask <<= 1)) ! 543: break; ! 544: mp_shift_left(multiplicand); ! 545: ! 546: } while (TRUE); ! 547: ! 548: mp_move(multiplicand,mplicand); /* recover */ ! 549: ! 550: return(0); /* normal return */ ! 551: } /* mp_mult */ ! 552: ! 553: #endif /* COMB_MULT */ ! 554: ! 555: ! 556: /* mp_modmult computes a multiply combined with a modulo operation. ! 557: Before calling mp_modmult, you must first call ! 558: stage_modulus(the_modulus) ! 559: */ ! 560: ! 561: #ifdef COUNTMULTS ! 562: /* "number of modmults" counters, used for performance studies. */ ! 563: static unsigned int tally_modmults = 0; ! 564: static unsigned int tally_modsquares = 0; ! 565: #endif /* COUNTMULTS */ ! 566: ! 567: #ifndef ASM_MODMULT /* not assembly primitive modmult */ ! 568: ! 569: #ifdef PEASANT ! 570: /* Conventional Russian peasant multiply with modulo algorithm. */ ! 571: ! 572: static unitptr modulus = 0; /* used only by mp_modmult */ ! 573: ! 574: int stage_peasant_modulus(unitptr n) ! 575: /* Must pass modulus to stage_modulus before calling modmult. ! 576: Assumes that global_precision has already been adjusted to the ! 577: size of the modulus, plus SLOP_BITS. ! 578: */ ! 579: { /* For this simple version of modmult, just copy unit pointer. */ ! 580: modulus = n; ! 581: return(0); /* normal return */ ! 582: } /* stage_peasant_modulus */ ! 583: ! 584: ! 585: int peasant_modmult(register unitptr prod, ! 586: unitptr multiplicand,register unitptr multiplier) ! 587: { /* "Russian peasant" multiply algorithm, combined with a modulo ! 588: operation. This is a simple naive replacement for Merritt's ! 589: faster modmult algorithm. References global unitptr "modulus". ! 590: Computes: prod = (multiplicand*multiplier) mod modulus ! 591: WARNING: All the arguments must be less than the modulus! ! 592: */ ! 593: int bits; ! 594: register unit bitmask; ! 595: short mprec; ! 596: mp_init(prod,0); ! 597: /* if (testeq(multiplicand,0)) ! 598: return(0); */ /* zero multiplicand means zero product */ ! 599: /* normalize and compute number of bits in multiplier first */ ! 600: init_bitsniffer(multiplier,bitmask,mprec,bits); ! 601: ! 602: while (bits--) ! 603: { mp_shift_left(prod); ! 604: msub(prod,modulus); /* turns mult into modmult */ ! 605: if (sniff_bit(multiplier,bitmask)) ! 606: { mp_add(prod,multiplicand); ! 607: msub(prod,modulus); /* turns mult into modmult */ ! 608: } ! 609: bump_bitsniffer(multiplier,bitmask); ! 610: } ! 611: return(0); ! 612: } /* peasant_modmult */ ! 613: ! 614: ! 615: /* If we are using a version of mp_modmult that uses internal tables ! 616: in memory, we have to call modmult_burn() at the end of mp_modexp. ! 617: This is so that no sensitive data is left in memory after the program ! 618: exits. The Russian peasant method doesn't use any such tables. ! 619: */ ! 620: static void peasant_burn(void) ! 621: /* Alias for modmult_burn, called only from mp_modexp(). Destroys ! 622: internal modmult tables. This version does nothing because no ! 623: tables are used by the Russian peasant modmult. */ ! 624: { } /* peasant_burn */ ! 625: ! 626: #endif /* PEASANT */ ! 627: ! 628: ! 629: #ifdef MERRITT ! 630: /*=========================================================================*/ ! 631: /* ! 632: This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ. ! 633: Also refined by Zhahai Stewart to reduce the number of subtracts. ! 634: It performs a multiply combined with a modulo operation, without ! 635: going into "double precision". It is faster than the Russian peasant ! 636: method, and still works well on machines that lack a fast hardware ! 637: multiply instruction. ! 638: */ ! 639: ! 640: /* The following support functions, macros, and data structures ! 641: are used only by Merritt's modmult algorithm... */ ! 642: ! 643: static void mp_lshift_unit(register unitptr r1) ! 644: /* Shift r1 1 whole unit to the left. Used only by modmult function. */ ! 645: { register short precision; ! 646: register unitptr r2; ! 647: precision = global_precision; ! 648: make_msbptr(r1,precision); ! 649: r2 = r1; ! 650: while (--precision) ! 651: *post_lowerunit(r1) = *pre_lowerunit(r2); ! 652: *r1 = 0; ! 653: } /* mp_lshift_unit */ ! 654: ! 655: ! 656: /* moduli_buf contains shifted images of the modulus, set by stage_modulus */ ! 657: static unit moduli_buf[UNITSIZE][MAX_UNIT_PRECISION] = {0}; ! 658: static unitptr moduli[UNITSIZE+1] = /* contains pointers into moduli_buf */ ! 659: { 0 ! 660: ,&moduli_buf[ 0][0], &moduli_buf[ 1][0], &moduli_buf[ 2][0], &moduli_buf[ 3][0], ! 661: &moduli_buf[ 4][0], &moduli_buf[ 5][0], &moduli_buf[ 6][0], &moduli_buf[ 7][0] ! 662: #ifndef UNIT8 ! 663: ,&moduli_buf[ 8][0], &moduli_buf[ 9][0], &moduli_buf[10][0], &moduli_buf[11][0], ! 664: &moduli_buf[12][0], &moduli_buf[13][0], &moduli_buf[14][0], &moduli_buf[15][0] ! 665: #ifndef UNIT16 /* and not UNIT8 */ ! 666: ,&moduli_buf[16][0], &moduli_buf[17][0], &moduli_buf[18][0], &moduli_buf[19][0], ! 667: &moduli_buf[20][0], &moduli_buf[21][0], &moduli_buf[22][0], &moduli_buf[23][0], ! 668: &moduli_buf[24][0], &moduli_buf[25][0], &moduli_buf[26][0], &moduli_buf[27][0], ! 669: &moduli_buf[28][0], &moduli_buf[29][0], &moduli_buf[30][0], &moduli_buf[31][0] ! 670: #endif /* UNIT16 and UNIT8 not defined */ ! 671: #endif /* UNIT8 not defined */ ! 672: }; ! 673: ! 674: /* To optimize msubs, need following 2 unit arrays, each filled ! 675: with the most significant unit and next-to-most significant unit ! 676: of the preshifted images of the RSA modulus. */ ! 677: static unit msu_moduli[UNITSIZE+1] = {0}; /* most signif. unit */ ! 678: static unit nmsu_moduli[UNITSIZE+1] = {0}; /* next-most signif. unit */ ! 679: ! 680: /* mpdbuf contains preshifted images of the multiplicand, mod n. ! 681: It is used only by mp_modmult. It could be staticly declared ! 682: inside of mp_modmult, but we put it outside mp_modmult so that ! 683: it can be wiped clean by modmult_burn(), which is called at the ! 684: end of mp_modexp. This is so that no sensitive data is left in ! 685: memory after the program exits. ! 686: */ ! 687: static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0}; ! 688: ! 689: ! 690: static void stage_mp_images(unitptr images[UNITSIZE],unitptr r) ! 691: /* Computes UNITSIZE images of r, each shifted left 1 more bit. ! 692: Used only by modmult function. ! 693: */ ! 694: { short int i; ! 695: images[0] = r; /* no need to move the first image, just copy ptr */ ! 696: for (i=1; i<UNITSIZE; i++) ! 697: { mp_move(images[i],images[i-1]); ! 698: mp_shift_left(images[i]); ! 699: } ! 700: } /* stage_mp_images */ ! 701: ! 702: ! 703: int stage_merritt_modulus(unitptr n) ! 704: /* Computes UNITSIZE+1 images of modulus, each shifted left 1 more bit. ! 705: Before calling mp_modmult, you must first stage the moduli images by ! 706: calling stage_modulus. n is the pointer to the modulus. ! 707: Assumes that global_precision has already been adjusted to the ! 708: size of the modulus, plus SLOP_BITS. ! 709: */ ! 710: { short int i; ! 711: unitptr msu; /* ptr to most significant unit, for faster msubs */ ! 712: moduli[0] = n; /* no need to move the first image, just copy ptr */ ! 713: ! 714: /* used by optimized msubs macro... */ ! 715: msu = msbptr(n,global_precision); /* needed by msubs */ ! 716: msu_moduli[0] = *post_lowerunit(msu); ! 717: nmsu_moduli[0] = *msu; ! 718: ! 719: for (i=1; i<UNITSIZE+1; i++) ! 720: { mp_move(moduli[i],moduli[i-1]); ! 721: mp_shift_left(moduli[i]); ! 722: ! 723: /* used by optimized msubs macro... */ ! 724: msu = msbptr(moduli[i],global_precision); /* needed by msubs */ ! 725: msu_moduli[i] = *post_lowerunit(msu); /* for faster msubs */ ! 726: nmsu_moduli[i] = *msu; ! 727: } ! 728: return(0); /* normal return */ ! 729: } /* stage_merritt_modulus */ ! 730: ! 731: ! 732: /* The following macros, sniffadd and msubs, are used by modmult... */ ! 733: #define sniffadd(i) if (*multiplier & power_of_2(i)) mp_add(prod,mpd[i]) ! 734: ! 735: /* Unoptimized msubs macro (msubs0) follows... */ ! 736: /* #define msubs0(i) msub(prod,moduli[i]) ! 737: */ ! 738: ! 739: /* To optimize msubs, compare the most significant units of the ! 740: product and the shifted modulus before deciding to call the full ! 741: mp_compare routine. Better still, compare the upper two units ! 742: before deciding to call mp_compare. ! 743: Optimization of msubs relies heavily on standard C left-to-right ! 744: parsimonious evaluation of logical expressions. ! 745: */ ! 746: ! 747: /* Partially-optimized msubs macro (msubs1) follows... */ ! 748: /* #define msubs1(i) if ( \ ! 749: ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \ ! 750: (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \ ! 751: ) mp_sub(prod,moduli[i]) ! 752: */ ! 753: ! 754: /* Fully-optimized msubs macro (msubs2) follows... */ ! 755: #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \ ! 756: (p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \ ! 757: (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \ ! 758: mp_sub(prod,moduli[i]) ! 759: ! 760: ! 761: int merritt_modmult(register unitptr prod, ! 762: unitptr multiplicand,register unitptr multiplier) ! 763: /* Performs combined multiply/modulo operation. ! 764: Computes: prod = (multiplicand*multiplier) mod modulus ! 765: WARNING: All the arguments must be less than the modulus! ! 766: Assumes the modulus has been predefined by first calling ! 767: stage_modulus. ! 768: */ ! 769: { ! 770: /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/ ! 771: register signedunit p_m; ! 772: register unitptr msu_prod; /* ptr to most significant unit of product */ ! 773: register unitptr nmsu_prod; /* next-most signif. unit of product */ ! 774: short mprec; /* precision of multiplier, in units */ ! 775: /* Array mpd contains a list of pointers to preshifted images of ! 776: the multiplicand: */ ! 777: static unitptr mpd[UNITSIZE] = ! 778: { 0, &mpdbuf[ 0][0], &mpdbuf[ 1][0], &mpdbuf[ 2][0], ! 779: &mpdbuf[ 3][0], &mpdbuf[ 4][0], &mpdbuf[ 5][0], &mpdbuf[ 6][0] ! 780: #ifndef UNIT8 ! 781: ,&mpdbuf[ 7][0], &mpdbuf[ 8][0], &mpdbuf[ 9][0], &mpdbuf[10][0], ! 782: &mpdbuf[11][0], &mpdbuf[12][0], &mpdbuf[13][0], &mpdbuf[14][0] ! 783: #ifndef UNIT16 /* and not UNIT8 */ ! 784: ,&mpdbuf[15][0], &mpdbuf[16][0], &mpdbuf[17][0], &mpdbuf[18][0], ! 785: &mpdbuf[19][0], &mpdbuf[20][0], &mpdbuf[21][0], &mpdbuf[22][0], ! 786: &mpdbuf[23][0], &mpdbuf[24][0], &mpdbuf[25][0], &mpdbuf[26][0], ! 787: &mpdbuf[27][0], &mpdbuf[28][0], &mpdbuf[29][0], &mpdbuf[30][0] ! 788: #endif /* UNIT16 and UNIT8 not defined */ ! 789: #endif /* UNIT8 not defined */ ! 790: }; ! 791: ! 792: /* Compute preshifted images of multiplicand, mod n: */ ! 793: stage_mp_images(mpd,multiplicand); ! 794: ! 795: /* To optimize msubs, set up msu_prod and nmsu_prod: */ ! 796: msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */ ! 797: nmsu_prod = msu_prod; ! 798: post_lowerunit(nmsu_prod); /* Get next-MSU of prod */ ! 799: ! 800: /* To understand this algorithm, it would be helpful to first ! 801: study the conventional Russian peasant modmult algorithm. ! 802: This one does about the same thing as Russian peasant, but ! 803: is organized differently to save some steps. It loops ! 804: through the multiplier a word (unit) at a time, instead of ! 805: a bit at a time. It word-shifts the product instead of ! 806: bit-shifting it, so it should be faster. It also does about ! 807: half as many subtracts as Russian peasant. ! 808: */ ! 809: ! 810: mp_init(prod,0); /* Initialize product to 0. */ ! 811: ! 812: /* The way mp_modmult is actually used in cryptographic ! 813: applications, there will NEVER be a zero multiplier or ! 814: multiplicand. So there is no need to optimize for that ! 815: condition. ! 816: */ ! 817: /* if (testeq(multiplicand,0)) ! 818: return(0); */ /* zero multiplicand means zero product */ ! 819: /* Normalize and compute number of units in multiplier first: */ ! 820: normalize(multiplier,mprec); ! 821: if (mprec==0) /* if precision of multiplier is 0 */ ! 822: return(0); /* zero multiplier means zero product */ ! 823: make_msbptr(multiplier,mprec); /* start at MSU of multiplier */ ! 824: ! 825: while (mprec--) /* Loop for the number of units in the multiplier */ ! 826: { ! 827: /* Shift the product one whole unit to the left. ! 828: This is part of the multiply phase of modmult. ! 829: */ ! 830: ! 831: mp_lshift_unit(prod); ! 832: ! 833: /* Sniff each bit in the current unit of the multiplier, ! 834: and conditionally add the the corresponding preshifted ! 835: image of the multiplicand to the product. ! 836: This is also part of the multiply phase of modmult. ! 837: ! 838: The following loop is unrolled for speed, using macros... ! 839: ! 840: for (i=UNITSIZE-1; i>=0; i--) ! 841: if (*multiplier & power_of_2(i)) ! 842: mp_add(prod,mpd[i]); ! 843: */ ! 844: #ifndef UNIT8 ! 845: #ifndef UNIT16 /* and not UNIT8 */ ! 846: sniffadd(31); ! 847: sniffadd(30); ! 848: sniffadd(29); ! 849: sniffadd(28); ! 850: sniffadd(27); ! 851: sniffadd(26); ! 852: sniffadd(25); ! 853: sniffadd(24); ! 854: sniffadd(23); ! 855: sniffadd(22); ! 856: sniffadd(21); ! 857: sniffadd(20); ! 858: sniffadd(19); ! 859: sniffadd(18); ! 860: sniffadd(17); ! 861: sniffadd(16); ! 862: #endif /* not UNIT16 and not UNIT8 */ ! 863: sniffadd(15); ! 864: sniffadd(14); ! 865: sniffadd(13); ! 866: sniffadd(12); ! 867: sniffadd(11); ! 868: sniffadd(10); ! 869: sniffadd(9); ! 870: sniffadd(8); ! 871: #endif /* not UNIT8 */ ! 872: sniffadd(7); ! 873: sniffadd(6); ! 874: sniffadd(5); ! 875: sniffadd(4); ! 876: sniffadd(3); ! 877: sniffadd(2); ! 878: sniffadd(1); ! 879: sniffadd(0); ! 880: ! 881: /* The product may have grown by as many as UNITSIZE+1 ! 882: bits. That's why we have global_precision set to the ! 883: size of the modulus plus UNITSIZE+1 slop bits. ! 884: Now reduce the product back down by conditionally ! 885: subtracting the UNITSIZE+1 preshifted images of the ! 886: modulus. This is the modulo reduction phase of modmult. ! 887: ! 888: The following loop is unrolled for speed, using macros... ! 889: ! 890: for (i=UNITSIZE; i>=0; i--) ! 891: if (mp_compare(prod,moduli[i]) >= 0) ! 892: mp_sub(prod,moduli[i]); ! 893: */ ! 894: #ifndef UNIT8 ! 895: #ifndef UNIT16 /* and not UNIT8 */ ! 896: msubs(32); ! 897: msubs(31); ! 898: msubs(30); ! 899: msubs(29); ! 900: msubs(28); ! 901: msubs(27); ! 902: msubs(26); ! 903: msubs(25); ! 904: msubs(24); ! 905: msubs(23); ! 906: msubs(22); ! 907: msubs(21); ! 908: msubs(20); ! 909: msubs(19); ! 910: msubs(18); ! 911: msubs(17); ! 912: #endif /* not UNIT16 and not UNIT8 */ ! 913: msubs(16); ! 914: msubs(15); ! 915: msubs(14); ! 916: msubs(13); ! 917: msubs(12); ! 918: msubs(11); ! 919: msubs(10); ! 920: msubs(9); ! 921: #endif /* not UNIT8 */ ! 922: msubs(8); ! 923: msubs(7); ! 924: msubs(6); ! 925: msubs(5); ! 926: msubs(4); ! 927: msubs(3); ! 928: msubs(2); ! 929: msubs(1); ! 930: msubs(0); ! 931: ! 932: /* Bump pointer to next lower unit of multiplier: */ ! 933: post_lowerunit(multiplier); ! 934: ! 935: } /* Loop for the number of units in the multiplier */ ! 936: ! 937: return(0); /* normal return */ ! 938: ! 939: } /* merritt_modmult */ ! 940: ! 941: ! 942: #undef msubs ! 943: #undef sniffadd ! 944: ! 945: ! 946: /* Merritt's mp_modmult function leaves some internal tables in memory, ! 947: so we have to call modmult_burn() at the end of mp_modexp. ! 948: This is so that no cryptographically sensitive data is left in memory ! 949: after the program exits. ! 950: */ ! 951: static void merritt_burn(void) ! 952: /* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */ ! 953: { unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION); ! 954: unitfill0(&(moduli_buf[0][0]),(UNITSIZE)*MAX_UNIT_PRECISION); ! 955: unitfill0(msu_moduli,UNITSIZE+1); ! 956: unitfill0(nmsu_moduli,UNITSIZE+1); ! 957: } /* merritt_burn() */ ! 958: ! 959: /******* end of Merritt's MODMULT stuff. *******/ ! 960: /*=========================================================================*/ ! 961: #endif /* MERRITT */ ! 962: ! 963: ! 964: #ifdef STEWART /* using Zhahai Stewart's modmult algorithm */ ! 965: #include "stewart.c" ! 966: #endif /* STEWART */ ! 967: ! 968: #endif /* not ASM_MODMULT */ ! 969: ! 970: ! 971: int countbits(unitptr r) ! 972: /* Returns number of significant bits in r */ ! 973: { int bits; ! 974: short prec; ! 975: register unit bitmask; ! 976: init_bitsniffer(r,bitmask,prec,bits); ! 977: return(bits); ! 978: } /* countbits */ ! 979: ! 980: ! 981: int mp_modexp(register unitptr expout,register unitptr expin, ! 982: register unitptr exponent,register unitptr modulus) ! 983: { /* Russian peasant combined exponentiation/modulo algorithm. ! 984: Calls modmult instead of mult. ! 985: Computes: expout = (expin**exponent) mod modulus ! 986: WARNING: All the arguments must be less than the modulus! ! 987: */ ! 988: int bits; ! 989: short oldprecision; ! 990: register unit bitmask; ! 991: unit product[MAX_UNIT_PRECISION]; ! 992: short eprec; ! 993: ! 994: #ifdef COUNTMULTS ! 995: tally_modmults = 0; /* clear "number of modmults" counter */ ! 996: tally_modsquares = 0; /* clear "number of modsquares" counter */ ! 997: #endif /* COUNTMULTS */ ! 998: mp_init(expout,1); ! 999: if (testeq(exponent,0)) ! 1000: { if (testeq(expin,0)) ! 1001: return(-1); /* 0 to the 0th power means return error */ ! 1002: return(0); /* otherwise, zero exponent means expout is 1 */ ! 1003: } ! 1004: if (testeq(modulus,0)) ! 1005: return(-2); /* zero modulus means error */ ! 1006: #if SLOP_BITS > 0 /* if there's room for sign bits */ ! 1007: if (mp_tstminus(modulus)) ! 1008: return(-2); /* negative modulus means error */ ! 1009: #endif /* SLOP_BITS > 0 */ ! 1010: if (mp_compare(expin,modulus) >= 0) ! 1011: return(-3); /* if expin >= modulus, return error */ ! 1012: if (mp_compare(exponent,modulus) >= 0) ! 1013: return(-4); /* if exponent >= modulus, return error */ ! 1014: ! 1015: oldprecision = global_precision; /* save global_precision */ ! 1016: /* set smallest optimum precision for this modulus */ ! 1017: set_precision(bits2units(countbits(modulus)+SLOP_BITS)); ! 1018: /* rescale all these registers to global_precision we just defined */ ! 1019: rescale(modulus,oldprecision,global_precision); ! 1020: rescale(expin,oldprecision,global_precision); ! 1021: rescale(exponent,oldprecision,global_precision); ! 1022: rescale(expout,oldprecision,global_precision); ! 1023: ! 1024: if (stage_modulus(modulus)) ! 1025: { set_precision(oldprecision); /* restore original precision */ ! 1026: return(-5); /* unstageable modulus (STEWART algorithm) */ ! 1027: } ! 1028: ! 1029: /* normalize and compute number of bits in exponent first */ ! 1030: init_bitsniffer(exponent,bitmask,eprec,bits); ! 1031: ! 1032: /* We can "optimize out" the first modsquare and modmult: */ ! 1033: bits--; /* We know for sure at this point that bits>0 */ ! 1034: mp_move(expout,expin); /* expout = (1*1)*expin; */ ! 1035: bump_bitsniffer(exponent,bitmask); ! 1036: ! 1037: while (bits--) ! 1038: { ! 1039: poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */ ! 1040: #ifdef COUNTMULTS ! 1041: tally_modsquares++; /* bump "number of modsquares" counter */ ! 1042: #endif /* COUNTMULTS */ ! 1043: mp_modsquare(product,expout); ! 1044: mp_move(expout,product); ! 1045: if (sniff_bit(exponent,bitmask)) ! 1046: { mp_modmult(product,expout,expin); ! 1047: mp_move(expout,product); ! 1048: #ifdef COUNTMULTS ! 1049: tally_modmults++; /* bump "number of modmults" counter */ ! 1050: #endif /* COUNTMULTS */ ! 1051: } ! 1052: bump_bitsniffer(exponent,bitmask); ! 1053: } /* while bits-- */ ! 1054: mp_burn(product); /* burn the evidence on the stack */ ! 1055: modmult_burn(); /* ask mp_modmult to also burn its own evidence */ ! 1056: ! 1057: #ifdef COUNTMULTS /* diagnostic analysis */ ! 1058: { long atomic_mults; ! 1059: unsigned int unitcount,totalmults; ! 1060: unitcount = bits2units(countbits(modulus)); ! 1061: /* calculation assumes modsquare takes as long as a modmult: */ ! 1062: atomic_mults = (long) tally_modmults * (unitcount * unitcount); ! 1063: atomic_mults += (long) tally_modsquares * (unitcount * unitcount); ! 1064: printf("%ld atomic mults for ",atomic_mults); ! 1065: printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n", ! 1066: tally_modsquares,tally_modmults, ! 1067: tally_modsquares+tally_modmults, ! 1068: countbits(modulus), unitcount); ! 1069: } ! 1070: #endif /* COUNTMULTS */ ! 1071: ! 1072: set_precision(oldprecision); /* restore original precision */ ! 1073: return(0); /* normal return */ ! 1074: } /* mp_modexp */ ! 1075: ! 1076: ! 1077: char *copyright_notice(void) ! 1078: /* force linker to include copyright notice in the executable object image. */ ! 1079: { return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */ ! 1080: ! 1081: ! 1082: int rsa_decrypt(unitptr M, unitptr C, ! 1083: unitptr d, unitptr p, unitptr q, unitptr u) ! 1084: /* Uses Chinese Remainder Theorem shortcut for RSA decryption. ! 1085: M is the output plaintext message. ! 1086: C is the input ciphertext message. ! 1087: d is the secret decryption exponent. ! 1088: p and q are the prime factors of n. ! 1089: u is the multiplicative inverse of p, mod q. ! 1090: Note that u is precomputed on the assumption that p<q. ! 1091: n, the common modulus, is not used here because of the ! 1092: Chinese Remainder Theorem shortcut. ! 1093: */ ! 1094: { ! 1095: unit p2[MAX_UNIT_PRECISION]; ! 1096: unit q2[MAX_UNIT_PRECISION]; ! 1097: unit temp1[MAX_UNIT_PRECISION]; ! 1098: unit temp2[MAX_UNIT_PRECISION]; ! 1099: int status; ! 1100: ! 1101: mp_init(M,1); /* initialize result in case of error */ ! 1102: ! 1103: if (mp_compare(p,q) >= 0) /* ensure that p<q */ ! 1104: { /* swap the pointers p and q */ ! 1105: unitptr t; ! 1106: t = p; p = q; q = t; ! 1107: } ! 1108: ! 1109: /* Rather than decrypting by computing modexp with full mod n ! 1110: precision, compute a shorter modexp with mod p precision... */ ! 1111: ! 1112: /* p2 = [ (C mod p)**( d mod (p-1) ) ] mod p */ ! 1113: mp_move(temp1,p); ! 1114: mp_dec(temp1); /* temp1 = p-1 */ ! 1115: mp_mod(temp2,d,temp1); /* temp2 = d mod (p-1) ) */ ! 1116: mp_mod(temp1,C,p); /* temp1 = C mod p */ ! 1117: status = mp_modexp(p2,temp1,temp2,p); ! 1118: if (status < 0) /* mp_modexp returned an error. */ ! 1119: return(status); /* error return */ ! 1120: ! 1121: /* Now compute a short modexp with mod q precision... */ ! 1122: ! 1123: /* q2 = [ (C mod q)**( d mod (q-1) ) ] mod q */ ! 1124: mp_move(temp1,q); ! 1125: mp_dec(temp1); /* temp1 = q-1 */ ! 1126: mp_mod(temp2,d,temp1); /* temp2 = d mod (q-1) */ ! 1127: mp_mod(temp1,C,q); /* temp1 = C mod q */ ! 1128: status = mp_modexp(q2,temp1,temp2,q); ! 1129: if (status < 0) /* mp_modexp returned an error. */ ! 1130: return(status); /* error return */ ! 1131: ! 1132: /* Now use the multiplicative inverse u to glue together the ! 1133: two halves, saving a lot of time by avoiding a full mod n ! 1134: exponentiation. At key generation time, u was computed ! 1135: with the secret key as the multiplicative inverse of ! 1136: p, mod q such that: (p*u) mod q = 1, assuming p<q. ! 1137: */ ! 1138: if (mp_compare(p2,q2) == 0) /* only happens if C<p */ ! 1139: mp_move(M,p2); ! 1140: else ! 1141: { /* q2 = q2 - p2; if q2<0 then q2 = q2 + q */ ! 1142: if (mp_sub(q2,p2)) /* if the result went negative... */ ! 1143: mp_add(q2,q); /* add q to q2 */ ! 1144: ! 1145: /* M = p2 + ( p * [(q2*u) mod q] ) */ ! 1146: mp_mult(temp1,q2,u); /* temp1 = q2*u */ ! 1147: mp_mod(temp2,temp1,q); /* temp2 = temp1 mod q */ ! 1148: mp_mult(temp1,p,temp2); /* temp1 = p * temp2 */ ! 1149: mp_add(temp1,p2); /* temp1 = temp1 + p2 */ ! 1150: mp_move(M,temp1); /* M = temp1 */ ! 1151: } ! 1152: ! 1153: mp_burn(p2); /* burn the evidence on the stack...*/ ! 1154: mp_burn(q2); ! 1155: mp_burn(temp1); ! 1156: mp_burn(temp2); ! 1157: /* Do an explicit reference to the copyright notice so that the linker ! 1158: will be forced to include it in the executable object image... */ ! 1159: copyright_notice(); /* has no real effect at run time */ ! 1160: return(0); /* normal return */ ! 1161: } /* rsa_decrypt */ ! 1162: ! 1163: ! 1164: /* ! 1165: ** mp_sqrt - returns square root of a number. ! 1166: ** returns -1 for error, 0 for perfect square, 1 for not perfect square. ! 1167: ** Not used by any RSA encrypt or decrypt functions. ! 1168: ** Used by some factoring algorithms. This version needs optimization. ! 1169: ** by Charles W. Merritt July 15, 1989, refined by PRZ. ! 1170: */ ! 1171: int mp_sqrt(unitptr quotient,unitptr dividend) ! 1172: /* Quotient is returned as the square root of dividend. */ ! 1173: { ! 1174: register char next2bits; /* "period", or group of 2 bits of dividend */ ! 1175: register unit dvdbitmask,qbitmask; ! 1176: unit remainder[MAX_UNIT_PRECISION],rjq[MAX_UNIT_PRECISION], ! 1177: divisor[MAX_UNIT_PRECISION]; ! 1178: unsigned int qbits,qprec,dvdbits,dprec,oldprecision; ! 1179: int notperfect; ! 1180: ! 1181: mp_init(quotient,0); ! 1182: if (mp_tstminus(dividend)) /* if dividend<0, return error */ ! 1183: { mp_dec(quotient); /* quotient = -1 */ ! 1184: return(-1); ! 1185: } ! 1186: ! 1187: /* normalize and compute number of bits in dividend first */ ! 1188: init_bitsniffer(dividend,dvdbitmask,dprec,dvdbits); ! 1189: /* init_bitsniffer returns a 0 if dvdbits is 0 */ ! 1190: if (dvdbits==1) ! 1191: { mp_init(quotient,1); /* square root of 1 is 1 */ ! 1192: return(0); ! 1193: } ! 1194: ! 1195: /* rescale quotient to half the precision of dividend */ ! 1196: qbits = (dvdbits+1) >> 1; ! 1197: qprec = bits2units(qbits); ! 1198: rescale(quotient,global_precision,qprec); ! 1199: make_msbptr(quotient,qprec); ! 1200: qbitmask = power_of_2( (qbits-1) & (UNITSIZE-1)) ; ! 1201: ! 1202: /* Set smallest optimum precision for this square root. ! 1203: The low-level primitives are affected by the call to set_precision. ! 1204: Even though the dividend precision is bigger than the precision ! 1205: we will use, no low-level primitives will be used on the dividend. ! 1206: They will be used on the quotient, remainder, and rjq, which are ! 1207: smaller precision. ! 1208: */ ! 1209: oldprecision = global_precision; /* save global_precision */ ! 1210: set_precision(bits2units(qbits+3)); /* 3 bits of precision slop */ ! 1211: ! 1212: /* special case: sqrt of 1st 2 (binary) digits of dividend ! 1213: is 1st (binary) digit of quotient. This is always 1. */ ! 1214: stuff_bit(quotient,qbitmask); ! 1215: bump_bitsniffer(quotient,qbitmask); ! 1216: mp_init(rjq,1); /* rjq is Right Justified Quotient */ ! 1217: ! 1218: if (!(dvdbits & 1)) ! 1219: { /* even number of bits in dividend */ ! 1220: next2bits = 2; ! 1221: bump_bitsniffer(dividend,dvdbitmask); dvdbits--; ! 1222: if (sniff_bit(dividend,dvdbitmask)) next2bits++; ! 1223: bump_bitsniffer(dividend,dvdbitmask); dvdbits--; ! 1224: } ! 1225: else ! 1226: { /* odd number of bits in dividend */ ! 1227: next2bits = 1; ! 1228: bump_bitsniffer(dividend,dvdbitmask); dvdbits--; ! 1229: } ! 1230: ! 1231: mp_init(remainder,next2bits-1); ! 1232: ! 1233: /* dvdbits is guaranteed to be even at this point */ ! 1234: ! 1235: while (dvdbits) ! 1236: { next2bits=0; ! 1237: if (sniff_bit(dividend,dvdbitmask)) next2bits=2; ! 1238: bump_bitsniffer(dividend,dvdbitmask); dvdbits--; ! 1239: if (sniff_bit(dividend,dvdbitmask)) next2bits++; ! 1240: bump_bitsniffer(dividend,dvdbitmask); dvdbits--; ! 1241: mp_rotate_left(remainder,(boolean)((next2bits&2)!=0)); ! 1242: mp_rotate_left(remainder,(boolean)((next2bits&1)!=0)); ! 1243: ! 1244: /* "divisor" is trial divisor, complete divisor is 4*rjq ! 1245: or 4*rjq+1. ! 1246: Subtract divisor times its last digit from remainder. ! 1247: If divisor ends in 1, remainder -= divisor*1, ! 1248: or if divisor ends in 0, remainder -= divisor*0 (do nothing). ! 1249: Last digit of divisor inflates divisor as large as possible ! 1250: yet still subtractable from remainder. ! 1251: */ ! 1252: mp_move(divisor,rjq); /* divisor = 4*rjq+1 */ ! 1253: mp_rotate_left(divisor,0); ! 1254: mp_rotate_left(divisor,1); ! 1255: if (mp_compare(remainder,divisor) >= 0) ! 1256: { mp_sub(remainder,divisor); ! 1257: stuff_bit(quotient,qbitmask); ! 1258: mp_rotate_left(rjq,1); ! 1259: } ! 1260: else ! 1261: mp_rotate_left(rjq,0); ! 1262: bump_bitsniffer(quotient,qbitmask); ! 1263: } ! 1264: notperfect = testne(remainder,0); /* not a perfect square? */ ! 1265: set_precision(oldprecision); /* restore original precision */ ! 1266: return(notperfect); /* normal return */ ! 1267: ! 1268: } /* mp_sqrt */ ! 1269: ! 1270: ! 1271: /* These are notes on computing the square root the manual old-fashioned ! 1272: way. This is the basis of the above fast sqrt algorthm: ! 1273: ! 1274: 1) Separate the number into groups (periods) of two digits each, ! 1275: beginning with units or at the decimal point. ! 1276: 2) Find the greatest perfect square in the left hand period & write ! 1277: its square root as the first figure of the required root. Subtract ! 1278: the square of this number from the left hand period. Annex to the ! 1279: remainder the next group so as to form a dividend. ! 1280: 3) Double the root already found and write it as a partial divisor at ! 1281: the left of the new dividend. Annex one zero digit, making a trial ! 1282: divisor, and divide the new dividend by the trial divisor. ! 1283: 4) Write the quotient in the root as the trial term and also substitute ! 1284: this quotient for the annexed zero digit in the partial divisor, ! 1285: making the latter complete. ! 1286: 5) Multiply the complete divisor by the figure just obtained and, ! 1287: if possible, subtract the product from the last remainder. ! 1288: If this product is too large, the trial term of the quotient ! 1289: must be replaced by the next smaller number and the operations ! 1290: preformed as before. ! 1291: (IN BINARY, OUR TRIAL TERM IS ALWAYS 1 AND WE USE IT OR DO NOTHING.) ! 1292: 6) Proceed in this manner until all periods are used. ! 1293: If there is still a remainder, it's not a perfect square. ! 1294: */ ! 1295: ! 1296: ! 1297: /****************** end of RSA library ****************************/ ! 1298: ! 1299:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.