|
|
1.1.1.2 root 1: /* C source code for multiprecision arithmetic library routines.
2: Implemented Nov 86 by Philip Zimmermann
3: Last revised 27 Nov 91 by PRZ
4:
5: Boulder Software Engineering
6: 3021 Eleventh Street
7: Boulder, CO 80304
8: (303) 541-0140
9:
10: (c) Copyright 1986-92 by Philip Zimmermann. All rights reserved.
11: The author assumes no liability for damages resulting from the use
12: of this software, even if the damage results from defects in this
13: software. No warranty is expressed or implied. The use of this
14: cryptographic software for developing weapon systems is expressly
15: forbidden.
16:
17: These routines implement all of the multiprecision arithmetic
18: necessary for number-theoretic cryptographic algorithms such as
19: ElGamal, Diffie-Hellman, Rabin, or factoring studies for large
20: composite numbers, as well as Rivest-Shamir-Adleman (RSA) public
21: key cryptography.
22:
23: Although originally developed in Microsoft C for the IBM PC, this code
24: contains few machine dependencies. It assumes 2's complement
25: arithmetic. It can be adapted to 8-bit, 16-bit, or 32-bit machines,
26: lowbyte-highbyte order or highbyte-lowbyte order. This version
27: has been converted to ANSI C.
28:
29:
30: The internal representation for these extended precision integer
31: "registers" is an array of "units". A unit is a machine word, which
32: is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit
33: unsigned integer, depending on the machine's word size. For example,
34: an IBM PC or AT uses a unit size of 16 bits. To perform arithmetic
35: on these huge precision integers, we pass pointers to these unit
36: arrays to various subroutines. A pointer to an array of units is of
37: type unitptr. This is a pointer to a huge integer "register".
38:
39: When calling a subroutine, we always pass a pointer to the BEGINNING
40: of the array of units, regardless of the byte order of the machine.
41: On a lowbyte-first machine, such as the Intel 80x86, this unitptr
42: points to the LEAST significant unit, and the array of units increases
43: significance to the right. On a highbyte-first machine, such as the
44: Motorola 680x0, this unitptr points to the MOST significant unit, and
45: the array of units decreases significance to the right.
46:
47: Modified 8 Apr 92 - HAJK
48: Implement new VAX/VMS primitive support.
49:
50: Modified 30 Sep 92 -Castor Fu <[email protected]>
51: Upgraded PORTABLE support to allow sizeof(unit) == sizeof(long)
52:
53: Modified 28 Nov 92 - Thad Smith
54: Added Smith modmult, generalized non-portable support.
55: */
56:
57: /* #define COUNTMULTS */ /* count modmults for performance studies */
58:
59: #ifdef DEBUG
60: #ifdef MSDOS
1.1.1.3 ! root 61: #ifdef __GO32__ /* DJGPP */
! 62: #include <pc.h>
! 63: #else
1.1.1.2 root 64: #include <conio.h>
1.1.1.3 ! root 65: #endif /* __GO32__ */
1.1.1.2 root 66: #define poll_for_break() {while (kbhit()) getch();}
67: #endif /* MSDOS */
68: #endif /* DEBUG */
69:
70: #ifndef poll_for_break
71: #define poll_for_break() /* stub */
72: #endif
73:
74: #include "mpilib.h"
75:
76: #ifdef mp_smula
77: #ifdef mp_smul
78: Error: Both mp_smula and mp_smul cannot be defined.
79: #else
80: #define mp_smul mp_smula
81: #endif
82: #endif
83:
84: /* set macros for MULTUNIT */
85: #ifdef MUNIT8
86: #define MULTUNITSIZE 8
87: typedef unsigned char MULTUNIT;
88: #ifdef UNIT8
89: #define MULTUNIT_SIZE_SAME
90: #endif
91: #else /* not MUNIT8 */
92: #ifdef MUNIT32
93: #define MULTUNITSIZE 32
94: typedef unsigned long MULTUNIT;
95: #ifdef UNIT32
96: #define MULTUNIT_SIZE_SAME
97: #else
98: /* #error is not portable, this has the same effect */
99: #include "UNITSIZE cannot be smaller than MULTUNITSIZE"
100: #endif
101: #else /* assume MUNIT16 */
102: #define MULTUNITSIZE 16
103: typedef unsigned short MULTUNIT;
104: #ifdef UNIT16
105: #define MULTUNIT_SIZE_SAME
106: #endif /* UNIT16 */
107: #ifdef UNIT8
108: #include "UNITSIZE cannot be smaller than MULTUNITSIZE"
109: #endif /* UNIT8 */
110: #endif /* MUNIT32 */
111: #endif /* MUNIT8 */
112:
113: #define MULTUNIT_msb ((MULTUNIT)1 << (MULTUNITSIZE-1)) /* msb of MULTUNIT */
114: #define DMULTUNIT_msb (1L << (2*MULTUNITSIZE-1))
115: #define MULTUNIT_mask ((MULTUNIT)((1L << MULTUNITSIZE)-1))
116: #define MULTUNITs_perunit (UNITSIZE/MULTUNITSIZE)
117:
118:
119: void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier);
120: void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier);
121:
122: short global_precision = 0; /* units of precision for all routines */
123: /* global_precision is the unit precision last set by set_precision.
124: Initially, set_precision() should be called to define global_precision
125: before using any of these other multiprecision library routines.
126: i.e.: set_precision(MAX_UNIT_PRECISION);
127: */
128:
129: /*************** multiprecision library primitives ****************/
130: /* The following portable C primitives should be recoded in assembly.
131: The entry point name should be defined, in "mpilib.h" to the external
132: entry point name. If undefined, the C version will be used.
133: */
134:
135: typedef unsigned long int ulint;
136:
137: #ifndef mp_addc
138: boolean mp_addc
139: (register unitptr r1,register unitptr r2,register boolean carry)
140: /* multiprecision add with carry r2 to r1, result in r1 */
141: /* carry is incoming carry flag-- value should be 0 or 1 */
142: { register unit x;
143: short precision; /* number of units to add */
144: precision = global_precision;
145: make_lsbptr(r1,precision);
146: make_lsbptr(r2,precision);
147: while (precision--)
148: {
149: if (carry)
150: { x = *r1 + *r2 + 1;
151: carry = (*r2 >= (unit)(~ *r1));
152: } else
153: { x = *r1 + *r2;
154: carry = (x < *r1) ;
155: }
156: post_higherunit(r2);
157: *post_higherunit(r1) = x;
158: }
159: return(carry); /* return the final carry flag bit */
160: } /* mp_addc */
161: #endif /* mp_addc */
162:
163: #ifndef mp_subb
164: boolean mp_subb
165: (register unitptr r1,register unitptr r2,register boolean borrow)
166: /* multiprecision subtract with borrow, r2 from r1, result in r1 */
167: /* borrow is incoming borrow flag-- value should be 0 or 1 */
168: { register unit x;
169: short precision; /* number of units to subtract */
170: precision = global_precision;
171: make_lsbptr(r1,precision);
172: make_lsbptr(r2,precision);
173: while (precision--)
174: { if (borrow)
175: { x = *r1 - *r2 - 1;
176: borrow = (*r1 <= *r2);
177: } else
178: { x = *r1 - *r2;
179: borrow = (*r1 < *r2);
180: }
181: post_higherunit(r2);
182: *post_higherunit(r1) = x;
183: }
184: return(borrow); /* return the final carry/borrow flag bit */
185: } /* mp_subb */
186: #endif /* mp_subb */
187:
188: #ifndef mp_rotate_left
189: boolean mp_rotate_left(register unitptr r1,register boolean carry)
190: /* multiprecision rotate left 1 bit with carry, result in r1. */
191: /* carry is incoming carry flag-- value should be 0 or 1 */
192: { register int precision; /* number of units to rotate */
193: unsigned int mcarry = carry, nextcarry; /* int is supposed to be
194: * the efficient size for ops*/
195: precision = global_precision;
196: make_lsbptr(r1,precision);
197: while (precision--)
198: {
199: nextcarry = (((signedunit) *r1) < 0);
200: *r1 = (*r1 << 1) | mcarry;
201: mcarry = nextcarry;
202: pre_higherunit(r1);
203: }
204: return(nextcarry); /* return the final carry flag bit */
205: } /* mp_rotate_left */
206: #endif /* mp_rotate_left */
207:
208: /************** end of primitives that should be in assembly *************/
209:
210:
211: /* The mp_shift_right_bits function is not called in any time-critical
212: situations in public-key cryptographic functions, so it doesn't
213: need to be coded in assembly language.
214: */
215: void mp_shift_right_bits(register unitptr r1,register short bits)
216: /* multiprecision shift right bits, result in r1.
217: bits is how many bits to shift, must be <= UNITSIZE.
218: */
219: { register short precision; /* number of units to shift */
220: register unit carry,nextcarry,bitmask;
221: register short unbits;
222: if (bits==0) return; /* shift zero bits is a no-op */
223: carry = 0;
224: bitmask = power_of_2(bits)-1;
225: unbits = UNITSIZE-bits; /* shift bits must be <= UNITSIZE */
226: precision = global_precision;
227: make_msbptr(r1,precision);
228: if (bits == UNITSIZE) {
229: while (precision--) {
230: nextcarry = *r1;
231: *r1 = carry;
232: carry = nextcarry;
233: pre_lowerunit(r1);
234: }
235: } else {
236: while (precision--)
237: { nextcarry = *r1 & bitmask;
238: *r1 >>= bits;
239: *r1 |= carry << unbits;
240: carry = nextcarry;
241: pre_lowerunit(r1);
242: }
243: }
244: } /* mp_shift_right_bits */
245:
246:
247: #ifndef mp_compare
248: short mp_compare(register unitptr r1,register unitptr r2)
249: /* Compares multiprecision integers *r1, *r2, and returns:
250: -1 iff *r1 < *r2
251: 0 iff *r1 == *r2
252: +1 iff *r1 > *r2
253: */
254: { register short precision; /* number of units to compare */
255:
256: precision = global_precision;
257: make_msbptr(r1,precision);
258: make_msbptr(r2,precision);
259: do
260: { if (*r1 < *r2)
261: return(-1);
262: if (*post_lowerunit(r1) > *post_lowerunit(r2))
263: return(1);
264: } while (--precision);
265: return(0); /* *r1 == *r2 */
266: } /* mp_compare */
267: #endif /* mp_compare */
268:
269:
270: boolean mp_inc(register unitptr r)
271: /* Increment multiprecision integer r. */
272: { register short precision;
273: precision = global_precision;
274: make_lsbptr(r,precision);
275: do
276: { if ( ++(*r) ) return(0); /* no carry */
277: post_higherunit(r);
278: } while (--precision);
279: return(1); /* return carry set */
280: } /* mp_inc */
281:
282:
283: boolean mp_dec(register unitptr r)
284: /* Decrement multiprecision integer r. */
285: { register short precision;
286: precision = global_precision;
287: make_lsbptr(r,precision);
288: do
289: { if ( (signedunit) (--(*r)) != (signedunit) -1 )
290: return(0); /* no borrow */
291: post_higherunit(r);
292: } while (--precision);
293: return(1); /* return borrow set */
294: } /* mp_dec */
295:
296:
297: void mp_neg(register unitptr r)
298: /* Compute 2's complement, the arithmetic negative, of r */
299: { register short precision; /* number of units to negate */
300: precision = global_precision;
301: mp_dec(r); /* 2's complement is 1's complement plus 1 */
302: do /* now do 1's complement */
303: { *r = ~(*r);
304: r++;
305: } while (--precision);
306: } /* mp_neg */
307:
308: #ifndef mp_move
309: void mp_move(register unitptr dst,register unitptr src)
310: { register short precision; /* number of units to move */
311: precision = global_precision;
312: do { *dst++ = *src++; } while (--precision);
313: } /* mp_move */
314: #endif /* mp_move */
315:
316: void mp_init(register unitptr r, word16 value)
317: /* Init multiprecision register r with short value. */
318: { /* Note that mp_init doesn't extend sign bit for >32767 */
319:
320: unitfill0( r, global_precision);
321: make_lsbptr(r,global_precision);
322: *post_higherunit(r) = value;
323: #ifdef UNIT8
324: *post_higherunit(r) = value >> UNITSIZE;
325: #endif
326: } /* mp_init */
327:
328:
329: short significance(register unitptr r)
330: /* Returns number of significant units in r */
331: { register short precision;
332: precision = global_precision;
333: make_msbptr(r,precision);
334: do
335: { if (*post_lowerunit(r))
336: return(precision);
337: } while (--precision);
338: return(precision);
339: } /* significance */
340:
341:
342: #ifndef unitfill0
343: void unitfill0(unitptr r,word16 unitcount)
344: /* Zero-fill the unit buffer r. */
345: { while (unitcount--) *r++ = 0;
346: } /* unitfill0 */
347: #endif /* unitfill0 */
348:
349:
350: int mp_udiv(register unitptr remainder,register unitptr quotient,
351: register unitptr dividend,register unitptr divisor)
352: /* Unsigned divide, treats both operands as positive. */
353: { int bits;
354: short dprec;
355: register unit bitmask;
356: if (testeq(divisor,0))
357: return(-1); /* zero divisor means divide error */
358: mp_init0(remainder);
359: mp_init0(quotient);
360: /* normalize and compute number of bits in dividend first */
361: init_bitsniffer(dividend,bitmask,dprec,bits);
362: /* rescale quotient to same precision (dprec) as dividend */
363: rescale(quotient,global_precision,dprec);
364: make_msbptr(quotient,dprec);
365:
366: while (bits--)
367: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
368: if (mp_compare(remainder,divisor) >= 0)
369: { mp_sub(remainder,divisor);
370: stuff_bit(quotient,bitmask);
371: }
372: bump_2bitsniffers(dividend,quotient,bitmask);
373: }
374: return(0);
375: } /* mp_udiv */
376:
377:
378: #ifdef UPTON_OR_SMITH
379: #define RECIPMARGIN 0 /* extra margin bits used by mp_recip() */
380:
381: int mp_recip(register unitptr quotient,register unitptr divisor)
382: /* Compute reciprocal (quotient) as 1/divisor. Used by faster modmult. */
383: { int bits;
384: short qprec;
385: register unit bitmask;
386: unit remainder[MAX_UNIT_PRECISION];
387: if (testeq(divisor,0))
388: return(-1); /* zero divisor means divide error */
389: mp_init0(remainder);
390: mp_init0(quotient);
391:
392: /* normalize and compute number of bits in quotient first */
393: bits = countbits(divisor) + RECIPMARGIN;
394: bitmask = bitmsk(bits); /* bitmask within a single unit */
395: qprec = bits2units(bits+1);
396: mp_setbit(remainder,(bits-RECIPMARGIN)-1);
397: /* rescale quotient to precision of divisor + RECIPMARGIN bits */
398: rescale(quotient,global_precision,qprec);
399: make_msbptr(quotient,qprec);
400:
401: while (bits--)
402: { mp_shift_left(remainder);
403: if (mp_compare(remainder,divisor) >= 0)
404: { mp_sub(remainder,divisor);
405: stuff_bit(quotient,bitmask);
406: }
407: bump_bitsniffer(quotient,bitmask);
408: }
409: mp_init0(remainder); /* burn sensitive data left on stack */
410: return(0);
411: } /* mp_recip */
412: #endif /* UPTON_OR_SMITH */
413:
414:
415: int mp_div(register unitptr remainder,register unitptr quotient,
416: register unitptr dividend,register unitptr divisor)
417: /* Signed divide, either or both operands may be negative. */
418: { boolean dvdsign,dsign;
419: int status;
420: dvdsign = (boolean)(mp_tstminus(dividend)!=0);
421: dsign = (boolean)(mp_tstminus(divisor)!=0);
422: if (dvdsign) mp_neg(dividend);
423: if (dsign) mp_neg(divisor);
424: status = mp_udiv(remainder,quotient,dividend,divisor);
425: if (dvdsign) mp_neg(dividend); /* repair caller's dividend */
426: if (dsign) mp_neg(divisor); /* repair caller's divisor */
427: if (status<0) return(status); /* divide error? */
428: if (dvdsign) mp_neg(remainder);
429: if (dvdsign ^ dsign) mp_neg(quotient);
430: return(status);
431: } /* mp_div */
432:
433:
434: word16 mp_shortdiv(register unitptr quotient,
435: register unitptr dividend,register word16 divisor)
436: /* This function does a fast divide and mod on a multiprecision dividend
437: using a short integer divisor returning a short integer remainder.
438: This is an unsigned divide. It treats both operands as positive.
439: It is used mainly for faster printing of large numbers in base 10.
440: */
441: { int bits;
442: short dprec;
443: register unit bitmask;
444: register word16 remainder;
445: if (!divisor) /* if divisor == 0 */
446: return(-1); /* zero divisor means divide error */
447: remainder=0;
448: mp_init0(quotient);
449: /* normalize and compute number of bits in dividend first */
450: init_bitsniffer(dividend,bitmask,dprec,bits);
451: /* rescale quotient to same precision (dprec) as dividend */
452: rescale(quotient,global_precision,dprec);
453: make_msbptr(quotient,dprec);
454:
455: while (bits--)
456: { remainder <<= 1;
457: if (sniff_bit(dividend,bitmask))
458: remainder++;
459: if (remainder >= divisor)
460: { remainder -= divisor;
461: stuff_bit(quotient,bitmask);
462: }
463: bump_2bitsniffers(dividend,quotient,bitmask);
464: }
465: return(remainder);
466: } /* mp_shortdiv */
467:
468:
469: int mp_mod(register unitptr remainder,
470: register unitptr dividend,register unitptr divisor)
471: /* Unsigned divide, treats both operands as positive. */
472: { int bits;
473: short dprec;
474: register unit bitmask;
475: if (testeq(divisor,0))
476: return(-1); /* zero divisor means divide error */
477: mp_init0(remainder);
478: /* normalize and compute number of bits in dividend first */
479: init_bitsniffer(dividend,bitmask,dprec,bits);
480:
481: while (bits--)
482: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
483: msub(remainder,divisor);
484: bump_bitsniffer(dividend,bitmask);
485: }
486: return(0);
487: } /* mp_mod */
488:
489:
490: word16 mp_shortmod(register unitptr dividend,register word16 divisor)
491: /* This function does a fast mod operation on a multiprecision dividend
492: using a short integer modulus returning a short integer remainder.
493: This is an unsigned divide. It treats both operands as positive.
494: It is used mainly for fast sieve searches for large primes.
495: */
496: { int bits;
497: short dprec;
498: register unit bitmask;
499: register word16 remainder;
500: if (!divisor) /* if divisor == 0 */
501: return(-1); /* zero divisor means divide error */
502: remainder=0;
503: /* normalize and compute number of bits in dividend first */
504: init_bitsniffer(dividend,bitmask,dprec,bits);
505:
506: while (bits--)
507: { remainder <<= 1;
508: if (sniff_bit(dividend,bitmask))
509: remainder++;
510: if (remainder >= divisor) remainder -= divisor;
511: bump_bitsniffer(dividend,bitmask);
512: }
513: return(remainder);
514: } /* mp_shortmod */
515:
516:
517:
518: #ifdef COMB_MULT /* use faster "comb" multiply algorithm */
519: /* We are skipping this code because it has a bug... */
520:
521: int mp_mult(register unitptr prod,
522: register unitptr multiplicand, register unitptr multiplier)
523: /* Computes multiprecision prod = multiplicand * multiplier */
524: { /* Uses interleaved comb multiply algorithm.
525: This improved multiply more than twice as fast as a Russian
526: peasant multiply, because it does a lot fewer shifts.
527: Must have global_precision set to the size of the multiplicand
528: plus UNITSIZE-1 SLOP_BITS. Produces a product that is the sum
529: of the lengths of the multiplier and multiplicand.
530:
531: BUG ALERT: Unfortunately, this code has a bug. It fails for
532: some numbers. One such example:
533: x= 59DE 60CE 2345 8091 A02B 2A1C DBC3 8BE5
534: x*x= 59DE 60CE 2345 26B3 993B 67A5 2499 0B7D
535: 52C8 CDC7 AFB3 61C8 243C 741B
536: --which is obviously wrong. The answer should be:
537: x*x= 1F8C 607B 5EA6 C061 2714 04A9 A0C6 A17A
538: C9AB 6095 C62F 3756 3843 E4D0 3950 7AD9
539: We'll have to fix this some day. In the meantime, we'll
540: just have the compiler skip over this code.
541: */
542: int bits;
543: register unit bitmask;
544: unitptr product, mplier, temp;
545: short mprec,mprec2;
546: unit mplicand[MAX_UNIT_PRECISION];
547: mp_init(prod,0); /* better clear full width--double precision */
548: if (testeq(multiplicand,0))
549: return(0); /* zero multiplicand means zero product */
550:
551: mp_move(mplicand,multiplicand); /* save it from damage */
552:
553: normalize(multiplier,mprec);
554: if (!mprec)
555: return(0);
556:
557: make_lsbptr(multiplier,mprec);
558: bitmask = 1; /* start scan at LSB of multiplier */
559:
560: do /* UNITSIZE times */
561: { /* do for bits 0-15 */
562: product = prod;
563: mplier = multiplier;
564: mprec2 = mprec;
565: while (mprec2--) /* do for each word in multiplier */
566: {
567: if (sniff_bit(mplier,bitmask))
568: { if (mp_addc(product,multiplicand,0)) /* ripple carry */
569: { /* After 1st time thru, this is rarely encountered. */
570: temp = msbptr(product,global_precision);
571: pre_higherunit(temp);
572: /* temp now points to LSU of carry region. */
573: unmake_lsbptr(temp,global_precision);
574: mp_inc(temp);
575: } /* ripple carry */
576: }
577: pre_higherunit(mplier);
578: pre_higherunit(product);
579: }
580: if (!(bitmask <<= 1))
581: break;
582: mp_shift_left(multiplicand);
583:
584: } while (TRUE);
585:
586: mp_move(multiplicand,mplicand); /* recover */
587:
588: return(0); /* normal return */
589: } /* mp_mult */
590:
591: #endif /* COMB_MULT */
592:
593:
594: /* Because the "comb" multiply has a bug, we will use the slower
595: Russian peasant multiply instead. Fortunately, the mp_mult
596: function is not called from any time-critical code.
597: */
598:
599: int mp_mult(register unitptr prod,
600: register unitptr multiplicand,register unitptr multiplier)
601: /* Computes multiprecision prod = multiplicand * multiplier */
602: { /* Uses "Russian peasant" multiply algorithm. */
603: int bits;
604: register unit bitmask;
605: short mprec;
606: mp_init(prod,0);
607: if (testeq(multiplicand,0))
608: return(0); /* zero multiplicand means zero product */
609: /* normalize and compute number of bits in multiplier first */
610: init_bitsniffer(multiplier,bitmask,mprec,bits);
611:
612: while (bits--)
613: { mp_shift_left(prod);
614: if (sniff_bit(multiplier,bitmask))
615: mp_add(prod,multiplicand);
616: bump_bitsniffer(multiplier,bitmask);
617: }
618: return(0);
619: } /* mp_mult */
620:
621:
622:
623: /* mp_modmult computes a multiprecision multiply combined with a
624: modulo operation. This is the most time-critical function in
625: this multiprecision arithmetic library for performing modulo
626: exponentiation. We experimented with different versions of modmult,
627: depending on the machine architecture and performance requirements.
628: We will either use a Russian Peasant modmult (peasant_modmult),
629: Charlie Merritt's modmult (merritt_modmult), Jimmy Upton's
630: modmult (upton_modmult), or Thad Smith's modmult (smith_modmult).
631: On machines with a hardware atomic multiply instruction,
632: Smith's modmult is fastest. It can utilize assembly subroutines to
633: speed up the hardware multiply logic and trial quotient calculation.
634: If the machine lacks a fast hardware multiply, Merritt's modmult
635: is preferred, which doesn't call any assembly multiply routine.
636: We use the alias names mp_modmult, stage_modulus, and modmult_burn
637: for the corresponding true names, which depend on what flavor of
638: modmult we are using.
639:
640: Before making the first call to mp_modmult, you must set up the
641: modulus-dependant precomputated tables by calling stage_modulus.
642: After making all the calls to mp_modmult, you call modmult_burn to
643: erase the tables created by stage_modulus that were left in memory.
644: */
645:
646: #ifdef COUNTMULTS
647: /* "number of modmults" counters, used for performance studies. */
648: static unsigned int tally_modmults = 0;
649: static unsigned int tally_modsquares = 0;
650: #endif /* COUNTMULTS */
651:
652:
653: #ifdef PEASANT
654: /* Conventional Russian peasant multiply with modulo algorithm. */
655:
656: static unitptr pmodulus = 0; /* used only by mp_modmult */
657:
658: int stage_peasant_modulus(unitptr n)
659: /* Must pass modulus to stage_modulus before calling modmult.
660: Assumes that global_precision has already been adjusted to the
661: size of the modulus, plus SLOP_BITS.
662: */
663: { /* For this simple version of modmult, just copy unit pointer. */
664: pmodulus = n;
665: return(0); /* normal return */
666: } /* stage_peasant_modulus */
667:
668:
669: int peasant_modmult(register unitptr prod,
670: unitptr multiplicand,register unitptr multiplier)
671: { /* "Russian peasant" multiply algorithm, combined with a modulo
672: operation. This is a simple naive replacement for Merritt's
673: faster modmult algorithm. References global unitptr "modulus".
674: Computes: prod = (multiplicand*multiplier) mod modulus
675: WARNING: All the arguments must be less than the modulus!
676: */
677: int bits;
678: register unit bitmask;
679: short mprec;
680: mp_init(prod,0);
681: /* if (testeq(multiplicand,0))
682: return(0); */ /* zero multiplicand means zero product */
683: /* normalize and compute number of bits in multiplier first */
684: init_bitsniffer(multiplier,bitmask,mprec,bits);
685:
686: while (bits--)
687: { mp_shift_left(prod);
688: msub(prod,pmodulus); /* turns mult into modmult */
689: if (sniff_bit(multiplier,bitmask))
690: { mp_add(prod,multiplicand);
691: msub(prod,pmodulus); /* turns mult into modmult */
692: }
693: bump_bitsniffer(multiplier,bitmask);
694: }
695: return(0);
696: } /* peasant_modmult */
697:
698:
699: /* If we are using a version of mp_modmult that uses internal tables
700: in memory, we have to call modmult_burn() at the end of mp_modexp.
701: This is so that no sensitive data is left in memory after the program
702: exits. The Russian peasant method doesn't use any such tables.
703: */
704: void peasant_burn(void)
705: /* Alias for modmult_burn, called only from mp_modexp(). Destroys
706: internal modmult tables. This version does nothing because no
707: tables are used by the Russian peasant modmult. */
708: { } /* peasant_burn */
709:
710: #endif /* PEASANT */
711:
712:
713: #ifdef MERRITT
714: /*=========================================================================*/
715: /*
716: This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ.
717: Also refined by Zhahai Stewart to reduce the number of subtracts.
718: Modified by Raymond Brand to reduce the number of SLOP_BITS by 1.
719: It performs a multiply combined with a modulo operation, without
720: going into "double precision". It is faster than the Russian peasant
721: method, and still works well on machines that lack a fast hardware
722: multiply instruction.
723: */
724:
725: /* The following support functions, macros, and data structures
726: are used only by Merritt's modmult algorithm... */
727:
728: static void mp_lshift_unit(register unitptr r1)
729: /* Shift r1 1 whole unit to the left. Used only by modmult function. */
730: { register short precision;
731: register unitptr r2;
732: precision = global_precision;
733: make_msbptr(r1,precision);
734: r2 = r1;
735: while (--precision)
736: *post_lowerunit(r1) = *pre_lowerunit(r2);
737: *r1 = 0;
738: } /* mp_lshift_unit */
739:
740:
741: /* moduli_buf contains shifted images of the modulus, set by stage_modulus */
742: static unit moduli_buf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0};
743: static unitptr moduli[UNITSIZE] = /* contains pointers into moduli_buf */
744: { 0
745: ,&moduli_buf[ 0][0], &moduli_buf[ 1][0], &moduli_buf[ 2][0], &moduli_buf[ 3][0],
746: &moduli_buf[ 4][0], &moduli_buf[ 5][0], &moduli_buf[ 6][0]
747: #ifndef UNIT8
748: ,&moduli_buf[ 7][0]
749: ,&moduli_buf[ 8][0], &moduli_buf[ 9][0], &moduli_buf[10][0], &moduli_buf[11][0],
750: &moduli_buf[12][0], &moduli_buf[13][0], &moduli_buf[14][0]
751: #ifndef UNIT16 /* and not UNIT8 */
752: ,&moduli_buf[15][0]
753: ,&moduli_buf[16][0], &moduli_buf[17][0], &moduli_buf[18][0], &moduli_buf[19][0],
754: &moduli_buf[20][0], &moduli_buf[21][0], &moduli_buf[22][0], &moduli_buf[23][0],
755: &moduli_buf[24][0], &moduli_buf[25][0], &moduli_buf[26][0], &moduli_buf[27][0],
756: &moduli_buf[28][0], &moduli_buf[29][0], &moduli_buf[30][0]
757: #endif /* UNIT16 and UNIT8 not defined */
758: #endif /* UNIT8 not defined */
759: };
760:
761: /* To optimize msubs, need following 2 unit arrays, each filled
762: with the most significant unit and next-to-most significant unit
763: of the preshifted images of the modulus. */
764: static unit msu_moduli[UNITSIZE] = {0}; /* most signif. unit */
765: static unit nmsu_moduli[UNITSIZE] = {0}; /* next-most signif. unit */
766:
767: /* mpdbuf contains preshifted images of the multiplicand, mod n.
768: It is used only by mp_modmult. It could be staticly declared
769: inside of mp_modmult, but we put it outside mp_modmult so that
770: it can be wiped clean by modmult_burn(), which is called at the
771: end of mp_modexp. This is so that no sensitive data is left in
772: memory after the program exits.
773: */
774: static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0};
775:
776:
777: static void stage_mp_images(unitptr images[UNITSIZE],unitptr r)
778: /* Computes UNITSIZE images of r, each shifted left 1 more bit.
779: Used only by modmult function.
780: */
781: { short int i;
782: images[0] = r; /* no need to move the first image, just copy ptr */
783: for (i=1; i<UNITSIZE; i++)
784: { mp_move(images[i],images[i-1]);
785: mp_shift_left(images[i]);
786: msub(images[i],moduli[0]);
787: }
788: } /* stage_mp_images */
789:
790:
791: int stage_merritt_modulus(unitptr n)
792: /* Computes UNITSIZE images of modulus, each shifted left 1 more bit.
793: Before calling mp_modmult, you must first stage the moduli images by
794: calling stage_modulus. n is the pointer to the modulus.
795: Assumes that global_precision has already been adjusted to the
796: size of the modulus, plus SLOP_BITS.
797: */
798: { short int i;
799: unitptr msu; /* ptr to most significant unit, for faster msubs */
800: moduli[0] = n; /* no need to move the first image, just copy ptr */
801:
802: /* used by optimized msubs macro... */
803: msu = msbptr(n,global_precision); /* needed by msubs */
804: msu_moduli[0] = *post_lowerunit(msu);
805: nmsu_moduli[0] = *msu;
806:
807: for (i=1; i<UNITSIZE; i++)
808: { mp_move(moduli[i],moduli[i-1]);
809: mp_shift_left(moduli[i]);
810:
811: /* used by optimized msubs macro... */
812: msu = msbptr(moduli[i],global_precision); /* needed by msubs */
813: msu_moduli[i] = *post_lowerunit(msu); /* for faster msubs */
814: nmsu_moduli[i] = *msu;
815: }
816: return(0); /* normal return */
817: } /* stage_merritt_modulus */
818:
819:
820: /* The following macros, sniffadd and msubs, are used by modmult... */
821: #define sniffadd(i) if (*multiplier & power_of_2(i)) mp_add(prod,mpd[i])
822:
823: /* Unoptimized msubs macro (msubs0) follows... */
824: /* #define msubs0(i) msub(prod,moduli[i])
825: */
826:
827: /* To optimize msubs, compare the most significant units of the
828: product and the shifted modulus before deciding to call the full
829: mp_compare routine. Better still, compare the upper two units
830: before deciding to call mp_compare.
831: Optimization of msubs relies heavily on standard C left-to-right
832: parsimonious evaluation of logical expressions.
833: */
834:
835: /* Partially-optimized msubs macro (msubs1) follows... */
836: /* #define msubs1(i) if ( \
837: ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \
838: (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \
839: ) mp_sub(prod,moduli[i])
840: */
841:
842: /* Fully-optimized msubs macro (msubs2) follows... */
843: #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \
844: (p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \
845: (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \
846: mp_sub(prod,moduli[i])
847:
848:
849: int merritt_modmult(register unitptr prod,
850: unitptr multiplicand,register unitptr multiplier)
851: /* Performs combined multiply/modulo operation.
852: Computes: prod = (multiplicand*multiplier) mod modulus
853: WARNING: All the arguments must be less than the modulus!
854: Assumes the modulus has been predefined by first calling
855: stage_modulus.
856: */
857: {
858: /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/
859: register signedunit p_m;
860: register unitptr msu_prod; /* ptr to most significant unit of product */
861: register unitptr nmsu_prod; /* next-most signif. unit of product */
862: short mprec; /* precision of multiplier, in units */
863: /* Array mpd contains a list of pointers to preshifted images of
864: the multiplicand: */
865: static unitptr mpd[UNITSIZE] =
866: { 0, &mpdbuf[ 0][0], &mpdbuf[ 1][0], &mpdbuf[ 2][0],
867: &mpdbuf[ 3][0], &mpdbuf[ 4][0], &mpdbuf[ 5][0], &mpdbuf[ 6][0]
868: #ifndef UNIT8
869: ,&mpdbuf[ 7][0], &mpdbuf[ 8][0], &mpdbuf[ 9][0], &mpdbuf[10][0],
870: &mpdbuf[11][0], &mpdbuf[12][0], &mpdbuf[13][0], &mpdbuf[14][0]
871: #ifndef UNIT16 /* and not UNIT8 */
872: ,&mpdbuf[15][0], &mpdbuf[16][0], &mpdbuf[17][0], &mpdbuf[18][0],
873: &mpdbuf[19][0], &mpdbuf[20][0], &mpdbuf[21][0], &mpdbuf[22][0],
874: &mpdbuf[23][0], &mpdbuf[24][0], &mpdbuf[25][0], &mpdbuf[26][0],
875: &mpdbuf[27][0], &mpdbuf[28][0], &mpdbuf[29][0], &mpdbuf[30][0]
876: #endif /* UNIT16 and UNIT8 not defined */
877: #endif /* UNIT8 not defined */
878: };
879:
880: /* Compute preshifted images of multiplicand, mod n: */
881: stage_mp_images(mpd,multiplicand);
882:
883: /* To optimize msubs, set up msu_prod and nmsu_prod: */
884: msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */
885: nmsu_prod = msu_prod;
886: post_lowerunit(nmsu_prod); /* Get next-MSU of prod */
887:
888: /* To understand this algorithm, it would be helpful to first
889: study the conventional Russian peasant modmult algorithm.
890: This one does about the same thing as Russian peasant, but
891: is organized differently to save some steps. It loops
892: through the multiplier a word (unit) at a time, instead of
893: a bit at a time. It word-shifts the product instead of
894: bit-shifting it, so it should be faster. It also does about
895: half as many subtracts as Russian peasant.
896: */
897:
898: mp_init(prod,0); /* Initialize product to 0. */
899:
900: /* The way mp_modmult is actually used in cryptographic
901: applications, there will NEVER be a zero multiplier or
902: multiplicand. So there is no need to optimize for that
903: condition.
904: */
905: /* if (testeq(multiplicand,0))
906: return(0); */ /* zero multiplicand means zero product */
907: /* Normalize and compute number of units in multiplier first: */
908: normalize(multiplier,mprec);
909: if (mprec==0) /* if precision of multiplier is 0 */
910: return(0); /* zero multiplier means zero product */
911: make_msbptr(multiplier,mprec); /* start at MSU of multiplier */
912:
913: while (mprec--) /* Loop for the number of units in the multiplier */
914: {
915: /* Shift the product one whole unit to the left.
916: This is part of the multiply phase of modmult.
917: */
918:
919: mp_lshift_unit(prod);
920:
921: /* The product may have grown by as many as UNITSIZE
922: bits. That's why we have global_precision set to the
923: size of the modulus plus UNITSIZE slop bits.
924: Now reduce the product back down by conditionally
925: subtracting the preshifted images of the modulus.
926: This is part of the modulo reduction phase of modmult.
927: The following loop is unrolled for speed, using macros...
928:
929: for (i=UNITSIZE-1; i>=LOG_UNITSIZE; i--)
930: if (mp_compare(prod,moduli[i]) >= 0)
931: mp_sub(prod,moduli[i]);
932: */
933:
934: #ifndef UNIT8
935: #ifndef UNIT16 /* and not UNIT8 */
936: msubs(31);
937: msubs(30);
938: msubs(29);
939: msubs(28);
940: msubs(27);
941: msubs(26);
942: msubs(25);
943: msubs(24);
944: msubs(23);
945: msubs(22);
946: msubs(21);
947: msubs(20);
948: msubs(19);
949: msubs(18);
950: msubs(17);
951: msubs(16);
952: #endif /* not UNIT16 and not UNIT8 */
953: msubs(15);
954: msubs(14);
955: msubs(13);
956: msubs(12);
957: msubs(11);
958: msubs(10);
959: msubs(9);
960: msubs(8);
961: #endif /* not UNIT8 */
962: msubs(7);
963: msubs(6);
964: msubs(5);
965: #ifndef UNIT32
966: msubs(4);
967: #ifndef UNIT16
968: msubs(3);
969: #endif
970: #endif
971:
972: /* Sniff each bit in the current unit of the multiplier,
973: and conditionally add the corresponding preshifted
974: image of the multiplicand to the product.
975: This is also part of the multiply phase of modmult.
976:
977: The following loop is unrolled for speed, using macros...
978:
979: for (i=UNITSIZE-1; i>=0; i--)
980: if (*multiplier & power_of_2(i))
981: mp_add(prod,mpd[i]);
982: */
983: #ifndef UNIT8
984: #ifndef UNIT16 /* and not UNIT8 */
985: sniffadd(31);
986: sniffadd(30);
987: sniffadd(29);
988: sniffadd(28);
989: sniffadd(27);
990: sniffadd(26);
991: sniffadd(25);
992: sniffadd(24);
993: sniffadd(23);
994: sniffadd(22);
995: sniffadd(21);
996: sniffadd(20);
997: sniffadd(19);
998: sniffadd(18);
999: sniffadd(17);
1000: sniffadd(16);
1001: #endif /* not UNIT16 and not UNIT8 */
1002: sniffadd(15);
1003: sniffadd(14);
1004: sniffadd(13);
1005: sniffadd(12);
1006: sniffadd(11);
1007: sniffadd(10);
1008: sniffadd(9);
1009: sniffadd(8);
1010: #endif /* not UNIT8 */
1011: sniffadd(7);
1012: sniffadd(6);
1013: sniffadd(5);
1014: sniffadd(4);
1015: sniffadd(3);
1016: sniffadd(2);
1017: sniffadd(1);
1018: sniffadd(0);
1019:
1020: /* The product may have grown by as many as LOG_UNITSIZE+1
1021: bits.
1022: Now reduce the product back down by conditionally
1023: subtracting LOG_UNITSIZE+1 preshifted images of the
1024: modulus. This is the modulo reduction phase of modmult.
1025:
1026: The following loop is unrolled for speed, using macros...
1027:
1028: for (i=LOG_UNITSIZE; i>=0; i--)
1029: if (mp_compare(prod,moduli[i]) >= 0)
1030: mp_sub(prod,moduli[i]);
1031: */
1032:
1033: #ifndef UNIT8
1034: #ifndef UNIT16
1035: msubs(5);
1036: #endif
1037: msubs(4);
1038: #endif
1039: msubs(3);
1040: msubs(2);
1041: msubs(1);
1042: msubs(0);
1043:
1044: /* Bump pointer to next lower unit of multiplier: */
1045: post_lowerunit(multiplier);
1046:
1047: } /* Loop for the number of units in the multiplier */
1048:
1049: return(0); /* normal return */
1050:
1051: } /* merritt_modmult */
1052:
1053:
1054: #undef msubs
1055: #undef sniffadd
1056:
1057:
1058: /* Merritt's mp_modmult function leaves some internal tables in memory,
1059: so we have to call modmult_burn() at the end of mp_modexp.
1060: This is so that no cryptographically sensitive data is left in memory
1061: after the program exits.
1062: */
1063: void merritt_burn(void)
1064: /* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */
1065: { unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION);
1066: unitfill0(&(moduli_buf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION);
1067: unitfill0(msu_moduli,UNITSIZE);
1068: unitfill0(nmsu_moduli,UNITSIZE);
1069: } /* merritt_burn() */
1070:
1071: /******* end of Merritt's MODMULT stuff. *******/
1072: /*=========================================================================*/
1073: #endif /* MERRITT */
1074:
1075:
1076: #ifdef UPTON_OR_SMITH /* Used by Upton's and Smith's modmult algorithms */
1077:
1078: /* Jimmy Upton's multiprecision modmult algorithm in C.
1079: Performs a multiply combined with a modulo operation.
1080:
1081: The following support functions and data structures
1082: are used only by Upton's modmult algorithm...
1083: */
1084:
1085: short munit_prec; /* global_precision expressed in MULTUNITs */
1086:
1087: /* Note that since the SPARC CPU has no hardware integer multiply
1088: instruction, there is not that much advantage in having an
1089: assembly version of mp_smul on that machine. It might be faster
1090: to use Merritt's modmult instead of Upton's modmult on the SPARC.
1091: */
1092:
1093: /*
1094: Multiply the single-word multiplier times the multiprecision integer
1095: in multiplicand, accumulating result in prod. The resulting
1096: multiprecision prod will be 1 word longer than the multiplicand.
1097: multiplicand is munit_prec words long. We add into prod, so caller
1098: should zero it out first. For best results, this time-critical
1099: function should be implemented in assembly.
1100: NOTE: Unlike other functions in the multiprecision arithmetic
1101: library, both multiplicand and prod are pointing at the LSB,
1102: regardless of byte order of the machine. On an 80x86, this makes
1103: no difference. But if this assembly function is implemented
1104: on a 680x0, it becomes important.
1105: */
1106: /* Note that this has been modified from the previous version to allow
1107: better support for Smith's modmult:
1108: The final carry bit is added to the existing product
1109: array, rather than simply stored.
1110: */
1111:
1112: #ifndef mp_smul
1113: void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier)
1114: {
1115: short i;
1116: unsigned long p, carry;
1117:
1118: carry = 0;
1119: for (i=0; i<munit_prec; ++i)
1120: { p = (unsigned long)multiplier * *post_higherunit(multiplicand);
1121: p += *prod + carry;
1122: *post_higherunit(prod) = (MULTUNIT)p;
1123: carry = p >> MULTUNITSIZE;
1124: }
1125: /* Add carry to the next higher word of product / dividend */
1126: *prod += (MULTUNIT)carry;
1127: } /* mp_smul */
1128: #endif
1129:
1130: /* mp_dmul is a double-precision multiply multiplicand times multiplier,
1131: result into prod. prod must be pointing at a "double precision"
1132: buffer. E.g. If global_precision is 10 words, prod must be
1133: pointing at a 20-word buffer.
1134: */
1135: #ifndef mp_dmul
1136: void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier)
1137: {
1138: register int i;
1139: register MULTUNIT *p_multiplicand, *p_multiplier;
1140: register MULTUNIT *prodp;
1141:
1142:
1143: unitfill0(prod,global_precision*2); /* Pre-zero prod */
1144: /* Calculate precision in units of MULTUNIT */
1145: munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
1146: p_multiplicand = (MULTUNIT *)multiplicand;
1147: p_multiplier = (MULTUNIT *)multiplier;
1148: prodp = (MULTUNIT *)prod;
1149: make_lsbptr(p_multiplicand,munit_prec);
1150: make_lsbptr(p_multiplier,munit_prec);
1151: make_lsbptr(prodp,munit_prec*2);
1152: /* Multiply multiplicand by each word in multiplier, accumulating prod: */
1153: for (i=0; i<munit_prec; ++i)
1154: mp_smul (post_higherunit(prodp),
1155: p_multiplicand, *post_higherunit(p_multiplier));
1156: } /* mp_dmul */
1157: #endif /* mp_dmul */
1158:
1159: static unit ALIGN modulus[MAX_UNIT_PRECISION];
1160: static short nbits; /* number of modulus significant bits */
1161: #endif /* UPTON_OR_SMITH */
1162:
1163: #ifdef UPTON
1164:
1165: /* These scratchpad arrays are used only by upton_modmult (mp_modmult).
1166: Some of them could be staticly declared inside of mp_modmult, but we
1167: put them outside mp_modmult so that they can be wiped clean by
1168: modmult_burn(), which is called at the end of mp_modexp. This is
1169: so that no sensitive data is left in memory after the program exits.
1170: */
1171:
1172: static unit ALIGN reciprocal[MAX_UNIT_PRECISION];
1173: static unit ALIGN dhi[MAX_UNIT_PRECISION];
1174: static unit ALIGN d_data[MAX_UNIT_PRECISION*2]; /* double-wide register */
1175: static unit ALIGN e_data[MAX_UNIT_PRECISION*2]; /* double-wide register */
1176: static unit ALIGN f_data[MAX_UNIT_PRECISION*2]; /* double-wide register */
1177:
1178: static short nbitsDivUNITSIZE;
1179: static short nbitsModUNITSIZE;
1180:
1181: /* stage_upton_modulus() is aliased to stage_modulus().
1182: Prepare for an Upton modmult. Calculate the reciprocal of modulus,
1183: and save both. Note that reciprocal will have one more bit than
1184: modulus.
1185: Assumes that global_precision has already been adjusted to the
1186: size of the modulus, plus SLOP_BITS.
1187: */
1188: int stage_upton_modulus(unitptr n)
1189: {
1190: mp_move(modulus, n);
1191: mp_recip(reciprocal, modulus);
1192: nbits = countbits(modulus);
1193: nbitsDivUNITSIZE = nbits / UNITSIZE;
1194: nbitsModUNITSIZE = nbits % UNITSIZE;
1195: return(0); /* normal return */
1196: } /* stage_upton_modulus */
1197:
1198:
1199: /* Upton's algorithm performs a multiply combined with a modulo operation.
1200: Computes: prod = (multiplicand*multiplier) mod modulus
1201: WARNING: All the arguments must be less than the modulus!
1202: References global unitptr modulus and reciprocal.
1203: The reciprocal of modulus is 1 bit longer than the modulus.
1204: upton_modmult() is aliased to mp_modmult().
1205: */
1206: int upton_modmult (unitptr prod, unitptr multiplicand, unitptr multiplier)
1207: {
1208: unitptr d = d_data;
1209: unitptr d1 = d_data;
1210: unitptr e = e_data;
1211: unitptr f = f_data;
1212: short orig_precision;
1213:
1214: orig_precision = global_precision;
1215: mp_dmul (d,multiplicand,multiplier);
1216:
1217: /* Throw off low nbits of d */
1218: #ifndef HIGHFIRST
1219: d1 = d + nbitsDivUNITSIZE;
1220: #else
1221: d1 = d + orig_precision - nbitsDivUNITSIZE;
1222: #endif
1223: mp_move(dhi, d1); /* Don't screw up d, we need it later */
1224: mp_shift_right_bits(dhi, nbitsModUNITSIZE);
1225:
1226: mp_dmul (e,dhi,reciprocal); /* Note - reciprocal has nbits+1 bits */
1227:
1228: #ifndef HIGHFIRST
1229: e += nbitsDivUNITSIZE;
1230: #else
1231: e += orig_precision - nbitsDivUNITSIZE;
1232: #endif
1233: mp_shift_right_bits(e, nbitsModUNITSIZE);
1234:
1235: mp_dmul (f,e,modulus);
1236:
1237: /* Now for the only double-precision call to mpilib: */
1238: set_precision (orig_precision * 2);
1239: mp_sub (d,f);
1240:
1241: /* d's precision should be <= orig_precision */
1242: rescale (d, orig_precision*2, orig_precision);
1243: set_precision (orig_precision);
1244:
1245: /* Should never have to do this final subtract more than twice: */
1246: while (mp_compare(d,modulus) > 0)
1247: mp_sub (d,modulus);
1248:
1249: mp_move(prod,d);
1250: return(0); /* normal return */
1251: } /* upton_modmult */
1252:
1253:
1254: /* Upton's mp_modmult function leaves some internal arrays in memory,
1255: so we have to call modmult_burn() at the end of mp_modexp.
1256: This is so that no cryptographically sensitive data is left in memory
1257: after the program exits.
1258: upton_burn() is aliased to modmult_burn().
1259: */
1260: void upton_burn(void)
1261: {
1262: unitfill0(modulus,MAX_UNIT_PRECISION);
1263: unitfill0(reciprocal,MAX_UNIT_PRECISION);
1264: unitfill0(dhi,MAX_UNIT_PRECISION);
1265: unitfill0(d_data,MAX_UNIT_PRECISION*2);
1266: unitfill0(e_data,MAX_UNIT_PRECISION*2);
1267: unitfill0(f_data,MAX_UNIT_PRECISION*2);
1268: nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0;
1269: } /* upton_burn */
1270:
1271: /******* end of Upton's MODMULT stuff. *******/
1272: /*=========================================================================*/
1273: #endif /* UPTON */
1274:
1275: #ifdef SMITH /* using Thad Smith's modmult algorithm */
1276:
1277: /* Thad Smith's implementation of multiprecision modmult algorithm in C.
1278: Performs a multiply combined with a modulo operation.
1279: The multiplication is done with mp_dmul, the same as for Upton's
1280: modmult. The modulus reduction is done by long division, in
1281: which a trial quotient "digit" is determined, then the product of
1282: that digit and the divisor are subtracted from the dividend.
1283:
1284: In this case, the digit is MULTUNIT in size and the subtraction
1285: is done by adding the product to the one's complement of the
1286: dividend, which allows use of the existing mp_smul routine.
1287:
1288: The following support functions and data structures
1289: are used only by Smith's modmult algorithm...
1290: */
1291:
1292: /* These scratchpad arrays are used only by smith_modmult (mp_modmult).
1293: Some of them could be statically declared inside of mp_modmult, but we
1294: put them outside mp_modmult so that they can be wiped clean by
1295: modmult_burn(), which is called at the end of mp_modexp. This is
1296: so that no sensitive data is left in memory after the program exits.
1297: */
1298:
1299: static unit ALIGN ds_data[MAX_UNIT_PRECISION*2+2];
1300:
1301: static unit mod_quotient [4];
1302: static unit mod_divisor [4]; /* 2 most signif. MULTUNITs of modulus */
1303:
1304: static MULTUNIT *modmpl; /* ptr to modulus least significant
1305: ** MULTUNIT */
1306: static int mshift; /* number of bits for
1307: ** recip scaling */
1308: static MULTUNIT reciph; /* MSunit of scaled recip */
1309: static MULTUNIT recipl; /* LSunit of scaled recip */
1310:
1311: static short modlenMULTUNITS; /* length of modulus in MULTUNITs */
1312: static MULTUNIT mutemp; /* temporary */
1313:
1314: /* The routines mp_smul and mp_dmul are the same as for UPTON and
1315: should be coded in assembly. Note, however, that the previous
1316: Upton's mp_smul version has been modified to compatible with
1317: Smith's modmult. The new version also still works for Upton's
1318: modmult.
1319: */
1320:
1321: #ifndef mp_set_recip
1322: #define mp_set_recip(rh,rl,m) /* null */
1323: #else
1324: /* setup routine for external mp_quo_digit */
1325: void mp_set_recip(MULTUNIT rh, MULTUNIT rl, int m);
1326: #endif
1327: MULTUNIT mp_quo_digit (MULTUNIT *dividend);
1328:
1329: #ifdef MULTUNIT_SIZE_SAME
1330: #define mp_musubb mp_subb /* use existing routine */
1331: #else /* ! MULTUNIT_SIZE_SAME */
1332:
1333: /* This performs the same function as mp_subb, but with MULTUNITs.
1334: Note: Processors without alignment requirements may be able to use
1335: mp_subb, even though MULTUNITs are smaller than units. In that case,
1336: use mp_subb, since it would be faster if coded in assembly. Note that
1337: this implementation won't work for MULTUNITs which are long -- use
1338: mp_subb in that case.
1339: */
1340: #ifndef mp_musubb
1341: boolean mp_musubb
1342: (register MULTUNIT* r1,register MULTUNIT* r2,register boolean borrow)
1343: /* multiprecision subtract of MULTUNITs with borrow, r2 from r1,
1344: ** result in r1 */
1345: /* borrow is incoming borrow flag-- value should be 0 or 1 */
1346: { register ulint x; /* won't work if sizeof(MULTUNIT)==
1347: sizeof(long) */
1348: short precision; /* number of MULTUNITs to subtract */
1349: precision = global_precision * MULTUNITs_perunit;
1350: make_lsbptr(r1,precision);
1351: make_lsbptr(r2,precision);
1352: while (precision--)
1353: { x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow;
1354: *post_higherunit(r1) = x;
1355: borrow = (((1L << MULTUNITSIZE) & x) != 0L);
1356: }
1357: return (borrow);
1358: } /* mp_musubb */
1359: #endif /* mp_musubb */
1360: #endif /* MULTUNIT_SIZE_SAME */
1361:
1362: /* The function mp_quo_digit is the heart of Smith's modulo reduction,
1363: which uses a form of long division. It computes a trial quotient
1364: "digit" (MULTUNIT-sized digit) by multiplying the three most
1365: significant MULTUNITs of the dividend by the two most significant
1366: MULTUNITs of the reciprocal of the modulus. Note that this function
1367: requires that MULTUNITSIZE * 2 <= sizeof(unsigned long).
1368:
1369: An important part of this technique is that the quotient never be
1370: too small, although it may occasionally be too large. This was
1371: done to eliminate the need to check and correct for a remainder
1372: exceeding the divisor. It is easier to check for a negative
1373: remainder. The following technique rarely needs correction for
1374: MULTUNITs of at least 16 bits.
1375:
1376: The following routine has two implementations:
1377:
1378: ASM_PROTOTYPE defined: written to be an executable prototype for
1379: an efficient assembly language implementation. Note that several
1380: of the following masks and shifts can be done by directly
1381: manipulating single precision registers on some architectures.
1382:
1383: ASM_PROTOTYPE undefined: a slightly more efficient implementation
1384: in C. Although this version returns a result larger than the
1385: optimum (which is corrected in smith_modmult) more often than the
1386: prototype version, the faster execution in C more than makes up
1387: for the difference.
1388:
1389: Parameter: dividend - points to the most significant MULTUNIT
1390: of the dividend. Note that dividend actually contains the
1391: one's complement of the actual dividend value (see comments for
1392: smith_modmult).
1393:
1394: Return: the trial quotient digit resulting from dividing the first
1395: three MULTUNITs at dividend by the upper two MULTUNITs of the
1396: modulus.
1397: */
1398:
1399: /* #define ASM_PROTOTYPE */ /* undefined: use C-optimized version */
1400:
1401: #ifndef mp_quo_digit
1402: MULTUNIT mp_quo_digit (MULTUNIT *dividend) {
1403: unsigned long q, q0, q1, q2;
1404: unsigned short lsb_factor;
1405:
1406: /* Compute the least significant product group.
1407: The last terms of q1 and q2 perform upward rounding, which is
1408: needed to guarantee that the result not be too small.
1409: */
1410: q1 = (dividend[tohigher(-2)] ^ MULTUNIT_mask) * (unsigned long)reciph
1411: + reciph;
1412: q2 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long)recipl
1413: + (1L << MULTUNITSIZE) ;
1414: #ifdef ASM_PROTOTYPE
1415: lsb_factor = 1 & (q1>>MULTUNITSIZE) & (q2>>MULTUNITSIZE);
1416: q = q1 + q2;
1417:
1418: /* The following statement is equivalent to shifting the sum right
1419: one bit while shifting in the carry bit.
1420: */
1421: q0 = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
1422: #else /* optimized C version */
1423: q0 = (q1>>1) + (q2>>1) + 1;
1424: #endif
1425:
1426: /* Compute the middle significant product group. */
1427:
1428: q1 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long)reciph;
1429: q2 = (dividend[ 0] ^ MULTUNIT_mask) * (unsigned long)recipl;
1430: #ifdef ASM_PROTOTYPE
1431: q = q1 + q2;
1432: q = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
1433:
1434: /* Add in the most significant word of the first group.
1435: The last term takes care of the carry from adding the lsb's
1436: that were shifted out prior to addition.
1437: */
1438: q = (q0 >> MULTUNITSIZE)+ q + (lsb_factor & (q1 ^ q2));
1439: #else /* optimized C version */
1440: q = (q0 >> MULTUNITSIZE)+ (q1>>1) + (q2>>1) + 1;
1441: #endif
1442:
1443: /* Compute the most significant term and add in the others */
1444:
1445: q = (q >> (MULTUNITSIZE-2)) +
1446: (((dividend[0] ^ MULTUNIT_mask) * (unsigned long)reciph) << 1);
1447: q >>= mshift;
1448:
1449: /* Prevent overflow and then wipe out the intermediate results. */
1450:
1451: mutemp = (MULTUNIT)min(q, (1L << MULTUNITSIZE) -1);
1452: q= q0 = q1 = q2 = 0; lsb_factor = 0; (void)lsb_factor;
1453: return mutemp;
1454: }
1455: #endif /* mp_quo_digit */
1456:
1457: /* stage_smith_modulus() - Prepare for a Smith modmult.
1458:
1459: Calculate the reciprocal of modulus with a precision of two MULTUNITs.
1460: Assumes that global_precision has already been adjusted to the
1461: size of the modulus, plus SLOP_BITS.
1462:
1463: Note: This routine was designed to work with large values and
1464: doesn't have the necessary testing or handling to work with a
1465: modulus having less than three significant units. For such cases,
1466: the separate multiply and modulus routines can be used.
1467:
1468: stage_smith_modulus() is aliased to stage_modulus().
1469: */
1470:
1471: int stage_smith_modulus(unitptr n_modulus)
1472: {
1473: int original_precision;
1474: int sigmod; /* significant units in modulus */
1475: unitptr mp; /* modulus most significant pointer */
1476: MULTUNIT *mpm; /* reciprocal pointer */
1477: int prec; /* precision of reciprocal calc in units */
1478:
1479: mp_move(modulus, n_modulus);
1480: modmpl = (MULTUNIT*) modulus;
1481: modmpl = lsbptr (modmpl, global_precision * MULTUNITs_perunit);
1482: nbits = countbits(modulus);
1483: modlenMULTUNITS = (nbits+ MULTUNITSIZE-1) / MULTUNITSIZE;
1484:
1485: original_precision = global_precision;
1486:
1487: /* The following code copies the three most significant units of
1488: * modulus to mod_divisor.
1489: */
1490: mp = modulus;
1491: sigmod = significance (modulus);
1492: rescale (mp, original_precision, sigmod);
1493: /* prec is the unit precision required for 3 MULTUNITs */
1494: prec = (3 +(MULTUNITs_perunit-1)) / MULTUNITs_perunit;
1495: set_precision (prec);
1496:
1497: /* set mp = ptr to most significant units of modulus, then move
1498: * the most significant units to mp_divisor
1499: */
1500: mp = msbptr(mp,sigmod) -tohigher(prec-1);
1501: unmake_lsbptr (mp, prec);
1502: mp_move (mod_divisor, mp);
1503:
1504: /* Keep 2*MULTUNITSIZE bits in mod_divisor.
1505: * This will (normally) result in a reciprocal of 2*MULTUNITSIZE+1 bits.
1506: */
1507: mshift = countbits (mod_divisor) - 2*MULTUNITSIZE;
1508: mp_shift_right_bits (mod_divisor, mshift);
1509: mp_recip(mod_quotient,mod_divisor);
1510: mp_shift_right_bits (mod_quotient,1);
1511:
1512: /* Reduce to: 0 < mshift <= MULTUNITSIZE */
1513: mshift = ((mshift + (MULTUNITSIZE-1)) % MULTUNITSIZE) +1;
1514: /* round up, rescaling if necessary */
1515: mp_inc (mod_quotient);
1516: if (countbits (mod_quotient) > 2*MULTUNITSIZE) {
1517: mp_shift_right_bits (mod_quotient,1);
1518: mshift--; /* now 0 <= mshift <= MULTUNITSIZE */
1519: }
1520: mpm = lsbptr ((MULTUNIT*)mod_quotient, prec*MULTUNITs_perunit);
1521: recipl = *post_higherunit (mpm);
1522: reciph = *mpm;
1523: mp_set_recip (reciph, recipl, mshift);
1524: set_precision (original_precision);
1525: return(0); /* normal return */
1526: } /* stage_smith_modulus */
1527:
1528: /* Smith's algorithm performs a multiply combined with a modulo operation.
1529: Computes: prod = (multiplicand*multiplier) mod modulus
1530: The modulus must first be set by stage_smith_modulus().
1531: smith_modmult() is aliased to mp_modmult().
1532: */
1533:
1534: int
1535: smith_modmult(unitptr prod, unitptr multiplicand, unitptr multiplier)
1536: {
1537: unitptr d; /* ptr to product */
1538: MULTUNIT *dmph, *dmpl, *dmp; /* ptrs to dividend (high, low, first)
1539: * aligned for subtraction */
1540: /* Note that dmph points one MULTUNIT higher than indicated by
1541: global precision. This allows us to zero out a word one higher than
1542: the normal precision.
1543: */
1544: short orig_precision;
1545: short nqd; /* number of quotient digits remaining to
1546: * be generated */
1547: short dmi; /* number of significant MULTUNITs in product */
1548:
1549: d = ds_data + 1; /* room for leading MSB if HIGHFIRST */
1550: orig_precision = global_precision;
1551: mp_dmul(d, multiplicand, multiplier);
1552:
1553: rescale(d, orig_precision * 2, orig_precision * 2 + 1);
1554: set_precision(orig_precision * 2 + 1);
1555: *msbptr(d, global_precision) = 0; /* leading 0 unit */
1556:
1557: /* We now start working with MULTUNITs.
1558: Determine the most significant MULTUNIT of the product so we don't
1559: have to process leading zeros in our divide loop.
1560: */
1561: dmi = significance(d) * MULTUNITs_perunit;
1562: if (dmi >= modlenMULTUNITS)
1563: { /* Make dividend negative. This allows the use of mp_smul to
1564: * "subtract" the product of the modulus and the trial divisor
1565: * by actually adding to a negative dividend.
1566: * The one's complement of the dividend is used, since it causes
1567: * a zero value to be represented as all ones. This facilitates
1568: * testing the result for possible overflow, since a sign bit
1569: * indicates that no adjustment is necessary, and we should not
1570: * attempt to adjust if the result of the addition is zero.
1571: */
1572: mp_inc(d);
1573: mp_neg(d);
1574: set_precision(orig_precision);
1575: munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
1576:
1577: /* Set msb, lsb, and normal ptrs of dividend */
1578: dmph = lsbptr((MULTUNIT *) d, (orig_precision * 2 + 1) *
1579: MULTUNITs_perunit) + tohigher(dmi);
1580: nqd = dmi + 1 - modlenMULTUNITS;
1581: dmpl = dmph - tohigher(modlenMULTUNITS);
1582:
1583: /* Divide loop.
1584: Each iteration computes the next quotient MULTUNIT digit, then
1585: multiplies the divisor (modulus) by the quotient digit and adds
1586: it to the one's complement of the dividend (equivalent to
1587: subtracting). If the product was greater than the remaining dividend,
1588: we get a non-negative result, in which case we subtract off the
1589: modulus to get the proper negative remainder.
1590: */
1591: for (; nqd; nqd--)
1592: { MULTUNIT q; /* quotient trial digit */
1593:
1594: q = mp_quo_digit(dmph);
1595: if (q > 0)
1596: { mp_smul(dmpl, modmpl, q);
1597:
1598: /* Perform correction if q too large.
1599: * This rarely occurs.
1600: */
1601: if (!(*dmph & MULTUNIT_msb))
1602: { dmp = dmpl;
1603: unmake_lsbptr(dmp, orig_precision *
1604: MULTUNITs_perunit);
1605: if (mp_musubb(dmp,
1606: (MULTUNIT *) modulus, 0))
1607: (*dmph) --;
1608: }
1609: }
1610: pre_lowerunit(dmph);
1611: pre_lowerunit(dmpl);
1612: }
1613: /* d contains the one's complement of the remainder. */
1614: rescale(d, orig_precision * 2 + 1, orig_precision);
1615: set_precision(orig_precision);
1616: mp_neg(d);
1617: mp_dec(d);
1618: } else
1619: { /* Product was less than modulus. Return it. */
1620: rescale(d, orig_precision * 2 + 1, orig_precision);
1621: set_precision(orig_precision);
1622: }
1623: mp_move(prod, d);
1624: return (0); /* normal return */
1625: } /* smith_modmult */
1626:
1627:
1628: /* Smith's mp_modmult function leaves some internal arrays in memory,
1629: so we have to call modmult_burn() at the end of mp_modexp.
1630: This is so that no cryptographically sensitive data is left in memory
1631: after the program exits.
1632: smith_burn() is aliased to modmult_burn().
1633: */
1634: void smith_burn(void)
1635: {
1636: empty_array (modulus);
1637: empty_array (ds_data);
1638: empty_array (mod_quotient);
1639: empty_array (mod_divisor);
1640: modmpl = 0;
1641: mshift =nbits = 0;
1642: reciph = recipl = 0;
1643: modlenMULTUNITS = mutemp = 0;
1644: mp_set_recip (0,0,0);
1645: } /* smith_burn */
1646:
1647: /* End of Thad Smith's implementation of modmult. */
1648:
1649: #endif /* SMITH */
1650:
1651:
1652: int countbits(unitptr r)
1653: /* Returns number of significant bits in r */
1654: { int bits;
1655: short prec;
1656: register unit bitmask;
1657: init_bitsniffer(r,bitmask,prec,bits);
1658: return(bits);
1659: } /* countbits */
1660:
1661:
1662: char *copyright_notice(void)
1663: /* force linker to include copyright notice in the executable object image. */
1664: { return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */
1665:
1666:
1667: int mp_modexp(register unitptr expout,register unitptr expin,
1668: register unitptr exponent,register unitptr modulus)
1669: { /* Russian peasant combined exponentiation/modulo algorithm.
1670: Calls modmult instead of mult.
1671: Computes: expout = (expin**exponent) mod modulus
1672: WARNING: All the arguments must be less than the modulus!
1673: */
1674: int bits;
1675: short oldprecision;
1676: register unit bitmask;
1677: unit product[MAX_UNIT_PRECISION];
1678: short eprec;
1679:
1680: #ifdef COUNTMULTS
1681: tally_modmults = 0; /* clear "number of modmults" counter */
1682: tally_modsquares = 0; /* clear "number of modsquares" counter */
1683: #endif /* COUNTMULTS */
1684: mp_init(expout,1);
1685: if (testeq(exponent,0))
1686: { if (testeq(expin,0))
1687: return(-1); /* 0 to the 0th power means return error */
1688: return(0); /* otherwise, zero exponent means expout is 1 */
1689: }
1690: if (testeq(modulus,0))
1691: return(-2); /* zero modulus means error */
1692: #if SLOP_BITS > 0 /* if there's room for sign bits */
1693: if (mp_tstminus(modulus))
1694: return(-2); /* negative modulus means error */
1695: #endif /* SLOP_BITS > 0 */
1696: if (mp_compare(expin,modulus) >= 0)
1697: return(-3); /* if expin >= modulus, return error */
1698: if (mp_compare(exponent,modulus) >= 0)
1699: return(-4); /* if exponent >= modulus, return error */
1700:
1701: oldprecision = global_precision; /* save global_precision */
1702: /* set smallest optimum precision for this modulus */
1703: set_precision(bits2units(countbits(modulus)+SLOP_BITS));
1704: /* rescale all these registers to global_precision we just defined */
1705: rescale(modulus,oldprecision,global_precision);
1706: rescale(expin,oldprecision,global_precision);
1707: rescale(exponent,oldprecision,global_precision);
1708: rescale(expout,oldprecision,global_precision);
1709:
1710: if (stage_modulus(modulus))
1711: { set_precision(oldprecision); /* restore original precision */
1712: return(-5); /* unstageable modulus (STEWART algorithm) */
1713: }
1714:
1715: /* normalize and compute number of bits in exponent first */
1716: init_bitsniffer(exponent,bitmask,eprec,bits);
1717:
1718: /* We can "optimize out" the first modsquare and modmult: */
1719: bits--; /* We know for sure at this point that bits>0 */
1720: mp_move(expout,expin); /* expout = (1*1)*expin; */
1721: bump_bitsniffer(exponent,bitmask);
1722:
1723: while (bits--)
1724: {
1725: poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */
1726: #ifdef COUNTMULTS
1727: tally_modsquares++; /* bump "number of modsquares" counter */
1728: #endif /* COUNTMULTS */
1729: mp_modsquare(product,expout);
1730: if (sniff_bit(exponent,bitmask))
1731: { mp_modmult(expout,product,expin);
1732: #ifdef COUNTMULTS
1733: tally_modmults++; /* bump "number of modmults" counter */
1734: #endif /* COUNTMULTS */
1735: } else
1736: {
1737: mp_move(expout,product);
1738: }
1739: bump_bitsniffer(exponent,bitmask);
1740: } /* while bits-- */
1741: mp_burn(product); /* burn the evidence on the stack */
1742: modmult_burn(); /* ask mp_modmult to also burn its own evidence */
1743:
1744: #ifdef COUNTMULTS /* diagnostic analysis */
1745: { long atomic_mults;
1746: unsigned int unitcount,totalmults;
1747: unitcount = bits2units(countbits(modulus));
1748: /* calculation assumes modsquare takes as long as a modmult: */
1749: atomic_mults = (long) tally_modmults * (unitcount * unitcount);
1750: atomic_mults += (long) tally_modsquares * (unitcount * unitcount);
1751: printf("%ld atomic mults for ",atomic_mults);
1752: printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n",
1753: tally_modsquares,tally_modmults,
1754: tally_modsquares+tally_modmults,
1755: countbits(modulus), unitcount);
1756: }
1757: #endif /* COUNTMULTS */
1758:
1759: set_precision(oldprecision); /* restore original precision */
1760:
1761: /* Do an explicit reference to the copyright notice so that the linker
1762: will be forced to include it in the executable object image... */
1763: copyright_notice(); /* has no real effect at run time */
1764:
1765: return(0); /* normal return */
1766: } /* mp_modexp */
1767:
1768:
1769:
1770:
1771: /*********************************************************************
1772: RSA-specific routines follow. These are the only functions that
1773: are specific to the RSA public key cryptosystem. The other
1774: multiprecision integer math functions may be used for non-RSA
1775: applications.
1776:
1777: The RSA public key cryptosystem is patented by the Massachusetts
1778: Institute of Technology (U.S. patent #4,405,829). This patent does
1779: not apply outside the USA. Public Key Partners (PKP) holds the
1780: exclusive commercial license to sell and sub-license the RSA public
1781: key cryptosystem. The author of this software implementation of the
1782: RSA algorithm is providing this software for educational use only.
1783: Licensing this algorithm from PKP is the responsibility of you, the
1784: user, not Philip Zimmermann, the author of this software. The author
1785: assumes no liability for any breach of patent law resulting from the
1786: unlicensed use of this software by the user.
1787: *********************************************************************/
1788:
1789:
1790: int rsa_decrypt(unitptr M, unitptr C,
1791: unitptr d, unitptr p, unitptr q, unitptr u)
1792: /* Uses Quisquater's Chinese Remainder Theorem shortcut
1793: for RSA decryption.
1794: M is the output plaintext message.
1795: C is the input ciphertext message.
1796: d is the secret decryption exponent.
1797: p and q are the secret prime factors of n.
1798: u is the multiplicative inverse of p, mod q.
1799: Note that u is precomputed on the assumption that p<q.
1800: n, the common modulus, is not used here because of the
1801: Chinese Remainder Theorem shortcut.
1802: */
1803: {
1804: unit p2[MAX_UNIT_PRECISION];
1805: unit q2[MAX_UNIT_PRECISION];
1806: unit temp1[MAX_UNIT_PRECISION];
1807: unit temp2[MAX_UNIT_PRECISION];
1808: int status;
1809:
1810: mp_init(M,1); /* initialize result in case of error */
1811:
1812: if (mp_compare(p,q) >= 0) /* ensure that p<q */
1813: { /* swap the pointers p and q */
1814: unitptr t;
1815: t = p; p = q; q = t;
1816: }
1817:
1818: /* Rather than decrypting by computing modexp with full mod n
1819: precision, compute a shorter modexp with mod p precision... */
1820:
1821: /* p2 = [ (C mod p)**( d mod (p-1) ) ] mod p */
1822: mp_move(temp1,p);
1823: mp_dec(temp1); /* temp1 = p-1 */
1824: mp_mod(temp2,d,temp1); /* temp2 = d mod (p-1) ) */
1825: mp_mod(temp1,C,p); /* temp1 = C mod p */
1826: status = mp_modexp(p2,temp1,temp2,p);
1827: if (status < 0) /* mp_modexp returned an error. */
1828: return(status); /* error return */
1829:
1830: /* Now compute a short modexp with mod q precision... */
1831:
1832: /* q2 = [ (C mod q)**( d mod (q-1) ) ] mod q */
1833: mp_move(temp1,q);
1834: mp_dec(temp1); /* temp1 = q-1 */
1835: mp_mod(temp2,d,temp1); /* temp2 = d mod (q-1) */
1836: mp_mod(temp1,C,q); /* temp1 = C mod q */
1837: status = mp_modexp(q2,temp1,temp2,q);
1838: if (status < 0) /* mp_modexp returned an error. */
1839: return(status); /* error return */
1840:
1841: /* Now use the multiplicative inverse u to glue together the
1842: two halves, saving a lot of time by avoiding a full mod n
1843: exponentiation. At key generation time, u was computed
1844: with the secret key as the multiplicative inverse of
1845: p, mod q such that: (p*u) mod q = 1, assuming p<q.
1846: */
1847: if (mp_compare(p2,q2) == 0) /* only happens if C<p */
1848: mp_move(M,p2);
1849: else
1850: { /* q2 = q2 - p2; if q2<0 then q2 = q2 + q */
1851: if (mp_sub(q2,p2)) /* if the result went negative... */
1852: mp_add(q2,q); /* add q to q2 */
1853:
1854: /* M = p2 + ( p * [(q2*u) mod q] ) */
1855: mp_mult(temp1,q2,u); /* temp1 = q2*u */
1856: mp_mod(temp2,temp1,q); /* temp2 = temp1 mod q */
1857: mp_mult(temp1,p,temp2); /* temp1 = p * temp2 */
1858: mp_add(temp1,p2); /* temp1 = temp1 + p2 */
1859: mp_move(M,temp1); /* M = temp1 */
1860: }
1861:
1862: mp_burn(p2); /* burn the evidence on the stack...*/
1863: mp_burn(q2);
1864: mp_burn(temp1);
1865: mp_burn(temp2);
1866: return(0); /* normal return */
1867: } /* rsa_decrypt */
1868:
1869:
1870: /****************** end of MPI library ****************************/
1871:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.