|
|
1.1 root 1: /* C source code for RSA multiprecision arithmetic library routines.
2: Implemented Nov 86 by Philip Zimmermann
3: Last revised 10 May 91 by PRZ
4:
5: Boulder Software Engineering
6: 3021 Eleventh Street
7: Boulder, CO 80304
8: (303) 444-4541
9:
10: (c) Copyright 1986 by Philip Zimmermann. All rights reserved.
11: The author assumes no liability for damages resulting from the use
12: of this software, even if the damage results from defects in this
13: software. No warranty is expressed or implied. The use of this
14: cryptographic software for developing weapon systems is expressly
15: forbidden.
16:
17: The RSA public key cryptosystem is patented by the Massachusetts
18: Institute of Technology (U.S. patent #4,405,829). Public Key
19: Partners (PKP) holds the exclusive commercial license to sell and
20: sub-license the RSA public key cryptosystem. The author of this
21: software implementation of the RSA algorithm is providing this
22: implementation for educational use only. Licensing this algorithm
23: from PKP is the responsibility of you, the user, not Philip
24: Zimmermann, the author of this implementation. The author assumes
25: no liability for any breach of patent law resulting from the
26: unlicensed use of this software by the user.
27:
28: These routines implement all of the multiprecision arithmetic necessary
29: for Rivest-Shamir-Adleman (RSA) public key cryptography.
30:
31: Although originally developed in Microsoft C for the IBM PC, this code
32: contains few machine dependancies. It assumes 2's complement
33: arithmetic. It can be adapted to 8-bit, 16-bit, or 32-bit machines,
34: lowbyte-highbyte order or highbyte-lowbyte order. This version
35: has been converted to ANSI C.
36:
37:
38: The internal representation for these extended precision integer
39: "registers" is an array of "units". A unit is a machine word, which
40: is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit
41: unsigned integer, depending on the machine's word size. For example,
42: an IBM PC or AT uses a unit size of 16 bits. To perform arithmetic
43: on these huge precision integers, we pass pointers to these unit
44: arrays to various subroutines. A pointer to an array of units is of
45: type unitptr. This is a pointer to a huge integer "register".
46:
47: When calling a subroutine, we always pass a pointer to the BEGINNING
48: of the array of units, regardless of the byte order of the machine.
49: On a lowbyte-first machine, such as the Intel 80x86, this unitptr
50: points to the LEAST significant unit, and the array of units increases
51: significance to the right. On a highbyte-first machine, such as the
52: Motorola 680x0, this unitptr points to the MOST significant unit, and
53: the array of units decreases significance to the right.
54: */
55:
56: /* #define COUNTMULTS */ /* count modmults for performance studies */
57:
58: #ifdef DEBUG
59: #ifndef EMBEDDED /* not EMBEDDED - not compiling for embedded target */
60: #include <stdio.h> /* for printf, etc. */
61: #include <conio.h> /* DEBUG mode: for kbhit() */
62: #define poll_for_break() {while (kbhit()) getch();}
63: #endif /* not EMBEDDED */
64: #else
65: #define poll_for_break() /* stub */
66: #endif /* not DEBUG */
67:
68: #define RSALIB
69: #include "rsalib.h"
70:
71: short global_precision = 0; /* units of precision for all routines */
72: /* global_precision is the unit precision last set by set_precision.
73: Initially, set_precision() should be called to define global_precision
74: before using any of these other RSA library routines.
75: i.e.: set_precision(MAX_UNIT_PRECISION);
76: */
77:
78: #ifdef PORTABLE
79: /*************** multiprecision library primitives ****************/
80: /* The following portable C primitives should be recoded in assembly.
81: The equivalent assembly primitives are externally defined under
82: different names, unless PORTABLE is defined. See the header file
83: "rsalib.h" for further details.
84:
85: The carry bit mechanism we use for these portable primitives
86: will only work if the size of unit is smaller than the size of
87: a long integer. In most cases, this means UNITSIZE must be
88: less than 32 bits -- if you use the C portable primitives.
89: */
90:
91: typedef unsigned long int ulint;
92: #define carrybit ((ulint) 1 << UNITSIZE)
93: /* ...assumes sizeof(unit) < sizeof(unsigned long) */
94:
95: boolean mp_addc
96: (register unitptr r1,register unitptr r2,register boolean carry)
97: /* multiprecision add with carry r2 to r1, result in r1 */
98: /* carry is incoming carry flag-- value should be 0 or 1 */
99: { register ulint x; /* won't work if sizeof(unit)==sizeof(long) */
100: short precision; /* number of units to add */
101: precision = global_precision;
102: make_lsbptr(r1,precision);
103: make_lsbptr(r2,precision);
104: while (precision--)
105: { x = (ulint) *r1 + (ulint) *post_higherunit(r2) + (ulint) carry;
106: *post_higherunit(r1) = x;
107: carry = ((x & carrybit) != 0L);
108: }
109: return(carry); /* return the final carry flag bit */
110: } /* mp_addc */
111:
112:
113: boolean mp_subb
114: (register unitptr r1,register unitptr r2,register boolean borrow)
115: /* multiprecision subtract with borrow, r2 from r1, result in r1 */
116: /* borrow is incoming borrow flag-- value should be 0 or 1 */
117: { register ulint x; /* won't work if sizeof(unit)==sizeof(long) */
118: short precision; /* number of units to subtract */
119: precision = global_precision;
120: make_lsbptr(r1,precision);
121: make_lsbptr(r2,precision);
122: while (precision--)
123: { x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow;
124: *post_higherunit(r1) = x;
125: borrow = ((x & carrybit) != 0L);
126: }
127: return(borrow); /* return the final carry/borrow flag bit */
128: } /* mp_subb */
129:
130: #undef carrybit
131:
132:
133: boolean mp_rotate_left(register unitptr r1,register boolean carry)
134: /* multiprecision rotate left 1 bit with carry, result in r1. */
135: /* carry is incoming carry flag-- value should be 0 or 1 */
136: { register short precision; /* number of units to rotate */
137: register boolean nextcarry;
138: precision = global_precision;
139: make_lsbptr(r1,precision);
140: while (precision--)
141: {
142: nextcarry = (((signedunit) *r1) < 0);
143: /* nextcarry = ((*r1 & uppermostbit) != 0); */
144: *r1 <<= 1 ;
145: if (carry) *r1 |= 1;
146: carry = nextcarry;
147: pre_higherunit(r1);
148: }
149: return(nextcarry); /* return the final carry flag bit */
150: } /* mp_rotate_left */
151:
152: /************** end of primitives that should be in assembly *************/
153: #endif /* PORTABLE */
154:
155:
156: /* The mp_rotate_right function is not called in any time-critical
157: situations in RSA functions, so it doesn't need to be coded
158: in assembly language.
159: */
160: boolean mp_rotate_right(register unitptr r1,register boolean carry)
161: /* multiprecision rotate right 1 bit with carry, result in r1. */
162: /* carry is incoming carry flag-- value should be 0 or 1 */
163: { register short precision; /* number of units to rotate */
164: register boolean nextcarry;
165: precision = global_precision;
166: make_msbptr(r1,precision);
167: while (precision--)
168: { nextcarry = *r1 & 1;
169: *r1 >>= 1 ;
170: if (carry) *r1 |= uppermostbit;
171: carry = nextcarry;
172: pre_lowerunit(r1);
173: }
174: return(nextcarry); /* return the final carry flag bit */
175: } /* mp_rotate_right */
176:
177:
178: short mp_compare(register unitptr r1,register unitptr r2)
179: /* Compares multiprecision integers *r1, *r2, and returns:
180: -1 iff *r1 < *r2
181: 0 iff *r1 == *r2
182: +1 iff *r1 > *r2
183: */
184: { register short precision; /* number of units to compare */
185: precision = global_precision;
186: make_msbptr(r1,precision);
187: make_msbptr(r2,precision);
188: do
189: { if (*r1 < *r2)
190: return(-1);
191: if (*post_lowerunit(r1) > *post_lowerunit(r2))
192: return(1);
193: } while (--precision);
194: return(0); /* *r1 == *r2 */
195: } /* mp_compare */
196:
197:
198: boolean mp_inc(register unitptr r)
199: /* Increment multiprecision integer r. */
200: { register short precision;
201: precision = global_precision;
202: make_lsbptr(r,precision);
203: do
204: { if ( ++(*r) ) return(0); /* no carry */
205: post_higherunit(r);
206: } while (--precision);
207: return(1); /* return carry set */
208: } /* mp_inc */
209:
210:
211: boolean mp_dec(register unitptr r)
212: /* Decrement multiprecision integer r. */
213: { register short precision;
214: precision = global_precision;
215: make_lsbptr(r,precision);
216: do
217: { if ( (signedunit) (--(*r)) != (signedunit) -1 )
218: return(0); /* no borrow */
219: post_higherunit(r);
220: } while (--precision);
221: return(1); /* return borrow set */
222: } /* mp_dec */
223:
224:
225: void mp_neg(register unitptr r)
226: /* Compute 2's complement, the arithmetic negative, of r */
227: { register short precision; /* number of units to negate */
228: precision = global_precision;
229: mp_dec(r); /* 2's complement is 1's complement plus 1 */
230: do /* now do 1's complement */
231: { *r = ~(*r);
232: r++;
233: } while (--precision);
234: } /* mp_neg */
235:
236:
237: void mp_move(register unitptr dst,register unitptr src)
238: { register short precision; /* number of units to move */
239: precision = global_precision;
240: do { *dst++ = *src++; } while (--precision);
241: } /* mp_move */
242:
243:
244: void mp_init(register unitptr r, word16 value)
245: /* Init multiprecision register r with short value. */
246: { /* Note that mp_init doesn't extend sign bit for >32767 */
247: register short precision; /* number of units to init */
248: precision = global_precision;
249: make_lsbptr(r,precision);
250: *post_higherunit(r) = value; precision--;
251: #ifdef UNIT8
252: *post_higherunit(r) = value >> UNITSIZE; precision--;
253: #endif
254: while (precision--)
255: *post_higherunit(r) = 0;
256: } /* mp_init */
257:
258:
259: short significance(register unitptr r)
260: /* Returns number of significant units in r */
261: { register short precision;
262: precision = global_precision;
263: make_msbptr(r,precision);
264: do
265: { if (*post_lowerunit(r))
266: return(precision);
267: } while (--precision);
268: return(precision);
269: } /* significance */
270:
271:
272: void unitfill0(unitptr r,word16 unitcount)
273: /* Zero-fill the unit buffer r. */
274: { while (unitcount--) *r++ = 0;
275: } /* unitfill0 */
276:
277:
278: /* The macro normalize(r,precision) "normalizes" a multiprecision integer
279: by adjusting r and precision to new values. For Motorola-style processors
280: (MSB-first), r is a pointer to the MSB of the register, and must
281: be adjusted to point to the first nonzero unit. For Intel/VAX-style
282: (LSB-first) processors, r is a pointer to the LSB of the register,
283: and must be left unchanged. The precision counter is always adjusted,
284: regardless of processor type. In the case of precision = 0,
285: r becomes undefined.
286: */
287:
288:
289: /* The macro rescale(r,current_precision,new_precision) rescales
290: a multiprecision integer by adjusting r and its precision to new values.
291: It can be used to reverse the effects of the normalize
292: routine given above. See the comments in normalize concerning
293: Intel vs. Motorola LSB/MSB conventions.
294: WARNING: You can only safely call rescale on registers that
295: you have previously normalized with the above normalize routine,
296: or are known to be big enough for the new precision. You may
297: specify a new precision that is smaller than the current precision.
298: You must be careful not to specify a new_precision value that is
299: too big, or which adjusts the r pointer out of range.
300: */
301:
302:
303: /* These bit sniffer definitions begin sniffing at the MSB */
304: #define sniff_bit(bptr,bitmask) (*(bptr) & bitmask)
305:
306: #define init_bitsniffer(bptr,bitmask,prec,bits) \
307: { normalize(bptr,prec); \
308: if (!prec) \
309: return(0); \
310: bits = units2bits(prec); \
311: make_msbptr(bptr,prec); bitmask = uppermostbit; \
312: while (!sniff_bit(bptr,bitmask)) \
313: { bitmask >>= 1; bits--; \
314: } \
315: }
316:
317: #define bump_bitsniffer(bptr,bitmask) \
318: { if (!(bitmask >>= 1)) \
319: { bitmask = uppermostbit; \
320: post_lowerunit(bptr); \
321: } \
322: }
323:
324: /* the following two macros are used by mp_udiv */
325: #define bump_2bitsniffers(bptr,bptr2,bitmask) \
326: { if (!(bitmask >>= 1)) \
327: { bitmask = uppermostbit; \
328: post_lowerunit(bptr); \
329: post_lowerunit(bptr2); \
330: } \
331: }
332:
333: #define stuff_bit(bptr,bitmask) *(bptr) |= bitmask
334:
335:
336: int mp_udiv(register unitptr remainder,register unitptr quotient,
337: register unitptr dividend,register unitptr divisor)
338: /* Unsigned divide, treats both operands as positive. */
339: { int bits;
340: short dprec;
341: register unit bitmask;
342: if (testeq(divisor,0))
343: return(-1); /* zero divisor means divide error */
344: mp_init(remainder,0);
345: mp_init(quotient,0);
346: /* normalize and compute number of bits in dividend first */
347: init_bitsniffer(dividend,bitmask,dprec,bits);
348: /* rescale quotient to same precision (dprec) as dividend */
349: rescale(quotient,global_precision,dprec);
350: make_msbptr(quotient,dprec);
351:
352: while (bits--)
353: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
354: if (mp_compare(remainder,divisor) >= 0)
355: { mp_sub(remainder,divisor);
356: stuff_bit(quotient,bitmask);
357: }
358: bump_2bitsniffers(dividend,quotient,bitmask);
359: }
360: return(0);
361: } /* mp_udiv */
362:
363:
364: int mp_div(register unitptr remainder,register unitptr quotient,
365: register unitptr dividend,register unitptr divisor)
366: /* Signed divide, either or both operands may be negative. */
367: { boolean dvdsign,dsign;
368: int status;
369: dvdsign = (mp_tstminus(dividend)!=0);
370: dsign = (mp_tstminus(divisor)!=0);
371: if (dvdsign) mp_neg(dividend);
372: if (dsign) mp_neg(divisor);
373: status = mp_udiv(remainder,quotient,dividend,divisor);
374: if (dvdsign) mp_neg(dividend); /* repair caller's dividend */
375: if (dsign) mp_neg(divisor); /* repair caller's divisor */
376: if (status<0) return(status); /* divide error? */
377: if (dvdsign) mp_neg(remainder);
378: if (dvdsign ^ dsign) mp_neg(quotient);
379: return(status);
380: } /* mp_div */
381:
382:
383: word16 mp_shortdiv(register unitptr quotient,
384: register unitptr dividend,register word16 divisor)
385: /* This function does a fast divide and mod on a multiprecision dividend
386: using a short integer divisor returning a short integer remainder.
387: This is an unsigned divide. It treats both operands as positive.
388: It is used mainly for faster printing of large numbers in base 10.
389: */
390: { int bits;
391: short dprec;
392: register unit bitmask;
393: register word16 remainder;
394: if (!divisor) /* if divisor == 0 */
395: return(-1); /* zero divisor means divide error */
396: remainder=0;
397: mp_init(quotient,0);
398: /* normalize and compute number of bits in dividend first */
399: init_bitsniffer(dividend,bitmask,dprec,bits);
400: /* rescale quotient to same precision (dprec) as dividend */
401: rescale(quotient,global_precision,dprec);
402: make_msbptr(quotient,dprec);
403:
404: while (bits--)
405: { remainder <<= 1;
406: if (sniff_bit(dividend,bitmask))
407: remainder++;
408: if (remainder >= divisor)
409: { remainder -= divisor;
410: stuff_bit(quotient,bitmask);
411: }
412: bump_2bitsniffers(dividend,quotient,bitmask);
413: }
414: return(remainder);
415: } /* mp_shortdiv */
416:
417:
418: int mp_mod(register unitptr remainder,
419: register unitptr dividend,register unitptr divisor)
420: /* Unsigned divide, treats both operands as positive. */
421: { int bits;
422: short dprec;
423: register unit bitmask;
424: if (testeq(divisor,0))
425: return(-1); /* zero divisor means divide error */
426: mp_init(remainder,0);
427: /* normalize and compute number of bits in dividend first */
428: init_bitsniffer(dividend,bitmask,dprec,bits);
429:
430: while (bits--)
431: { mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
432: msub(remainder,divisor);
433: bump_bitsniffer(dividend,bitmask);
434: }
435: return(0);
436: } /* mp_mod */
437:
438:
439: word16 mp_shortmod(register unitptr dividend,register word16 divisor)
440: /* This function does a fast mod operation on a multprecision dividend
441: using a short integer modulus returning a short integer remainder.
442: This is an unsigned divide. It treats both operands as positive.
443: It is used mainly for fast sieve searches for large primes.
444: */
445: { int bits;
446: short dprec;
447: register unit bitmask;
448: register word16 remainder;
449: if (!divisor) /* if divisor == 0 */
450: return(-1); /* zero divisor means divide error */
451: remainder=0;
452: /* normalize and compute number of bits in dividend first */
453: init_bitsniffer(dividend,bitmask,dprec,bits);
454:
455: while (bits--)
456: { remainder <<= 1;
457: if (sniff_bit(dividend,bitmask))
458: remainder++;
459: if (remainder >= divisor) remainder -= divisor;
460: bump_bitsniffer(dividend,bitmask);
461: }
462: return(remainder);
463: } /* mp_shortmod */
464:
465:
466:
467: #ifndef COMB_MULT /* if not "comb" multiply, use peasant multiply */
468:
469: int mp_mult(register unitptr prod,
470: register unitptr multiplicand,register unitptr multiplier)
471: /* Computes multiprecision prod = multiplicand * multiplier */
472: { /* Uses "Russian peasant" multiply algorithm. */
473: int bits;
474: register unit bitmask;
475: short mprec;
476: mp_init(prod,0);
477: if (testeq(multiplicand,0))
478: return(0); /* zero multiplicand means zero product */
479: /* normalize and compute number of bits in multiplier first */
480: init_bitsniffer(multiplier,bitmask,mprec,bits);
481:
482: while (bits--)
483: { mp_shift_left(prod);
484: if (sniff_bit(multiplier,bitmask))
485: { mp_add(prod,multiplicand);
486: }
487: bump_bitsniffer(multiplier,bitmask);
488: }
489: return(0);
490: } /* mp_mult */
491:
492: #else /* use "comb" multiply algorithm */
493:
494: int mp_mult(register unitptr prod,
495: register unitptr multiplicand, register unitptr multiplier)
496: /* Computes multiprecision prod = multiplicand * multiplier */
497: { /* Uses interleaved comb multiply algorithm.
498: This improved multiply more than twice as fast as a Russian
499: peasant multiply, because it does a lot fewer shifts.
500: Must have global_precision set to the size of the multiplicand
501: plus UNITSIZE-1 SLOP_BITS. Produces a product that is the sum
502: of the lengths of the multiplier and multiplicand.
503: */
504: int bits;
505: register unit bitmask;
506: unitptr product, mplier, temp;
507: short mprec,mprec2;
508: unit mplicand[MAX_UNIT_PRECISION];
509: mp_init(prod,0); /* better clear full width--double precision */
510: if (testeq(multiplicand,0))
511: return(0); /* zero multiplicand means zero product */
512:
513: mp_move(mplicand,multiplicand); /* save it from damage */
514:
515: normalize(multiplier,mprec);
516: if (!mprec)
517: return(0);
518:
519: make_lsbptr(multiplier,mprec);
520: bitmask = 1; /* start scan at LSB of multiplier */
521:
522: do /* UNITSIZE times */
523: { /* do for bits 0-15 */
524: product = prod;
525: mplier = multiplier;
526: mprec2 = mprec;
527: while (mprec2--) /* do for each word in multiplier */
528: {
529: if (sniff_bit(mplier,bitmask))
530: { if (mp_addc(product,multiplicand,0)) /* ripple carry */
531: { /* After 1st time thru, this is rarely encountered. */
532: temp = msbptr(product,global_precision);
533: pre_higherunit(temp);
534: /* temp now points to LSU of carry region. */
535: unmake_lsbptr(temp,global_precision);
536: mp_inc(temp);
537: } /* ripple carry */
538: }
539: pre_higherunit(mplier);
540: pre_higherunit(product);
541: }
542: if (!(bitmask <<= 1))
543: break;
544: mp_shift_left(multiplicand);
545:
546: } while (TRUE);
547:
548: mp_move(multiplicand,mplicand); /* recover */
549:
550: return(0); /* normal return */
551: } /* mp_mult */
552:
553: #endif /* COMB_MULT */
554:
555:
556: /* mp_modmult computes a multiply combined with a modulo operation.
557: Before calling mp_modmult, you must first call
558: stage_modulus(the_modulus)
559: */
560:
561: #ifdef COUNTMULTS
562: /* "number of modmults" counters, used for performance studies. */
563: static unsigned int tally_modmults = 0;
564: static unsigned int tally_modsquares = 0;
565: #endif /* COUNTMULTS */
566:
567: #ifndef ASM_MODMULT /* not assembly primitive modmult */
568:
569: #ifdef PEASANT
570: /* Conventional Russian peasant multiply with modulo algorithm. */
571:
572: static unitptr modulus = 0; /* used only by mp_modmult */
573:
574: int stage_peasant_modulus(unitptr n)
575: /* Must pass modulus to stage_modulus before calling modmult.
576: Assumes that global_precision has already been adjusted to the
577: size of the modulus, plus SLOP_BITS.
578: */
579: { /* For this simple version of modmult, just copy unit pointer. */
580: modulus = n;
581: return(0); /* normal return */
582: } /* stage_peasant_modulus */
583:
584:
585: int peasant_modmult(register unitptr prod,
586: unitptr multiplicand,register unitptr multiplier)
587: { /* "Russian peasant" multiply algorithm, combined with a modulo
588: operation. This is a simple naive replacement for Merritt's
589: faster modmult algorithm. References global unitptr "modulus".
590: Computes: prod = (multiplicand*multiplier) mod modulus
591: WARNING: All the arguments must be less than the modulus!
592: */
593: int bits;
594: register unit bitmask;
595: short mprec;
596: mp_init(prod,0);
597: /* if (testeq(multiplicand,0))
598: return(0); */ /* zero multiplicand means zero product */
599: /* normalize and compute number of bits in multiplier first */
600: init_bitsniffer(multiplier,bitmask,mprec,bits);
601:
602: while (bits--)
603: { mp_shift_left(prod);
604: msub(prod,modulus); /* turns mult into modmult */
605: if (sniff_bit(multiplier,bitmask))
606: { mp_add(prod,multiplicand);
607: msub(prod,modulus); /* turns mult into modmult */
608: }
609: bump_bitsniffer(multiplier,bitmask);
610: }
611: return(0);
612: } /* peasant_modmult */
613:
614:
615: /* If we are using a version of mp_modmult that uses internal tables
616: in memory, we have to call modmult_burn() at the end of mp_modexp.
617: This is so that no sensitive data is left in memory after the program
618: exits. The Russian peasant method doesn't use any such tables.
619: */
620: static void peasant_burn(void)
621: /* Alias for modmult_burn, called only from mp_modexp(). Destroys
622: internal modmult tables. This version does nothing because no
623: tables are used by the Russian peasant modmult. */
624: { } /* peasant_burn */
625:
626: #endif /* PEASANT */
627:
628:
629: #ifdef MERRITT
630: /*=========================================================================*/
631: /*
632: This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ.
633: Also refined by Zhahai Stewart to reduce the number of subtracts.
634: It performs a multiply combined with a modulo operation, without
635: going into "double precision". It is faster than the Russian peasant
636: method, and still works well on machines that lack a fast hardware
637: multiply instruction.
638: */
639:
640: /* The following support functions, macros, and data structures
641: are used only by Merritt's modmult algorithm... */
642:
643: static void mp_lshift_unit(register unitptr r1)
644: /* Shift r1 1 whole unit to the left. Used only by modmult function. */
645: { register short precision;
646: register unitptr r2;
647: precision = global_precision;
648: make_msbptr(r1,precision);
649: r2 = r1;
650: while (--precision)
651: *post_lowerunit(r1) = *pre_lowerunit(r2);
652: *r1 = 0;
653: } /* mp_lshift_unit */
654:
655:
656: /* moduli_buf contains shifted images of the modulus, set by stage_modulus */
657: static unit moduli_buf[UNITSIZE][MAX_UNIT_PRECISION] = {0};
658: static unitptr moduli[UNITSIZE+1] = /* contains pointers into moduli_buf */
659: { 0
660: ,&moduli_buf[ 0][0], &moduli_buf[ 1][0], &moduli_buf[ 2][0], &moduli_buf[ 3][0],
661: &moduli_buf[ 4][0], &moduli_buf[ 5][0], &moduli_buf[ 6][0], &moduli_buf[ 7][0]
662: #ifndef UNIT8
663: ,&moduli_buf[ 8][0], &moduli_buf[ 9][0], &moduli_buf[10][0], &moduli_buf[11][0],
664: &moduli_buf[12][0], &moduli_buf[13][0], &moduli_buf[14][0], &moduli_buf[15][0]
665: #ifndef UNIT16 /* and not UNIT8 */
666: ,&moduli_buf[16][0], &moduli_buf[17][0], &moduli_buf[18][0], &moduli_buf[19][0],
667: &moduli_buf[20][0], &moduli_buf[21][0], &moduli_buf[22][0], &moduli_buf[23][0],
668: &moduli_buf[24][0], &moduli_buf[25][0], &moduli_buf[26][0], &moduli_buf[27][0],
669: &moduli_buf[28][0], &moduli_buf[29][0], &moduli_buf[30][0], &moduli_buf[31][0]
670: #endif /* UNIT16 and UNIT8 not defined */
671: #endif /* UNIT8 not defined */
672: };
673:
674: /* To optimize msubs, need following 2 unit arrays, each filled
675: with the most significant unit and next-to-most significant unit
676: of the preshifted images of the RSA modulus. */
677: static unit msu_moduli[UNITSIZE+1] = {0}; /* most signif. unit */
678: static unit nmsu_moduli[UNITSIZE+1] = {0}; /* next-most signif. unit */
679:
680: /* mpdbuf contains preshifted images of the multiplicand, mod n.
681: It is used only by mp_modmult. It could be staticly declared
682: inside of mp_modmult, but we put it outside mp_modmult so that
683: it can be wiped clean by modmult_burn(), which is called at the
684: end of mp_modexp. This is so that no sensitive data is left in
685: memory after the program exits.
686: */
687: static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0};
688:
689:
690: static void stage_mp_images(unitptr images[UNITSIZE],unitptr r)
691: /* Computes UNITSIZE images of r, each shifted left 1 more bit.
692: Used only by modmult function.
693: */
694: { short int i;
695: images[0] = r; /* no need to move the first image, just copy ptr */
696: for (i=1; i<UNITSIZE; i++)
697: { mp_move(images[i],images[i-1]);
698: mp_shift_left(images[i]);
699: }
700: } /* stage_mp_images */
701:
702:
703: int stage_merritt_modulus(unitptr n)
704: /* Computes UNITSIZE+1 images of modulus, each shifted left 1 more bit.
705: Before calling mp_modmult, you must first stage the moduli images by
706: calling stage_modulus. n is the pointer to the modulus.
707: Assumes that global_precision has already been adjusted to the
708: size of the modulus, plus SLOP_BITS.
709: */
710: { short int i;
711: unitptr msu; /* ptr to most significant unit, for faster msubs */
712: moduli[0] = n; /* no need to move the first image, just copy ptr */
713:
714: /* used by optimized msubs macro... */
715: msu = msbptr(n,global_precision); /* needed by msubs */
716: msu_moduli[0] = *post_lowerunit(msu);
717: nmsu_moduli[0] = *msu;
718:
719: for (i=1; i<UNITSIZE+1; i++)
720: { mp_move(moduli[i],moduli[i-1]);
721: mp_shift_left(moduli[i]);
722:
723: /* used by optimized msubs macro... */
724: msu = msbptr(moduli[i],global_precision); /* needed by msubs */
725: msu_moduli[i] = *post_lowerunit(msu); /* for faster msubs */
726: nmsu_moduli[i] = *msu;
727: }
728: return(0); /* normal return */
729: } /* stage_merritt_modulus */
730:
731:
732: /* The following macros, sniffadd and msubs, are used by modmult... */
733: #define sniffadd(i) if (*multiplier & power_of_2(i)) mp_add(prod,mpd[i])
734:
735: /* Unoptimized msubs macro (msubs0) follows... */
736: /* #define msubs0(i) msub(prod,moduli[i])
737: */
738:
739: /* To optimize msubs, compare the most significant units of the
740: product and the shifted modulus before deciding to call the full
741: mp_compare routine. Better still, compare the upper two units
742: before deciding to call mp_compare.
743: Optimization of msubs relies heavily on standard C left-to-right
744: parsimonious evaluation of logical expressions.
745: */
746:
747: /* Partially-optimized msubs macro (msubs1) follows... */
748: /* #define msubs1(i) if ( \
749: ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \
750: (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \
751: ) mp_sub(prod,moduli[i])
752: */
753:
754: /* Fully-optimized msubs macro (msubs2) follows... */
755: #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \
756: (p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \
757: (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \
758: mp_sub(prod,moduli[i])
759:
760:
761: int merritt_modmult(register unitptr prod,
762: unitptr multiplicand,register unitptr multiplier)
763: /* Performs combined multiply/modulo operation.
764: Computes: prod = (multiplicand*multiplier) mod modulus
765: WARNING: All the arguments must be less than the modulus!
766: Assumes the modulus has been predefined by first calling
767: stage_modulus.
768: */
769: {
770: /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/
771: register signedunit p_m;
772: register unitptr msu_prod; /* ptr to most significant unit of product */
773: register unitptr nmsu_prod; /* next-most signif. unit of product */
774: short mprec; /* precision of multiplier, in units */
775: /* Array mpd contains a list of pointers to preshifted images of
776: the multiplicand: */
777: static unitptr mpd[UNITSIZE] =
778: { 0, &mpdbuf[ 0][0], &mpdbuf[ 1][0], &mpdbuf[ 2][0],
779: &mpdbuf[ 3][0], &mpdbuf[ 4][0], &mpdbuf[ 5][0], &mpdbuf[ 6][0]
780: #ifndef UNIT8
781: ,&mpdbuf[ 7][0], &mpdbuf[ 8][0], &mpdbuf[ 9][0], &mpdbuf[10][0],
782: &mpdbuf[11][0], &mpdbuf[12][0], &mpdbuf[13][0], &mpdbuf[14][0]
783: #ifndef UNIT16 /* and not UNIT8 */
784: ,&mpdbuf[15][0], &mpdbuf[16][0], &mpdbuf[17][0], &mpdbuf[18][0],
785: &mpdbuf[19][0], &mpdbuf[20][0], &mpdbuf[21][0], &mpdbuf[22][0],
786: &mpdbuf[23][0], &mpdbuf[24][0], &mpdbuf[25][0], &mpdbuf[26][0],
787: &mpdbuf[27][0], &mpdbuf[28][0], &mpdbuf[29][0], &mpdbuf[30][0]
788: #endif /* UNIT16 and UNIT8 not defined */
789: #endif /* UNIT8 not defined */
790: };
791:
792: /* Compute preshifted images of multiplicand, mod n: */
793: stage_mp_images(mpd,multiplicand);
794:
795: /* To optimize msubs, set up msu_prod and nmsu_prod: */
796: msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */
797: nmsu_prod = msu_prod;
798: post_lowerunit(nmsu_prod); /* Get next-MSU of prod */
799:
800: /* To understand this algorithm, it would be helpful to first
801: study the conventional Russian peasant modmult algorithm.
802: This one does about the same thing as Russian peasant, but
803: is organized differently to save some steps. It loops
804: through the multiplier a word (unit) at a time, instead of
805: a bit at a time. It word-shifts the product instead of
806: bit-shifting it, so it should be faster. It also does about
807: half as many subtracts as Russian peasant.
808: */
809:
810: mp_init(prod,0); /* Initialize product to 0. */
811:
812: /* The way mp_modmult is actually used in cryptographic
813: applications, there will NEVER be a zero multiplier or
814: multiplicand. So there is no need to optimize for that
815: condition.
816: */
817: /* if (testeq(multiplicand,0))
818: return(0); */ /* zero multiplicand means zero product */
819: /* Normalize and compute number of units in multiplier first: */
820: normalize(multiplier,mprec);
821: if (mprec==0) /* if precision of multiplier is 0 */
822: return(0); /* zero multiplier means zero product */
823: make_msbptr(multiplier,mprec); /* start at MSU of multiplier */
824:
825: while (mprec--) /* Loop for the number of units in the multiplier */
826: {
827: /* Shift the product one whole unit to the left.
828: This is part of the multiply phase of modmult.
829: */
830:
831: mp_lshift_unit(prod);
832:
833: /* Sniff each bit in the current unit of the multiplier,
834: and conditionally add the the corresponding preshifted
835: image of the multiplicand to the product.
836: This is also part of the multiply phase of modmult.
837:
838: The following loop is unrolled for speed, using macros...
839:
840: for (i=UNITSIZE-1; i>=0; i--)
841: if (*multiplier & power_of_2(i))
842: mp_add(prod,mpd[i]);
843: */
844: #ifndef UNIT8
845: #ifndef UNIT16 /* and not UNIT8 */
846: sniffadd(31);
847: sniffadd(30);
848: sniffadd(29);
849: sniffadd(28);
850: sniffadd(27);
851: sniffadd(26);
852: sniffadd(25);
853: sniffadd(24);
854: sniffadd(23);
855: sniffadd(22);
856: sniffadd(21);
857: sniffadd(20);
858: sniffadd(19);
859: sniffadd(18);
860: sniffadd(17);
861: sniffadd(16);
862: #endif /* not UNIT16 and not UNIT8 */
863: sniffadd(15);
864: sniffadd(14);
865: sniffadd(13);
866: sniffadd(12);
867: sniffadd(11);
868: sniffadd(10);
869: sniffadd(9);
870: sniffadd(8);
871: #endif /* not UNIT8 */
872: sniffadd(7);
873: sniffadd(6);
874: sniffadd(5);
875: sniffadd(4);
876: sniffadd(3);
877: sniffadd(2);
878: sniffadd(1);
879: sniffadd(0);
880:
881: /* The product may have grown by as many as UNITSIZE+1
882: bits. That's why we have global_precision set to the
883: size of the modulus plus UNITSIZE+1 slop bits.
884: Now reduce the product back down by conditionally
885: subtracting the UNITSIZE+1 preshifted images of the
886: modulus. This is the modulo reduction phase of modmult.
887:
888: The following loop is unrolled for speed, using macros...
889:
890: for (i=UNITSIZE; i>=0; i--)
891: if (mp_compare(prod,moduli[i]) >= 0)
892: mp_sub(prod,moduli[i]);
893: */
894: #ifndef UNIT8
895: #ifndef UNIT16 /* and not UNIT8 */
896: msubs(32);
897: msubs(31);
898: msubs(30);
899: msubs(29);
900: msubs(28);
901: msubs(27);
902: msubs(26);
903: msubs(25);
904: msubs(24);
905: msubs(23);
906: msubs(22);
907: msubs(21);
908: msubs(20);
909: msubs(19);
910: msubs(18);
911: msubs(17);
912: #endif /* not UNIT16 and not UNIT8 */
913: msubs(16);
914: msubs(15);
915: msubs(14);
916: msubs(13);
917: msubs(12);
918: msubs(11);
919: msubs(10);
920: msubs(9);
921: #endif /* not UNIT8 */
922: msubs(8);
923: msubs(7);
924: msubs(6);
925: msubs(5);
926: msubs(4);
927: msubs(3);
928: msubs(2);
929: msubs(1);
930: msubs(0);
931:
932: /* Bump pointer to next lower unit of multiplier: */
933: post_lowerunit(multiplier);
934:
935: } /* Loop for the number of units in the multiplier */
936:
937: return(0); /* normal return */
938:
939: } /* merritt_modmult */
940:
941:
942: #undef msubs
943: #undef sniffadd
944:
945:
946: /* Merritt's mp_modmult function leaves some internal tables in memory,
947: so we have to call modmult_burn() at the end of mp_modexp.
948: This is so that no cryptographically sensitive data is left in memory
949: after the program exits.
950: */
951: static void merritt_burn(void)
952: /* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */
953: { unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION);
954: unitfill0(&(moduli_buf[0][0]),(UNITSIZE)*MAX_UNIT_PRECISION);
955: unitfill0(msu_moduli,UNITSIZE+1);
956: unitfill0(nmsu_moduli,UNITSIZE+1);
957: } /* merritt_burn() */
958:
959: /******* end of Merritt's MODMULT stuff. *******/
960: /*=========================================================================*/
961: #endif /* MERRITT */
962:
963:
964: #ifdef STEWART /* using Zhahai Stewart's modmult algorithm */
965: #include "stewart.c"
966: #endif /* STEWART */
967:
968: #endif /* not ASM_MODMULT */
969:
970:
971: int countbits(unitptr r)
972: /* Returns number of significant bits in r */
973: { int bits;
974: short prec;
975: register unit bitmask;
976: init_bitsniffer(r,bitmask,prec,bits);
977: return(bits);
978: } /* countbits */
979:
980:
981: int mp_modexp(register unitptr expout,register unitptr expin,
982: register unitptr exponent,register unitptr modulus)
983: { /* Russian peasant combined exponentiation/modulo algorithm.
984: Calls modmult instead of mult.
985: Computes: expout = (expin**exponent) mod modulus
986: WARNING: All the arguments must be less than the modulus!
987: */
988: int bits;
989: short oldprecision;
990: register unit bitmask;
991: unit product[MAX_UNIT_PRECISION];
992: short eprec;
993:
994: #ifdef COUNTMULTS
995: tally_modmults = 0; /* clear "number of modmults" counter */
996: tally_modsquares = 0; /* clear "number of modsquares" counter */
997: #endif /* COUNTMULTS */
998: mp_init(expout,1);
999: if (testeq(exponent,0))
1000: { if (testeq(expin,0))
1001: return(-1); /* 0 to the 0th power means return error */
1002: return(0); /* otherwise, zero exponent means expout is 1 */
1003: }
1004: if (testeq(modulus,0))
1005: return(-2); /* zero modulus means error */
1006: #if SLOP_BITS > 0 /* if there's room for sign bits */
1007: if (mp_tstminus(modulus))
1008: return(-2); /* negative modulus means error */
1009: #endif /* SLOP_BITS > 0 */
1010: if (mp_compare(expin,modulus) >= 0)
1011: return(-3); /* if expin >= modulus, return error */
1012: if (mp_compare(exponent,modulus) >= 0)
1013: return(-4); /* if exponent >= modulus, return error */
1014:
1015: oldprecision = global_precision; /* save global_precision */
1016: /* set smallest optimum precision for this modulus */
1017: set_precision(bits2units(countbits(modulus)+SLOP_BITS));
1018: /* rescale all these registers to global_precision we just defined */
1019: rescale(modulus,oldprecision,global_precision);
1020: rescale(expin,oldprecision,global_precision);
1021: rescale(exponent,oldprecision,global_precision);
1022: rescale(expout,oldprecision,global_precision);
1023:
1024: if (stage_modulus(modulus))
1025: { set_precision(oldprecision); /* restore original precision */
1026: return(-5); /* unstageable modulus (STEWART algorithm) */
1027: }
1028:
1029: /* normalize and compute number of bits in exponent first */
1030: init_bitsniffer(exponent,bitmask,eprec,bits);
1031:
1032: /* We can "optimize out" the first modsquare and modmult: */
1033: bits--; /* We know for sure at this point that bits>0 */
1034: mp_move(expout,expin); /* expout = (1*1)*expin; */
1035: bump_bitsniffer(exponent,bitmask);
1036:
1037: while (bits--)
1038: {
1039: poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */
1040: #ifdef COUNTMULTS
1041: tally_modsquares++; /* bump "number of modsquares" counter */
1042: #endif /* COUNTMULTS */
1043: mp_modsquare(product,expout);
1044: mp_move(expout,product);
1045: if (sniff_bit(exponent,bitmask))
1046: { mp_modmult(product,expout,expin);
1047: mp_move(expout,product);
1048: #ifdef COUNTMULTS
1049: tally_modmults++; /* bump "number of modmults" counter */
1050: #endif /* COUNTMULTS */
1051: }
1052: bump_bitsniffer(exponent,bitmask);
1053: } /* while bits-- */
1054: mp_burn(product); /* burn the evidence on the stack */
1055: modmult_burn(); /* ask mp_modmult to also burn its own evidence */
1056:
1057: #ifdef COUNTMULTS /* diagnostic analysis */
1058: { long atomic_mults;
1059: unsigned int unitcount,totalmults;
1060: unitcount = bits2units(countbits(modulus));
1061: /* calculation assumes modsquare takes as long as a modmult: */
1062: atomic_mults = (long) tally_modmults * (unitcount * unitcount);
1063: atomic_mults += (long) tally_modsquares * (unitcount * unitcount);
1064: printf("%ld atomic mults for ",atomic_mults);
1065: printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n",
1066: tally_modsquares,tally_modmults,
1067: tally_modsquares+tally_modmults,
1068: countbits(modulus), unitcount);
1069: }
1070: #endif /* COUNTMULTS */
1071:
1072: set_precision(oldprecision); /* restore original precision */
1073: return(0); /* normal return */
1074: } /* mp_modexp */
1075:
1076:
1077: char *copyright_notice(void)
1078: /* force linker to include copyright notice in the executable object image. */
1079: { return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */
1080:
1081:
1082: int rsa_decrypt(unitptr M, unitptr C,
1083: unitptr d, unitptr p, unitptr q, unitptr u)
1084: /* Uses Chinese Remainder Theorem shortcut for RSA decryption.
1085: M is the output plaintext message.
1086: C is the input ciphertext message.
1087: d is the secret decryption exponent.
1088: p and q are the prime factors of n.
1089: u is the multiplicative inverse of p, mod q.
1090: Note that u is precomputed on the assumption that p<q.
1091: n, the common modulus, is not used here because of the
1092: Chinese Remainder Theorem shortcut.
1093: */
1094: {
1095: unit p2[MAX_UNIT_PRECISION];
1096: unit q2[MAX_UNIT_PRECISION];
1097: unit temp1[MAX_UNIT_PRECISION];
1098: unit temp2[MAX_UNIT_PRECISION];
1099: int status;
1100:
1101: mp_init(M,1); /* initialize result in case of error */
1102:
1103: if (mp_compare(p,q) >= 0) /* ensure that p<q */
1104: { /* swap the pointers p and q */
1105: unitptr t;
1106: t = p; p = q; q = t;
1107: }
1108:
1109: /* Rather than decrypting by computing modexp with full mod n
1110: precision, compute a shorter modexp with mod p precision... */
1111:
1112: /* p2 = [ (C mod p)**( d mod (p-1) ) ] mod p */
1113: mp_move(temp1,p);
1114: mp_dec(temp1); /* temp1 = p-1 */
1115: mp_mod(temp2,d,temp1); /* temp2 = d mod (p-1) ) */
1116: mp_mod(temp1,C,p); /* temp1 = C mod p */
1117: status = mp_modexp(p2,temp1,temp2,p);
1118: if (status < 0) /* mp_modexp returned an error. */
1119: return(status); /* error return */
1120:
1121: /* Now compute a short modexp with mod q precision... */
1122:
1123: /* q2 = [ (C mod q)**( d mod (q-1) ) ] mod q */
1124: mp_move(temp1,q);
1125: mp_dec(temp1); /* temp1 = q-1 */
1126: mp_mod(temp2,d,temp1); /* temp2 = d mod (q-1) */
1127: mp_mod(temp1,C,q); /* temp1 = C mod q */
1128: status = mp_modexp(q2,temp1,temp2,q);
1129: if (status < 0) /* mp_modexp returned an error. */
1130: return(status); /* error return */
1131:
1132: /* Now use the multiplicative inverse u to glue together the
1133: two halves, saving a lot of time by avoiding a full mod n
1134: exponentiation. At key generation time, u was computed
1135: with the secret key as the multiplicative inverse of
1136: p, mod q such that: (p*u) mod q = 1, assuming p<q.
1137: */
1138: if (mp_compare(p2,q2) == 0) /* only happens if C<p */
1139: mp_move(M,p2);
1140: else
1141: { /* q2 = q2 - p2; if q2<0 then q2 = q2 + q */
1142: if (mp_sub(q2,p2)) /* if the result went negative... */
1143: mp_add(q2,q); /* add q to q2 */
1144:
1145: /* M = p2 + ( p * [(q2*u) mod q] ) */
1146: mp_mult(temp1,q2,u); /* temp1 = q2*u */
1147: mp_mod(temp2,temp1,q); /* temp2 = temp1 mod q */
1148: mp_mult(temp1,p,temp2); /* temp1 = p * temp2 */
1149: mp_add(temp1,p2); /* temp1 = temp1 + p2 */
1150: mp_move(M,temp1); /* M = temp1 */
1151: }
1152:
1153: mp_burn(p2); /* burn the evidence on the stack...*/
1154: mp_burn(q2);
1155: mp_burn(temp1);
1156: mp_burn(temp2);
1157: /* Do an explicit reference to the copyright notice so that the linker
1158: will be forced to include it in the executable object image... */
1159: copyright_notice(); /* has no real effect at run time */
1160: return(0); /* normal return */
1161: } /* rsa_decrypt */
1162:
1163:
1164: /*
1165: ** mp_sqrt - returns square root of a number.
1166: ** returns -1 for error, 0 for perfect square, 1 for not perfect square.
1167: ** Not used by any RSA encrypt or decrypt functions.
1168: ** Used by some factoring algorithms. This version needs optimization.
1169: ** by Charles W. Merritt July 15, 1989, refined by PRZ.
1170: */
1171: int mp_sqrt(unitptr quotient,unitptr dividend)
1172: /* Quotient is returned as the square root of dividend. */
1173: {
1174: register char next2bits; /* "period", or group of 2 bits of dividend */
1175: register unit dvdbitmask,qbitmask;
1176: unit remainder[MAX_UNIT_PRECISION],rjq[MAX_UNIT_PRECISION],
1177: divisor[MAX_UNIT_PRECISION];
1178: unsigned int qbits,qprec,dvdbits,dprec,oldprecision;
1179: int notperfect;
1180:
1181: mp_init(quotient,0);
1182: if (mp_tstminus(dividend)) /* if dividend<0, return error */
1183: { mp_dec(quotient); /* quotient = -1 */
1184: return(-1);
1185: }
1186:
1187: /* normalize and compute number of bits in dividend first */
1188: init_bitsniffer(dividend,dvdbitmask,dprec,dvdbits);
1189: /* init_bitsniffer returns a 0 if dvdbits is 0 */
1190: if (dvdbits==1)
1191: { mp_init(quotient,1); /* square root of 1 is 1 */
1192: return(0);
1193: }
1194:
1195: /* rescale quotient to half the precision of dividend */
1196: qbits = (dvdbits+1) >> 1;
1197: qprec = bits2units(qbits);
1198: rescale(quotient,global_precision,qprec);
1199: make_msbptr(quotient,qprec);
1200: qbitmask = power_of_2( (qbits-1) & (UNITSIZE-1)) ;
1201:
1202: /* Set smallest optimum precision for this square root.
1203: The low-level primitives are affected by the call to set_precision.
1204: Even though the dividend precision is bigger than the precision
1205: we will use, no low-level primitives will be used on the dividend.
1206: They will be used on the quotient, remainder, and rjq, which are
1207: smaller precision.
1208: */
1209: oldprecision = global_precision; /* save global_precision */
1210: set_precision(bits2units(qbits+3)); /* 3 bits of precision slop */
1211:
1212: /* special case: sqrt of 1st 2 (binary) digits of dividend
1213: is 1st (binary) digit of quotient. This is always 1. */
1214: stuff_bit(quotient,qbitmask);
1215: bump_bitsniffer(quotient,qbitmask);
1216: mp_init(rjq,1); /* rjq is Right Justified Quotient */
1217:
1218: if (!(dvdbits & 1))
1219: { /* even number of bits in dividend */
1220: next2bits = 2;
1221: bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
1222: if (sniff_bit(dividend,dvdbitmask)) next2bits++;
1223: bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
1224: }
1225: else
1226: { /* odd number of bits in dividend */
1227: next2bits = 1;
1228: bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
1229: }
1230:
1231: mp_init(remainder,next2bits-1);
1232:
1233: /* dvdbits is guaranteed to be even at this point */
1234:
1235: while (dvdbits)
1236: { next2bits=0;
1237: if (sniff_bit(dividend,dvdbitmask)) next2bits=2;
1238: bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
1239: if (sniff_bit(dividend,dvdbitmask)) next2bits++;
1240: bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
1241: mp_rotate_left(remainder,(boolean)((next2bits&2)!=0));
1242: mp_rotate_left(remainder,(boolean)((next2bits&1)!=0));
1243:
1244: /* "divisor" is trial divisor, complete divisor is 4*rjq
1245: or 4*rjq+1.
1246: Subtract divisor times its last digit from remainder.
1247: If divisor ends in 1, remainder -= divisor*1,
1248: or if divisor ends in 0, remainder -= divisor*0 (do nothing).
1249: Last digit of divisor inflates divisor as large as possible
1250: yet still subtractable from remainder.
1251: */
1252: mp_move(divisor,rjq); /* divisor = 4*rjq+1 */
1253: mp_rotate_left(divisor,0);
1254: mp_rotate_left(divisor,1);
1255: if (mp_compare(remainder,divisor) >= 0)
1256: { mp_sub(remainder,divisor);
1257: stuff_bit(quotient,qbitmask);
1258: mp_rotate_left(rjq,1);
1259: }
1260: else
1261: mp_rotate_left(rjq,0);
1262: bump_bitsniffer(quotient,qbitmask);
1263: }
1264: notperfect = testne(remainder,0); /* not a perfect square? */
1265: set_precision(oldprecision); /* restore original precision */
1266: return(notperfect); /* normal return */
1267:
1268: } /* mp_sqrt */
1269:
1270:
1271: /* These are notes on computing the square root the manual old-fashioned
1272: way. This is the basis of the above fast sqrt algorthm:
1273:
1274: 1) Separate the number into groups (periods) of two digits each,
1275: beginning with units or at the decimal point.
1276: 2) Find the greatest perfect square in the left hand period & write
1277: its square root as the first figure of the required root. Subtract
1278: the square of this number from the left hand period. Annex to the
1279: remainder the next group so as to form a dividend.
1280: 3) Double the root already found and write it as a partial divisor at
1281: the left of the new dividend. Annex one zero digit, making a trial
1282: divisor, and divide the new dividend by the trial divisor.
1283: 4) Write the quotient in the root as the trial term and also substitute
1284: this quotient for the annexed zero digit in the partial divisor,
1285: making the latter complete.
1286: 5) Multiply the complete divisor by the figure just obtained and,
1287: if possible, subtract the product from the last remainder.
1288: If this product is too large, the trial term of the quotient
1289: must be replaced by the next smaller number and the operations
1290: preformed as before.
1291: (IN BINARY, OUR TRIAL TERM IS ALWAYS 1 AND WE USE IT OR DO NOTHING.)
1292: 6) Proceed in this manner until all periods are used.
1293: If there is still a remainder, it's not a perfect square.
1294: */
1295:
1296:
1297: /****************** end of RSA library ****************************/
1298:
1299:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.