--- pgp/src/mpilib.c	2018/04/24 16:42:05	1.1.1.6
+++ pgp/src/mpilib.c	2018/04/24 16:43:27	1.1.1.7
@@ -1,82 +1,82 @@
-/*	C source code for multiprecision arithmetic library routines.
-	Implemented Nov 86 by Philip Zimmermann
-	Last revised 27 Nov 91 by PRZ
-
-	Boulder Software Engineering
-	3021 Eleventh Street
-	Boulder, CO 80304
-	(303) 541-0140
-
-	(c) Copyright 1986-1994 by Philip Zimmermann.  All rights reserved.
-	The author assumes no liability for damages resulting from the use
-	of this software, even if the damage results from defects in this
-	software.  No warranty is expressed or implied.  The use of this
-	cryptographic software for developing weapon systems is expressly
-	forbidden.
-
-
-	These routines implement all of the multiprecision arithmetic
-	necessary for number-theoretic cryptographic algorithms such as
-	ElGamal, Diffie-Hellman, Rabin, or factoring studies for large
-	composite numbers, as well as Rivest-Shamir-Adleman (RSA) public
-	key cryptography.
-
-	Although originally developed in Microsoft C for the IBM PC, this code
-	contains few machine dependencies.  It assumes 2's complement
-	arithmetic.  It can be adapted to 8-bit, 16-bit, or 32-bit machines,
-	lowbyte-highbyte order or highbyte-lowbyte order.  This version
-	has been converted to ANSI C.
-
-
-	The internal representation for these extended precision integer
-	"registers" is an array of "units".  A unit is a machine word, which
-	is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit
-	unsigned integer, depending on the machine's word size.  For example,
-	an IBM PC or AT uses a unit size of 16 bits.  To perform arithmetic
-	on these huge precision integers, we pass pointers to these unit
-	arrays to various subroutines.  A pointer to an array of units is of
-	type unitptr.  This is a pointer to a huge integer "register".
-
-	When calling a subroutine, we always pass a pointer to the BEGINNING
-	of the array of units, regardless of the byte order of the machine.
-	On a lowbyte-first machine, such as the Intel 80x86, this unitptr
-	points to the LEAST significant unit, and the array of units increases
-	significance to the right.  On a highbyte-first machine, such as the
-	Motorola 680x0, this unitptr points to the MOST significant unit, and
-	the array of units decreases significance to the right.
+/* C source code for multiprecision arithmetic library routines.
+   Implemented Nov 86 by Philip Zimmermann
+   Last revised 27 Nov 91 by PRZ
+
+   Boulder Software Engineering
+   3021 Eleventh Street
+   Boulder, CO 80304
+   (303) 541-0140
+
+   (c) Copyright 1986-1994 by Philip Zimmermann.  All rights reserved.
+   The author assumes no liability for damages resulting from the use
+   of this software, even if the damage results from defects in this
+   software.  No warranty is expressed or implied.  The use of this
+   cryptographic software for developing weapon systems is expressly
+   forbidden.
+
+
+   These routines implement all of the multiprecision arithmetic
+   necessary for number-theoretic cryptographic algorithms such as
+   ElGamal, Diffie-Hellman, Rabin, or factoring studies for large
+   composite numbers, as well as Rivest-Shamir-Adleman (RSA) public
+   key cryptography.
+
+   Although originally developed in Microsoft C for the IBM PC, this code
+   contains few machine dependencies.  It assumes 2's complement
+   arithmetic.  It can be adapted to 8-bit, 16-bit, or 32-bit machines,
+   lowbyte-highbyte order or highbyte-lowbyte order.  This version
+   has been converted to ANSI C.
+
+
+   The internal representation for these extended precision integer
+   "registers" is an array of "units".  A unit is a machine word, which
+   is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit
+   unsigned integer, depending on the machine's word size.  For example,
+   an IBM PC or AT uses a unit size of 16 bits.  To perform arithmetic
+   on these huge precision integers, we pass pointers to these unit
+   arrays to various subroutines.  A pointer to an array of units is of
+   type unitptr.  This is a pointer to a huge integer "register".
+
+   When calling a subroutine, we always pass a pointer to the BEGINNING
+   of the array of units, regardless of the byte order of the machine.
+   On a lowbyte-first machine, such as the Intel 80x86, this unitptr
+   points to the LEAST significant unit, and the array of units increases
+   significance to the right.  On a highbyte-first machine, such as the
+   Motorola 680x0, this unitptr points to the MOST significant unit, and
+   the array of units decreases significance to the right.
+
+   Modified 8 Apr 92 - HAJK
+   Implement new VAX/VMS primitive support.
 
-	Modified 8 Apr 92 - HAJK
-	Implement new VAX/VMS primitive support.
+   Modified 30 Sep 92 -Castor Fu <castor@drizzle.stanford.edu>
+   Upgraded PORTABLE support to allow sizeof(unit) == sizeof(long)
 
-	Modified 30 Sep 92 -Castor Fu <castor@drizzle.stanford.edu>
-	Upgraded PORTABLE support to allow sizeof(unit) == sizeof(long)
-
-	Modified 28 Nov 92 - Thad Smith
-	Added Smith modmult, generalized non-portable support.
-*/
+   Modified 28 Nov 92 - Thad Smith
+   Added Smith modmult, generalized non-portable support.
+ */
 
-/* #define COUNTMULTS */ /* count modmults for performance studies */
+/* #define COUNTMULTS *//* count modmults for performance studies */
 
 #ifdef DEBUG
 #ifdef MSDOS
-#ifdef __GO32__ /* DJGPP */
+#ifdef __GO32__			/* DJGPP */
 #include <pc.h>
 #else
 #include <conio.h>
-#endif  /* __GO32__ */
+#endif				/* __GO32__ */
 #define poll_for_break() {while (kbhit()) getch();}
-#endif	/* MSDOS */
-#endif	/* DEBUG */
+#endif				/* MSDOS */
+#endif				/* DEBUG */
 
 #ifndef poll_for_break
-#define poll_for_break()  /* stub */
+#define poll_for_break()	/* stub */
 #endif
 
 #include "mpilib.h"
 
 #ifdef mp_smula
 #ifdef mp_smul
-	Error: Both mp_smula and mp_smul cannot be defined.
+Error:Both mp_smula and mp_smul cannot be defined.
 #else
 #define mp_smul	mp_smula
 #endif
@@ -89,7 +89,7 @@ typedef unsigned char MULTUNIT;
 #ifdef UNIT8
 #define MULTUNIT_SIZE_SAME
 #endif
-#else	/* not MUNIT8 */
+#else				/* not MUNIT8 */
 #ifdef MUNIT32
 #define MULTUNITSIZE   32
 typedef unsigned long MULTUNIT;
@@ -99,446 +99,463 @@ typedef unsigned long MULTUNIT;
 /* #error is not portable, this has the same effect */
 #include "UNITSIZE cannot be smaller than MULTUNITSIZE"
 #endif
-#else   /* assume MUNIT16 */
+#else				/* assume MUNIT16 */
 #define MULTUNITSIZE   16
 typedef unsigned short MULTUNIT;
 #ifdef UNIT16
 #define MULTUNIT_SIZE_SAME
-#endif	/* UNIT16 */
+#endif				/* UNIT16 */
 #ifdef UNIT8
 #include "UNITSIZE cannot be smaller than MULTUNITSIZE"
-#endif	/* UNIT8 */
-#endif	/* MUNIT32 */
-#endif	/* MUNIT8 */
+#endif				/* UNIT8 */
+#endif				/* MUNIT32 */
+#endif				/* MUNIT8 */
 
-#define MULTUNIT_msb    ((MULTUNIT)1 << (MULTUNITSIZE-1)) /* msb of MULTUNIT */
+#define MULTUNIT_msb    ((MULTUNIT)1 << (MULTUNITSIZE-1))	/* msb of
+								   MULTUNIT */
 #define DMULTUNIT_msb   (1L << (2*MULTUNITSIZE-1))
 #define MULTUNIT_mask   ((MULTUNIT)((1L << MULTUNITSIZE)-1))
 #define MULTUNITs_perunit   (UNITSIZE/MULTUNITSIZE)
 
 
-void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier);
-void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier);
+void mp_smul(MULTUNIT * prod, MULTUNIT * multiplicand, MULTUNIT multiplier);
+void mp_dmul(unitptr prod, unitptr multiplicand, unitptr multiplier);
 
-short global_precision = 0; /* units of precision for all routines */
-/*	global_precision is the unit precision last set by set_precision.
-	Initially, set_precision() should be called to define global_precision
-	before using any of these other multiprecision library routines.
-	i.e.:   set_precision(MAX_UNIT_PRECISION);
-*/
+short global_precision = 0;	/* units of precision for all routines */
+/*      global_precision is the unit precision last set by set_precision.
+   Initially, set_precision() should be called to define global_precision
+   before using any of these other multiprecision library routines.
+   i.e.:   set_precision(MAX_UNIT_PRECISION);
+ */
 
 /*************** multiprecision library primitives ****************/
-/*	The following portable C primitives should be recoded in assembly.
-	The entry point name should be defined, in "mpilib.h" to the external
-	entry point name.  If undefined, the C version will be used.
-*/
+/*      The following portable C primitives should be recoded in assembly.
+   The entry point name should be defined, in "mpilib.h" to the external
+   entry point name.  If undefined, the C version will be used.
+ */
 
 typedef unsigned long int ulint;
 
 #ifndef mp_addc
+/* 
+   multiprecision add with carry r2 to r1, result in r1
+   carry is incoming carry flag-- value should be 0 or 1
+*/
 boolean mp_addc
-	(register unitptr r1,register unitptr r2,register boolean carry)
-	/* multiprecision add with carry r2 to r1, result in r1 */
-	/* carry is incoming carry flag-- value should be 0 or 1 */
-{
-	register unit x;
-	short precision;	/* number of units to add */
-	precision = global_precision;
-	make_lsbptr(r1,precision);
-	make_lsbptr(r2,precision);
-	while (precision--) {
-		if (carry) {
-			x = *r1 + *r2 + 1;
-			carry = (*r2  >= (unit)(~ *r1));
-		} else {
-			x = *r1 + *r2;
-			carry = (x < *r1) ;
-		}
-		post_higherunit(r2);
-		*post_higherunit(r1) = x;
+ (register unitptr r1, register unitptr r2, register boolean carry)
+{
+    register unit x;
+    short precision;		/* number of units to add */
+    precision = global_precision;
+    make_lsbptr(r1, precision);
+    make_lsbptr(r2, precision);
+    while (precision--) {
+	if (carry) {
+	    x = *r1 + *r2 + 1;
+	    carry = (*r2 >= (unit) (~*r1));
+	} else {
+	    x = *r1 + *r2;
+	    carry = (x < *r1);
 	}
-	return carry;		/* return the final carry flag bit */
-} /* mp_addc */
-#endif  /* mp_addc */
+	post_higherunit(r2);
+	*post_higherunit(r1) = x;
+    }
+    return carry;		/* return the final carry flag bit */
+}				/* mp_addc */
+#endif				/* mp_addc */
 
 #ifndef mp_subb
+
+/* 
+   multiprecision subtract with borrow, r2 from r1, result in r1
+   borrow is incoming borrow flag-- value should be 0 or 1
+ */
 boolean mp_subb
-	(register unitptr r1,register unitptr r2,register boolean borrow)
-	/* multiprecision subtract with borrow, r2 from r1, result in r1 */
-	/* borrow is incoming borrow flag-- value should be 0 or 1 */
-{
-	register unit x;
-	short precision;	/* number of units to subtract */
-	precision = global_precision;
-	make_lsbptr(r1,precision);
-	make_lsbptr(r2,precision);
-	while (precision--) {
-		if (borrow) {
-			x = *r1 - *r2 - 1;
-			borrow = (*r1 <= *r2);
-		} else {
-			x = *r1 - *r2;
-			borrow = (*r1 < *r2);
-		}
-		post_higherunit(r2);
-		*post_higherunit(r1) = x;
+ (register unitptr r1, register unitptr r2, register boolean borrow)
+{
+    register unit x;
+    short precision;		/* number of units to subtract */
+    precision = global_precision;
+    make_lsbptr(r1, precision);
+    make_lsbptr(r2, precision);
+    while (precision--) {
+	if (borrow) {
+	    x = *r1 - *r2 - 1;
+	    borrow = (*r1 <= *r2);
+	} else {
+	    x = *r1 - *r2;
+	    borrow = (*r1 < *r2);
 	}
-	return borrow;	/* return the final carry/borrow flag bit */
-} /* mp_subb */
-#endif  /* mp_subb */
+	post_higherunit(r2);
+	*post_higherunit(r1) = x;
+    }
+    return borrow;		/* return the final carry/borrow flag bit */
+}				/* mp_subb */
+#endif				/* mp_subb */
 
 #ifndef mp_rotate_left
-boolean mp_rotate_left(register unitptr r1,register boolean carry)
-	/* multiprecision rotate left 1 bit with carry, result in r1. */
-	/* carry is incoming carry flag-- value should be 0 or 1 */
+
+/*
+   multiprecision rotate left 1 bit with carry, result in r1.
+   carry is incoming carry flag-- value should be 0 or 1
+*/
+boolean mp_rotate_left(register unitptr r1, register boolean carry)
 {
-	register int precision;	/* number of units to rotate */
-	unsigned int mcarry = carry, nextcarry; /* int is supposed to be
+    register int precision;	/* number of units to rotate */
+    unsigned int mcarry = carry, nextcarry;	/* int is supposed to be
 						 * the efficient size for ops*/
-	precision = global_precision;
-	make_lsbptr(r1,precision);
-	while (precision--) {
-		nextcarry = (((signedunit) *r1) < 0);
-		*r1 = (*r1 << 1) | mcarry;
-		mcarry = nextcarry;
-		pre_higherunit(r1);
-	}
-	return nextcarry;	/* return the final carry flag bit */
-} /* mp_rotate_left */
-#endif  /* mp_rotate_left */
+    precision = global_precision;
+    make_lsbptr(r1, precision);
+    while (precision--) {
+	nextcarry = (((signedunit) * r1) < 0);
+	*r1 = (*r1 << 1) | mcarry;
+	mcarry = nextcarry;
+	pre_higherunit(r1);
+    }
+    return nextcarry;		/* return the final carry flag bit */
+}				/* mp_rotate_left */
+#endif				/* mp_rotate_left */
 
 /************** end of primitives that should be in assembly *************/
 
+/* The mp_shift_right_bits function is not called in any time-critical
+   situations in public-key cryptographic functions, so it doesn't
+   need to be coded in assembly language.
+ */
 
-/*	The mp_shift_right_bits function is not called in any time-critical
-	situations in public-key cryptographic functions, so it doesn't
-	need to be coded in assembly language.
+/*
+   multiprecision shift right bits, result in r1.
+   bits is how many bits to shift, must be <= UNITSIZE.
 */
-void mp_shift_right_bits(register unitptr r1,register short bits)
-	/*	multiprecision shift right bits, result in r1.
-		bits is how many bits to shift, must be <= UNITSIZE.
-	*/
-{
-	register short precision;	/* number of units to shift */
-	register unit carry,nextcarry,bitmask;
-	register short unbits;
-	if (bits==0) return;	/* shift zero bits is a no-op */
-	carry = 0;
-	bitmask = power_of_2(bits)-1;
-	unbits = UNITSIZE-bits;		/* shift bits must be <= UNITSIZE */
-	precision = global_precision;
-	make_msbptr(r1,precision);
-	if (bits == UNITSIZE) {
-		while (precision--)  {
-			nextcarry = *r1;
-			*r1 = carry;
-			carry = nextcarry;
-			pre_lowerunit(r1);
-		}
-	} else {
-		while (precision--) {
-			nextcarry = *r1 & bitmask;
-			*r1 >>= bits;
-			*r1 |= carry << unbits;
-			carry = nextcarry;
-			pre_lowerunit(r1);
-		}
+void mp_shift_right_bits(register unitptr r1, register short bits)
+{
+    register short precision;	/* number of units to shift */
+    register unit carry, nextcarry, bitmask;
+    register short unbits;
+    if (bits == 0)
+	return;			/* shift zero bits is a no-op */
+    carry = 0;
+    bitmask = power_of_2(bits) - 1;
+    unbits = UNITSIZE - bits;	/* shift bits must be <= UNITSIZE */
+    precision = global_precision;
+    make_msbptr(r1, precision);
+    if (bits == UNITSIZE) {
+	while (precision--) {
+	    nextcarry = *r1;
+	    *r1 = carry;
+	    carry = nextcarry;
+	    pre_lowerunit(r1);
 	}
-} /* mp_shift_right_bits */
-
+    } else {
+	while (precision--) {
+	    nextcarry = *r1 & bitmask;
+	    *r1 >>= bits;
+	    *r1 |= carry << unbits;
+	    carry = nextcarry;
+	    pre_lowerunit(r1);
+	}
+    }
+}				/* mp_shift_right_bits */
 
 #ifndef mp_compare
-short mp_compare(register unitptr r1,register unitptr r2)
+
 /*
  * Compares multiprecision integers *r1, *r2, and returns:
  * -1 iff *r1 < *r2
  *  0 iff *r1 == *r2
  * +1 iff *r1 > *r2
  */
+short mp_compare(register unitptr r1, register unitptr r2)
 {
-	register short precision;	/* number of units to compare */
+    register short precision;	/* number of units to compare */
 
-	precision = global_precision;
-	make_msbptr(r1,precision);
-	make_msbptr(r2,precision);
-	do {
-		if (*r1 < *r2)
-			return -1;
-		if (*post_lowerunit(r1) > *post_lowerunit(r2))
-			return 1;
-	} while (--precision);
-	return 0;	/*  *r1 == *r2  */
-} /* mp_compare */
-#endif /* mp_compare */
+    precision = global_precision;
+    make_msbptr(r1, precision);
+    make_msbptr(r2, precision);
+    do {
+	if (*r1 < *r2)
+	    return -1;
+	if (*post_lowerunit(r1) > *post_lowerunit(r2))
+	    return 1;
+    } while (--precision);
+    return 0;			/*  *r1 == *r2  */
+}				/* mp_compare */
+#endif				/* mp_compare */
 
-
-boolean mp_inc(register unitptr r)
 /* Increment multiprecision integer r. */
+boolean mp_inc(register unitptr r)
 {
-	register short precision;
-	precision = global_precision;
-	make_lsbptr(r,precision);
-	do {
-		if ( ++(*r) ) return 0;	/* no carry */
-		post_higherunit(r);
-	} while (--precision);
-	return 1;		/* return carry set */
-} /* mp_inc */
+    register short precision;
+    precision = global_precision;
+    make_lsbptr(r, precision);
+    do {
+	if (++(*r))
+	    return 0;		/* no carry */
+	post_higherunit(r);
+    } while (--precision);
+    return 1;			/* return carry set */
+}				/* mp_inc */
 
-
-boolean mp_dec(register unitptr r)
 /* Decrement multiprecision integer r. */
+boolean mp_dec(register unitptr r)
 {
-	register short precision;
-	precision = global_precision;
-	make_lsbptr(r,precision);
-	do {
-		if ( (signedunit) (--(*r)) != (signedunit) -1 )
-			return 0;	/* no borrow */
-		post_higherunit(r);
-	} while (--precision);
-	return 1;		/* return borrow set */
-} /* mp_dec */
-
+    register short precision;
+    precision = global_precision;
+    make_lsbptr(r, precision);
+    do {
+	if ((signedunit) (--(*r)) != (signedunit) - 1)
+	    return 0;		/* no borrow */
+	post_higherunit(r);
+    } while (--precision);
+    return 1;			/* return borrow set */
+}				/* mp_dec */
 
+/* Compute 2's complement, the arithmetic negative, of r */
 void mp_neg(register unitptr r)
-	/* Compute 2's complement, the arithmetic negative, of r */
 {
-	register short precision;	/* number of units to negate */
-	precision = global_precision;
-	mp_dec(r);	/* 2's complement is 1's complement plus 1 */
-	do {	/* now do 1's complement */
-		*r = ~(*r);
-		r++;
-	} while (--precision);
-} /* mp_neg */
+    register short precision;	/* number of units to negate */
+    precision = global_precision;
+    mp_dec(r);			/* 2's complement is 1's complement plus 1 */
+    do {			/* now do 1's complement */
+	*r = ~(*r);
+	r++;
+    } while (--precision);
+}				/* mp_neg */
 
 #ifndef mp_move
-void mp_move(register unitptr dst,register unitptr src)
+
+void mp_move(register unitptr dst, register unitptr src)
 {
-	register short precision;	/* number of units to move */
-	precision = global_precision;
-	do { *dst++ = *src++; } while (--precision);
-} /* mp_move */
-#endif /* mp_move */
+    register short precision;	/* number of units to move */
+    precision = global_precision;
+    do {
+	*dst++ = *src++;
+    } while (--precision);
+}				/* mp_move */
+#endif				/* mp_move */
 
+/* Init multiprecision register r with short value. */
 void mp_init(register unitptr r, word16 value)
-	/* Init multiprecision register r with short value. */
 {	/* Note that mp_init doesn't extend sign bit for >32767 */
 
-	unitfill0( r, global_precision);
-	make_lsbptr(r,global_precision);
-	*post_higherunit(r) = value;
+    unitfill0(r, global_precision);
+    make_lsbptr(r, global_precision);
+    *post_higherunit(r) = value;
 #ifdef UNIT8
-	*post_higherunit(r) = value >> UNITSIZE;
+    *post_higherunit(r) = value >> UNITSIZE;
 #endif
-} /* mp_init */
-
+}				/* mp_init */
 
+/* Returns number of significant units in r */
 short significance(register unitptr r)
-	/* Returns number of significant units in r */
 {
-	register short precision;
-	precision = global_precision;
-	make_msbptr(r,precision);
-	do {
-		if (*post_lowerunit(r))
-			return precision;
-	} while (--precision);
-	return precision;
-} /* significance */
+    register short precision;
+    precision = global_precision;
+    make_msbptr(r, precision);
+    do {
+	if (*post_lowerunit(r))
+	    return precision;
+    } while (--precision);
+    return precision;
+}				/* significance */
 
 
 #ifndef unitfill0
-void unitfill0(unitptr r,word16 unitcount)
-	/* Zero-fill the unit buffer r. */
-{
-	while (unitcount--) *r++ = 0;
-} /* unitfill0 */
-#endif  /* unitfill0 */
-
-
-int mp_udiv(register unitptr remainder,register unitptr quotient,
-	register unitptr dividend,register unitptr divisor)
-	/* Unsigned divide, treats both operands as positive. */
-{
-	int bits;
-	short dprec;
-	register unit bitmask;
-
-	if (testeq(divisor,0))
-		return -1;	/* zero divisor means divide error */
-	mp_init0(remainder);
-	mp_init0(quotient);
-	/* normalize and compute number of bits in dividend first */
-	init_bitsniffer(dividend,bitmask,dprec,bits);
-	/* rescale quotient to same precision (dprec) as dividend */
-	rescale(quotient,global_precision,dprec);
-	make_msbptr(quotient,dprec);
-
-	while (bits--) {
-		mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
-		if (mp_compare(remainder,divisor) >= 0) {
-			mp_sub(remainder,divisor);
-			stuff_bit(quotient,bitmask);
-		}
-		bump_2bitsniffers(dividend,quotient,bitmask);
-	}
-	return 0;
-} /* mp_udiv */
-
 
-#ifdef UPTON_OR_SMITH
-#define RECIPMARGIN 0	/* extra margin bits used by mp_recip() */
+/* Zero-fill the unit buffer r. */
+void unitfill0(unitptr r, word16 unitcount)
+{
+    while (unitcount--)
+	*r++ = 0;
+}				/* unitfill0 */
+#endif				/* unitfill0 */
 
-int mp_recip(register unitptr quotient,register unitptr divisor)
-	/* Compute reciprocal (quotient) as 1/divisor.  Used by faster modmult. */
+/* Unsigned divide, treats both operands as positive. */
+int mp_udiv(register unitptr remainder, register unitptr quotient,
+	    register unitptr dividend, register unitptr divisor)
 {
-	int bits;
-	short qprec;
-	register unit bitmask;
-	unit remainder[MAX_UNIT_PRECISION];
-	if (testeq(divisor,0))
-		return -1;	/* zero divisor means divide error */
-	mp_init0(remainder);
-	mp_init0(quotient);
-
-	/* normalize and compute number of bits in quotient first */
-	bits = countbits(divisor) + RECIPMARGIN;
-	bitmask = bitmsk(bits);		/* bitmask within a single unit */
-	qprec = bits2units(bits+1);
-	mp_setbit(remainder,(bits-RECIPMARGIN)-1);
-	/* rescale quotient to precision of divisor + RECIPMARGIN bits */
-	rescale(quotient,global_precision,qprec);
-	make_msbptr(quotient,qprec);
-
-	while (bits--) {
-		mp_shift_left(remainder);
-		if (mp_compare(remainder,divisor) >= 0) {
-			mp_sub(remainder,divisor);
-			stuff_bit(quotient,bitmask);
-		}
-		bump_bitsniffer(quotient,bitmask);
-	}
-	mp_init0(remainder);	/* burn sensitive data left on stack */
-	return 0;
-} /* mp_recip */
-#endif	/* UPTON_OR_SMITH */
+    int bits;
+    short dprec;
+    register unit bitmask;
+
+    if (testeq(divisor, 0))
+	return -1;		/* zero divisor means divide error */
+    mp_init0(remainder);
+    mp_init0(quotient);
+    /* normalize and compute number of bits in dividend first */
+    init_bitsniffer(dividend, bitmask, dprec, bits);
+    /* rescale quotient to same precision (dprec) as dividend */
+    rescale(quotient, global_precision, dprec);
+    make_msbptr(quotient, dprec);
+
+    while (bits--) {
+	mp_rotate_left(remainder,
+		       (boolean) (sniff_bit(dividend, bitmask) != 0));
+	if (mp_compare(remainder, divisor) >= 0) {
+	    mp_sub(remainder, divisor);
+	    stuff_bit(quotient, bitmask);
+	}
+	bump_2bitsniffers(dividend, quotient, bitmask);
+    }
+    return 0;
+}				/* mp_udiv */
 
+#ifdef UPTON_OR_SMITH
 
-int mp_div(register unitptr remainder,register unitptr quotient,
-	register unitptr dividend,register unitptr divisor)
-	/* Signed divide, either or both operands may be negative. */
-{
-	boolean dvdsign,dsign;
-	int status;
-	dvdsign = (boolean)(mp_tstminus(dividend)!=0);
-	dsign = (boolean)(mp_tstminus(divisor)!=0);
-	if (dvdsign) mp_neg(dividend);
-	if (dsign) mp_neg(divisor);
-	status = mp_udiv(remainder,quotient,dividend,divisor);
-	if (dvdsign) mp_neg(dividend);		/* repair caller's dividend */
-	if (dsign) mp_neg(divisor);		/* repair caller's divisor */
-	if (status<0) return status;		/* divide error? */
-	if (dvdsign) mp_neg(remainder);
-	if (dvdsign ^ dsign) mp_neg(quotient);
-	return status;
-} /* mp_div */
+#define RECIPMARGIN 0		/* extra margin bits used by mp_recip() */
 
+/* Compute reciprocal (quotient) as 1/divisor.  Used by faster modmult. */
+int mp_recip(register unitptr quotient, register unitptr divisor)
+{
+    int bits;
+    short qprec;
+    register unit bitmask;
+    unit remainder[MAX_UNIT_PRECISION];
+    if (testeq(divisor, 0))
+	return -1;		/* zero divisor means divide error */
+    mp_init0(remainder);
+    mp_init0(quotient);
+
+    /* normalize and compute number of bits in quotient first */
+    bits = countbits(divisor) + RECIPMARGIN;
+    bitmask = bitmsk(bits);	/* bitmask within a single unit */
+    qprec = bits2units(bits + 1);
+    mp_setbit(remainder, (bits - RECIPMARGIN) - 1);
+    /* rescale quotient to precision of divisor + RECIPMARGIN bits */
+    rescale(quotient, global_precision, qprec);
+    make_msbptr(quotient, qprec);
+
+    while (bits--) {
+	mp_shift_left(remainder);
+	if (mp_compare(remainder, divisor) >= 0) {
+	    mp_sub(remainder, divisor);
+	    stuff_bit(quotient, bitmask);
+	}
+	bump_bitsniffer(quotient, bitmask);
+    }
+    mp_init0(remainder);	/* burn sensitive data left on stack */
+    return 0;
+}				/* mp_recip */
+#endif				/* UPTON_OR_SMITH */
+
+/* Signed divide, either or both operands may be negative. */
+int mp_div(register unitptr remainder, register unitptr quotient,
+	   register unitptr dividend, register unitptr divisor)
+{
+    boolean dvdsign, dsign;
+    int status;
+    dvdsign = (boolean) (mp_tstminus(dividend) != 0);
+    dsign = (boolean) (mp_tstminus(divisor) != 0);
+    if (dvdsign)
+	mp_neg(dividend);
+    if (dsign)
+	mp_neg(divisor);
+    status = mp_udiv(remainder, quotient, dividend, divisor);
+    if (dvdsign)
+	mp_neg(dividend);	/* repair caller's dividend */
+    if (dsign)
+	mp_neg(divisor);	/* repair caller's divisor */
+    if (status < 0)
+	return status;		/* divide error? */
+    if (dvdsign)
+	mp_neg(remainder);
+    if (dvdsign ^ dsign)
+	mp_neg(quotient);
+    return status;
+}				/* mp_div */
 
-word16 mp_shortdiv(register unitptr quotient,
-	register unitptr dividend,register word16 divisor)
 /*
  * This function does a fast divide and mod on a multiprecision dividend
  * using a short integer divisor returning a short integer remainder.
  * This is an unsigned divide.  It treats both operands as positive.
  * It is used mainly for faster printing of large numbers in base 10.
  */
+word16 mp_shortdiv(register unitptr quotient,
+		   register unitptr dividend, register word16 divisor)
 {
-	int bits;
-	short dprec;
-	register unit bitmask;
-	register word16 remainder;
-	if (!divisor)	/* if divisor == 0 */
-		return -1;	/* zero divisor means divide error */
-	remainder=0;
-	mp_init0(quotient);
-	/* normalize and compute number of bits in dividend first */
-	init_bitsniffer(dividend,bitmask,dprec,bits);
-	/* rescale quotient to same precision (dprec) as dividend */
-	rescale(quotient,global_precision,dprec);
-	make_msbptr(quotient,dprec);
-
-	while (bits--) {
-		remainder <<= 1;
-		if (sniff_bit(dividend,bitmask))
-			remainder++;
-		if (remainder >= divisor) {
-			remainder -= divisor;
-			stuff_bit(quotient,bitmask);
-		}
-		bump_2bitsniffers(dividend,quotient,bitmask);
-	}
-	return remainder;
-} /* mp_shortdiv */
-
+    int bits;
+    short dprec;
+    register unit bitmask;
+    register word16 remainder;
+    if (!divisor)		/* if divisor == 0 */
+	return -1;		/* zero divisor means divide error */
+    remainder = 0;
+    mp_init0(quotient);
+    /* normalize and compute number of bits in dividend first */
+    init_bitsniffer(dividend, bitmask, dprec, bits);
+    /* rescale quotient to same precision (dprec) as dividend */
+    rescale(quotient, global_precision, dprec);
+    make_msbptr(quotient, dprec);
+
+    while (bits--) {
+	remainder <<= 1;
+	if (sniff_bit(dividend, bitmask))
+	    remainder++;
+	if (remainder >= divisor) {
+	    remainder -= divisor;
+	    stuff_bit(quotient, bitmask);
+	}
+	bump_2bitsniffers(dividend, quotient, bitmask);
+    }
+    return remainder;
+}				/* mp_shortdiv */
 
-int mp_mod(register unitptr remainder,
-	register unitptr dividend,register unitptr divisor)
 /* Unsigned divide, treats both operands as positive. */
+int mp_mod(register unitptr remainder,
+	   register unitptr dividend, register unitptr divisor)
 {
-	int bits;
-	short dprec;
-	register unit bitmask;
-	if (testeq(divisor,0))
-		return -1;	/* zero divisor means divide error */
-	mp_init0(remainder);
-	/* normalize and compute number of bits in dividend first */
-	init_bitsniffer(dividend,bitmask,dprec,bits);
-
-	while (bits--) {
-		mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
-		msub(remainder,divisor);
-		bump_bitsniffer(dividend,bitmask);
-	}
-	return 0;
-} /* mp_mod */
+    int bits;
+    short dprec;
+    register unit bitmask;
+    if (testeq(divisor, 0))
+	return -1;		/* zero divisor means divide error */
+    mp_init0(remainder);
+    /* normalize and compute number of bits in dividend first */
+    init_bitsniffer(dividend, bitmask, dprec, bits);
+
+    while (bits--) {
+	mp_rotate_left(remainder,
+		       (boolean) (sniff_bit(dividend, bitmask) != 0));
+	msub(remainder, divisor);
+	bump_bitsniffer(dividend, bitmask);
+    }
+    return 0;
+}				/* mp_mod */
 
-
-word16 mp_shortmod(register unitptr dividend,register word16 divisor)
 /*
  * This function does a fast mod operation on a multiprecision dividend
  * using a short integer modulus returning a short integer remainder.
  * This is an unsigned divide.  It treats both operands as positive.
  * It is used mainly for fast sieve searches for large primes.
  */
-{	int bits;
-	short dprec;
-	register unit bitmask;
-	register word16 remainder;
-	if (!divisor)	/* if divisor == 0 */
-		return -1;	/* zero divisor means divide error */
-	remainder=0;
-	/* normalize and compute number of bits in dividend first */
-	init_bitsniffer(dividend,bitmask,dprec,bits);
-
-	while (bits--) {
-		remainder <<= 1;
-		if (sniff_bit(dividend,bitmask))
-			remainder++;
-		if (remainder >= divisor) remainder -= divisor;
-		bump_bitsniffer(dividend,bitmask);
-	}
-	return remainder;
-} /* mp_shortmod */
+word16 mp_shortmod(register unitptr dividend, register word16 divisor)
+{
+    int bits;
+    short dprec;
+    register unit bitmask;
+    register word16 remainder;
+    if (!divisor)		/* if divisor == 0 */
+	return -1;		/* zero divisor means divide error */
+    remainder = 0;
+    /* normalize and compute number of bits in dividend first */
+    init_bitsniffer(dividend, bitmask, dprec, bits);
+
+    while (bits--) {
+	remainder <<= 1;
+	if (sniff_bit(dividend, bitmask))
+	    remainder++;
+	if (remainder >= divisor)
+	    remainder -= divisor;
+	bump_bitsniffer(dividend, bitmask);
+    }
+    return remainder;
+}				/* mp_shortmod */
 
 
 
-#ifdef COMB_MULT	/* use faster "comb" multiply algorithm */
-	/* We are skipping this code because it has a bug... */
+#ifdef COMB_MULT		/* use faster "comb" multiply algorithm */
+/* We are skipping this code because it has a bug... */
 
-int mp_mult(register unitptr prod,
-	register unitptr multiplicand, register unitptr multiplier)
 /*
  * Computes multiprecision prod = multiplicand * multiplier
  *
@@ -562,93 +579,93 @@ int mp_mult(register unitptr prod,
  *
  * BUG NOTE: Possibly fixed.  Needs testing.
  */
+int mp_mult(register unitptr prod,
+	    register unitptr multiplicand, register unitptr multiplier)
 {
-	int bits;
-	register unit bitmask;
-	unitptr product, mplier, temp;
-	short mprec,mprec2;
-	unit mplicand[MAX_UNIT_PRECISION];
-
-	/* better clear full width--double precision */
-	mp_init(prod+tohigher(global_precision),0);
-
-	if (testeq(multiplicand,0))
-		return 0;	/* zero multiplicand means zero product */
-
-	mp_move(mplicand,multiplicand);	/* save it from damage */
-
-	normalize(multiplier,mprec);
-	if (!mprec)
-		return 0;
-
-	make_lsbptr(multiplier,mprec);
-	bitmask = 1;	/* start scan at LSB of multiplier */
-
-	do {	/* UNITSIZE times */
-		/* do for bits 0-15 */
-		product = prod;
-		mplier = multiplier;
-		mprec2 = mprec;
-		while (mprec2--) {	/* do for each word in multiplier */
-		
-			if (sniff_bit(mplier,bitmask)) {
-				if (mp_addc(product,multiplicand,0))	/* ripple carry */
-				{	/* After 1st time thru, this is rarely encountered. */
-					temp = msbptr(product,global_precision);
-					pre_higherunit(temp);
-					/* temp now points to LSU of carry region. */
-					unmake_lsbptr(temp,global_precision);
-					mp_inc(temp);
-				} /* ripple carry */
-			}
-			pre_higherunit(mplier);
-			pre_higherunit(product);
-		}
-		if (!(bitmask <<= 1))
-			break;
-		mp_shift_left(multiplicand);
+    int bits;
+    register unit bitmask;
+    unitptr product, mplier, temp;
+    short mprec, mprec2;
+    unit mplicand[MAX_UNIT_PRECISION];
 
-	} while (TRUE);
+    /* better clear full width--double precision */
+    mp_init(prod + tohigher(global_precision), 0);
 
-	mp_move(multiplicand,mplicand);	/* recover */
+    if (testeq(multiplicand, 0))
+	return 0;		/* zero multiplicand means zero product */
 
-	return 0;	/* normal return */
-} /* mp_mult */
+    mp_move(mplicand, multiplicand);	/* save it from damage */
 
-#endif	/* COMB_MULT */
+    normalize(multiplier, mprec);
+    if (!mprec)
+	return 0;
 
+    make_lsbptr(multiplier, mprec);
+    bitmask = 1;		/* start scan at LSB of multiplier */
 
-/*	Because the "comb" multiply has a bug, we will use the slower
-	Russian peasant multiply instead.  Fortunately, the mp_mult
-	function is not called from any time-critical code.
-*/
+    do {			/* UNITSIZE times */
+	/* do for bits 0-15 */
+	product = prod;
+	mplier = multiplier;
+	mprec2 = mprec;
+	while (mprec2--) {	/* do for each word in multiplier */
+
+	    if (sniff_bit(mplier, bitmask)) {
+		if (mp_addc(product, multiplicand, 0)) {  /* ripple carry */
+		  /* After 1st time thru, this is rarely encountered. */
+		    temp = msbptr(product, global_precision);
+		    pre_higherunit(temp);
+		    /* temp now points to LSU of carry region. */
+		    unmake_lsbptr(temp, global_precision);
+		    mp_inc(temp);
+		}		/* ripple carry */
+	    }
+	    pre_higherunit(mplier);
+	    pre_higherunit(product);
+	}
+	if (!(bitmask <<= 1))
+	    break;
+	mp_shift_left(multiplicand);
+
+    } while (TRUE);
+
+    mp_move(multiplicand, mplicand);	/* recover */
+
+    return 0;			/* normal return */
+}				/* mp_mult */
+
+#endif				/* COMB_MULT */
+
+
+/* Because the "comb" multiply has a bug, we will use the slower
+   Russian peasant multiply instead.  Fortunately, the mp_mult
+   function is not called from any time-critical code.
+ */
 
-int mp_mult(register unitptr prod,
-	register unitptr multiplicand,register unitptr multiplier)
 /*
  * Computes multiprecision prod = multiplicand * multiplier
  * Uses "Russian peasant" multiply algorithm.
  */
+int mp_mult(register unitptr prod,
+	    register unitptr multiplicand, register unitptr multiplier)
 {
-	int bits;
-	register unit bitmask;
-	short mprec;
-	mp_init(prod,0);
-	if (testeq(multiplicand,0))
-		return 0;	/* zero multiplicand means zero product */
-	/* normalize and compute number of bits in multiplier first */
-	init_bitsniffer(multiplier,bitmask,mprec,bits);
-
-	while (bits--) {
-		mp_shift_left(prod);
-		if (sniff_bit(multiplier,bitmask))
-			mp_add(prod,multiplicand);
-		bump_bitsniffer(multiplier,bitmask);
-	}
-	return 0;
-} /* mp_mult */
-
-
+    int bits;
+    register unit bitmask;
+    short mprec;
+    mp_init(prod, 0);
+    if (testeq(multiplicand, 0))
+	return 0;		/* zero multiplicand means zero product */
+    /* normalize and compute number of bits in multiplier first */
+    init_bitsniffer(multiplier, bitmask, mprec, bits);
+
+    while (bits--) {
+	mp_shift_left(prod);
+	if (sniff_bit(multiplier, bitmask))
+	    mp_add(prod, multiplicand);
+	bump_bitsniffer(multiplier, bitmask);
+    }
+    return 0;
+}				/* mp_mult */
 
 /*
  * mp_modmult computes a multiprecision multiply combined with a
@@ -678,7 +695,7 @@ int mp_mult(register unitptr prod,
 /* "number of modmults" counters, used for performance studies. */
 static unsigned int tally_modmults = 0;
 static unsigned int tally_modsquares = 0;
-#endif	/* COUNTMULTS */
+#endif				/* COUNTMULTS */
 
 
 #ifdef PEASANT
@@ -686,48 +703,46 @@ static unsigned int tally_modsquares = 0
 
 static unitptr pmodulus = 0;	/* used only by mp_modmult */
 
-int stage_peasant_modulus(unitptr n)
 /*
  * Must pass modulus to stage_modulus before calling modmult.
  * Assumes that global_precision has already been adjusted to the
  * size of the modulus, plus SLOP_BITS.
  */
+int stage_peasant_modulus(unitptr n)
 {	/* For this simple version of modmult, just copy unit pointer. */
-	pmodulus = n;
-	return 0;	/* normal return */
-} /* stage_peasant_modulus */
-
+    pmodulus = n;
+    return 0;			/* normal return */
+}				/* stage_peasant_modulus */
 
-int peasant_modmult(register unitptr prod,
-	unitptr multiplicand,register unitptr multiplier)
 /* "Russian peasant" multiply algorithm, combined with a modulo
  * operation.  This is a simple naive replacement for Merritt's
  * faster modmult algorithm.  References global unitptr "modulus".
  * Computes:  prod = (multiplicand*multiplier) mod modulus
  * WARNING: All the arguments must be less than the modulus!
  */
+int peasant_modmult(register unitptr prod,
+		    unitptr multiplicand, register unitptr multiplier)
 {
-	int bits;
-	register unit bitmask;
-	short mprec;
-	mp_init(prod,0);
-/*	if (testeq(multiplicand,0))
-		return 0; */	/* zero multiplicand means zero product */
-	/* normalize and compute number of bits in multiplier first */
-	init_bitsniffer(multiplier,bitmask,mprec,bits);
-
-	while (bits--) {
-		mp_shift_left(prod);
-		msub(prod,pmodulus);	/* turns mult into modmult */
-		if (sniff_bit(multiplier,bitmask)) {
-			mp_add(prod,multiplicand);
-			msub(prod,pmodulus);	/* turns mult into modmult */
-		}
-		bump_bitsniffer(multiplier,bitmask);
-	}
-	return 0;
-} /* peasant_modmult */
-
+    int bits;
+    register unit bitmask;
+    short mprec;
+    mp_init(prod, 0);
+/*      if (testeq(multiplicand,0))
+       return 0; *//* zero multiplicand means zero product */
+    /* normalize and compute number of bits in multiplier first */
+    init_bitsniffer(multiplier, bitmask, mprec, bits);
+
+    while (bits--) {
+	mp_shift_left(prod);
+	msub(prod, pmodulus);	/* turns mult into modmult */
+	if (sniff_bit(multiplier, bitmask)) {
+	    mp_add(prod, multiplicand);
+	    msub(prod, pmodulus);	/* turns mult into modmult */
+	}
+	bump_bitsniffer(multiplier, bitmask);
+    }
+    return 0;
+}				/* peasant_modmult */
 
 /*
  * If we are using a version of mp_modmult that uses internal tables
@@ -735,61 +750,63 @@ int peasant_modmult(register unitptr pro
  * This is so that no sensitive data is left in memory after the program
  * exits.  The Russian peasant method doesn't use any such tables.
  */
-void peasant_burn(void)
+
 /*
  * Alias for modmult_burn, called only from mp_modexp().  Destroys
  * internal modmult tables.  This version does nothing because no
  * tables are used by the Russian peasant modmult.
  */
-{ } /* peasant_burn */
+void peasant_burn(void)
+{
+}				/* peasant_burn */
 
-#endif	/* PEASANT */
+#endif				/* PEASANT */
 
 
 #ifdef MERRITT
 /*=========================================================================*/
 /*
-	This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ.
-	Also refined by Zhahai Stewart to reduce the number of subtracts.
-    Modified by Raymond Brand to reduce the number of SLOP_BITS by 1.
-	It performs a multiply combined with a modulo operation, without
-	going into "double precision".  It is faster than the Russian peasant
-	method, and still works well on machines that lack a fast hardware
-	multiply instruction.
-*/
+   This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ.
+   Also refined by Zhahai Stewart to reduce the number of subtracts.
+   Modified by Raymond Brand to reduce the number of SLOP_BITS by 1.
+   It performs a multiply combined with a modulo operation, without
+   going into "double precision".  It is faster than the Russian peasant
+   method, and still works well on machines that lack a fast hardware
+   multiply instruction.
+ */
 
-/*	The following support functions, macros, and data structures
-	are used only by Merritt's modmult algorithm... */
+/* The following support functions, macros, and data structures
+   are used only by Merritt's modmult algorithm... */
 
+/*      Shift r1 1 whole unit to the left.  Used only by modmult function. */
 static void mp_lshift_unit(register unitptr r1)
-/*	Shift r1 1 whole unit to the left.  Used only by modmult function. */
 {
-	register short precision;
-	register unitptr r2;
-	precision = global_precision;
-	make_msbptr(r1,precision);
-	r2 = r1;
-	while (--precision)
-		*post_lowerunit(r1) = *pre_lowerunit(r2);
-	*r1 = 0;
-} /* mp_lshift_unit */
-
+    register short precision;
+    register unitptr r2;
+    precision = global_precision;
+    make_msbptr(r1, precision);
+    r2 = r1;
+    while (--precision)
+	*post_lowerunit(r1) = *pre_lowerunit(r2);
+    *r1 = 0;
+}				/* mp_lshift_unit */
 
 /* moduli_buf contains shifted images of the modulus, set by stage_modulus */
-static unit moduli_buf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0};
-static unitptr moduli[UNITSIZE] = /* contains pointers into moduli_buf */
-{	 0,              moduli_buf[ 0], moduli_buf[ 1], moduli_buf[ 2],
-	 moduli_buf[ 3], moduli_buf[ 4], moduli_buf[ 5], moduli_buf[ 6]
+static unit moduli_buf[UNITSIZE - 1][MAX_UNIT_PRECISION] =
+{0};
+static unitptr moduli[UNITSIZE] =	/* contains pointers into moduli_buf */
+{0, moduli_buf[0], moduli_buf[1], moduli_buf[2],
+ moduli_buf[3], moduli_buf[4], moduli_buf[5], moduli_buf[6]
 #ifndef UNIT8
-	,moduli_buf[ 7] ,moduli_buf[ 8], moduli_buf[ 9], moduli_buf[10],
-	 moduli_buf[11], moduli_buf[12], moduli_buf[13], moduli_buf[14]
-#ifndef UNIT16	/* and not UNIT8 */
-	,moduli_buf[15] ,moduli_buf[16], moduli_buf[17], moduli_buf[18],
-	 moduli_buf[19], moduli_buf[20], moduli_buf[21], moduli_buf[22],
-	 moduli_buf[23], moduli_buf[24], moduli_buf[25], moduli_buf[26],
-	 moduli_buf[27], moduli_buf[28], moduli_buf[29], moduli_buf[30]
-#endif	/* UNIT16 and UNIT8 not defined */
-#endif	/* UNIT8 not defined */
+ ,moduli_buf[7], moduli_buf[8], moduli_buf[9], moduli_buf[10],
+ moduli_buf[11], moduli_buf[12], moduli_buf[13], moduli_buf[14]
+#ifndef UNIT16			/* and not UNIT8 */
+ ,moduli_buf[15], moduli_buf[16], moduli_buf[17], moduli_buf[18],
+ moduli_buf[19], moduli_buf[20], moduli_buf[21], moduli_buf[22],
+ moduli_buf[23], moduli_buf[24], moduli_buf[25], moduli_buf[26],
+ moduli_buf[27], moduli_buf[28], moduli_buf[29], moduli_buf[30]
+#endif				/* UNIT16 and UNIT8 not defined */
+#endif				/* UNIT8 not defined */
 };
 
 /*
@@ -797,8 +814,10 @@ static unitptr moduli[UNITSIZE] = /* con
  * with the most significant unit and next-to-most significant unit
  * of the preshifted images of the modulus.
  */
-static unit msu_moduli[UNITSIZE] = {0}; /* most signif. unit */
-static unit nmsu_moduli[UNITSIZE] = {0}; /* next-most signif. unit */
+static unit msu_moduli[UNITSIZE] =
+{0};				/* most signif. unit */
+static unit nmsu_moduli[UNITSIZE] =
+{0};				/* next-most signif. unit */
 
 /*
  * mpdbuf contains preshifted images of the multiplicand, mod n.
@@ -808,27 +827,25 @@ static unit nmsu_moduli[UNITSIZE] = {0};
  * end of mp_modexp.  This is so that no sensitive data is left in
  * memory after the program exits.
  */
-static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0};
-
+static unit mpdbuf[UNITSIZE - 1][MAX_UNIT_PRECISION] =
+{0};
 
-static void stage_mp_images(unitptr images[UNITSIZE],unitptr r)
 /*
  * Computes UNITSIZE images of r, each shifted left 1 more bit.
  * Used only by modmult function.
  */
+static void stage_mp_images(unitptr images[UNITSIZE], unitptr r)
 {
-	short int i;
+    short int i;
 
-	images[0] = r;	/* no need to move the first image, just copy ptr */
-	for (i=1; i<UNITSIZE; i++) {
-		mp_move(images[i],images[i-1]);
-		mp_shift_left(images[i]);
-		msub(images[i],moduli[0]);
-	}
-} /* stage_mp_images */
+    images[0] = r;	/* no need to move the first image, just copy ptr */
+    for (i = 1; i < UNITSIZE; i++) {
+	mp_move(images[i], images[i - 1]);
+	mp_shift_left(images[i]);
+	msub(images[i], moduli[0]);
+    }
+}				/* stage_mp_images */
 
-
-int stage_merritt_modulus(unitptr n)
 /*
  * Computes UNITSIZE images of modulus, each shifted left 1 more bit.
  * Before calling mp_modmult, you must first stage the moduli images by
@@ -836,36 +853,36 @@ int stage_merritt_modulus(unitptr n)
  * Assumes that global_precision has already been adjusted to the
  * size of the modulus, plus SLOP_BITS.
  */
+int stage_merritt_modulus(unitptr n)
 {
-	short int i;
-	unitptr msu;	/* ptr to most significant unit, for faster msubs */
+    short int i;
+    unitptr msu;	/* ptr to most significant unit, for faster msubs */
 
-	moduli[0] = n;	/* no need to move the first image, just copy ptr */
+    moduli[0] = n;	/* no need to move the first image, just copy ptr */
 
-	/* used by optimized msubs macro... */
-	msu = msbptr(n,global_precision);	/* needed by msubs */
-	msu_moduli[0] = *post_lowerunit(msu);
-	nmsu_moduli[0] = *msu;
-
-	for (i=1; i<UNITSIZE; i++) {
-		mp_move(moduli[i],moduli[i-1]);
-		mp_shift_left(moduli[i]);
-
-		/* used by optimized msubs macro... */
-		msu = msbptr(moduli[i],global_precision);	/* needed by msubs */
-		msu_moduli[i] = *post_lowerunit(msu);	/* for faster msubs */
-		nmsu_moduli[i] = *msu;
-	}
-	return 0;	/* normal return */
-} /* stage_merritt_modulus */
+    /* used by optimized msubs macro... */
+    msu = msbptr(n, global_precision);	/* needed by msubs */
+    msu_moduli[0] = *post_lowerunit(msu);
+    nmsu_moduli[0] = *msu;
+
+    for (i = 1; i < UNITSIZE; i++) {
+	mp_move(moduli[i], moduli[i - 1]);
+	mp_shift_left(moduli[i]);
 
+	/* used by optimized msubs macro... */
+	msu = msbptr(moduli[i], global_precision);	/* needed by msubs */
+	msu_moduli[i] = *post_lowerunit(msu);	/* for faster msubs */
+	nmsu_moduli[i] = *msu;
+    }
+    return 0;			/* normal return */
+}				/* stage_merritt_modulus */
 
 /* The following macros, sniffadd and msubs, are used by modmult... */
 #define sniffadd(i) if (*multiplier & power_of_2(i))  mp_add(prod,mpd[i])
 
 /* Unoptimized msubs macro (msubs0) follows... */
 /* #define msubs0(i) msub(prod,moduli[i])
-*/
+ */
 
 /*
  * To optimize msubs, compare the most significant units of the
@@ -878,10 +895,10 @@ int stage_merritt_modulus(unitptr n)
 
 /* Partially-optimized msubs macro (msubs1) follows... */
 /* #define msubs1(i) if ( \
-  ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \
-  (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \
-  ) mp_sub(prod,moduli[i])
-*/
+   ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \
+   (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \
+   ) mp_sub(prod,moduli[i])
+ */
 
 /* Fully-optimized msubs macro (msubs2) follows... */
 #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \
@@ -889,9 +906,6 @@ int stage_merritt_modulus(unitptr n)
  (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \
  mp_sub(prod,moduli[i])
 
-
-int merritt_modmult(register unitptr prod,
-	unitptr multiplicand,register unitptr multiplier)
 /*
  * Performs combined multiply/modulo operation.
  * Computes:  prod = (multiplicand*multiplier) mod modulus
@@ -899,233 +913,237 @@ int merritt_modmult(register unitptr pro
  * Assumes the modulus has been predefined by first calling
  * stage_modulus.
  */
+int merritt_modmult(register unitptr prod,
+		    unitptr multiplicand, register unitptr multiplier)
 {
-	/* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/
-	register signedunit p_m;
-	register unitptr msu_prod;	/* ptr to most significant unit of product */
-	register unitptr nmsu_prod;	/* next-most signif. unit of product */
-	short mprec;		/* precision of multiplier, in units */
-	/*	Array mpd contains a list of pointers to preshifted images of
-		the multiplicand: */
-	static unitptr mpd[UNITSIZE] = {
-		 0,          mpdbuf[ 0], mpdbuf[ 1], mpdbuf[ 2],
-		 mpdbuf[ 3], mpdbuf[ 4], mpdbuf[ 5], mpdbuf[ 6]
+    /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro... */
+    register signedunit p_m;
+    register unitptr msu_prod;	/* ptr to most significant unit of product */
+    register unitptr nmsu_prod;	/* next-most signif. unit of product */
+    short mprec;		/* precision of multiplier, in units */
+    /*      Array mpd contains a list of pointers to preshifted images of
+       the multiplicand: */
+    static unitptr mpd[UNITSIZE] =
+    {
+	0, mpdbuf[0], mpdbuf[1], mpdbuf[2],
+	mpdbuf[3], mpdbuf[4], mpdbuf[5], mpdbuf[6]
 #ifndef UNIT8
-		,mpdbuf[ 7], mpdbuf[ 8], mpdbuf[ 9], mpdbuf[10],
-		 mpdbuf[11], mpdbuf[12], mpdbuf[13], mpdbuf[14]
-#ifndef UNIT16	/* and not UNIT8 */
-		,mpdbuf[15], mpdbuf[16], mpdbuf[17], mpdbuf[18],
-		 mpdbuf[19], mpdbuf[20], mpdbuf[21], mpdbuf[22],
-		 mpdbuf[23], mpdbuf[24], mpdbuf[25], mpdbuf[26],
-		 mpdbuf[27], mpdbuf[28], mpdbuf[29], mpdbuf[30]
-#endif	/* UNIT16 and UNIT8 not defined */
-#endif	/* UNIT8 not defined */
-	};
-
-	/* Compute preshifted images of multiplicand, mod n: */
-	stage_mp_images(mpd,multiplicand);
-
-	/* To optimize msubs, set up msu_prod and nmsu_prod: */
-	msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */
-	nmsu_prod = msu_prod;
-	post_lowerunit(nmsu_prod); /* Get next-MSU of prod */
+	,mpdbuf[7], mpdbuf[8], mpdbuf[9], mpdbuf[10],
+	mpdbuf[11], mpdbuf[12], mpdbuf[13], mpdbuf[14]
+#ifndef UNIT16			/* and not UNIT8 */
+	,mpdbuf[15], mpdbuf[16], mpdbuf[17], mpdbuf[18],
+	mpdbuf[19], mpdbuf[20], mpdbuf[21], mpdbuf[22],
+	mpdbuf[23], mpdbuf[24], mpdbuf[25], mpdbuf[26],
+	mpdbuf[27], mpdbuf[28], mpdbuf[29], mpdbuf[30]
+#endif				/* UNIT16 and UNIT8 not defined */
+#endif				/* UNIT8 not defined */
+    };
+
+    /* Compute preshifted images of multiplicand, mod n: */
+    stage_mp_images(mpd, multiplicand);
+
+    /* To optimize msubs, set up msu_prod and nmsu_prod: */
+    msu_prod = msbptr(prod, global_precision);	/* Get ptr to MSU of prod */
+    nmsu_prod = msu_prod;
+    post_lowerunit(nmsu_prod);	/* Get next-MSU of prod */
+
+    /*
+     * To understand this algorithm, it would be helpful to first
+     * study the conventional Russian peasant modmult algorithm.
+     * This one does about the same thing as Russian peasant, but
+     * is organized differently to save some steps.  It loops
+     * through the multiplier a word (unit) at a time, instead of
+     * a bit at a time.  It word-shifts the product instead of
+     * bit-shifting it, so it should be faster.  It also does about
+     * half as many subtracts as Russian peasant.
+     */
+
+    mp_init(prod, 0);		/* Initialize product to 0. */
+
+    /*
+     * The way mp_modmult is actually used in cryptographic
+     * applications, there will NEVER be a zero multiplier or
+     * multiplicand.  So there is no need to optimize for that
+     * condition.
+     */
+/*      if (testeq(multiplicand,0))
+       return 0; *//* zero multiplicand means zero product */
+    /* Normalize and compute number of units in multiplier first: */
+    normalize(multiplier, mprec);
+    if (mprec == 0)		/* if precision of multiplier is 0 */
+	return 0;		/* zero multiplier means zero product */
+    make_msbptr(multiplier, mprec);	/* start at MSU of multiplier */
 
+    while (mprec--) { /* Loop for the number of units in the multiplier */
 	/*
-	 * To understand this algorithm, it would be helpful to first
-	 * study the conventional Russian peasant modmult algorithm.
-	 * This one does about the same thing as Russian peasant, but
-	 * is organized differently to save some steps.  It loops
-	 * through the multiplier a word (unit) at a time, instead of
-	 * a bit at a time.  It word-shifts the product instead of
-	 * bit-shifting it, so it should be faster.  It also does about
-	 * half as many subtracts as Russian peasant.
+	 * Shift the product one whole unit to the left.
+	 * This is part of the multiply phase of modmult.
 	 */
 
-	mp_init(prod,0);	/* Initialize product to 0. */
+	mp_lshift_unit(prod);
 
-	/*
-	 * The way mp_modmult is actually used in cryptographic
-	 * applications, there will NEVER be a zero multiplier or
-	 * multiplicand.  So there is no need to optimize for that
-	 * condition.
+	/* 
+	 * The product may have grown by as many as UNITSIZE
+	 * bits.  That's why we have global_precision set to the
+	 * size of the modulus plus UNITSIZE slop bits.
+	 * Now reduce the product back down by conditionally
+	 * subtracting the preshifted images of the modulus.
+	 * This is part of the modulo reduction phase of modmult. 
+	 * The following loop is unrolled for speed, using macros...
+
+	 for (i=UNITSIZE-1; i>=LOG_UNITSIZE; i--)
+	 if (mp_compare(prod,moduli[i]) >= 0)
+	 mp_sub(prod,moduli[i]);
 	 */
-/*	if (testeq(multiplicand,0))
-		return 0; */	/* zero multiplicand means zero product */
-	/* Normalize and compute number of units in multiplier first: */
-	normalize(multiplier,mprec);
-	if (mprec==0)	/* if precision of multiplier is 0 */
-		return 0;	/* zero multiplier means zero product */
-	make_msbptr(multiplier,mprec); /* start at MSU of multiplier */
-
-	while (mprec--) { /* Loop for the number of units in the multiplier */
-		/*
-		 * Shift the product one whole unit to the left.
-		 * This is part of the multiply phase of modmult.
-		 */
-
-		mp_lshift_unit(prod);
-
-		/* 
-		 * The product may have grown by as many as UNITSIZE
-		 * bits.  That's why we have global_precision set to the
-		 * size of the modulus plus UNITSIZE slop bits.
-		 * Now reduce the product back down by conditionally
-		 * subtracting the preshifted images of the modulus.
-		 * This is part of the modulo reduction phase of modmult. 
-		 * The following loop is unrolled for speed, using macros...
-
-		for (i=UNITSIZE-1; i>=LOG_UNITSIZE; i--)
-			if (mp_compare(prod,moduli[i]) >= 0)
-				mp_sub(prod,moduli[i]);
-		 */
 
 #ifndef UNIT8
-#ifndef UNIT16  /* and not UNIT8 */
-		msubs(31);
-		msubs(30);
-		msubs(29);
-		msubs(28);
-		msubs(27);
-		msubs(26);
-		msubs(25);
-		msubs(24);
-		msubs(23);
-		msubs(22);
-		msubs(21);
-		msubs(20);
-		msubs(19);
-		msubs(18);
-		msubs(17);
-		msubs(16);
-#endif  /* not UNIT16 and not UNIT8 */
-		msubs(15);
-		msubs(14);
-		msubs(13);
-		msubs(12);
-		msubs(11);
-		msubs(10);
-		msubs(9);
-		msubs(8);
-#endif  /* not UNIT8 */
-		msubs(7);
-		msubs(6);
-		msubs(5);
+#ifndef UNIT16			/* and not UNIT8 */
+	msubs(31);
+	msubs(30);
+	msubs(29);
+	msubs(28);
+	msubs(27);
+	msubs(26);
+	msubs(25);
+	msubs(24);
+	msubs(23);
+	msubs(22);
+	msubs(21);
+	msubs(20);
+	msubs(19);
+	msubs(18);
+	msubs(17);
+	msubs(16);
+#endif				/* not UNIT16 and not UNIT8 */
+	msubs(15);
+	msubs(14);
+	msubs(13);
+	msubs(12);
+	msubs(11);
+	msubs(10);
+	msubs(9);
+	msubs(8);
+#endif				/* not UNIT8 */
+	msubs(7);
+	msubs(6);
+	msubs(5);
 #ifndef UNIT32
-		msubs(4);
+	msubs(4);
 #ifndef UNIT16
-		msubs(3);
+	msubs(3);
 #endif
 #endif
 
-		/*
-		 * Sniff each bit in the current unit of the multiplier, 
-		 * and conditionally add the corresponding preshifted 
-		 * image of the multiplicand to the product.
-		 * This is also part of the multiply phase of modmult.
-		 *
-		 * The following loop is unrolled for speed, using macros...
-
-		for (i=UNITSIZE-1; i>=0; i--)
-		   if (*multiplier & power_of_2(i))
-				mp_add(prod,mpd[i]);
-		 */
+	/*
+	 * Sniff each bit in the current unit of the multiplier, 
+	 * and conditionally add the corresponding preshifted 
+	 * image of the multiplicand to the product.
+	 * This is also part of the multiply phase of modmult.
+	 *
+	 * The following loop is unrolled for speed, using macros...
+
+	 for (i=UNITSIZE-1; i>=0; i--)
+	 if (*multiplier & power_of_2(i))
+	 mp_add(prod,mpd[i]);
+	 */
 #ifndef UNIT8
-#ifndef UNIT16	/* and not UNIT8 */
-		sniffadd(31);
-		sniffadd(30);
-		sniffadd(29);
-		sniffadd(28);
-		sniffadd(27);
-		sniffadd(26);
-		sniffadd(25);
-		sniffadd(24);
-		sniffadd(23);
-		sniffadd(22);
-		sniffadd(21);
-		sniffadd(20);
-		sniffadd(19);
-		sniffadd(18);
-		sniffadd(17);
-		sniffadd(16);
-#endif	/* not UNIT16 and not UNIT8 */
-		sniffadd(15);
-		sniffadd(14);
-		sniffadd(13);
-		sniffadd(12);
-		sniffadd(11);
-		sniffadd(10);
-		sniffadd(9);
-		sniffadd(8);
-#endif	/* not UNIT8 */
-		sniffadd(7);
-		sniffadd(6);
-		sniffadd(5);
-		sniffadd(4);
-		sniffadd(3);
-		sniffadd(2);
-		sniffadd(1);
-		sniffadd(0);
-
-		/*
-		 * The product may have grown by as many as LOG_UNITSIZE+1
-		 * bits.
-		 * Now reduce the product back down by conditionally 
-		 * subtracting LOG_UNITSIZE+1 preshifted images of the 
-		 * modulus.  This is the modulo reduction phase of modmult.
-		 *
-		 * The following loop is unrolled for speed, using macros...
-
-		for (i=LOG_UNITSIZE; i>=0; i--)
-			if (mp_compare(prod,moduli[i]) >= 0) 
-				mp_sub(prod,moduli[i]); 
-		 */
+#ifndef UNIT16			/* and not UNIT8 */
+	sniffadd(31);
+	sniffadd(30);
+	sniffadd(29);
+	sniffadd(28);
+	sniffadd(27);
+	sniffadd(26);
+	sniffadd(25);
+	sniffadd(24);
+	sniffadd(23);
+	sniffadd(22);
+	sniffadd(21);
+	sniffadd(20);
+	sniffadd(19);
+	sniffadd(18);
+	sniffadd(17);
+	sniffadd(16);
+#endif				/* not UNIT16 and not UNIT8 */
+	sniffadd(15);
+	sniffadd(14);
+	sniffadd(13);
+	sniffadd(12);
+	sniffadd(11);
+	sniffadd(10);
+	sniffadd(9);
+	sniffadd(8);
+#endif				/* not UNIT8 */
+	sniffadd(7);
+	sniffadd(6);
+	sniffadd(5);
+	sniffadd(4);
+	sniffadd(3);
+	sniffadd(2);
+	sniffadd(1);
+	sniffadd(0);
+
+	/*
+	 * The product may have grown by as many as LOG_UNITSIZE+1
+	 * bits.
+	 * Now reduce the product back down by conditionally 
+	 * subtracting LOG_UNITSIZE+1 preshifted images of the 
+	 * modulus.  This is the modulo reduction phase of modmult.
+	 *
+	 * The following loop is unrolled for speed, using macros...
+
+	 for (i=LOG_UNITSIZE; i>=0; i--)
+	 if (mp_compare(prod,moduli[i]) >= 0) 
+	 mp_sub(prod,moduli[i]); 
+	 */
 
 #ifndef UNIT8
 #ifndef UNIT16
-		msubs(5); 
+	msubs(5);
 #endif
-		msubs(4);
+	msubs(4);
 #endif
-		msubs(3); 
-		msubs(2); 
-		msubs(1); 
-		msubs(0);
+	msubs(3);
+	msubs(2);
+	msubs(1);
+	msubs(0);
 
-		/* Bump pointer to next lower unit of multiplier: */
-		post_lowerunit(multiplier);
+	/* Bump pointer to next lower unit of multiplier: */
+	post_lowerunit(multiplier);
 
-	} /* Loop for the number of units in the multiplier */
+    }				/* Loop for the number of units in the
+				   multiplier */
 
-	return 0;	/* normal return */
-
-} /* merritt_modmult */
+    return 0;			/* normal return */
 
+}				/* merritt_modmult */
 
 #undef msubs
 #undef sniffadd
 
-
 /*
  * Merritt's mp_modmult function leaves some internal tables in memory,
  * so we have to call modmult_burn() at the end of mp_modexp.
  * This is so that no cryptographically sensitive data is left in memory
  * after the program exits.
  */
-void merritt_burn(void)
+
 /* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */
+void merritt_burn(void)
 {
-	unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION);
-	unitfill0(&(moduli_buf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION);
-	unitfill0(msu_moduli,UNITSIZE);
-	unitfill0(nmsu_moduli,UNITSIZE);
-} /* merritt_burn() */
+    unitfill0(&(mpdbuf[0][0]), (UNITSIZE - 1) * MAX_UNIT_PRECISION);
+    unitfill0(&(moduli_buf[0][0]), (UNITSIZE - 1) * MAX_UNIT_PRECISION);
+    unitfill0(msu_moduli, UNITSIZE);
+    unitfill0(nmsu_moduli, UNITSIZE);
+}				/* merritt_burn() */
 
 /******* end of Merritt's MODMULT stuff. *******/
 /*=========================================================================*/
-#endif	/* MERRITT */
+#endif				/* MERRITT */
 
 
-#ifdef UPTON_OR_SMITH	/* Used by Upton's and Smith's modmult algorithms */
+#ifdef UPTON_OR_SMITH		/* Used by Upton's and Smith's
+				   modmult algorithms */
 
 /*
  * Jimmy Upton's multiprecision modmult algorithm in C.
@@ -1135,7 +1153,7 @@ void merritt_burn(void)
  * are used only by Upton's modmult algorithm...
  */
 
-short munit_prec;	/* global_precision expressed in MULTUNITs */
+short munit_prec;		/* global_precision expressed in MULTUNITs */
 
 /*
  * Note that since the SPARC CPU has no hardware integer multiply
@@ -1161,26 +1179,27 @@ short munit_prec;	/* global_precision ex
 /*
  * Note that this has been modified from the previous version to allow
  * better support for Smith's modmult:
- * 	The final carry bit is added to the existing product
- * 	array, rather than simply stored.
-*/
+ *      The final carry bit is added to the existing product
+ *      array, rather than simply stored.
+ */
 
 #ifndef mp_smul
-void mp_smul (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier)
+void mp_smul(MULTUNIT * prod, MULTUNIT * multiplicand, MULTUNIT multiplier)
 {
-	short i;
-	unsigned long p, carry;
+    short i;
+    unsigned long p, carry;
+
+    carry = 0;
+    for (i = 0; i < munit_prec; ++i) {
+	p = (unsigned long) multiplier **post_higherunit(multiplicand);
+	p += *prod + carry;
+	*post_higherunit(prod) = (MULTUNIT) p;
+	carry = p >> MULTUNITSIZE;
+    }
+    /* Add carry to the next higher word of product / dividend */
+    *prod += (MULTUNIT) carry;
+}				/* mp_smul */
 
-	carry = 0;
-	for (i=0; i<munit_prec; ++i) {
-		p = (unsigned long)multiplier * *post_higherunit(multiplicand);
-		p += *prod + carry;
-		*post_higherunit(prod) = (MULTUNIT)p;
-		carry = p >> MULTUNITSIZE;
-	}
-	/* Add carry to the next higher word of product / dividend */
-	*prod += (MULTUNIT)carry;
-} /* mp_smul */
 #endif
 
 /*
@@ -1190,32 +1209,32 @@ void mp_smul (MULTUNIT *prod, MULTUNIT *
  * pointing at a 20-word buffer.
  */
 #ifndef mp_dmul
-void mp_dmul (unitptr prod, unitptr multiplicand, unitptr multiplier)
+void mp_dmul(unitptr prod, unitptr multiplicand, unitptr multiplier)
 {
-	register	int i;
-	register	MULTUNIT *p_multiplicand, *p_multiplier;
-	register	MULTUNIT *prodp;
-
-
-	unitfill0(prod,global_precision*2);	/* Pre-zero prod */
-	/* Calculate precision in units of MULTUNIT */
-	munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
-	p_multiplicand = (MULTUNIT *)multiplicand;
-	p_multiplier = (MULTUNIT *)multiplier;
-	prodp = (MULTUNIT *)prod;
-	make_lsbptr(p_multiplicand,munit_prec);
-	make_lsbptr(p_multiplier,munit_prec);
-	make_lsbptr(prodp,munit_prec*2);
-	/* Multiply multiplicand by each word in multiplier, accumulating prod: */
-	for (i=0; i<munit_prec; ++i)
-		mp_smul (post_higherunit(prodp),
-			p_multiplicand, *post_higherunit(p_multiplier));
-} /* mp_dmul */
-#endif  /* mp_dmul */
+    register int i;
+    register MULTUNIT *p_multiplicand, *p_multiplier;
+    register MULTUNIT *prodp;
+
+
+    unitfill0(prod, global_precision * 2);	/* Pre-zero prod */
+    /* Calculate precision in units of MULTUNIT */
+    munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
+    p_multiplicand = (MULTUNIT *) multiplicand;
+    p_multiplier = (MULTUNIT *) multiplier;
+    prodp = (MULTUNIT *) prod;
+    make_lsbptr(p_multiplicand, munit_prec);
+    make_lsbptr(p_multiplier, munit_prec);
+    make_lsbptr(prodp, munit_prec * 2);
+    /* Multiply multiplicand by each word in multiplier, accumulating prod: */
+    for (i = 0; i < munit_prec; ++i)
+	mp_smul(post_higherunit(prodp),
+		p_multiplicand, *post_higherunit(p_multiplier));
+}				/* mp_dmul */
+#endif				/* mp_dmul */
 
 static unit ALIGN modulus[MAX_UNIT_PRECISION];
-static short nbits;			/* number of modulus significant bits */
-#endif /* UPTON_OR_SMITH */
+static short nbits;		/* number of modulus significant bits */
+#endif				/* UPTON_OR_SMITH */
 
 #ifdef UPTON
 
@@ -1229,9 +1248,12 @@ static short nbits;			/* number of modul
 
 static unit ALIGN reciprocal[MAX_UNIT_PRECISION];
 static unit ALIGN dhi[MAX_UNIT_PRECISION];
-static unit ALIGN d_data[MAX_UNIT_PRECISION*2];     /* double-wide register */
-static unit ALIGN e_data[MAX_UNIT_PRECISION*2];     /* double-wide register */
-static unit ALIGN f_data[MAX_UNIT_PRECISION*2];     /* double-wide register */
+static unit ALIGN d_data[MAX_UNIT_PRECISION * 2];	/* double-wide
+							   register */
+static unit ALIGN e_data[MAX_UNIT_PRECISION * 2];	/* double-wide
+							   register */
+static unit ALIGN f_data[MAX_UNIT_PRECISION * 2];	/* double-wide
+							   register */
 
 static short nbitsDivUNITSIZE;
 static short nbitsModUNITSIZE;
@@ -1246,13 +1268,13 @@ static short nbitsModUNITSIZE;
  */
 int stage_upton_modulus(unitptr n)
 {
-	mp_move(modulus, n);
-	mp_recip(reciprocal, modulus);
-	nbits = countbits(modulus);
-	nbitsDivUNITSIZE = nbits / UNITSIZE;
-	nbitsModUNITSIZE = nbits % UNITSIZE;
-	return 0;	/* normal return */
-} /* stage_upton_modulus */
+    mp_move(modulus, n);
+    mp_recip(reciprocal, modulus);
+    nbits = countbits(modulus);
+    nbitsDivUNITSIZE = nbits / UNITSIZE;
+    nbitsModUNITSIZE = nbits % UNITSIZE;
+    return 0;			/* normal return */
+}				/* stage_upton_modulus */
 
 
 /*
@@ -1263,76 +1285,75 @@ int stage_upton_modulus(unitptr n)
  * The reciprocal of modulus is 1 bit longer than the modulus.
  * upton_modmult() is aliased to mp_modmult().
  */
-int upton_modmult (unitptr prod, unitptr multiplicand, unitptr multiplier)
+int upton_modmult(unitptr prod, unitptr multiplicand, unitptr multiplier)
 {
-	unitptr d = d_data;
-	unitptr d1 = d_data;
-	unitptr e = e_data;
-	unitptr f = f_data;
-	short orig_precision;
+    unitptr d = d_data;
+    unitptr d1 = d_data;
+    unitptr e = e_data;
+    unitptr f = f_data;
+    short orig_precision;
 
-	orig_precision = global_precision;
-	mp_dmul (d,multiplicand,multiplier);
+    orig_precision = global_precision;
+    mp_dmul(d, multiplicand, multiplier);
 
-	/* Throw off low nbits of d */
+    /* Throw off low nbits of d */
 #ifndef HIGHFIRST
-	d1 = d + nbitsDivUNITSIZE;
+    d1 = d + nbitsDivUNITSIZE;
 #else
-	d1 = d + orig_precision - nbitsDivUNITSIZE;
+    d1 = d + orig_precision - nbitsDivUNITSIZE;
 #endif
-	mp_move(dhi, d1);	/* Don't screw up d, we need it later */
-	mp_shift_right_bits(dhi, nbitsModUNITSIZE);
+    mp_move(dhi, d1);		/* Don't screw up d, we need it later */
+    mp_shift_right_bits(dhi, nbitsModUNITSIZE);
 
-	mp_dmul (e,dhi,reciprocal);	/* Note - reciprocal has nbits+1 bits */
+    mp_dmul(e, dhi, reciprocal); /* Note - reciprocal has nbits+1 bits */
 
 #ifndef HIGHFIRST
-	e += nbitsDivUNITSIZE;
+    e += nbitsDivUNITSIZE;
 #else
-	e += orig_precision - nbitsDivUNITSIZE;
+    e += orig_precision - nbitsDivUNITSIZE;
 #endif
-	mp_shift_right_bits(e, nbitsModUNITSIZE);
+    mp_shift_right_bits(e, nbitsModUNITSIZE);
 
-	mp_dmul (f,e,modulus);
+    mp_dmul(f, e, modulus);
 
-	/* Now for the only double-precision call to mpilib: */
-	set_precision (orig_precision * 2);
-	mp_sub (d,f);
-
-	/* d's precision should be <= orig_precision */
-	rescale (d, orig_precision*2, orig_precision);
-	set_precision (orig_precision);
-
-	/* Should never have to do this final subtract more than twice: */
-	while (mp_compare(d,modulus) > 0)
-		mp_sub (d,modulus);
-
-	mp_move(prod,d);
-	return 0;  /* normal return */
-} /* upton_modmult */
-
-
-/*	Upton's mp_modmult function leaves some internal arrays in memory,
-	so we have to call modmult_burn() at the end of mp_modexp.
-	This is so that no cryptographically sensitive data is left in memory
-	after the program exits.
-	upton_burn() is aliased to modmult_burn().
-*/
+    /* Now for the only double-precision call to mpilib: */
+    set_precision(orig_precision * 2);
+    mp_sub(d, f);
+
+    /* d's precision should be <= orig_precision */
+    rescale(d, orig_precision * 2, orig_precision);
+    set_precision(orig_precision);
+
+    /* Should never have to do this final subtract more than twice: */
+    while (mp_compare(d, modulus) > 0)
+	mp_sub(d, modulus);
+
+    mp_move(prod, d);
+    return 0;			/* normal return */
+}				/* upton_modmult */
+
+/* Upton's mp_modmult function leaves some internal arrays in memory,
+   so we have to call modmult_burn() at the end of mp_modexp.
+   This is so that no cryptographically sensitive data is left in memory
+   after the program exits.
+   upton_burn() is aliased to modmult_burn().
+ */
 void upton_burn(void)
 {
-	unitfill0(modulus,MAX_UNIT_PRECISION);
-	unitfill0(reciprocal,MAX_UNIT_PRECISION);
-	unitfill0(dhi,MAX_UNIT_PRECISION);
-	unitfill0(d_data,MAX_UNIT_PRECISION*2);
-	unitfill0(e_data,MAX_UNIT_PRECISION*2);
-	unitfill0(f_data,MAX_UNIT_PRECISION*2);
-	nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0;
-} /* upton_burn */
+    unitfill0(modulus, MAX_UNIT_PRECISION);
+    unitfill0(reciprocal, MAX_UNIT_PRECISION);
+    unitfill0(dhi, MAX_UNIT_PRECISION);
+    unitfill0(d_data, MAX_UNIT_PRECISION * 2);
+    unitfill0(e_data, MAX_UNIT_PRECISION * 2);
+    unitfill0(f_data, MAX_UNIT_PRECISION * 2);
+    nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0;
+}				/* upton_burn */
 
 /******* end of Upton's MODMULT stuff. *******/
 /*=========================================================================*/
-#endif  /* UPTON */
+#endif				/* UPTON */
 
-#ifdef SMITH	/* using Thad Smith's modmult algorithm */
+#ifdef SMITH			/* using Thad Smith's modmult algorithm */
 
 /*
  * Thad Smith's implementation of multiprecision modmult algorithm in C.
@@ -1358,20 +1379,20 @@ void upton_burn(void)
  * so that no sensitive data is left in memory after the program exits.
  */
 
-static unit ALIGN ds_data[MAX_UNIT_PRECISION*2+2];
+static unit ALIGN ds_data[MAX_UNIT_PRECISION * 2 + 2];
 
-static unit mod_quotient  [4];
-static unit mod_divisor   [4];	/* 2 most signif. MULTUNITs of modulus */
+static unit mod_quotient[4];
+static unit mod_divisor[4];	/* 2 most signif. MULTUNITs of modulus */
 
-static MULTUNIT *modmpl;		/* ptr to modulus least significant
-					** MULTUNIT			    */
-static int  mshift;			/* number of bits for
-					** recip scaling	  */
-static MULTUNIT reciph; 		/* MSunit of scaled recip */
-static MULTUNIT recipl; 		/* LSunit of scaled recip */
+static MULTUNIT *modmpl;	/* ptr to modulus least significant
+				   ** MULTUNIT                      */
+static int mshift;		/* number of bits for
+				   ** recip scaling       */
+static MULTUNIT reciph;		/* MSunit of scaled recip */
+static MULTUNIT recipl;		/* LSunit of scaled recip */
 
-static short modlenMULTUNITS;		/* length of modulus in MULTUNITs */
-static MULTUNIT mutemp; 		/* temporary */
+static short modlenMULTUNITS;	/* length of modulus in MULTUNITs */
+static MULTUNIT mutemp;		/* temporary */
 
 /*
  * The routines mp_smul and mp_dmul are the same as for UPTON and
@@ -1382,16 +1403,16 @@ static MULTUNIT mutemp; 		/* temporary *
  */
 
 #ifndef mp_set_recip
-#define mp_set_recip(rh,rl,m)     /* null */
+#define mp_set_recip(rh,rl,m)	/* null */
 #else
 /* setup routine for external mp_quo_digit */
 void mp_set_recip(MULTUNIT rh, MULTUNIT rl, int m);
 #endif
-MULTUNIT mp_quo_digit (MULTUNIT *dividend);
+MULTUNIT mp_quo_digit(MULTUNIT * dividend);
 
 #ifdef	MULTUNIT_SIZE_SAME
 #define mp_musubb mp_subb	/* use existing routine */
-#else  /* ! MULTUNIT_SIZE_SAME */
+#else				/* ! MULTUNIT_SIZE_SAME */
 
 /*
  * This performs the same function as mp_subb, but with MULTUNITs.
@@ -1402,28 +1423,29 @@ MULTUNIT mp_quo_digit (MULTUNIT *dividen
  * mp_subb in that case.
  */
 #ifndef mp_musubb
-boolean mp_musubb
-	(register MULTUNIT* r1,register MULTUNIT* r2,register boolean borrow)
+
 /*
  * multiprecision subtract of MULTUNITs with borrow, r2 from r1, result in r1
  * borrow is incoming borrow flag-- value should be 0 or 1
  */
+boolean mp_musubb
+ (register MULTUNIT * r1, register MULTUNIT * r2, register boolean borrow)
 {
-	register ulint x;	/* won't work if sizeof(MULTUNIT)==
-						 sizeof(long)	    */
-	short precision;	/* number of MULTUNITs to subtract */
-	precision = global_precision * MULTUNITs_perunit;
-	make_lsbptr(r1,precision);
-	make_lsbptr(r2,precision);
-	while (precision--) {
-		x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow;
-		*post_higherunit(r1) = x;
-		borrow = (((1L << MULTUNITSIZE) & x) != 0L);
-	}
-	return (borrow);
-} /* mp_musubb */
-#endif	/* mp_musubb */
-#endif	/* MULTUNIT_SIZE_SAME */
+    register ulint x;		/* won't work if sizeof(MULTUNIT)==
+				   sizeof(long)     */
+    short precision;		/* number of MULTUNITs to subtract */
+    precision = global_precision * MULTUNITs_perunit;
+    make_lsbptr(r1, precision);
+    make_lsbptr(r2, precision);
+    while (precision--) {
+	x = (ulint) * r1 - (ulint) * post_higherunit(r2) - (ulint) borrow;
+	*post_higherunit(r1) = x;
+	borrow = (((1L << MULTUNITSIZE) & x) != 0L);
+    }
+    return (borrow);
+}				/* mp_musubb */
+#endif				/* mp_musubb */
+#endif				/* MULTUNIT_SIZE_SAME */
 
 /*
  * The function mp_quo_digit is the heart of Smith's modulo reduction,
@@ -1436,7 +1458,7 @@ boolean mp_musubb
  * An important part of this technique is that the quotient never be
  * too small, although it may occasionally be too large.  This was
  * done to eliminate the need to check and correct for a remainder
- * exceeding the divisor.	It is easier to check for a negative
+ * exceeding the divisor.       It is easier to check for a negative
  * remainder.  The following technique rarely needs correction for
  * MULTUNITs of at least 16 bits.
  *
@@ -1454,73 +1476,76 @@ boolean mp_musubb
  * for the difference.
  * 
  * Parameter: dividend - points to the most significant MULTUNIT
- * 	of the dividend.  Note that dividend actually contains the 
- * 	one's complement of the actual dividend value (see comments for 
- * 	smith_modmult).
+ *      of the dividend.  Note that dividend actually contains the 
+ *      one's complement of the actual dividend value (see comments for 
+ *      smith_modmult).
  * 
  *  Return: the trial quotient digit resulting from dividing the first
- * 	three MULTUNITs at dividend by the upper two MULTUNITs of the 
- * 	modulus.
+ *      three MULTUNITs at dividend by the upper two MULTUNITs of the 
+ *      modulus.
  */
 
-/* #define ASM_PROTOTYPE */ /* undefined: use C-optimized version */
+/* #define ASM_PROTOTYPE *//* undefined: use C-optimized version */
 
 #ifndef mp_quo_digit
-MULTUNIT mp_quo_digit (MULTUNIT *dividend) {
-	unsigned long q, q0, q1, q2;
-	unsigned short lsb_factor;
+MULTUNIT mp_quo_digit(MULTUNIT * dividend)
+{
+    unsigned long q, q0, q1, q2;
+    unsigned short lsb_factor;
 
 /*
  * Compute the least significant product group.
- *The last terms of q1 and q2 perform upward rounding, which is
- *needed to guarantee that the result not be too small.
+ * The last terms of q1 and q2 perform upward rounding, which is
+ * needed to guarantee that the result not be too small.
  */
-	q1 = (dividend[tohigher(-2)] ^ MULTUNIT_mask) * (unsigned long)reciph
-	     + reciph;
-	q2 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long)recipl
-	     + (1L << MULTUNITSIZE) ;
+    q1 = (dividend[tohigher(-2)] ^ MULTUNIT_mask) * (unsigned long) reciph
+	+ reciph;
+    q2 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long) recipl
+	+ (1L << MULTUNITSIZE);
 #ifdef ASM_PROTOTYPE
-	lsb_factor = 1 & (q1>>MULTUNITSIZE) & (q2>>MULTUNITSIZE);
-	q = q1 + q2;
+    lsb_factor = 1 & (q1 >> MULTUNITSIZE) & (q2 >> MULTUNITSIZE);
+    q = q1 + q2;
 
-	/* The following statement is equivalent to shifting the sum right
-	   one bit while shifting in the carry bit.
-	*/
-	q0 = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
-#else 	/* optimized C version */
-	q0 = (q1>>1) + (q2>>1) + 1;
+    /* The following statement is equivalent to shifting the sum right
+       one bit while shifting in the carry bit.
+     */
+    q0 = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
+#else				/* optimized C version */
+    q0 = (q1 >> 1) + (q2 >> 1) + 1;
 #endif
 
-/*	Compute the middle significant product group.	*/
+/*      Compute the middle significant product group.   */
 
-	q1 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long)reciph;
-	q2 = (dividend[ 0] ^ MULTUNIT_mask) * (unsigned long)recipl;
+    q1 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long) reciph;
+    q2 = (dividend[0] ^ MULTUNIT_mask) * (unsigned long) recipl;
 #ifdef ASM_PROTOTYPE
-	q = q1 + q2;
-	q = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
+    q = q1 + q2;
+    q = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
 
-/*	Add in the most significant word of the first group.
-	The last term takes care of the carry from adding the lsb's
-	that were shifted out prior to addition.
-*/
-	q = (q0 >> MULTUNITSIZE)+ q + (lsb_factor & (q1 ^ q2));
-#else 	/* optimized C version */
-	q = (q0 >> MULTUNITSIZE)+ (q1>>1) + (q2>>1) + 1;
+/*      Add in the most significant word of the first group.
+   The last term takes care of the carry from adding the lsb's
+   that were shifted out prior to addition.
+ */
+    q = (q0 >> MULTUNITSIZE) + q + (lsb_factor & (q1 ^ q2));
+#else				/* optimized C version */
+    q = (q0 >> MULTUNITSIZE) + (q1 >> 1) + (q2 >> 1) + 1;
 #endif
 
-/*	Compute the most significant term and add in the others */
-
-	q = (q >> (MULTUNITSIZE-2)) +
-	    (((dividend[0] ^ MULTUNIT_mask) * (unsigned long)reciph) << 1);
-	q >>= mshift;
+/*      Compute the most significant term and add in the others */
 
-/*	Prevent overflow and then wipe out the intermediate results. */
-
-	mutemp = (MULTUNIT)min(q, (1L << MULTUNITSIZE) -1);
-	q= q0 = q1 = q2 = 0;   lsb_factor = 0; (void)lsb_factor;
-	return mutemp;
+    q = (q >> (MULTUNITSIZE - 2)) +
+	(((dividend[0] ^ MULTUNIT_mask) * (unsigned long) reciph) << 1);
+    q >>= mshift;
+
+/*      Prevent overflow and then wipe out the intermediate results. */
+
+    mutemp = (MULTUNIT) min(q, (1L << MULTUNITSIZE) - 1);
+    q = q0 = q1 = q2 = 0;
+    lsb_factor = 0;
+    (void) lsb_factor;
+    return mutemp;
 }
-#endif	/* mp_quo_digit */
+#endif				/* mp_quo_digit */
 
 /*
  * stage_smith_modulus() - Prepare for a Smith modmult.
@@ -1536,120 +1561,118 @@ MULTUNIT mp_quo_digit (MULTUNIT *dividen
  * 
  * stage_smith_modulus() is aliased to stage_modulus().
  */
-
 int stage_smith_modulus(unitptr n_modulus)
 {
-	int   original_precision;
-	int   sigmod;		/* significant units in modulus */
-	unitptr  mp;		/* modulus most significant pointer */
-	MULTUNIT *mpm;		/* reciprocal pointer */
-	int   prec;		/* precision of reciprocal calc in units */
-
-	mp_move(modulus, n_modulus);
-	modmpl = (MULTUNIT*) modulus;
-	modmpl = lsbptr (modmpl, global_precision * MULTUNITs_perunit);
-	nbits = countbits(modulus);
-	modlenMULTUNITS = (nbits+ MULTUNITSIZE-1) / MULTUNITSIZE;
-
-	original_precision = global_precision;
-
-	/* The following code copies the three most significant units of
-	 * modulus to mod_divisor.
-	*/
-	mp = modulus;
-	sigmod = significance (modulus);
-	rescale (mp, original_precision, sigmod);
+    int original_precision;
+    int sigmod;			/* significant units in modulus */
+    unitptr mp;			/* modulus most significant pointer */
+    MULTUNIT *mpm;		/* reciprocal pointer */
+    int prec;			/* precision of reciprocal calc in units */
+
+    mp_move(modulus, n_modulus);
+    modmpl = (MULTUNIT *) modulus;
+    modmpl = lsbptr(modmpl, global_precision * MULTUNITs_perunit);
+    nbits = countbits(modulus);
+    modlenMULTUNITS = (nbits + MULTUNITSIZE - 1) / MULTUNITSIZE;
+
+    original_precision = global_precision;
+
+    /* The following code copies the three most significant units of
+     * modulus to mod_divisor.
+     */
+    mp = modulus;
+    sigmod = significance(modulus);
+    rescale(mp, original_precision, sigmod);
 /* prec is the unit precision required for 3 MULTUNITs */
-	prec = (3 +(MULTUNITs_perunit-1)) / MULTUNITs_perunit;
-	set_precision (prec);
- 
-	/* set mp = ptr to most significant units of modulus, then move
-	 * the most significant units to mp_divisor 
-	*/
-	mp = msbptr(mp,sigmod) -tohigher(prec-1);
-	unmake_lsbptr (mp, prec);
-	mp_move (mod_divisor, mp);
- 
-	/* Keep 2*MULTUNITSIZE bits in mod_divisor.
-	 * This will (normally) result in a reciprocal of 2*MULTUNITSIZE+1 bits.
-	*/
-	mshift = countbits (mod_divisor) - 2*MULTUNITSIZE;
-	mp_shift_right_bits (mod_divisor, mshift);
-	mp_recip(mod_quotient,mod_divisor);
-	mp_shift_right_bits (mod_quotient,1);
-
-	/* Reduce to:   0 < mshift <= MULTUNITSIZE */
-	mshift = ((mshift + (MULTUNITSIZE-1)) % MULTUNITSIZE) +1;
-	/* round up, rescaling if necessary */
-	mp_inc (mod_quotient);
-	if (countbits (mod_quotient) > 2*MULTUNITSIZE) {
-		mp_shift_right_bits (mod_quotient,1);
-		mshift--;	/* now  0 <= mshift <= MULTUNITSIZE */
-	}
-	mpm = lsbptr ((MULTUNIT*)mod_quotient, prec*MULTUNITs_perunit);
-	recipl = *post_higherunit (mpm);
-	reciph = *mpm;
-	mp_set_recip (reciph, recipl, mshift);
-	set_precision (original_precision);
-	return 0;	/* normal return */
-} /* stage_smith_modulus */
-
-/*	Smith's algorithm performs a multiply combined with a modulo operation.
-	Computes:  prod = (multiplicand*multiplier) mod modulus
-	The modulus must first be set by stage_smith_modulus().
-	smith_modmult() is aliased to mp_modmult().
-*/
+    prec = (3 + (MULTUNITs_perunit - 1)) / MULTUNITs_perunit;
+    set_precision(prec);
 
-int
-smith_modmult(unitptr prod, unitptr multiplicand, unitptr multiplier)
-{
-	unitptr    d;	/* ptr to product */ 
-	MULTUNIT   *dmph, *dmpl, *dmp;	/* ptrs to dividend (high, low, first)
-					 * aligned for subtraction	     */
-/*	Note that dmph points one MULTUNIT higher than indicated by
-	global precision.  This allows us to zero out a word one higher than
-	the normal precision.
-*/
-	short	    orig_precision;
-	short	    nqd;	/* number of quotient digits remaining to
-				 * be generated 			     */
-	short	    dmi;	/* number of significant MULTUNITs in product */
-
-	d = ds_data + 1;	/* room for leading MSB if HIGHFIRST */
-	orig_precision = global_precision;
-	mp_dmul(d, multiplicand, multiplier);
-
-	rescale(d, orig_precision * 2, orig_precision * 2 + 1);
-	set_precision(orig_precision * 2 + 1);
-	*msbptr(d, global_precision) = 0;	/* leading 0 unit */
-
-/*	We now start working with MULTUNITs.
-	Determine the most significant MULTUNIT of the product so we don't
-	have to process leading zeros in our divide loop.
-*/
-	dmi = significance(d) * MULTUNITs_perunit;
-	if (dmi >= modlenMULTUNITS) {
-		/* Make dividend negative.  This allows the use of mp_smul to
-		 * "subtract" the product of the modulus and the trial divisor
-		 * by actually adding to a negative dividend.
-		 * The one's complement of the dividend is used, since it causes
-		 * a zero value to be represented as all ones.  This facilitates
-		 * testing the result for possible overflow, since a sign bit
-		 * indicates that no adjustment is necessary, and we should not
-		 * attempt to adjust if the result of the addition is zero.
-		 */
-		mp_inc(d);
-		mp_neg(d);
-		set_precision(orig_precision);
-		munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
-
-		/* Set msb, lsb, and normal ptrs of dividend */
-		dmph = lsbptr((MULTUNIT *) d, (orig_precision * 2 + 1) *
-			    MULTUNITs_perunit) + tohigher(dmi);
-		nqd = dmi + 1 - modlenMULTUNITS;
-		dmpl = dmph - tohigher(modlenMULTUNITS);
+    /* set mp = ptr to most significant units of modulus, then move
+     * the most significant units to mp_divisor 
+     */
+    mp = msbptr(mp, sigmod) - tohigher(prec - 1);
+    unmake_lsbptr(mp, prec);
+    mp_move(mod_divisor, mp);
+
+    /* Keep 2*MULTUNITSIZE bits in mod_divisor.
+     * This will (normally) result in a reciprocal of 2*MULTUNITSIZE+1 bits.
+     */
+    mshift = countbits(mod_divisor) - 2 * MULTUNITSIZE;
+    mp_shift_right_bits(mod_divisor, mshift);
+    mp_recip(mod_quotient, mod_divisor);
+    mp_shift_right_bits(mod_quotient, 1);
+
+    /* Reduce to:   0 < mshift <= MULTUNITSIZE */
+    mshift = ((mshift + (MULTUNITSIZE - 1)) % MULTUNITSIZE) + 1;
+    /* round up, rescaling if necessary */
+    mp_inc(mod_quotient);
+    if (countbits(mod_quotient) > 2 * MULTUNITSIZE) {
+	mp_shift_right_bits(mod_quotient, 1);
+	mshift--;		/* now  0 <= mshift <= MULTUNITSIZE */
+    }
+    mpm = lsbptr((MULTUNIT *) mod_quotient, prec * MULTUNITs_perunit);
+    recipl = *post_higherunit(mpm);
+    reciph = *mpm;
+    mp_set_recip(reciph, recipl, mshift);
+    set_precision(original_precision);
+    return 0;			/* normal return */
+}				/* stage_smith_modulus */
+
+/* Smith's algorithm performs a multiply combined with a modulo operation.
+   Computes:  prod = (multiplicand*multiplier) mod modulus
+   The modulus must first be set by stage_smith_modulus().
+   smith_modmult() is aliased to mp_modmult().
+ */
+int smith_modmult(unitptr prod, unitptr multiplicand, unitptr multiplier)
+{
+    unitptr d;			/* ptr to product */
+    MULTUNIT *dmph, *dmpl, *dmp;	/* ptrs to dividend (high, low, first)
+					 * aligned for subtraction         */
+/* Note that dmph points one MULTUNIT higher than indicated by
+   global precision.  This allows us to zero out a word one higher than
+   the normal precision.
+ */
+    short orig_precision;
+    short nqd;			/* number of quotient digits remaining to
+				 * be generated                            */
+    short dmi;			/* number of significant MULTUNITs in
+				   product */
+
+    d = ds_data + 1;		/* room for leading MSB if HIGHFIRST */
+    orig_precision = global_precision;
+    mp_dmul(d, multiplicand, multiplier);
+
+    rescale(d, orig_precision * 2, orig_precision * 2 + 1);
+    set_precision(orig_precision * 2 + 1);
+    *msbptr(d, global_precision) = 0;	/* leading 0 unit */
+
+/*      We now start working with MULTUNITs.
+   Determine the most significant MULTUNIT of the product so we don't
+   have to process leading zeros in our divide loop.
+ */
+    dmi = significance(d) * MULTUNITs_perunit;
+    if (dmi >= modlenMULTUNITS) {
+	/* Make dividend negative.  This allows the use of mp_smul to
+	 * "subtract" the product of the modulus and the trial divisor
+	 * by actually adding to a negative dividend.
+	 * The one's complement of the dividend is used, since it causes
+	 * a zero value to be represented as all ones.  This facilitates
+	 * testing the result for possible overflow, since a sign bit
+	 * indicates that no adjustment is necessary, and we should not
+	 * attempt to adjust if the result of the addition is zero.
+	 */
+	mp_inc(d);
+	mp_neg(d);
+	set_precision(orig_precision);
+	munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
+
+	/* Set msb, lsb, and normal ptrs of dividend */
+	dmph = lsbptr((MULTUNIT *) d, (orig_precision * 2 + 1) *
+		      MULTUNITs_perunit) + tohigher(dmi);
+	nqd = dmi + 1 - modlenMULTUNITS;
+	dmpl = dmph - tohigher(modlenMULTUNITS);
 
-/*	
+/*      
  * Divide loop.
  * Each iteration computes the next quotient MULTUNIT digit, then
  * multiplies the divisor (modulus) by the quotient digit and adds
@@ -1658,187 +1681,186 @@ smith_modmult(unitptr prod, unitptr mult
  * we get a non-negative result, in which case we subtract off the
  * modulus to get the proper negative remainder.
  */
-		for (; nqd; nqd--) {
-			MULTUNIT    q;	    /* quotient trial digit */
-
-			q = mp_quo_digit(dmph);
-			if (q > 0) {
-				mp_smul(dmpl, modmpl, q);
-
-				/* Perform correction if q too large.
-				*  This rarely occurs.
-				*/
-				if (!(*dmph & MULTUNIT_msb)) {
-					dmp = dmpl;
-					unmake_lsbptr(dmp, orig_precision * 
-						   MULTUNITs_perunit);
-					if (mp_musubb(dmp,
-						   (MULTUNIT *) modulus, 0))
-						(*dmph)  --;
-				}
-			}
-			pre_lowerunit(dmph);
-			pre_lowerunit(dmpl);
-		}
-		/* d contains the one's complement of the remainder. */
-		rescale(d, orig_precision * 2 + 1, orig_precision);
-		set_precision(orig_precision);
-		mp_neg(d);
-		mp_dec(d);
-	} else {
-		/* Product was less than modulus.  Return it. */
-		rescale(d, orig_precision * 2 + 1, orig_precision);
-		set_precision(orig_precision);
-	}
-	mp_move(prod, d);
-	return (0);		/* normal return */
-} /* smith_modmult */
+	for (; nqd; nqd--) {
+	    MULTUNIT q;		/* quotient trial digit */
 
+	    q = mp_quo_digit(dmph);
+	    if (q > 0) {
+		mp_smul(dmpl, modmpl, q);
 
-/*	Smith's mp_modmult function leaves some internal arrays in memory,
-	so we have to call modmult_burn() at the end of mp_modexp.
-	This is so that no cryptographically sensitive data is left in memory
-	after the program exits.
-	smith_burn() is aliased to modmult_burn().
-*/
+		/* Perform correction if q too large.
+		   *  This rarely occurs.
+		 */
+		if (!(*dmph & MULTUNIT_msb)) {
+		    dmp = dmpl;
+		    unmake_lsbptr(dmp, orig_precision *
+				  MULTUNITs_perunit);
+		    if (mp_musubb(dmp,
+				  (MULTUNIT *) modulus, 0))
+			(*dmph)--;
+		}
+	    }
+	    pre_lowerunit(dmph);
+	    pre_lowerunit(dmpl);
+	}
+	/* d contains the one's complement of the remainder. */
+	rescale(d, orig_precision * 2 + 1, orig_precision);
+	set_precision(orig_precision);
+	mp_neg(d);
+	mp_dec(d);
+    } else {
+	/* Product was less than modulus.  Return it. */
+	rescale(d, orig_precision * 2 + 1, orig_precision);
+	set_precision(orig_precision);
+    }
+    mp_move(prod, d);
+    return (0);			/* normal return */
+}				/* smith_modmult */
+
+/* Smith's mp_modmult function leaves some internal arrays in memory,
+   so we have to call modmult_burn() at the end of mp_modexp.
+   This is so that no cryptographically sensitive data is left in memory
+   after the program exits.
+   smith_burn() is aliased to modmult_burn().
+ */
 void smith_burn(void)
 {
-	empty_array (modulus);
-	empty_array (ds_data);
-	empty_array (mod_quotient);
-	empty_array (mod_divisor);
-	modmpl = 0;
-	mshift =nbits = 0;
-	reciph = recipl = 0;
-	modlenMULTUNITS = mutemp = 0;
-	mp_set_recip (0,0,0);
-} /* smith_burn */
-
-/*	End of Thad Smith's implementation of modmult. */
+    empty_array(modulus);
+    empty_array(ds_data);
+    empty_array(mod_quotient);
+    empty_array(mod_divisor);
+    modmpl = 0;
+    mshift = nbits = 0;
+    reciph = recipl = 0;
+    modlenMULTUNITS = mutemp = 0;
+    mp_set_recip(0, 0, 0);
+}				/* smith_burn */
 
-#endif	/* SMITH */
+/*      End of Thad Smith's implementation of modmult. */
 
+#endif				/* SMITH */
 
+/* Returns number of significant bits in r */
 int countbits(unitptr r)
-	/* Returns number of significant bits in r */
 {
-	int bits;
-	short prec;
-	register unit bitmask;
-	init_bitsniffer(r,bitmask,prec,bits);
-	return bits;
-} /* countbits */
-
+    int bits;
+    short prec;
+    register unit bitmask;
+    init_bitsniffer(r, bitmask, prec, bits);
+    return bits;
+}				/* countbits */
 
 char *copyright_notice(void)
 /* force linker to include copyright notice in the executable object image. */
-{ return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */
-
+{
+    return ("(c)1986 Philip Zimmermann");
+}				/* copyright_notice */
 
-int mp_modexp(register unitptr expout,register unitptr expin,
-	register unitptr exponent,register unitptr modulus)
 /*
  * Russian peasant combined exponentiation/modulo algorithm.
  * Calls modmult instead of mult.
  * Computes:  expout = (expin**exponent) mod modulus
  * WARNING: All the arguments must be less than the modulus!
  */
+int mp_modexp(register unitptr expout, register unitptr expin,
+	      register unitptr exponent, register unitptr modulus)
 {
-	int bits;
-	short oldprecision;
-	register unit bitmask;
-	unit product[MAX_UNIT_PRECISION];
-	short eprec;
+    int bits;
+    short oldprecision;
+    register unit bitmask;
+    unit product[MAX_UNIT_PRECISION];
+    short eprec;
 
 #ifdef COUNTMULTS
-	tally_modmults = 0;	/* clear "number of modmults" counter */
-	tally_modsquares = 0;	/* clear "number of modsquares" counter */
-#endif	/* COUNTMULTS */
-	mp_init(expout,1);
-	if (testeq(exponent,0)) {
-		if (testeq(expin,0))
-			return -1; /* 0 to the 0th power means return error */
-		return 0;	/* otherwise, zero exponent means expout is 1 */
-	}
-	if (testeq(modulus,0))
-		return -2;		/* zero modulus means error */
-#if SLOP_BITS > 0	/* if there's room for sign bits */
-	if (mp_tstminus(modulus))
-		return -2;		/* negative modulus means error */
-#endif	/* SLOP_BITS > 0 */
-	if (mp_compare(expin,modulus) >= 0)
-		return -3; /* if expin >= modulus, return error */
-	if (mp_compare(exponent,modulus) >= 0)
-		return -4; /* if exponent >= modulus, return error */
-
-	oldprecision = global_precision;	/* save global_precision */
-	/* set smallest optimum precision for this modulus */
-	set_precision(bits2units(countbits(modulus)+SLOP_BITS));
-	/* rescale all these registers to global_precision we just defined */
-	rescale(modulus,oldprecision,global_precision);
-	rescale(expin,oldprecision,global_precision);
-	rescale(exponent,oldprecision,global_precision);
-	rescale(expout,oldprecision,global_precision);
-
-	if (stage_modulus(modulus)) {
-		set_precision(oldprecision);	/* restore original precision */
-		return -5;		/* unstageable modulus (STEWART algorithm) */
-	}
-
-	/* normalize and compute number of bits in exponent first */
-	init_bitsniffer(exponent,bitmask,eprec,bits);
-
-	/* We can "optimize out" the first modsquare and modmult: */
-	bits--;		/* We know for sure at this point that bits>0 */
-	mp_move(expout,expin);		/*  expout = (1*1)*expin; */
-	bump_bitsniffer(exponent,bitmask);
+    tally_modmults = 0;		/* clear "number of modmults" counter */
+    tally_modsquares = 0;	/* clear "number of modsquares" counter */
+#endif				/* COUNTMULTS */
+    mp_init(expout, 1);
+    if (testeq(exponent, 0)) {
+	if (testeq(expin, 0))
+	    return -1;		/* 0 to the 0th power means return error */
+	return 0;		/* otherwise, zero exponent means expout
+				   is 1 */
+    }
+    if (testeq(modulus, 0))
+	return -2;		/* zero modulus means error */
+#if SLOP_BITS > 0		/* if there's room for sign bits */
+    if (mp_tstminus(modulus))
+	return -2;		/* negative modulus means error */
+#endif				/* SLOP_BITS > 0 */
+    if (mp_compare(expin, modulus) >= 0)
+	return -3;		/* if expin >= modulus, return error */
+    if (mp_compare(exponent, modulus) >= 0)
+	return -4;		/* if exponent >= modulus, return error */
+
+    oldprecision = global_precision;	/* save global_precision */
+    /* set smallest optimum precision for this modulus */
+    set_precision(bits2units(countbits(modulus) + SLOP_BITS));
+    /* rescale all these registers to global_precision we just defined */
+    rescale(modulus, oldprecision, global_precision);
+    rescale(expin, oldprecision, global_precision);
+    rescale(exponent, oldprecision, global_precision);
+    rescale(expout, oldprecision, global_precision);
 
-	while (bits--) {
-		poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */
+    if (stage_modulus(modulus)) {
+	set_precision(oldprecision);	/* restore original precision */
+	return -5;		/* unstageable modulus (STEWART algorithm) */
+    }
+    /* normalize and compute number of bits in exponent first */
+    init_bitsniffer(exponent, bitmask, eprec, bits);
+
+    /* We can "optimize out" the first modsquare and modmult: */
+    bits--;			/* We know for sure at this point that
+				   bits>0 */
+    mp_move(expout, expin);	/*  expout = (1*1)*expin; */
+    bump_bitsniffer(exponent, bitmask);
+
+    while (bits--) {
+	poll_for_break();	/* polls keyboard, allows ctrl-C to
+				   abort program */
 #ifdef COUNTMULTS
-		tally_modsquares++;	/* bump "number of modsquares" counter */
-#endif	/* COUNTMULTS */
-		mp_modsquare(product,expout);
-		if (sniff_bit(exponent,bitmask)) {
-			mp_modmult(expout,product,expin);
+	tally_modsquares++;	/* bump "number of modsquares" counter */
+#endif				/* COUNTMULTS */
+	mp_modsquare(product, expout);
+	if (sniff_bit(exponent, bitmask)) {
+	    mp_modmult(expout, product, expin);
 #ifdef COUNTMULTS
-			tally_modmults++;	/* bump "number of modmults" counter */
-#endif	/* COUNTMULTS */
-		} else {
-			mp_move(expout,product);
-		}
-		bump_bitsniffer(exponent,bitmask);
-	} /* while bits-- */
-	mp_burn(product);	/* burn the evidence on the stack */
-	modmult_burn(); /* ask mp_modmult to also burn its own evidence */
-
-#ifdef COUNTMULTS	/* diagnostic analysis */
-	{
-		long atomic_mults;
-		unsigned int unitcount,totalmults;
-		unitcount = bits2units(countbits(modulus));
-		/* calculation assumes modsquare takes as long as a modmult: */
-		atomic_mults = (long) tally_modmults * (unitcount * unitcount);
-		atomic_mults += (long) tally_modsquares * (unitcount * unitcount);
-		printf("%ld atomic mults for ",atomic_mults);
-		printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n",
-			tally_modsquares,tally_modmults,
-			tally_modsquares+tally_modmults,
-			countbits(modulus), unitcount);
+	    tally_modmults++;	/* bump "number of modmults" counter */
+#endif				/* COUNTMULTS */
+	} else {
+	    mp_move(expout, product);
 	}
-#endif	/* COUNTMULTS */
-
-	set_precision(oldprecision);	/* restore original precision */
-
-	/* Do an explicit reference to the copyright notice so that the linker
-	   will be forced to include it in the executable object image... */
-	copyright_notice();	/* has no real effect at run time */
+	bump_bitsniffer(exponent, bitmask);
+    }				/* while bits-- */
+    mp_burn(product);		/* burn the evidence on the stack */
+    modmult_burn();		/* ask mp_modmult to also burn its
+				   own evidence */
+
+#ifdef COUNTMULTS		/* diagnostic analysis */
+    {
+	long atomic_mults;
+	unsigned int unitcount, totalmults;
+	unitcount = bits2units(countbits(modulus));
+	/* calculation assumes modsquare takes as long as a modmult: */
+	atomic_mults = (long) tally_modmults *(unitcount * unitcount);
+	atomic_mults += (long) tally_modsquares *(unitcount * unitcount);
+	printf("%ld atomic mults for ", atomic_mults);
+	printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n",
+	       tally_modsquares, tally_modmults,
+	       tally_modsquares + tally_modmults,
+	       countbits(modulus), unitcount);
+    }
+#endif				/* COUNTMULTS */
+
+    set_precision(oldprecision);	/* restore original precision */
+
+    /* Do an explicit reference to the copyright notice so that the linker
+       will be forced to include it in the executable object image... */
+    copyright_notice();		/* has no real effect at run time */
 
-	return 0;		/* normal return */
-} /* mp_modexp */
+    return 0;			/* normal return */
+}				/* mp_modexp */
 
-int mp_modexp_crt(unitptr expout, unitptr expin,
-	unitptr p, unitptr q, unitptr ep, unitptr eq, unitptr u)
 /*
  * This is a faster modexp for moduli with a known factorisation into two
  * relatively prime factors p and q, and an input relatively prime to the
@@ -1886,58 +1908,58 @@ int mp_modexp_crt(unitptr expout, unitpt
  * temporary in the final step when it is no longer needed.  With that,
  * you should be able to understand the code below.
  */
+int mp_modexp_crt(unitptr expout, unitptr expin,
+		  unitptr p, unitptr q, unitptr ep, unitptr eq, unitptr u)
 {
-	unit q2[MAX_UNIT_PRECISION];
-	unit temp[MAX_UNIT_PRECISION];
-	int status;
-
-/*	First, compiute p2 (physically held in M) */
-
-/*	p2 = [ (expin mod p) ^ ep ] mod p		*/
-	mp_mod(temp,expin,p);		/* temp = expin mod p  */
-	status = mp_modexp(expout,temp,ep,p);
-	if (status < 0) {	/* mp_modexp returned an error. */
-		mp_init(expout,1);
-		return status;	/* error return */
-	}
-
-/*	And the same thing for q2 */
-
-/*	q2 = [ (expin mod q) ^ eq ] mod q		*/
-	mp_mod(temp,expin,q);		/* temp = expin mod q  */
-	status = mp_modexp(q2,temp,eq,q);
-	if (status < 0) {	/* mp_modexp returned an error. */
-		mp_init(expout,1);
-		return status;	/* error return */
-	}
-
-/*	Now use the multiplicative inverse u to glue together the
-	two halves.
-*/
+    unit q2[MAX_UNIT_PRECISION];
+    unit temp[MAX_UNIT_PRECISION];
+    int status;
+
+/*      First, compiute p2 (physically held in M) */
+
+/*      p2 = [ (expin mod p) ^ ep ] mod p               */
+    mp_mod(temp, expin, p);	/* temp = expin mod p  */
+    status = mp_modexp(expout, temp, ep, p);
+    if (status < 0) {		/* mp_modexp returned an error. */
+	mp_init(expout, 1);
+	return status;		/* error return */
+    }
+/*      And the same thing for q2 */
+
+/*      q2 = [ (expin mod q) ^ eq ] mod q               */
+    mp_mod(temp, expin, q);	/* temp = expin mod q  */
+    status = mp_modexp(q2, temp, eq, q);
+    if (status < 0) {		/* mp_modexp returned an error. */
+	mp_init(expout, 1);
+	return status;		/* error return */
+    }
+/*      Now use the multiplicative inverse u to glue together the
+   two halves.
+ */
 #if 0
 /* This optimisation is useful if you commonly get small results,
    but for random results and large q, the odds of (1/q) of it
    being useful do not warrant the test.
-*/
-	if (mp_compare(expout,q2) != 0) {
+ */
+    if (mp_compare(expout, q2) != 0) {
 #endif
-	/*	Find q2-p2 mod q */
-	if (mp_sub(q2,expout))	/* if the result went negative */
-		mp_add(q2,q);	/* add q to q2 */
-
-	/*	expout = p2 + ( p * [(q2*u) mod q] )	*/
-	mp_mult(temp,q2,u);		/* q2*u  */
-	mp_mod(q2,temp,q);		/* (q2*u) mod q */
-	mp_mult(temp,p,q2);		/* p * [(q2*u) mod q] */
-	mp_add(expout,temp);		/* expout = p2 + p * [...] */
+	/*      Find q2-p2 mod q */
+	if (mp_sub(q2, expout))	/* if the result went negative */
+	    mp_add(q2, q);	/* add q to q2 */
+
+	/*      expout = p2 + ( p * [(q2*u) mod q] )    */
+	mp_mult(temp, q2, u);	/* q2*u  */
+	mp_mod(q2, temp, q);	/* (q2*u) mod q */
+	mp_mult(temp, p, q2);	/* p * [(q2*u) mod q] */
+	mp_add(expout, temp);	/* expout = p2 + p * [...] */
 #if 0
-	}
+    }
 #endif
 
-	mp_burn(q2);	/* burn the evidence on the stack...*/
-	mp_burn(temp);
-	return 0;	/* normal return */
-} /* mp_modexp_crt */
+    mp_burn(q2);		/* burn the evidence on the stack... */
+    mp_burn(temp);
+    return 0;			/* normal return */
+}				/* mp_modexp_crt */
 
 
 /****************** end of MPI library ****************************/