Annotation of pgp/src/rsalib.c, revision 1.1.1.1

1.1       root        1: /*     C source code for RSA multiprecision arithmetic library routines.
                      2:        Implemented Nov 86 by Philip Zimmermann
                      3:        Last revised 10 May 91 by PRZ
                      4: 
                      5:        Boulder Software Engineering
                      6:        3021 Eleventh Street
                      7:        Boulder, CO 80304
                      8:        (303) 444-4541
                      9: 
                     10:        (c) Copyright 1986 by Philip Zimmermann.  All rights reserved.
                     11:        The author assumes no liability for damages resulting from the use 
                     12:        of this software, even if the damage results from defects in this 
                     13:        software.  No warranty is expressed or implied.  The use of this 
                     14:        cryptographic software for developing weapon systems is expressly 
                     15:        forbidden.
                     16: 
                     17:        The RSA public key cryptosystem is patented by the Massachusetts
                     18:        Institute of Technology (U.S. patent #4,405,829).  Public Key 
                     19:        Partners (PKP) holds the exclusive commercial license to sell and 
                     20:        sub-license the RSA public key cryptosystem.  The author of this 
                     21:        software implementation of the RSA algorithm is providing this 
                     22:        implementation for educational use only.  Licensing this algorithm 
                     23:        from PKP is the responsibility of you, the user, not Philip
                     24:        Zimmermann, the author of this implementation.  The author assumes
                     25:        no liability for any breach of patent law resulting from the
                     26:        unlicensed use of this software by the user.
                     27: 
                     28:        These routines implement all of the multiprecision arithmetic necessary
                     29:        for Rivest-Shamir-Adleman (RSA) public key cryptography.
                     30: 
                     31:        Although originally developed in Microsoft C for the IBM PC, this code 
                     32:        contains few machine dependancies.  It assumes 2's complement 
                     33:        arithmetic.  It can be adapted to 8-bit, 16-bit, or 32-bit machines, 
                     34:        lowbyte-highbyte order or highbyte-lowbyte order.  This version
                     35:        has been converted to ANSI C.
                     36: 
                     37: 
                     38:        The internal representation for these extended precision integer
                     39:        "registers" is an array of "units".  A unit is a machine word, which
                     40:        is either an 8-bit byte, a 16-bit unsigned integer, or a 32-bit
                     41:        unsigned integer, depending on the machine's word size.  For example,
                     42:        an IBM PC or AT uses a unit size of 16 bits.  To perform arithmetic
                     43:        on these huge precision integers, we pass pointers to these unit
                     44:        arrays to various subroutines.  A pointer to an array of units is of
                     45:        type unitptr.  This is a pointer to a huge integer "register".
                     46: 
                     47:        When calling a subroutine, we always pass a pointer to the BEGINNING
                     48:        of the array of units, regardless of the byte order of the machine.
                     49:        On a lowbyte-first machine, such as the Intel 80x86, this unitptr
                     50:        points to the LEAST significant unit, and the array of units increases
                     51:        significance to the right.  On a highbyte-first machine, such as the
                     52:        Motorola 680x0, this unitptr points to the MOST significant unit, and
                     53:        the array of units decreases significance to the right.
                     54: */
                     55: 
                     56: /* #define COUNTMULTS */ /* count modmults for performance studies */
                     57: 
                     58: #ifdef DEBUG
                     59: #ifndef EMBEDDED       /* not EMBEDDED - not compiling for embedded target */
                     60: #include <stdio.h>     /* for printf, etc. */
                     61: #include <conio.h>     /* DEBUG mode:  for kbhit() */
                     62: #define poll_for_break() {while (kbhit()) getch();}
                     63: #endif /* not EMBEDDED */
                     64: #else
                     65: #define poll_for_break() /* stub */
                     66: #endif /* not DEBUG */
                     67: 
                     68: #define RSALIB
                     69: #include "rsalib.h"
                     70: 
                     71: short global_precision = 0; /* units of precision for all routines */
                     72: /*     global_precision is the unit precision last set by set_precision.
                     73:        Initially, set_precision() should be called to define global_precision
                     74:        before using any of these other RSA library routines.
                     75:        i.e.:   set_precision(MAX_UNIT_PRECISION);
                     76: */
                     77: 
                     78: #ifdef PORTABLE
                     79: /*************** multiprecision library primitives ****************/
                     80: /*     The following portable C primitives should be recoded in assembly.
                     81:        The equivalent assembly primitives are externally defined under
                     82:        different names, unless PORTABLE is defined.  See the header file
                     83:        "rsalib.h" for further details.
                     84: 
                     85:        The carry bit mechanism we use for these portable primitives
                     86:        will only work if the size of unit is smaller than the size of
                     87:        a long integer.  In most cases, this means UNITSIZE must be
                     88:        less than 32 bits -- if you use the C portable primitives.
                     89: */
                     90: 
                     91: typedef unsigned long int ulint;
                     92: #define carrybit ((ulint) 1 << UNITSIZE)
                     93: /* ...assumes sizeof(unit) < sizeof(unsigned long) */
                     94: 
                     95: boolean mp_addc
                     96:        (register unitptr r1,register unitptr r2,register boolean carry)
                     97:        /* multiprecision add with carry r2 to r1, result in r1 */
                     98:        /* carry is incoming carry flag-- value should be 0 or 1 */
                     99: {      register ulint x;       /* won't work if sizeof(unit)==sizeof(long) */
                    100:        short precision;        /* number of units to add */
                    101:        precision = global_precision;
                    102:        make_lsbptr(r1,precision);
                    103:        make_lsbptr(r2,precision);
                    104:        while (precision--)     
                    105:        {       x = (ulint) *r1 + (ulint) *post_higherunit(r2) + (ulint) carry;
                    106:                *post_higherunit(r1) = x;
                    107:                carry = ((x & carrybit) != 0L);
                    108:        }
                    109:        return(carry);          /* return the final carry flag bit */
                    110: }      /* mp_addc */
                    111: 
                    112: 
                    113: boolean mp_subb
                    114:        (register unitptr r1,register unitptr r2,register boolean borrow)
                    115:        /* multiprecision subtract with borrow, r2 from r1, result in r1 */
                    116:        /* borrow is incoming borrow flag-- value should be 0 or 1 */
                    117: {      register ulint x;       /* won't work if sizeof(unit)==sizeof(long) */
                    118:        short precision;        /* number of units to subtract */
                    119:        precision = global_precision;
                    120:        make_lsbptr(r1,precision);
                    121:        make_lsbptr(r2,precision);
                    122:        while (precision--)     
                    123:        {       x = (ulint) *r1 - (ulint) *post_higherunit(r2) - (ulint) borrow;
                    124:                *post_higherunit(r1) = x;
                    125:                borrow = ((x & carrybit) != 0L);
                    126:        }
                    127:        return(borrow);         /* return the final carry/borrow flag bit */
                    128: }      /* mp_subb */
                    129: 
                    130: #undef carrybit
                    131: 
                    132: 
                    133: boolean mp_rotate_left(register unitptr r1,register boolean carry)
                    134:        /* multiprecision rotate left 1 bit with carry, result in r1. */
                    135:        /* carry is incoming carry flag-- value should be 0 or 1 */
                    136: {      register short precision;       /* number of units to rotate */
                    137:        register boolean nextcarry;
                    138:        precision = global_precision;
                    139:        make_lsbptr(r1,precision);
                    140:        while (precision--)     
                    141:        {       
                    142:                nextcarry = (((signedunit) *r1) < 0);
                    143:                /* nextcarry = ((*r1 & uppermostbit) != 0); */
                    144:                *r1 <<= 1 ;
                    145:                if (carry) *r1 |= 1;
                    146:                carry = nextcarry;
                    147:                pre_higherunit(r1);
                    148:        }
                    149:        return(nextcarry);      /* return the final carry flag bit */
                    150: }      /* mp_rotate_left */
                    151: 
                    152: /************** end of primitives that should be in assembly *************/
                    153: #endif /* PORTABLE */
                    154: 
                    155: 
                    156: /*     The mp_rotate_right function is not called in any time-critical
                    157:        situations in RSA functions, so it doesn't need to be coded 
                    158:        in assembly language.
                    159: */
                    160: boolean mp_rotate_right(register unitptr r1,register boolean carry)
                    161:        /* multiprecision rotate right 1 bit with carry, result in r1. */
                    162:        /* carry is incoming carry flag-- value should be 0 or 1 */
                    163: {      register short precision;       /* number of units to rotate */
                    164:        register boolean nextcarry;
                    165:        precision = global_precision;
                    166:        make_msbptr(r1,precision);
                    167:        while (precision--)     
                    168:        {       nextcarry = *r1 & 1;
                    169:                *r1 >>= 1 ;
                    170:                if (carry) *r1 |= uppermostbit;
                    171:                carry = nextcarry;
                    172:                pre_lowerunit(r1);
                    173:        }
                    174:        return(nextcarry);      /* return the final carry flag bit */
                    175: }      /* mp_rotate_right */
                    176: 
                    177: 
                    178: short mp_compare(register unitptr r1,register unitptr r2)
                    179: /*     Compares multiprecision integers *r1, *r2, and returns: 
                    180:        -1 iff *r1 < *r2
                    181:         0 iff *r1 == *r2
                    182:        +1 iff *r1 > *r2
                    183: */
                    184: {      register short precision;       /* number of units to compare */
                    185:        precision = global_precision;
                    186:        make_msbptr(r1,precision);
                    187:        make_msbptr(r2,precision);
                    188:        do      
                    189:        {       if (*r1 < *r2) 
                    190:                        return(-1);
                    191:                if (*post_lowerunit(r1) > *post_lowerunit(r2)) 
                    192:                        return(1);
                    193:        } while (--precision);  
                    194:        return(0);      /*  *r1 == *r2  */
                    195: }      /* mp_compare */
                    196: 
                    197: 
                    198: boolean mp_inc(register unitptr r)
                    199:        /* Increment multiprecision integer r. */
                    200: {      register short precision;
                    201:        precision = global_precision;
                    202:        make_lsbptr(r,precision);
                    203:        do      
                    204:        {       if ( ++(*r) ) return(0);        /* no carry */
                    205:                post_higherunit(r);     
                    206:        } while (--precision);
                    207:        return(1);              /* return carry set */
                    208: }      /* mp_inc */
                    209: 
                    210: 
                    211: boolean mp_dec(register unitptr r)
                    212:        /* Decrement multiprecision integer r. */
                    213: {      register short precision;
                    214:        precision = global_precision;
                    215:        make_lsbptr(r,precision);
                    216:        do      
                    217:        {       if ( (signedunit) (--(*r)) != (signedunit) -1 )
                    218:                        return(0);      /* no borrow */
                    219:                post_higherunit(r);     
                    220:        } while (--precision);
                    221:        return(1);              /* return borrow set */
                    222: }      /* mp_dec */
                    223: 
                    224: 
                    225: void mp_neg(register unitptr r)
                    226:        /* Compute 2's complement, the arithmetic negative, of r */
                    227: {      register short precision;       /* number of units to negate */
                    228:        precision = global_precision;
                    229:        mp_dec(r);      /* 2's complement is 1's complement plus 1 */
                    230:        do      /* now do 1's complement */
                    231:        {       *r = ~(*r);
                    232:                r++;    
                    233:        } while (--precision);
                    234: }      /* mp_neg */
                    235: 
                    236: 
                    237: void mp_move(register unitptr dst,register unitptr src)
                    238: {      register short precision;       /* number of units to move */
                    239:        precision = global_precision;
                    240:        do { *dst++ = *src++; } while (--precision);
                    241: }      /* mp_move */
                    242: 
                    243: 
                    244: void mp_init(register unitptr r, word16 value)
                    245:        /* Init multiprecision register r with short value. */
                    246: {      /* Note that mp_init doesn't extend sign bit for >32767 */
                    247:        register short precision;       /* number of units to init */
                    248:        precision = global_precision;
                    249:        make_lsbptr(r,precision);
                    250:        *post_higherunit(r) = value; precision--;
                    251: #ifdef UNIT8
                    252:        *post_higherunit(r) = value >> UNITSIZE; precision--;
                    253: #endif
                    254:        while (precision--)
                    255:                *post_higherunit(r) = 0;
                    256: }      /* mp_init */
                    257: 
                    258: 
                    259: short significance(register unitptr r)
                    260:        /* Returns number of significant units in r */
                    261: {      register short precision;
                    262:        precision = global_precision;
                    263:        make_msbptr(r,precision);
                    264:        do      
                    265:        {       if (*post_lowerunit(r)) 
                    266:                        return(precision);
                    267:        } while (--precision);
                    268:        return(precision);
                    269: }      /* significance */
                    270: 
                    271: 
                    272: void unitfill0(unitptr r,word16 unitcount)
                    273:        /* Zero-fill the unit buffer r. */
                    274: {      while (unitcount--) *r++ = 0;
                    275: }      /* unitfill0 */
                    276: 
                    277: 
                    278: /* The macro normalize(r,precision) "normalizes" a multiprecision integer 
                    279:    by adjusting r and precision to new values.  For Motorola-style processors 
                    280:    (MSB-first), r is a pointer to the MSB of the register, and must
                    281:    be adjusted to point to the first nonzero unit.  For Intel/VAX-style
                    282:    (LSB-first) processors, r is a  pointer to the LSB of the register,
                    283:    and must be left unchanged.  The precision counter is always adjusted,
                    284:    regardless of processor type.  In the case of precision = 0,
                    285:    r becomes undefined.
                    286: */
                    287: 
                    288: 
                    289: /* The macro rescale(r,current_precision,new_precision) rescales
                    290:    a multiprecision integer by adjusting r and its precision to new values.  
                    291:    It can be used to reverse the effects of the normalize 
                    292:    routine given above.  See the comments in normalize concerning 
                    293:    Intel vs. Motorola LSB/MSB conventions.
                    294:    WARNING:  You can only safely call rescale on registers that 
                    295:    you have previously normalized with the above normalize routine, 
                    296:    or are known to be big enough for the new precision.  You may
                    297:    specify a new precision that is smaller than the current precision.
                    298:    You must be careful not to specify a new_precision value that is 
                    299:    too big, or which adjusts the r pointer out of range.
                    300: */
                    301: 
                    302: 
                    303: /* These bit sniffer definitions begin sniffing at the MSB */
                    304: #define sniff_bit(bptr,bitmask)        (*(bptr) & bitmask)
                    305: 
                    306: #define init_bitsniffer(bptr,bitmask,prec,bits) \
                    307: { normalize(bptr,prec); \
                    308:   if (!prec) \
                    309:     return(0); \
                    310:   bits = units2bits(prec); \
                    311:   make_msbptr(bptr,prec); bitmask = uppermostbit; \
                    312:   while (!sniff_bit(bptr,bitmask)) \
                    313:   { bitmask >>= 1; bits--; \
                    314:   } \
                    315: }
                    316: 
                    317: #define bump_bitsniffer(bptr,bitmask) \
                    318: { if (!(bitmask >>= 1)) \
                    319:   { bitmask = uppermostbit; \
                    320:     post_lowerunit(bptr); \
                    321:   } \
                    322: }
                    323: 
                    324: /* the following two macros are used by mp_udiv */
                    325: #define bump_2bitsniffers(bptr,bptr2,bitmask) \
                    326: { if (!(bitmask >>= 1)) \
                    327:   { bitmask = uppermostbit; \
                    328:     post_lowerunit(bptr); \
                    329:     post_lowerunit(bptr2); \
                    330:   } \
                    331: }
                    332: 
                    333: #define stuff_bit(bptr,bitmask)        *(bptr) |= bitmask
                    334: 
                    335: 
                    336: int mp_udiv(register unitptr remainder,register unitptr quotient,
                    337:        register unitptr dividend,register unitptr divisor)
                    338:        /* Unsigned divide, treats both operands as positive. */
                    339: {      int bits;
                    340:        short dprec;
                    341:        register unit bitmask;
                    342:        if (testeq(divisor,0))
                    343:                return(-1);     /* zero divisor means divide error */
                    344:        mp_init(remainder,0);
                    345:        mp_init(quotient,0);
                    346:        /* normalize and compute number of bits in dividend first */
                    347:        init_bitsniffer(dividend,bitmask,dprec,bits);
                    348:        /* rescale quotient to same precision (dprec) as dividend */
                    349:        rescale(quotient,global_precision,dprec);
                    350:        make_msbptr(quotient,dprec); 
                    351: 
                    352:        while (bits--)
                    353:        {       mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
                    354:                if (mp_compare(remainder,divisor) >= 0)
                    355:                {       mp_sub(remainder,divisor);
                    356:                        stuff_bit(quotient,bitmask);
                    357:                }
                    358:                bump_2bitsniffers(dividend,quotient,bitmask); 
                    359:        }
                    360:        return(0);
                    361: } /* mp_udiv */
                    362: 
                    363: 
                    364: int mp_div(register unitptr remainder,register unitptr quotient,
                    365:        register unitptr dividend,register unitptr divisor)
                    366:        /* Signed divide, either or both operands may be negative. */
                    367: {      boolean dvdsign,dsign;
                    368:        int status;
                    369:        dvdsign = (mp_tstminus(dividend)!=0);
                    370:        dsign = (mp_tstminus(divisor)!=0);
                    371:        if (dvdsign) mp_neg(dividend);
                    372:        if (dsign) mp_neg(divisor);
                    373:        status = mp_udiv(remainder,quotient,dividend,divisor);
                    374:        if (dvdsign) mp_neg(dividend);          /* repair caller's dividend */
                    375:        if (dsign) mp_neg(divisor);             /* repair caller's divisor */
                    376:        if (status<0) return(status);           /* divide error? */
                    377:        if (dvdsign) mp_neg(remainder);
                    378:        if (dvdsign ^ dsign) mp_neg(quotient);
                    379:        return(status);
                    380: } /* mp_div */
                    381: 
                    382: 
                    383: word16 mp_shortdiv(register unitptr quotient,
                    384:        register unitptr dividend,register word16 divisor)
                    385: /*     This function does a fast divide and mod on a multiprecision dividend
                    386:        using a short integer divisor returning a short integer remainder.
                    387:        This is an unsigned divide.  It treats both operands as positive.
                    388:        It is used mainly for faster printing of large numbers in base 10. 
                    389: */
                    390: {      int bits;
                    391:        short dprec;
                    392:        register unit bitmask;
                    393:        register word16 remainder;
                    394:        if (!divisor)   /* if divisor == 0 */
                    395:                return(-1);     /* zero divisor means divide error */
                    396:        remainder=0;
                    397:        mp_init(quotient,0);
                    398:        /* normalize and compute number of bits in dividend first */
                    399:        init_bitsniffer(dividend,bitmask,dprec,bits);
                    400:        /* rescale quotient to same precision (dprec) as dividend */
                    401:        rescale(quotient,global_precision,dprec);
                    402:        make_msbptr(quotient,dprec); 
                    403: 
                    404:        while (bits--)
                    405:        {       remainder <<= 1;
                    406:                if (sniff_bit(dividend,bitmask))
                    407:                        remainder++;
                    408:                if (remainder >= divisor)
                    409:                {       remainder -= divisor;
                    410:                        stuff_bit(quotient,bitmask);
                    411:                }
                    412:                bump_2bitsniffers(dividend,quotient,bitmask); 
                    413:        }       
                    414:        return(remainder);
                    415: } /* mp_shortdiv */
                    416: 
                    417: 
                    418: int mp_mod(register unitptr remainder,
                    419:        register unitptr dividend,register unitptr divisor)
                    420:        /* Unsigned divide, treats both operands as positive. */
                    421: {      int bits;
                    422:        short dprec;
                    423:        register unit bitmask;
                    424:        if (testeq(divisor,0))
                    425:                return(-1);     /* zero divisor means divide error */
                    426:        mp_init(remainder,0);
                    427:        /* normalize and compute number of bits in dividend first */
                    428:        init_bitsniffer(dividend,bitmask,dprec,bits);
                    429: 
                    430:        while (bits--)
                    431:        {       mp_rotate_left(remainder,(boolean)(sniff_bit(dividend,bitmask)!=0));
                    432:                msub(remainder,divisor);
                    433:                bump_bitsniffer(dividend,bitmask);
                    434:        }       
                    435:        return(0);
                    436: } /* mp_mod */
                    437: 
                    438: 
                    439: word16 mp_shortmod(register unitptr dividend,register word16 divisor)
                    440: /*     This function does a fast mod operation on a multprecision dividend
                    441:        using a short integer modulus returning a short integer remainder.
                    442:        This is an unsigned divide.  It treats both operands as positive.
                    443:        It is used mainly for fast sieve searches for large primes. 
                    444: */
                    445: {      int bits;
                    446:        short dprec;
                    447:        register unit bitmask;
                    448:        register word16 remainder;
                    449:        if (!divisor)   /* if divisor == 0 */
                    450:                return(-1);     /* zero divisor means divide error */
                    451:        remainder=0;
                    452:        /* normalize and compute number of bits in dividend first */
                    453:        init_bitsniffer(dividend,bitmask,dprec,bits);
                    454: 
                    455:        while (bits--)
                    456:        {       remainder <<= 1;
                    457:                if (sniff_bit(dividend,bitmask))
                    458:                        remainder++;
                    459:                if (remainder >= divisor) remainder -= divisor;
                    460:                bump_bitsniffer(dividend,bitmask);
                    461:        }       
                    462:        return(remainder);
                    463: } /* mp_shortmod */
                    464: 
                    465: 
                    466: 
                    467: #ifndef COMB_MULT      /* if not "comb" multiply, use peasant multiply */
                    468: 
                    469: int mp_mult(register unitptr prod,
                    470:        register unitptr multiplicand,register unitptr multiplier)
                    471:        /* Computes multiprecision prod = multiplicand * multiplier */
                    472: {      /* Uses "Russian peasant" multiply algorithm. */
                    473:        int bits;
                    474:        register unit bitmask;
                    475:        short mprec;
                    476:        mp_init(prod,0);
                    477:        if (testeq(multiplicand,0))
                    478:                return(0);      /* zero multiplicand means zero product */
                    479:        /* normalize and compute number of bits in multiplier first */
                    480:        init_bitsniffer(multiplier,bitmask,mprec,bits);
                    481: 
                    482:        while (bits--)
                    483:        {       mp_shift_left(prod);
                    484:                if (sniff_bit(multiplier,bitmask))
                    485:                {       mp_add(prod,multiplicand);
                    486:                }
                    487:                bump_bitsniffer(multiplier,bitmask);
                    488:        }
                    489:        return(0);      
                    490: }      /* mp_mult */
                    491: 
                    492: #else  /* use "comb" multiply algorithm */
                    493: 
                    494: int mp_mult(register unitptr prod,
                    495:        register unitptr multiplicand, register unitptr multiplier)
                    496:        /*      Computes multiprecision prod = multiplicand * multiplier */
                    497: {      /*      Uses interleaved comb multiply algorithm.
                    498:                This improved multiply more than twice as fast as a Russian 
                    499:                peasant multiply, because it does a lot fewer shifts. 
                    500:                Must have global_precision set to the size of the multiplicand 
                    501:                plus UNITSIZE-1 SLOP_BITS.  Produces a product that is the sum 
                    502:                of the lengths of the multiplier and multiplicand.
                    503:        */
                    504:        int bits;
                    505:        register unit bitmask;
                    506:        unitptr product, mplier, temp;
                    507:        short mprec,mprec2;
                    508:        unit mplicand[MAX_UNIT_PRECISION];
                    509:        mp_init(prod,0);                /* better clear full width--double precision */
                    510:        if (testeq(multiplicand,0))
                    511:                return(0);      /* zero multiplicand means zero product */
                    512: 
                    513:        mp_move(mplicand,multiplicand); /* save it from damage */
                    514: 
                    515:        normalize(multiplier,mprec);
                    516:        if (!mprec)
                    517:                return(0);
                    518: 
                    519:        make_lsbptr(multiplier,mprec);
                    520:        bitmask = 1;    /* start scan at LSB of multiplier */
                    521: 
                    522:        do      /* UNITSIZE times */
                    523:        {       /* do for bits 0-15 */
                    524:                product = prod;
                    525:                mplier = multiplier;
                    526:                mprec2 = mprec;
                    527:                while (mprec2--)        /* do for each word in multiplier */
                    528:                {
                    529:                        if (sniff_bit(mplier,bitmask))
                    530:                        {       if (mp_addc(product,multiplicand,0))    /* ripple carry */
                    531:                                {       /* After 1st time thru, this is rarely encountered. */
                    532:                                        temp = msbptr(product,global_precision);
                    533:                                        pre_higherunit(temp);
                    534:                                        /* temp now points to LSU of carry region. */
                    535:                                        unmake_lsbptr(temp,global_precision);
                    536:                                        mp_inc(temp);
                    537:                                }       /* ripple carry */
                    538:                        }
                    539:                        pre_higherunit(mplier);
                    540:                        pre_higherunit(product);
                    541:                }
                    542:                if (!(bitmask <<= 1))
                    543:                        break;
                    544:                mp_shift_left(multiplicand);
                    545: 
                    546:        } while (TRUE);
                    547: 
                    548:        mp_move(multiplicand,mplicand); /* recover */
                    549: 
                    550:        return(0);      /* normal return */     
                    551: }      /* mp_mult */
                    552: 
                    553: #endif /* COMB_MULT */
                    554: 
                    555: 
                    556: /*     mp_modmult computes a multiply combined with a modulo operation. 
                    557:        Before calling mp_modmult, you must first call
                    558:        stage_modulus(the_modulus)
                    559: */
                    560: 
                    561: #ifdef COUNTMULTS
                    562: /* "number of modmults" counters, used for performance studies. */
                    563: static unsigned int tally_modmults = 0;
                    564: static unsigned int tally_modsquares = 0;
                    565: #endif /* COUNTMULTS */
                    566: 
                    567: #ifndef ASM_MODMULT    /* not assembly primitive modmult */
                    568: 
                    569: #ifdef PEASANT
                    570: /* Conventional Russian peasant multiply with modulo algorithm. */
                    571: 
                    572: static unitptr modulus = 0;    /* used only by mp_modmult */
                    573: 
                    574: int stage_peasant_modulus(unitptr n)
                    575: /*     Must pass modulus to stage_modulus before calling modmult.
                    576:        Assumes that global_precision has already been adjusted to the
                    577:        size of the modulus, plus SLOP_BITS.
                    578: */
                    579: {      /* For this simple version of modmult, just copy unit pointer. */
                    580:        modulus = n;
                    581:        return(0);      /* normal return */
                    582: }      /* stage_peasant_modulus */
                    583: 
                    584: 
                    585: int peasant_modmult(register unitptr prod,
                    586:        unitptr multiplicand,register unitptr multiplier)
                    587: {      /*      "Russian peasant" multiply algorithm, combined with a modulo 
                    588:                operation.  This is a simple naive replacement for Merritt's 
                    589:                faster modmult algorithm.  References global unitptr "modulus".
                    590:                Computes:  prod = (multiplicand*multiplier) mod modulus
                    591:                WARNING: All the arguments must be less than the modulus!
                    592:        */
                    593:        int bits;
                    594:        register unit bitmask;
                    595:        short mprec;
                    596:        mp_init(prod,0);
                    597: /*     if (testeq(multiplicand,0))
                    598:                return(0); */   /* zero multiplicand means zero product */
                    599:        /* normalize and compute number of bits in multiplier first */
                    600:        init_bitsniffer(multiplier,bitmask,mprec,bits);
                    601: 
                    602:        while (bits--)
                    603:        {       mp_shift_left(prod);
                    604:                msub(prod,modulus);     /* turns mult into modmult */
                    605:                if (sniff_bit(multiplier,bitmask))
                    606:                {       mp_add(prod,multiplicand);
                    607:                        msub(prod,modulus);     /* turns mult into modmult */
                    608:                }
                    609:                bump_bitsniffer(multiplier,bitmask);
                    610:        }
                    611:        return(0);
                    612: }      /* peasant_modmult */
                    613: 
                    614: 
                    615: /*     If we are using a version of mp_modmult that uses internal tables 
                    616:        in memory, we have to call modmult_burn() at the end of mp_modexp.
                    617:        This is so that no sensitive data is left in memory after the program 
                    618:        exits.  The Russian peasant method doesn't use any such tables.
                    619: */
                    620: static void peasant_burn(void)
                    621: /*     Alias for modmult_burn, called only from mp_modexp().  Destroys
                    622:        internal modmult tables.  This version does nothing because no 
                    623:        tables are used by the Russian peasant modmult. */
                    624: { }    /* peasant_burn */
                    625: 
                    626: #endif /* PEASANT */
                    627: 
                    628: 
                    629: #ifdef MERRITT
                    630: /*=========================================================================*/
                    631: /*
                    632:        This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ.
                    633:        Also refined by Zhahai Stewart to reduce the number of subtracts.
                    634:        It performs a multiply combined with a modulo operation, without 
                    635:        going into "double precision".  It is faster than the Russian peasant
                    636:        method, and still works well on machines that lack a fast hardware 
                    637:        multiply instruction.  
                    638: */
                    639: 
                    640: /*     The following support functions, macros, and data structures
                    641:        are used only by Merritt's modmult algorithm... */
                    642: 
                    643: static void mp_lshift_unit(register unitptr r1)
                    644: /*     Shift r1 1 whole unit to the left.  Used only by modmult function. */
                    645: {      register short precision;
                    646:        register unitptr r2;
                    647:        precision = global_precision;
                    648:        make_msbptr(r1,precision);
                    649:        r2 = r1;
                    650:        while (--precision)
                    651:                *post_lowerunit(r1) = *pre_lowerunit(r2);
                    652:        *r1 = 0;
                    653: }      /* mp_lshift_unit */
                    654: 
                    655: 
                    656: /* moduli_buf contains shifted images of the modulus, set by stage_modulus */
                    657: static unit moduli_buf[UNITSIZE][MAX_UNIT_PRECISION] = {0};
                    658: static unitptr moduli[UNITSIZE+1] = /* contains pointers into moduli_buf */
                    659: {      0
                    660:        ,&moduli_buf[ 0][0], &moduli_buf[ 1][0], &moduli_buf[ 2][0], &moduli_buf[ 3][0], 
                    661:         &moduli_buf[ 4][0], &moduli_buf[ 5][0], &moduli_buf[ 6][0], &moduli_buf[ 7][0]
                    662: #ifndef UNIT8
                    663:        ,&moduli_buf[ 8][0], &moduli_buf[ 9][0], &moduli_buf[10][0], &moduli_buf[11][0], 
                    664:         &moduli_buf[12][0], &moduli_buf[13][0], &moduli_buf[14][0], &moduli_buf[15][0] 
                    665: #ifndef UNIT16 /* and not UNIT8 */
                    666:        ,&moduli_buf[16][0], &moduli_buf[17][0], &moduli_buf[18][0], &moduli_buf[19][0], 
                    667:         &moduli_buf[20][0], &moduli_buf[21][0], &moduli_buf[22][0], &moduli_buf[23][0], 
                    668:         &moduli_buf[24][0], &moduli_buf[25][0], &moduli_buf[26][0], &moduli_buf[27][0], 
                    669:         &moduli_buf[28][0], &moduli_buf[29][0], &moduli_buf[30][0], &moduli_buf[31][0]
                    670: #endif /* UNIT16 and UNIT8 not defined */
                    671: #endif /* UNIT8 not defined */
                    672: };
                    673: 
                    674: /*     To optimize msubs, need following 2 unit arrays, each filled
                    675:        with the most significant unit and next-to-most significant unit
                    676:        of the preshifted images of the RSA modulus. */
                    677: static unit msu_moduli[UNITSIZE+1] = {0}; /* most signif. unit */
                    678: static unit nmsu_moduli[UNITSIZE+1] = {0}; /* next-most signif. unit */
                    679: 
                    680: /*     mpdbuf contains preshifted images of the multiplicand, mod n.
                    681:        It is used only by mp_modmult.  It could be staticly declared
                    682:        inside of mp_modmult, but we put it outside mp_modmult so that 
                    683:        it can be wiped clean by modmult_burn(), which is called at the
                    684:        end of mp_modexp.  This is so that no sensitive data is left in 
                    685:        memory after the program exits.
                    686: */
                    687: static unit mpdbuf[UNITSIZE-1][MAX_UNIT_PRECISION] = {0};
                    688: 
                    689: 
                    690: static void stage_mp_images(unitptr images[UNITSIZE],unitptr r)
                    691: /*     Computes UNITSIZE images of r, each shifted left 1 more bit.
                    692:        Used only by modmult function. 
                    693: */
                    694: {      short int i;
                    695:        images[0] = r;  /* no need to move the first image, just copy ptr */
                    696:        for (i=1; i<UNITSIZE; i++)
                    697:        {       mp_move(images[i],images[i-1]);
                    698:                mp_shift_left(images[i]);
                    699:        }
                    700: }      /* stage_mp_images */
                    701: 
                    702: 
                    703: int stage_merritt_modulus(unitptr n)
                    704: /*     Computes UNITSIZE+1 images of modulus, each shifted left 1 more bit.
                    705:        Before calling mp_modmult, you must first stage the moduli images by
                    706:        calling stage_modulus.  n is the pointer to the modulus.
                    707:        Assumes that global_precision has already been adjusted to the
                    708:        size of the modulus, plus SLOP_BITS.
                    709: */
                    710: {      short int i;
                    711:        unitptr msu;    /* ptr to most significant unit, for faster msubs */
                    712:        moduli[0] = n;  /* no need to move the first image, just copy ptr */
                    713: 
                    714:        /* used by optimized msubs macro... */
                    715:        msu = msbptr(n,global_precision);       /* needed by msubs */
                    716:        msu_moduli[0] = *post_lowerunit(msu);
                    717:        nmsu_moduli[0] = *msu;
                    718: 
                    719:        for (i=1; i<UNITSIZE+1; i++)
                    720:        {       mp_move(moduli[i],moduli[i-1]);
                    721:                mp_shift_left(moduli[i]);
                    722: 
                    723:                /* used by optimized msubs macro... */
                    724:                msu = msbptr(moduli[i],global_precision);       /* needed by msubs */
                    725:                msu_moduli[i] = *post_lowerunit(msu);   /* for faster msubs */
                    726:                nmsu_moduli[i] = *msu;
                    727:        }
                    728:        return(0);      /* normal return */
                    729: }      /* stage_merritt_modulus */
                    730: 
                    731: 
                    732: /* The following macros, sniffadd and msubs, are used by modmult... */
                    733: #define sniffadd(i) if (*multiplier & power_of_2(i))  mp_add(prod,mpd[i])
                    734: 
                    735: /* Unoptimized msubs macro (msubs0) follows... */
                    736: /* #define msubs0(i) msub(prod,moduli[i]) 
                    737: */
                    738: 
                    739: /*     To optimize msubs, compare the most significant units of the 
                    740:        product and the shifted modulus before deciding to call the full 
                    741:        mp_compare routine.  Better still, compare the upper two units
                    742:        before deciding to call mp_compare.
                    743:        Optimization of msubs relies heavily on standard C left-to-right 
                    744:        parsimonious evaluation of logical expressions. 
                    745: */
                    746: 
                    747: /* Partially-optimized msubs macro (msubs1) follows... */
                    748: /* #define msubs1(i) if ( \
                    749:   ((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \
                    750:   (p_m || (mp_compare(prod,moduli[i]) >= 0) ) \
                    751:   ) mp_sub(prod,moduli[i])
                    752: */
                    753: 
                    754: /* Fully-optimized msubs macro (msubs2) follows... */
                    755: #define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \
                    756:  (p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \
                    757:  (*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \
                    758:  mp_sub(prod,moduli[i])
                    759: 
                    760: 
                    761: int merritt_modmult(register unitptr prod,
                    762:        unitptr multiplicand,register unitptr multiplier)
                    763:        /*      Performs combined multiply/modulo operation.  
                    764:                Computes:  prod = (multiplicand*multiplier) mod modulus
                    765:                WARNING: All the arguments must be less than the modulus!
                    766:                Assumes the modulus has been predefined by first calling 
                    767:                stage_modulus.
                    768:        */
                    769: {
                    770:        /* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro...*/
                    771:        register signedunit p_m;
                    772:        register unitptr msu_prod;      /* ptr to most significant unit of product */
                    773:        register unitptr nmsu_prod;     /* next-most signif. unit of product */
                    774:        short mprec;            /* precision of multiplier, in units */
                    775:        /*      Array mpd contains a list of pointers to preshifted images of 
                    776:                the multiplicand: */
                    777:        static unitptr mpd[UNITSIZE] =
                    778:        {        0,              &mpdbuf[ 0][0], &mpdbuf[ 1][0], &mpdbuf[ 2][0],
                    779:                 &mpdbuf[ 3][0], &mpdbuf[ 4][0], &mpdbuf[ 5][0], &mpdbuf[ 6][0]
                    780: #ifndef UNIT8
                    781:                ,&mpdbuf[ 7][0], &mpdbuf[ 8][0], &mpdbuf[ 9][0], &mpdbuf[10][0],
                    782:                 &mpdbuf[11][0], &mpdbuf[12][0], &mpdbuf[13][0], &mpdbuf[14][0]
                    783: #ifndef UNIT16 /* and not UNIT8 */
                    784:                ,&mpdbuf[15][0], &mpdbuf[16][0], &mpdbuf[17][0], &mpdbuf[18][0],
                    785:                 &mpdbuf[19][0], &mpdbuf[20][0], &mpdbuf[21][0], &mpdbuf[22][0],
                    786:                 &mpdbuf[23][0], &mpdbuf[24][0], &mpdbuf[25][0], &mpdbuf[26][0],
                    787:                 &mpdbuf[27][0], &mpdbuf[28][0], &mpdbuf[29][0], &mpdbuf[30][0]
                    788: #endif /* UNIT16 and UNIT8 not defined */
                    789: #endif /* UNIT8 not defined */
                    790:        };
                    791: 
                    792:        /* Compute preshifted images of multiplicand, mod n: */
                    793:        stage_mp_images(mpd,multiplicand);
                    794: 
                    795:        /* To optimize msubs, set up msu_prod and nmsu_prod: */
                    796:        msu_prod = msbptr(prod,global_precision); /* Get ptr to MSU of prod */
                    797:        nmsu_prod = msu_prod;
                    798:        post_lowerunit(nmsu_prod); /* Get next-MSU of prod */
                    799: 
                    800:        /*      To understand this algorithm, it would be helpful to first 
                    801:                study the conventional Russian peasant modmult algorithm.
                    802:                This one does about the same thing as Russian peasant, but
                    803:                is organized differently to save some steps.  It loops 
                    804:                through the multiplier a word (unit) at a time, instead of 
                    805:                a bit at a time.  It word-shifts the product instead of 
                    806:                bit-shifting it, so it should be faster.  It also does about 
                    807:                half as many subtracts as Russian peasant.
                    808:        */
                    809: 
                    810:        mp_init(prod,0);        /* Initialize product to 0. */
                    811: 
                    812:        /*      The way mp_modmult is actually used in cryptographic 
                    813:                applications, there will NEVER be a zero multiplier or 
                    814:                multiplicand.  So there is no need to optimize for that 
                    815:                condition.
                    816:        */
                    817: /*     if (testeq(multiplicand,0))
                    818:                return(0); */   /* zero multiplicand means zero product */
                    819:        /* Normalize and compute number of units in multiplier first: */
                    820:        normalize(multiplier,mprec);
                    821:        if (mprec==0)   /* if precision of multiplier is 0 */
                    822:                return(0);      /* zero multiplier means zero product */
                    823:        make_msbptr(multiplier,mprec); /* start at MSU of multiplier */
                    824: 
                    825:        while (mprec--) /* Loop for the number of units in the multiplier */
                    826:        {
                    827:                /*      Shift the product one whole unit to the left.
                    828:                        This is part of the multiply phase of modmult. 
                    829:                */
                    830: 
                    831:                mp_lshift_unit(prod);
                    832: 
                    833:                /*      Sniff each bit in the current unit of the multiplier, 
                    834:                        and conditionally add the the corresponding preshifted 
                    835:                        image of the multiplicand to the product.
                    836:                        This is also part of the multiply phase of modmult.
                    837: 
                    838:                        The following loop is unrolled for speed, using macros...
                    839: 
                    840:                for (i=UNITSIZE-1; i>=0; i--)
                    841:                   if (*multiplier & power_of_2(i)) 
                    842:                                mp_add(prod,mpd[i]);
                    843:                */
                    844: #ifndef UNIT8
                    845: #ifndef UNIT16 /* and not UNIT8 */
                    846:                sniffadd(31); 
                    847:                sniffadd(30); 
                    848:                sniffadd(29); 
                    849:                sniffadd(28);
                    850:                sniffadd(27); 
                    851:                sniffadd(26); 
                    852:                sniffadd(25); 
                    853:                sniffadd(24);
                    854:                sniffadd(23); 
                    855:                sniffadd(22); 
                    856:                sniffadd(21); 
                    857:                sniffadd(20);
                    858:                sniffadd(19); 
                    859:                sniffadd(18); 
                    860:                sniffadd(17); 
                    861:                sniffadd(16);
                    862: #endif /* not UNIT16 and not UNIT8 */
                    863:                sniffadd(15); 
                    864:                sniffadd(14); 
                    865:                sniffadd(13); 
                    866:                sniffadd(12);
                    867:                sniffadd(11); 
                    868:                sniffadd(10); 
                    869:                sniffadd(9); 
                    870:                sniffadd(8);
                    871: #endif /* not UNIT8 */
                    872:                sniffadd(7); 
                    873:                sniffadd(6); 
                    874:                sniffadd(5); 
                    875:                sniffadd(4);
                    876:                sniffadd(3); 
                    877:                sniffadd(2); 
                    878:                sniffadd(1); 
                    879:                sniffadd(0);
                    880: 
                    881:                /*      The product may have grown by as many as UNITSIZE+1 
                    882:                        bits.  That's why we have global_precision set to the
                    883:                        size of the modulus plus UNITSIZE+1 slop bits.  
                    884:                        Now reduce the product back down by conditionally 
                    885:                        subtracting     the UNITSIZE+1 preshifted images of the 
                    886:                        modulus.  This is the modulo reduction phase of modmult.
                    887: 
                    888:                        The following loop is unrolled for speed, using macros...
                    889: 
                    890:                for (i=UNITSIZE; i>=0; i--)
                    891:                        if (mp_compare(prod,moduli[i]) >= 0) 
                    892:                                mp_sub(prod,moduli[i]); 
                    893:                */
                    894: #ifndef UNIT8
                    895: #ifndef UNIT16 /* and not UNIT8 */
                    896:                msubs(32); 
                    897:                msubs(31); 
                    898:                msubs(30); 
                    899:                msubs(29); 
                    900:                msubs(28);
                    901:                msubs(27); 
                    902:                msubs(26); 
                    903:                msubs(25); 
                    904:                msubs(24);
                    905:                msubs(23); 
                    906:                msubs(22); 
                    907:                msubs(21); 
                    908:                msubs(20);
                    909:                msubs(19); 
                    910:                msubs(18); 
                    911:                msubs(17); 
                    912: #endif /* not UNIT16 and not UNIT8 */
                    913:                msubs(16);
                    914:                msubs(15); 
                    915:                msubs(14); 
                    916:                msubs(13); 
                    917:                msubs(12);
                    918:                msubs(11); 
                    919:                msubs(10); 
                    920:                msubs(9); 
                    921: #endif /* not UNIT8 */
                    922:                msubs(8);
                    923:                msubs(7); 
                    924:                msubs(6); 
                    925:                msubs(5); 
                    926:                msubs(4);
                    927:                msubs(3); 
                    928:                msubs(2); 
                    929:                msubs(1); 
                    930:                msubs(0);
                    931: 
                    932:                /* Bump pointer to next lower unit of multiplier: */
                    933:                post_lowerunit(multiplier);
                    934: 
                    935:        }       /* Loop for the number of units in the multiplier */
                    936: 
                    937:        return(0);      /* normal return */
                    938: 
                    939: }      /* merritt_modmult */
                    940: 
                    941: 
                    942: #undef msubs
                    943: #undef sniffadd
                    944: 
                    945: 
                    946: /*     Merritt's mp_modmult function leaves some internal tables in memory,
                    947:        so we have to call modmult_burn() at the end of mp_modexp.  
                    948:        This is so that no cryptographically sensitive data is left in memory 
                    949:        after the program exits.
                    950: */
                    951: static void merritt_burn(void)
                    952: /*     Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */
                    953: {      unitfill0(&(mpdbuf[0][0]),(UNITSIZE-1)*MAX_UNIT_PRECISION);
                    954:        unitfill0(&(moduli_buf[0][0]),(UNITSIZE)*MAX_UNIT_PRECISION);
                    955:        unitfill0(msu_moduli,UNITSIZE+1);
                    956:        unitfill0(nmsu_moduli,UNITSIZE+1);
                    957: } /* merritt_burn() */
                    958: 
                    959: /******* end of Merritt's MODMULT stuff. *******/
                    960: /*=========================================================================*/
                    961: #endif /* MERRITT */
                    962: 
                    963: 
                    964: #ifdef STEWART /* using Zhahai Stewart's modmult algorithm */
                    965: #include "stewart.c"
                    966: #endif /* STEWART */
                    967: 
                    968: #endif /* not ASM_MODMULT */
                    969: 
                    970: 
                    971: int countbits(unitptr r)
                    972:        /* Returns number of significant bits in r */
                    973: {      int bits;
                    974:        short prec;
                    975:        register unit bitmask;
                    976:        init_bitsniffer(r,bitmask,prec,bits);
                    977:        return(bits);
                    978: } /* countbits */
                    979: 
                    980: 
                    981: int mp_modexp(register unitptr expout,register unitptr expin,
                    982:        register unitptr exponent,register unitptr modulus)
                    983: {      /*      Russian peasant combined exponentiation/modulo algorithm.
                    984:                Calls modmult instead of mult. 
                    985:                Computes:  expout = (expin**exponent) mod modulus
                    986:                WARNING: All the arguments must be less than the modulus!
                    987:        */
                    988:        int bits;
                    989:        short oldprecision;
                    990:        register unit bitmask;
                    991:        unit product[MAX_UNIT_PRECISION];
                    992:        short eprec;
                    993: 
                    994: #ifdef COUNTMULTS
                    995:        tally_modmults = 0;     /* clear "number of modmults" counter */
                    996:        tally_modsquares = 0;   /* clear "number of modsquares" counter */
                    997: #endif /* COUNTMULTS */
                    998:        mp_init(expout,1);
                    999:        if (testeq(exponent,0))
                   1000:        {       if (testeq(expin,0))
                   1001:                        return(-1); /* 0 to the 0th power means return error */
                   1002:                return(0);      /* otherwise, zero exponent means expout is 1 */
                   1003:        }
                   1004:        if (testeq(modulus,0))
                   1005:                return(-2);             /* zero modulus means error */
                   1006: #if SLOP_BITS > 0      /* if there's room for sign bits */
                   1007:        if (mp_tstminus(modulus))
                   1008:                return(-2);             /* negative modulus means error */
                   1009: #endif /* SLOP_BITS > 0 */
                   1010:        if (mp_compare(expin,modulus) >= 0)
                   1011:                return(-3); /* if expin >= modulus, return error */
                   1012:        if (mp_compare(exponent,modulus) >= 0)
                   1013:                return(-4); /* if exponent >= modulus, return error */
                   1014: 
                   1015:        oldprecision = global_precision;        /* save global_precision */
                   1016:        /* set smallest optimum precision for this modulus */
                   1017:        set_precision(bits2units(countbits(modulus)+SLOP_BITS));
                   1018:        /* rescale all these registers to global_precision we just defined */
                   1019:        rescale(modulus,oldprecision,global_precision);
                   1020:        rescale(expin,oldprecision,global_precision);
                   1021:        rescale(exponent,oldprecision,global_precision);
                   1022:        rescale(expout,oldprecision,global_precision);
                   1023: 
                   1024:        if (stage_modulus(modulus))
                   1025:        {       set_precision(oldprecision);    /* restore original precision */
                   1026:                return(-5);             /* unstageable modulus (STEWART algorithm) */
                   1027:        }
                   1028: 
                   1029:        /* normalize and compute number of bits in exponent first */
                   1030:        init_bitsniffer(exponent,bitmask,eprec,bits);
                   1031: 
                   1032:        /* We can "optimize out" the first modsquare and modmult: */
                   1033:        bits--;         /* We know for sure at this point that bits>0 */
                   1034:        mp_move(expout,expin);          /*  expout = (1*1)*expin; */
                   1035:        bump_bitsniffer(exponent,bitmask);
                   1036:        
                   1037:        while (bits--)
                   1038:        {
                   1039:                poll_for_break(); /* polls keyboard, allows ctrl-C to abort program */
                   1040: #ifdef COUNTMULTS
                   1041:                tally_modsquares++;     /* bump "number of modsquares" counter */
                   1042: #endif /* COUNTMULTS */
                   1043:                mp_modsquare(product,expout);
                   1044:                mp_move(expout,product);
                   1045:                if (sniff_bit(exponent,bitmask))
                   1046:                {       mp_modmult(product,expout,expin);
                   1047:                        mp_move(expout,product);
                   1048: #ifdef COUNTMULTS
                   1049:                        tally_modmults++;       /* bump "number of modmults" counter */
                   1050: #endif /* COUNTMULTS */
                   1051:                }
                   1052:                bump_bitsniffer(exponent,bitmask);
                   1053:        }       /* while bits-- */
                   1054:        mp_burn(product);       /* burn the evidence on the stack */
                   1055:        modmult_burn(); /* ask mp_modmult to also burn its own evidence */
                   1056: 
                   1057: #ifdef COUNTMULTS      /* diagnostic analysis */
                   1058:        {       long atomic_mults;
                   1059:                unsigned int unitcount,totalmults;
                   1060:                unitcount = bits2units(countbits(modulus));
                   1061:                /* calculation assumes modsquare takes as long as a modmult: */
                   1062:                atomic_mults = (long) tally_modmults * (unitcount * unitcount);
                   1063:                atomic_mults += (long) tally_modsquares * (unitcount * unitcount);
                   1064:                printf("%ld atomic mults for ",atomic_mults);
                   1065:                printf("%d+%d = %d modsqr+modmlt, at %d bits, %d words.\n",
                   1066:                        tally_modsquares,tally_modmults,
                   1067:                        tally_modsquares+tally_modmults,
                   1068:                        countbits(modulus), unitcount);
                   1069:        }
                   1070: #endif /* COUNTMULTS */
                   1071: 
                   1072:        set_precision(oldprecision);    /* restore original precision */
                   1073:        return(0);              /* normal return */
                   1074: }      /* mp_modexp */
                   1075: 
                   1076: 
                   1077: char *copyright_notice(void)
                   1078: /* force linker to include copyright notice in the executable object image. */
                   1079: { return ("(c)1986 Philip Zimmermann"); } /* copyright_notice */
                   1080: 
                   1081: 
                   1082: int rsa_decrypt(unitptr M, unitptr C,
                   1083:        unitptr d, unitptr p, unitptr q, unitptr u)
                   1084:        /*      Uses Chinese Remainder Theorem shortcut for RSA decryption.
                   1085:                M is the output plaintext message.
                   1086:                C is the input ciphertext message.
                   1087:                d is the secret decryption exponent.
                   1088:                p and q are the prime factors of n.
                   1089:                u is the multiplicative inverse of p, mod q.
                   1090:                Note that u is precomputed on the assumption that p<q.
                   1091:                n, the common modulus, is not used here because of the
                   1092:                Chinese Remainder Theorem shortcut.
                   1093:        */
                   1094: {
                   1095:        unit p2[MAX_UNIT_PRECISION];
                   1096:        unit q2[MAX_UNIT_PRECISION];
                   1097:        unit temp1[MAX_UNIT_PRECISION];
                   1098:        unit temp2[MAX_UNIT_PRECISION];
                   1099:        int status;
                   1100: 
                   1101:        mp_init(M,1);   /* initialize result in case of error */
                   1102: 
                   1103:        if (mp_compare(p,q) >= 0)       /* ensure that p<q */
                   1104:        {       /* swap the pointers p and q */
                   1105:                unitptr t;
                   1106:                t = p;  p = q; q = t;
                   1107:        }
                   1108: 
                   1109: /*     Rather than decrypting by computing modexp with full mod n
                   1110:        precision, compute a shorter modexp with mod p precision... */
                   1111: 
                   1112: /*     p2 = [ (C mod p)**( d mod (p-1) ) ] mod p               */
                   1113:        mp_move(temp1,p);
                   1114:        mp_dec(temp1);                  /* temp1 = p-1 */
                   1115:        mp_mod(temp2,d,temp1);  /* temp2 = d mod (p-1) ) */
                   1116:        mp_mod(temp1,C,p);              /* temp1 = C mod p  */
                   1117:        status = mp_modexp(p2,temp1,temp2,p);
                   1118:        if (status < 0) /* mp_modexp returned an error. */
                   1119:                return(status); /* error return */
                   1120: 
                   1121: /*     Now compute a short modexp with mod q precision... */
                   1122: 
                   1123: /*     q2 = [ (C mod q)**( d mod (q-1) ) ] mod q               */
                   1124:        mp_move(temp1,q);
                   1125:        mp_dec(temp1);                  /* temp1 = q-1 */
                   1126:        mp_mod(temp2,d,temp1);  /* temp2 = d mod (q-1) */
                   1127:        mp_mod(temp1,C,q);              /* temp1 = C mod q  */
                   1128:        status = mp_modexp(q2,temp1,temp2,q);
                   1129:        if (status < 0) /* mp_modexp returned an error. */
                   1130:                return(status); /* error return */
                   1131: 
                   1132: /*     Now use the multiplicative inverse u to glue together the
                   1133:        two halves, saving a lot of time by avoiding a full mod n
                   1134:        exponentiation.  At key generation time, u was computed
                   1135:        with the secret key as the multiplicative inverse of
                   1136:        p, mod q such that:  (p*u) mod q = 1, assuming p<q.
                   1137: */
                   1138:        if (mp_compare(p2,q2) == 0)     /* only happens if C<p */
                   1139:                mp_move(M,p2);
                   1140:        else
                   1141:        {       /*      q2 = q2 - p2;  if q2<0 then q2 = q2 + q  */
                   1142:                if (mp_sub(q2,p2))      /* if the result went negative... */
                   1143:                        mp_add(q2,q);           /* add q to q2 */
                   1144: 
                   1145:                /*      M = p2 + ( p * [(q2*u) mod q] )         */
                   1146:                mp_mult(temp1,q2,u);            /* temp1 = q2*u  */
                   1147:                mp_mod(temp2,temp1,q);          /* temp2 = temp1 mod q */
                   1148:                mp_mult(temp1,p,temp2); /* temp1 = p * temp2 */
                   1149:                mp_add(temp1,p2);               /* temp1 = temp1 + p2 */
                   1150:                mp_move(M,temp1);               /* M = temp1 */
                   1151:        }
                   1152: 
                   1153:        mp_burn(p2);    /* burn the evidence on the stack...*/
                   1154:        mp_burn(q2);
                   1155:        mp_burn(temp1);
                   1156:        mp_burn(temp2);
                   1157:        /* Do an explicit reference to the copyright notice so that the linker
                   1158:           will be forced to include it in the executable object image... */
                   1159:        copyright_notice();     /* has no real effect at run time */
                   1160:        return(0);      /* normal return */
                   1161: }      /* rsa_decrypt */
                   1162: 
                   1163: 
                   1164: /*
                   1165: **     mp_sqrt - returns square root of a number.
                   1166: **     returns -1 for error, 0 for perfect square, 1 for not perfect square.
                   1167: **     Not used by any RSA encrypt or decrypt functions.
                   1168: **     Used by some factoring algorithms.  This version needs optimization.
                   1169: **     by Charles W. Merritt  July 15, 1989, refined by PRZ.
                   1170: */
                   1171: int mp_sqrt(unitptr quotient,unitptr dividend)
                   1172:        /* Quotient is returned as the square root of dividend. */
                   1173: {
                   1174:        register char next2bits; /* "period", or group of 2 bits of dividend */
                   1175:        register unit dvdbitmask,qbitmask;
                   1176:        unit remainder[MAX_UNIT_PRECISION],rjq[MAX_UNIT_PRECISION],
                   1177:                divisor[MAX_UNIT_PRECISION];
                   1178:        unsigned int qbits,qprec,dvdbits,dprec,oldprecision;
                   1179:        int notperfect;
                   1180: 
                   1181:        mp_init(quotient,0);
                   1182:        if (mp_tstminus(dividend)) /* if dividend<0, return error */
                   1183:        {       mp_dec(quotient);       /* quotient = -1 */
                   1184:                return(-1);
                   1185:        }
                   1186: 
                   1187:        /* normalize and compute number of bits in dividend first */
                   1188:        init_bitsniffer(dividend,dvdbitmask,dprec,dvdbits);
                   1189:        /* init_bitsniffer returns a 0 if dvdbits is 0 */
                   1190:        if (dvdbits==1)
                   1191:        {       mp_init(quotient,1);    /* square root of 1 is 1 */
                   1192:                return(0);
                   1193:        }
                   1194: 
                   1195:        /* rescale quotient to half the precision of dividend */
                   1196:        qbits = (dvdbits+1) >> 1;
                   1197:        qprec = bits2units(qbits);
                   1198:        rescale(quotient,global_precision,qprec);
                   1199:        make_msbptr(quotient,qprec); 
                   1200:        qbitmask = power_of_2( (qbits-1) & (UNITSIZE-1)) ;
                   1201: 
                   1202:        /* Set smallest optimum precision for this square root.
                   1203:           The low-level primitives are affected by the call to set_precision.
                   1204:           Even though the dividend precision is bigger than the precision
                   1205:           we will use, no low-level primitives will be used on the dividend.
                   1206:           They will be used on the quotient, remainder, and rjq, which are
                   1207:           smaller precision.
                   1208:        */
                   1209:        oldprecision = global_precision;        /* save global_precision */
                   1210:        set_precision(bits2units(qbits+3));     /* 3 bits of precision slop */
                   1211: 
                   1212:        /* special case: sqrt of 1st 2 (binary) digits of dividend
                   1213:                is 1st (binary) digit of quotient.  This is always 1. */
                   1214:        stuff_bit(quotient,qbitmask);
                   1215:        bump_bitsniffer(quotient,qbitmask);
                   1216:        mp_init(rjq,1); /* rjq is Right Justified Quotient */
                   1217: 
                   1218:        if (!(dvdbits & 1))
                   1219:        {       /* even number of bits in dividend */
                   1220:                next2bits = 2;
                   1221:                bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
                   1222:                if (sniff_bit(dividend,dvdbitmask)) next2bits++;
                   1223:                bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
                   1224:        }
                   1225:        else
                   1226:        {       /* odd number of bits in dividend */
                   1227:                next2bits = 1;
                   1228:                bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
                   1229:        }
                   1230: 
                   1231:        mp_init(remainder,next2bits-1);
                   1232: 
                   1233:        /* dvdbits is guaranteed to be even at this point */
                   1234: 
                   1235:        while (dvdbits)
                   1236:        {       next2bits=0;
                   1237:                if (sniff_bit(dividend,dvdbitmask)) next2bits=2;
                   1238:                bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
                   1239:                if (sniff_bit(dividend,dvdbitmask)) next2bits++;
                   1240:                bump_bitsniffer(dividend,dvdbitmask); dvdbits--;
                   1241:                mp_rotate_left(remainder,(boolean)((next2bits&2)!=0));
                   1242:                mp_rotate_left(remainder,(boolean)((next2bits&1)!=0));
                   1243: 
                   1244:                /*      "divisor" is trial divisor, complete divisor is 4*rjq 
                   1245:                        or 4*rjq+1.
                   1246:                        Subtract divisor times its last digit from remainder.
                   1247:                        If divisor ends in 1, remainder -= divisor*1,
                   1248:                        or if divisor ends in 0, remainder -= divisor*0 (do nothing).
                   1249:                        Last digit of divisor inflates divisor as large as possible
                   1250:                        yet still subtractable from remainder.
                   1251:                */
                   1252:                mp_move(divisor,rjq);           /* divisor = 4*rjq+1 */
                   1253:                mp_rotate_left(divisor,0);
                   1254:                mp_rotate_left(divisor,1);
                   1255:                if (mp_compare(remainder,divisor) >= 0)
                   1256:                {       mp_sub(remainder,divisor);
                   1257:                        stuff_bit(quotient,qbitmask);
                   1258:                        mp_rotate_left(rjq,1);
                   1259:                }
                   1260:                else
                   1261:                        mp_rotate_left(rjq,0);
                   1262:                bump_bitsniffer(quotient,qbitmask);
                   1263:        }
                   1264:        notperfect = testne(remainder,0); /* not a perfect square? */
                   1265:        set_precision(oldprecision);    /* restore original precision */
                   1266:        return(notperfect);     /* normal return */
                   1267: 
                   1268: }      /* mp_sqrt */
                   1269: 
                   1270: 
                   1271: /*     These are notes on computing the square root the manual old-fashioned 
                   1272:        way.  This is the basis of the above fast sqrt algorthm:
                   1273: 
                   1274: 1)     Separate the number into groups (periods) of two digits each,
                   1275:        beginning with units or at the decimal point.
                   1276: 2)     Find the greatest perfect square in the left hand period & write 
                   1277:        its     square root as the first figure of the required root.  Subtract
                   1278:        the square of this number from the left hand period.  Annex to the
                   1279:        remainder the next group so as to form a dividend.
                   1280: 3)     Double the root already found and write it as a partial divisor at 
                   1281:        the left of the new dividend.  Annex one zero digit, making a trial 
                   1282:        divisor, and divide the new dividend by the trial divisor.
                   1283: 4)     Write the quotient in the root as the trial term and also substitute 
                   1284:        this quotient for the annexed zero digit in the partial divisor, 
                   1285:        making the latter complete.
                   1286: 5)     Multiply the complete divisor by the figure just obtained and, 
                   1287:        if possible, subtract the product from the last remainder.
                   1288:        If this product is too large, the trial term of the quotient
                   1289:        must be replaced by the next smaller number and the operations
                   1290:        preformed as before.
                   1291:        (IN BINARY, OUR TRIAL TERM IS ALWAYS 1 AND WE USE IT OR DO NOTHING.)
                   1292: 6)     Proceed in this manner until all periods are used.
                   1293:        If there is still a remainder, it's not a perfect square.
                   1294: */
                   1295: 
                   1296: 
                   1297: /****************** end of RSA library ****************************/
                   1298: 
                   1299: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.