Annotation of pgp/src/r3000.c, revision 1.1.1.1

1.1       root        1: /*     r3000.c - MIPS R3000 adaptation of selected routines from mpilib.c.
                      2: 
                      3:        C source code for multiprecision arithmetic library routines.
                      4:        R3000 optimizations by Castor Fu, in Sep 1992.
                      5:        See the comments in the file mpilib.c for details on the purpose of
                      6:        these functions.
                      7: 
                      8:        Original version of mpilib.c implemented in 1986 by 
                      9:        Philip R. Zimmermann, updated by PRZ 1990-1992.
                     10: 
                     11:        Boulder Software Engineering
                     12:        3021 Eleventh Street
                     13:        Boulder, CO 80304
                     14:        (303) 541-0140
                     15: 
                     16:        (c) Copyright 1986-92 by Philip Zimmermann.  All rights reserved.
                     17:        The author assumes no liability for damages resulting from the use
                     18:        of this software, even if the damage results from defects in this
                     19:        software.  No warranty is expressed or implied.
                     20: 
                     21:        -- Adapt so that the unit size can be a full int size. This
                     22:           was inspired by the lack of a carry bit on MIPSco processors.
                     23:           On such machines, the advantage of assembly language coding
                     24:           is less clear.  (Except for the multiply. . . )
                     25: 
                     26:           One reason for creating an R3000 module of C routines is that
                     27:           we have one routine (P_ROTL) which is compiled particularly
                     28:           poorly, and chose to explicitly unroll the loop.
                     29: 
                     30: */
                     31: 
                     32: 
                     33: #include "mpilib.h"
                     34: 
                     35: #define word_v(r,n)    (r)[tohigher(n)]
                     36: 
                     37: extern short global_precision; /* units of precision for all routines */
                     38: /*     global_precision is the unit precision last set by set_precision.
                     39:        Initially, set_precision() should be called to define global_precision
                     40:        before using any of these other multiprecision library routines.
                     41:        i.e.:   set_precision(MAX_UNIT_PRECISION);
                     42: */
                     43: 
                     44: /*************** multiprecision library primitives ****************/
                     45: /*     The following portable C primitives should be recoded in assembly.
                     46:        The equivalent assembly primitives are externally defined under
                     47:        different names, unless PORTABLE is defined.  See the header file
                     48:        "mpilib.h" for further details.
                     49: */
                     50: 
                     51: typedef unsigned long int ulint;
                     52: /* ...assumes sizeof(unit) <= sizeof(unsigned long) */
                     53: 
                     54: boolean mp_addc
                     55:        (register unitptr r1,register unitptr r2,register boolean carry)
                     56:        /* multiprecision add with carry r2 to r1, result in r1 */
                     57:        /* carry is incoming carry flag-- value should be 0 or 1 */
                     58: {      register unit x,x1;
                     59:        int i;
                     60:        short precision;        /* number of units to add */
                     61:        unsigned int mcarry = carry;
                     62:        precision = global_precision;
                     63:        make_lsbptr(r1,precision);
                     64:        make_lsbptr(r2,precision);
                     65: 
                     66:        i = precision&3;
                     67:        while (i--)
                     68:        {       
                     69:                  if (mcarry) {
                     70:                     x = *r1 + *r2 + 1;
                     71:                     x1 = ~ *r1;
                     72:                     mcarry = 1 ^ (*r2  < x1);
                     73:                  } else {
                     74:                     x = *r1 + *r2;
                     75:                     mcarry = (x < *r1) ;
                     76:                  }
                     77:                post_higherunit(r2);
                     78:                *post_higherunit(r1) = x;
                     79:        }
                     80: 
                     81:        i = precision>>2;
                     82:        while (i--)
                     83:        {       
                     84: #define ADDC(n) \
                     85:                  if (mcarry) { \
                     86:                     x = word_v(r1,n) + word_v(r2,n) + 1; \
                     87:                     x1 = ~word_v(r1,n); \
                     88:                     mcarry = 1 ^ (word_v(r2,n)  < x1); \
                     89:                  } else { \
                     90:                     x = word_v(r1,n) + word_v(r2,n); \
                     91:                     mcarry = (x < word_v(r1,n)) ; \
                     92:                  } \
                     93:                  word_v(r1,n) = x;
                     94: 
                     95:                  ADDC(0);
                     96:                  ADDC(1);
                     97:                  ADDC(2);
                     98:                  ADDC(3);
                     99: #undef ADDC
                    100:                r1 += tohigher(4);
                    101:                r2 += tohigher(4);
                    102:        }
                    103: 
                    104:        return(mcarry);         /* return the final carry flag bit */
                    105: }      /* mp_addc */
                    106: 
                    107: 
                    108: boolean mp_subb
                    109:        (register unitptr r1,register unitptr r2,register boolean borrow)
                    110:        /* multiprecision subtract with borrow, r2 from r1, result in r1 */
                    111:        /* borrow is incoming borrow flag-- value should be 0 or 1 */
                    112: {      register unit x;        
                    113:        unsigned int mborrow = borrow;
                    114:        int i; 
                    115:        short precision;        /* number of units to subtract */
                    116:        precision = global_precision;
                    117:        make_lsbptr(r1,precision);
                    118:        make_lsbptr(r2,precision);
                    119: 
                    120:        i = precision&3;
                    121:        while (i--)
                    122:        {       if (mborrow) {
                    123:                  x = *r1 - *r2 - mborrow;
                    124:                  mborrow = 1 ^ (*r2 < *r1);
                    125:                } else {
                    126:                  x = *r1 - *r2;
                    127:                  mborrow = (*r1 < *r2);
                    128:                }
                    129:                post_higherunit(r2);
                    130:                *post_higherunit(r1) = x;
                    131:        }
                    132: 
                    133:        i = precision>>2;
                    134:        while (i--)
                    135:        {       
                    136: 
                    137: #define        SUBB(n) \
                    138:                if (mborrow) { \
                    139:                  x = word_v(r1,n) - word_v(r2,n) - mborrow; \
                    140:                  mborrow = 1 ^ (word_v(r2,n) < word_v(r1,n)); \
                    141:                } else { \
                    142:                  x = word_v(r1,n) - word_v(r2,n); \
                    143:                  mborrow = (word_v(r1,n) < word_v(r2,n)); \
                    144:                } \
                    145:                word_v(r1,n) = x;
                    146: 
                    147:                SUBB(0);
                    148:                SUBB(1);
                    149:                SUBB(2);
                    150:                SUBB(3);
                    151: #undef SUBB
                    152: 
                    153: 
                    154:                r1 += tohigher(4);
                    155:                r2 += tohigher(4);
                    156:        }
                    157: 
                    158:        return(mborrow);        /* return the final carry/borrow flag bit */
                    159: }      /* mp_subb */
                    160: 
                    161: 
                    162: /* We've unrolled the loop here because the MIPS/DEC C compiler is too
                    163:  * stupid to do so.  Presumably on register poor machines this is not
                    164:  * a clearly good idea
                    165:  */
                    166: 
                    167: boolean mp_rotate_left(register unitptr r1,register boolean carry)
                    168:        /* multiprecision rotate left 1 bit with carry, result in r1. */
                    169:        /* carry is incoming carry flag-- value should be 0 or 1 */
                    170: {      register int precision; /* number of units to rotate */
                    171:        unsigned int mcarry = carry, carry2,carry3,carry4, nextcarry; 
                    172: 
                    173:        int i;
                    174:        precision = global_precision;
                    175:        make_lsbptr(r1,precision);
                    176:        i = precision &3;
                    177:        while (i--) {
                    178:                nextcarry = (((signedunit) *r1) < 0);
                    179:                *r1 = (*r1 << 1) | mcarry;
                    180:                mcarry = nextcarry;
                    181:                pre_higherunit(r1);
                    182:        }
                    183:        i = precision>>2;
                    184:        while (i--)
                    185:        {
                    186:                carry2 = (((signedunit) *r1) < 0);
                    187:                *r1 = (*r1 << 1) | mcarry;
                    188: 
                    189:                carry3 = (((signedunit) word_v(r1,1)) < 0);
                    190:                word_v(r1,1) = (word_v(r1,1) << 1) | carry2;
                    191: 
                    192:                carry4 = (((signedunit) word_v(r1,2)) < 0);
                    193:                word_v(r1,2) = (word_v(r1,2) << 1) | carry3;
                    194: 
                    195:                mcarry = (((signedunit) word_v(r1,3)) < 0);
                    196:                word_v(r1,3) = (word_v(r1,3) << 1) | carry4;
                    197: 
                    198:                r1 += tohigher(4);
                    199:        }
                    200:        return(mcarry); /* return the final carry flag bit */
                    201: }      /* mp_rotate_left */
                    202: 
                    203: void p_setp(short nbits){} ;
                    204: 
                    205: /************** end of primitives that should be in assembly *************/
                    206: 
                    207: #include "lmul.h"      /* used only by R3000.c */
                    208: 
                    209: #ifdef MUNIT16
                    210: typedef unsigned short MULTUNIT;
                    211: #endif
                    212: 
                    213: #ifdef MUNIT32
                    214: typedef unsigned int MULTUNIT;
                    215: #endif
                    216: #define        MULTUNITSIZE    (sizeof(MULTUNIT)*8)
                    217: #define MULTUNIT_hmask ((1UL << (MULTUNITSIZE/2))-1)
                    218: #define MULTUNIT_mask   ((MULTUNIT_hmask<<(MULTUNITSIZE/2)) | MULTUNIT_hmask)
                    219: 
                    220: p_smula (
                    221: MULTUNIT *prod,
                    222: MULTUNIT *multiplicand,
                    223: MULTUNIT multiplier)
                    224: {
                    225:        short i=global_precision;
                    226:        int count=i,j;
                    227:        MULTUNIT *pp=prod, *mp=multiplicand;  
                    228:        MULTUNIT pl, ph, npl, nph, cl, ch;
                    229:        MULTUNIT al, ah;
                    230: 
                    231:        cl = 0;
                    232:        ch = 0;
                    233:        
                    234:        if (i <= 0 ) return;
                    235:        lmul(multiplier, *multiplicand, pl, ph);
                    236:        post_higherunit(multiplicand);
                    237:        al = 0;
                    238:        ah = 0;
                    239:        ch = 0;
                    240:        while(--i) {
                    241:          lmul(multiplier, *multiplicand, npl, nph);
                    242:          post_higherunit(multiplicand);
                    243:          al += *prod;
                    244:          cl = (al < *prod);
                    245:          al += pl;
                    246:          cl += (al < pl);
                    247:          ah += ph;
                    248:          ch = (ah < ph);
                    249:          ah += cl;
                    250:          ch += (ah < cl);
                    251:          *prod = al;
                    252:          post_higherunit(prod);
                    253:          al = ah;
                    254:          ah = ch;
                    255:          pl = npl;
                    256:          ph = nph;
                    257:        }
                    258:        al += *prod;
                    259:        cl = (al < *prod);
                    260:        al += pl;
                    261:        cl += (al < pl);
                    262:        ah += ph;
                    263:        ch = (ah < ph);
                    264:        ah += cl;
                    265:        ch += (ah < cl);
                    266: 
                    267:        *prod = al;
                    268:        post_higherunit(prod);
                    269: 
                    270:        *prod += ah;
                    271:        ch += (*prod < ah);
                    272:        post_higherunit(prod);
                    273: 
                    274:        /*
                    275:        *prod += ch ;
                    276:        post_higherunit(prod);
                    277:        */
                    278: 
                    279: 
                    280: }      /* mp_smul */
                    281: 
                    282: 
                    283: 
                    284: static  int  mshift;                   /* number of bits for
                    285:                                        ** recip scaling          */
                    286: static  MULTUNIT reciph;               /* MSunit of scaled recip */
                    287: static  MULTUNIT recipl;               /* LSunit of scaled recip */
                    288: 
                    289: void p_setrecip(MULTUNIT rh, MULTUNIT rl, int m)
                    290: {
                    291:        reciph = rh;
                    292:        recipl = rl;
                    293:        mshift = m;
                    294: }
                    295: 
                    296: 
                    297: MULTUNIT p_quo_digit (MULTUNIT *dividend) 
                    298: {
                    299:        MULTUNIT ql, qh, q0l, q0h, q1l, q1h, q2l, q2h;
                    300:        MULTUNIT q3h, q3l, q4h, q4l;
                    301:        MULTUNIT mutemp;
                    302: 
                    303: 
                    304: /*     Compute the least significant product group.
                    305:        The last terms of q1 and q2 perform upward rounding, which is
                    306:        needed to guarantee that the result not be too small.
                    307: */
                    308:        lmul(dividend[tohigher(-2)] ^ MULTUNIT_mask, reciph, q1l, q1h);
                    309:        q1l += reciph;
                    310:        q1h += (q1l <  reciph);
                    311: 
                    312:        lmul(dividend[tohigher(-1)]^ MULTUNIT_mask, recipl, q2l, q2h);
                    313:        q2h += 1;
                    314: 
                    315:        q1l = (q1l >> 1) + (q1h << (MULTUNITSIZE-1));
                    316:        q1h >>= 1;
                    317:        q2l =  (q2l >> 1) + (q2h << (MULTUNITSIZE-1));
                    318:        q2h >>= 1;
                    319: 
                    320:        q0l = q1l + q2l;
                    321:        q0h = q1h + q2h + (q0l < q1l);
                    322: 
                    323:        q0l++;
                    324:        q0h+= (q0l < 1);
                    325: 
                    326: /*     Compute the middle significant product group.   */
                    327: 
                    328:        lmul(dividend[tohigher(-1)]^MULTUNIT_mask, reciph, q3l, q3h);
                    329:        lmul(dividend[0] ^ MULTUNIT_mask, recipl,  q4l, q4h);
                    330: 
                    331:        q3l = (q3l >> 1) + (q3h << (MULTUNITSIZE-1));
                    332:        q3h >>= 1;
                    333:        q4l =  (q4l >> 1) + (q4h << (MULTUNITSIZE-1));
                    334:        q4h >>= 1;
                    335: 
                    336:        ql = q0h + 1;
                    337:        qh = (ql < 1);
                    338: 
                    339:        ql += q3l;
                    340:        qh += q3h + (ql < q3l);
                    341:        ql += q4l;
                    342:        qh += q4h + (ql < q4l);
                    343: 
                    344: /*     Compute the most significant term and add in the others */
                    345: 
                    346:        lmul((dividend[0] ^ MULTUNIT_mask), reciph, q1l, q1h);
                    347:        q1h = (q1h << 1) + (q1l>>(MULTUNITSIZE-1));
                    348:        q1l = (q1l << 1) ;
                    349: 
                    350:        ql = (ql >> (MULTUNITSIZE-2)) + (qh <<  2);
                    351:        qh >>= (MULTUNITSIZE-2);
                    352: 
                    353:        ql += q1l;
                    354:        qh += (ql < q1l) + q1h;
                    355: 
                    356:        if (mshift != MULTUNITSIZE) {
                    357:                ql = (ql >> mshift) + (qh << (MULTUNITSIZE-mshift));
                    358:                qh >>= mshift;
                    359:        } else {
                    360:                ql = qh;
                    361:                qh  = 0;
                    362:        }
                    363: 
                    364: /*     Prevent overflow and then wipe out the intermediate results. */
                    365:        mutemp = (qh != 0) ?  MULTUNIT_mask : ql;
                    366: 
                    367:        return(mutemp);
                    368: }
                    369: 
                    370: /* end of r3000.c */
                    371: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.