Annotation of pgp/src/r3000.c, revision 1.1

1.1     ! root        1: /*     r3000.c - MIPS R3000 adaptation of selected routines from mpilib.c.
        !             2: 
        !             3:        C source code for multiprecision arithmetic library routines.
        !             4:        R3000 optimizations by Castor Fu, in Sep 1992.
        !             5:        See the comments in the file mpilib.c for details on the purpose of
        !             6:        these functions.
        !             7: 
        !             8:        Original version of mpilib.c implemented in 1986 by 
        !             9:        Philip R. Zimmermann, updated by PRZ 1990-1992.
        !            10: 
        !            11:        Boulder Software Engineering
        !            12:        3021 Eleventh Street
        !            13:        Boulder, CO 80304
        !            14:        (303) 541-0140
        !            15: 
        !            16:        (c) Copyright 1986-92 by Philip Zimmermann.  All rights reserved.
        !            17:        The author assumes no liability for damages resulting from the use
        !            18:        of this software, even if the damage results from defects in this
        !            19:        software.  No warranty is expressed or implied.
        !            20: 
        !            21:        -- Adapt so that the unit size can be a full int size. This
        !            22:           was inspired by the lack of a carry bit on MIPSco processors.
        !            23:           On such machines, the advantage of assembly language coding
        !            24:           is less clear.  (Except for the multiply. . . )
        !            25: 
        !            26:           One reason for creating an R3000 module of C routines is that
        !            27:           we have one routine (P_ROTL) which is compiled particularly
        !            28:           poorly, and chose to explicitly unroll the loop.
        !            29: 
        !            30: */
        !            31: 
        !            32: 
        !            33: #include "mpilib.h"
        !            34: 
        !            35: #define word_v(r,n)    (r)[tohigher(n)]
        !            36: 
        !            37: extern short global_precision; /* units of precision for all routines */
        !            38: /*     global_precision is the unit precision last set by set_precision.
        !            39:        Initially, set_precision() should be called to define global_precision
        !            40:        before using any of these other multiprecision library routines.
        !            41:        i.e.:   set_precision(MAX_UNIT_PRECISION);
        !            42: */
        !            43: 
        !            44: /*************** multiprecision library primitives ****************/
        !            45: /*     The following portable C primitives should be recoded in assembly.
        !            46:        The equivalent assembly primitives are externally defined under
        !            47:        different names, unless PORTABLE is defined.  See the header file
        !            48:        "mpilib.h" for further details.
        !            49: */
        !            50: 
        !            51: typedef unsigned long int ulint;
        !            52: /* ...assumes sizeof(unit) <= sizeof(unsigned long) */
        !            53: 
        !            54: boolean mp_addc
        !            55:        (register unitptr r1,register unitptr r2,register boolean carry)
        !            56:        /* multiprecision add with carry r2 to r1, result in r1 */
        !            57:        /* carry is incoming carry flag-- value should be 0 or 1 */
        !            58: {      register unit x,x1;
        !            59:        int i;
        !            60:        short precision;        /* number of units to add */
        !            61:        unsigned int mcarry = carry;
        !            62:        precision = global_precision;
        !            63:        make_lsbptr(r1,precision);
        !            64:        make_lsbptr(r2,precision);
        !            65: 
        !            66:        i = precision&3;
        !            67:        while (i--)
        !            68:        {       
        !            69:                  if (mcarry) {
        !            70:                     x = *r1 + *r2 + 1;
        !            71:                     x1 = ~ *r1;
        !            72:                     mcarry = 1 ^ (*r2  < x1);
        !            73:                  } else {
        !            74:                     x = *r1 + *r2;
        !            75:                     mcarry = (x < *r1) ;
        !            76:                  }
        !            77:                post_higherunit(r2);
        !            78:                *post_higherunit(r1) = x;
        !            79:        }
        !            80: 
        !            81:        i = precision>>2;
        !            82:        while (i--)
        !            83:        {       
        !            84: #define ADDC(n) \
        !            85:                  if (mcarry) { \
        !            86:                     x = word_v(r1,n) + word_v(r2,n) + 1; \
        !            87:                     x1 = ~word_v(r1,n); \
        !            88:                     mcarry = 1 ^ (word_v(r2,n)  < x1); \
        !            89:                  } else { \
        !            90:                     x = word_v(r1,n) + word_v(r2,n); \
        !            91:                     mcarry = (x < word_v(r1,n)) ; \
        !            92:                  } \
        !            93:                  word_v(r1,n) = x;
        !            94: 
        !            95:                  ADDC(0);
        !            96:                  ADDC(1);
        !            97:                  ADDC(2);
        !            98:                  ADDC(3);
        !            99: #undef ADDC
        !           100:                r1 += tohigher(4);
        !           101:                r2 += tohigher(4);
        !           102:        }
        !           103: 
        !           104:        return(mcarry);         /* return the final carry flag bit */
        !           105: }      /* mp_addc */
        !           106: 
        !           107: 
        !           108: boolean mp_subb
        !           109:        (register unitptr r1,register unitptr r2,register boolean borrow)
        !           110:        /* multiprecision subtract with borrow, r2 from r1, result in r1 */
        !           111:        /* borrow is incoming borrow flag-- value should be 0 or 1 */
        !           112: {      register unit x;        
        !           113:        unsigned int mborrow = borrow;
        !           114:        int i; 
        !           115:        short precision;        /* number of units to subtract */
        !           116:        precision = global_precision;
        !           117:        make_lsbptr(r1,precision);
        !           118:        make_lsbptr(r2,precision);
        !           119: 
        !           120:        i = precision&3;
        !           121:        while (i--)
        !           122:        {       if (mborrow) {
        !           123:                  x = *r1 - *r2 - mborrow;
        !           124:                  mborrow = 1 ^ (*r2 < *r1);
        !           125:                } else {
        !           126:                  x = *r1 - *r2;
        !           127:                  mborrow = (*r1 < *r2);
        !           128:                }
        !           129:                post_higherunit(r2);
        !           130:                *post_higherunit(r1) = x;
        !           131:        }
        !           132: 
        !           133:        i = precision>>2;
        !           134:        while (i--)
        !           135:        {       
        !           136: 
        !           137: #define        SUBB(n) \
        !           138:                if (mborrow) { \
        !           139:                  x = word_v(r1,n) - word_v(r2,n) - mborrow; \
        !           140:                  mborrow = 1 ^ (word_v(r2,n) < word_v(r1,n)); \
        !           141:                } else { \
        !           142:                  x = word_v(r1,n) - word_v(r2,n); \
        !           143:                  mborrow = (word_v(r1,n) < word_v(r2,n)); \
        !           144:                } \
        !           145:                word_v(r1,n) = x;
        !           146: 
        !           147:                SUBB(0);
        !           148:                SUBB(1);
        !           149:                SUBB(2);
        !           150:                SUBB(3);
        !           151: #undef SUBB
        !           152: 
        !           153: 
        !           154:                r1 += tohigher(4);
        !           155:                r2 += tohigher(4);
        !           156:        }
        !           157: 
        !           158:        return(mborrow);        /* return the final carry/borrow flag bit */
        !           159: }      /* mp_subb */
        !           160: 
        !           161: 
        !           162: /* We've unrolled the loop here because the MIPS/DEC C compiler is too
        !           163:  * stupid to do so.  Presumably on register poor machines this is not
        !           164:  * a clearly good idea
        !           165:  */
        !           166: 
        !           167: boolean mp_rotate_left(register unitptr r1,register boolean carry)
        !           168:        /* multiprecision rotate left 1 bit with carry, result in r1. */
        !           169:        /* carry is incoming carry flag-- value should be 0 or 1 */
        !           170: {      register int precision; /* number of units to rotate */
        !           171:        unsigned int mcarry = carry, carry2,carry3,carry4, nextcarry; 
        !           172: 
        !           173:        int i;
        !           174:        precision = global_precision;
        !           175:        make_lsbptr(r1,precision);
        !           176:        i = precision &3;
        !           177:        while (i--) {
        !           178:                nextcarry = (((signedunit) *r1) < 0);
        !           179:                *r1 = (*r1 << 1) | mcarry;
        !           180:                mcarry = nextcarry;
        !           181:                pre_higherunit(r1);
        !           182:        }
        !           183:        i = precision>>2;
        !           184:        while (i--)
        !           185:        {
        !           186:                carry2 = (((signedunit) *r1) < 0);
        !           187:                *r1 = (*r1 << 1) | mcarry;
        !           188: 
        !           189:                carry3 = (((signedunit) word_v(r1,1)) < 0);
        !           190:                word_v(r1,1) = (word_v(r1,1) << 1) | carry2;
        !           191: 
        !           192:                carry4 = (((signedunit) word_v(r1,2)) < 0);
        !           193:                word_v(r1,2) = (word_v(r1,2) << 1) | carry3;
        !           194: 
        !           195:                mcarry = (((signedunit) word_v(r1,3)) < 0);
        !           196:                word_v(r1,3) = (word_v(r1,3) << 1) | carry4;
        !           197: 
        !           198:                r1 += tohigher(4);
        !           199:        }
        !           200:        return(mcarry); /* return the final carry flag bit */
        !           201: }      /* mp_rotate_left */
        !           202: 
        !           203: void p_setp(short nbits){} ;
        !           204: 
        !           205: /************** end of primitives that should be in assembly *************/
        !           206: 
        !           207: #include "lmul.h"      /* used only by R3000.c */
        !           208: 
        !           209: #ifdef MUNIT16
        !           210: typedef unsigned short MULTUNIT;
        !           211: #endif
        !           212: 
        !           213: #ifdef MUNIT32
        !           214: typedef unsigned int MULTUNIT;
        !           215: #endif
        !           216: #define        MULTUNITSIZE    (sizeof(MULTUNIT)*8)
        !           217: #define MULTUNIT_hmask ((1UL << (MULTUNITSIZE/2))-1)
        !           218: #define MULTUNIT_mask   ((MULTUNIT_hmask<<(MULTUNITSIZE/2)) | MULTUNIT_hmask)
        !           219: 
        !           220: p_smula (
        !           221: MULTUNIT *prod,
        !           222: MULTUNIT *multiplicand,
        !           223: MULTUNIT multiplier)
        !           224: {
        !           225:        short i=global_precision;
        !           226:        int count=i,j;
        !           227:        MULTUNIT *pp=prod, *mp=multiplicand;  
        !           228:        MULTUNIT pl, ph, npl, nph, cl, ch;
        !           229:        MULTUNIT al, ah;
        !           230: 
        !           231:        cl = 0;
        !           232:        ch = 0;
        !           233:        
        !           234:        if (i <= 0 ) return;
        !           235:        lmul(multiplier, *multiplicand, pl, ph);
        !           236:        post_higherunit(multiplicand);
        !           237:        al = 0;
        !           238:        ah = 0;
        !           239:        ch = 0;
        !           240:        while(--i) {
        !           241:          lmul(multiplier, *multiplicand, npl, nph);
        !           242:          post_higherunit(multiplicand);
        !           243:          al += *prod;
        !           244:          cl = (al < *prod);
        !           245:          al += pl;
        !           246:          cl += (al < pl);
        !           247:          ah += ph;
        !           248:          ch = (ah < ph);
        !           249:          ah += cl;
        !           250:          ch += (ah < cl);
        !           251:          *prod = al;
        !           252:          post_higherunit(prod);
        !           253:          al = ah;
        !           254:          ah = ch;
        !           255:          pl = npl;
        !           256:          ph = nph;
        !           257:        }
        !           258:        al += *prod;
        !           259:        cl = (al < *prod);
        !           260:        al += pl;
        !           261:        cl += (al < pl);
        !           262:        ah += ph;
        !           263:        ch = (ah < ph);
        !           264:        ah += cl;
        !           265:        ch += (ah < cl);
        !           266: 
        !           267:        *prod = al;
        !           268:        post_higherunit(prod);
        !           269: 
        !           270:        *prod += ah;
        !           271:        ch += (*prod < ah);
        !           272:        post_higherunit(prod);
        !           273: 
        !           274:        /*
        !           275:        *prod += ch ;
        !           276:        post_higherunit(prod);
        !           277:        */
        !           278: 
        !           279: 
        !           280: }      /* mp_smul */
        !           281: 
        !           282: 
        !           283: 
        !           284: static  int  mshift;                   /* number of bits for
        !           285:                                        ** recip scaling          */
        !           286: static  MULTUNIT reciph;               /* MSunit of scaled recip */
        !           287: static  MULTUNIT recipl;               /* LSunit of scaled recip */
        !           288: 
        !           289: void p_setrecip(MULTUNIT rh, MULTUNIT rl, int m)
        !           290: {
        !           291:        reciph = rh;
        !           292:        recipl = rl;
        !           293:        mshift = m;
        !           294: }
        !           295: 
        !           296: 
        !           297: MULTUNIT p_quo_digit (MULTUNIT *dividend) 
        !           298: {
        !           299:        MULTUNIT ql, qh, q0l, q0h, q1l, q1h, q2l, q2h;
        !           300:        MULTUNIT q3h, q3l, q4h, q4l;
        !           301:        MULTUNIT mutemp;
        !           302: 
        !           303: 
        !           304: /*     Compute the least significant product group.
        !           305:        The last terms of q1 and q2 perform upward rounding, which is
        !           306:        needed to guarantee that the result not be too small.
        !           307: */
        !           308:        lmul(dividend[tohigher(-2)] ^ MULTUNIT_mask, reciph, q1l, q1h);
        !           309:        q1l += reciph;
        !           310:        q1h += (q1l <  reciph);
        !           311: 
        !           312:        lmul(dividend[tohigher(-1)]^ MULTUNIT_mask, recipl, q2l, q2h);
        !           313:        q2h += 1;
        !           314: 
        !           315:        q1l = (q1l >> 1) + (q1h << (MULTUNITSIZE-1));
        !           316:        q1h >>= 1;
        !           317:        q2l =  (q2l >> 1) + (q2h << (MULTUNITSIZE-1));
        !           318:        q2h >>= 1;
        !           319: 
        !           320:        q0l = q1l + q2l;
        !           321:        q0h = q1h + q2h + (q0l < q1l);
        !           322: 
        !           323:        q0l++;
        !           324:        q0h+= (q0l < 1);
        !           325: 
        !           326: /*     Compute the middle significant product group.   */
        !           327: 
        !           328:        lmul(dividend[tohigher(-1)]^MULTUNIT_mask, reciph, q3l, q3h);
        !           329:        lmul(dividend[0] ^ MULTUNIT_mask, recipl,  q4l, q4h);
        !           330: 
        !           331:        q3l = (q3l >> 1) + (q3h << (MULTUNITSIZE-1));
        !           332:        q3h >>= 1;
        !           333:        q4l =  (q4l >> 1) + (q4h << (MULTUNITSIZE-1));
        !           334:        q4h >>= 1;
        !           335: 
        !           336:        ql = q0h + 1;
        !           337:        qh = (ql < 1);
        !           338: 
        !           339:        ql += q3l;
        !           340:        qh += q3h + (ql < q3l);
        !           341:        ql += q4l;
        !           342:        qh += q4h + (ql < q4l);
        !           343: 
        !           344: /*     Compute the most significant term and add in the others */
        !           345: 
        !           346:        lmul((dividend[0] ^ MULTUNIT_mask), reciph, q1l, q1h);
        !           347:        q1h = (q1h << 1) + (q1l>>(MULTUNITSIZE-1));
        !           348:        q1l = (q1l << 1) ;
        !           349: 
        !           350:        ql = (ql >> (MULTUNITSIZE-2)) + (qh <<  2);
        !           351:        qh >>= (MULTUNITSIZE-2);
        !           352: 
        !           353:        ql += q1l;
        !           354:        qh += (ql < q1l) + q1h;
        !           355: 
        !           356:        if (mshift != MULTUNITSIZE) {
        !           357:                ql = (ql >> mshift) + (qh << (MULTUNITSIZE-mshift));
        !           358:                qh >>= mshift;
        !           359:        } else {
        !           360:                ql = qh;
        !           361:                qh  = 0;
        !           362:        }
        !           363: 
        !           364: /*     Prevent overflow and then wipe out the intermediate results. */
        !           365:        mutemp = (qh != 0) ?  MULTUNIT_mask : ql;
        !           366: 
        !           367:        return(mutemp);
        !           368: }
        !           369: 
        !           370: /* end of r3000.c */
        !           371: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.