|
|
1.1 ! root 1: /* r3000.c - MIPS R3000 adaptation of selected routines from mpilib.c. ! 2: ! 3: C source code for multiprecision arithmetic library routines. ! 4: R3000 optimizations by Castor Fu, in Sep 1992. ! 5: See the comments in the file mpilib.c for details on the purpose of ! 6: these functions. ! 7: ! 8: Original version of mpilib.c implemented in 1986 by ! 9: Philip R. Zimmermann, updated by PRZ 1990-1992. ! 10: ! 11: Boulder Software Engineering ! 12: 3021 Eleventh Street ! 13: Boulder, CO 80304 ! 14: (303) 541-0140 ! 15: ! 16: (c) Copyright 1986-92 by Philip Zimmermann. All rights reserved. ! 17: The author assumes no liability for damages resulting from the use ! 18: of this software, even if the damage results from defects in this ! 19: software. No warranty is expressed or implied. ! 20: ! 21: -- Adapt so that the unit size can be a full int size. This ! 22: was inspired by the lack of a carry bit on MIPSco processors. ! 23: On such machines, the advantage of assembly language coding ! 24: is less clear. (Except for the multiply. . . ) ! 25: ! 26: One reason for creating an R3000 module of C routines is that ! 27: we have one routine (P_ROTL) which is compiled particularly ! 28: poorly, and chose to explicitly unroll the loop. ! 29: ! 30: */ ! 31: ! 32: ! 33: #include "mpilib.h" ! 34: ! 35: #define word_v(r,n) (r)[tohigher(n)] ! 36: ! 37: extern short global_precision; /* units of precision for all routines */ ! 38: /* global_precision is the unit precision last set by set_precision. ! 39: Initially, set_precision() should be called to define global_precision ! 40: before using any of these other multiprecision library routines. ! 41: i.e.: set_precision(MAX_UNIT_PRECISION); ! 42: */ ! 43: ! 44: /*************** multiprecision library primitives ****************/ ! 45: /* The following portable C primitives should be recoded in assembly. ! 46: The equivalent assembly primitives are externally defined under ! 47: different names, unless PORTABLE is defined. See the header file ! 48: "mpilib.h" for further details. ! 49: */ ! 50: ! 51: typedef unsigned long int ulint; ! 52: /* ...assumes sizeof(unit) <= sizeof(unsigned long) */ ! 53: ! 54: boolean mp_addc ! 55: (register unitptr r1,register unitptr r2,register boolean carry) ! 56: /* multiprecision add with carry r2 to r1, result in r1 */ ! 57: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 58: { register unit x,x1; ! 59: int i; ! 60: short precision; /* number of units to add */ ! 61: unsigned int mcarry = carry; ! 62: precision = global_precision; ! 63: make_lsbptr(r1,precision); ! 64: make_lsbptr(r2,precision); ! 65: ! 66: i = precision&3; ! 67: while (i--) ! 68: { ! 69: if (mcarry) { ! 70: x = *r1 + *r2 + 1; ! 71: x1 = ~ *r1; ! 72: mcarry = 1 ^ (*r2 < x1); ! 73: } else { ! 74: x = *r1 + *r2; ! 75: mcarry = (x < *r1) ; ! 76: } ! 77: post_higherunit(r2); ! 78: *post_higherunit(r1) = x; ! 79: } ! 80: ! 81: i = precision>>2; ! 82: while (i--) ! 83: { ! 84: #define ADDC(n) \ ! 85: if (mcarry) { \ ! 86: x = word_v(r1,n) + word_v(r2,n) + 1; \ ! 87: x1 = ~word_v(r1,n); \ ! 88: mcarry = 1 ^ (word_v(r2,n) < x1); \ ! 89: } else { \ ! 90: x = word_v(r1,n) + word_v(r2,n); \ ! 91: mcarry = (x < word_v(r1,n)) ; \ ! 92: } \ ! 93: word_v(r1,n) = x; ! 94: ! 95: ADDC(0); ! 96: ADDC(1); ! 97: ADDC(2); ! 98: ADDC(3); ! 99: #undef ADDC ! 100: r1 += tohigher(4); ! 101: r2 += tohigher(4); ! 102: } ! 103: ! 104: return(mcarry); /* return the final carry flag bit */ ! 105: } /* mp_addc */ ! 106: ! 107: ! 108: boolean mp_subb ! 109: (register unitptr r1,register unitptr r2,register boolean borrow) ! 110: /* multiprecision subtract with borrow, r2 from r1, result in r1 */ ! 111: /* borrow is incoming borrow flag-- value should be 0 or 1 */ ! 112: { register unit x; ! 113: unsigned int mborrow = borrow; ! 114: int i; ! 115: short precision; /* number of units to subtract */ ! 116: precision = global_precision; ! 117: make_lsbptr(r1,precision); ! 118: make_lsbptr(r2,precision); ! 119: ! 120: i = precision&3; ! 121: while (i--) ! 122: { if (mborrow) { ! 123: x = *r1 - *r2 - mborrow; ! 124: mborrow = 1 ^ (*r2 < *r1); ! 125: } else { ! 126: x = *r1 - *r2; ! 127: mborrow = (*r1 < *r2); ! 128: } ! 129: post_higherunit(r2); ! 130: *post_higherunit(r1) = x; ! 131: } ! 132: ! 133: i = precision>>2; ! 134: while (i--) ! 135: { ! 136: ! 137: #define SUBB(n) \ ! 138: if (mborrow) { \ ! 139: x = word_v(r1,n) - word_v(r2,n) - mborrow; \ ! 140: mborrow = 1 ^ (word_v(r2,n) < word_v(r1,n)); \ ! 141: } else { \ ! 142: x = word_v(r1,n) - word_v(r2,n); \ ! 143: mborrow = (word_v(r1,n) < word_v(r2,n)); \ ! 144: } \ ! 145: word_v(r1,n) = x; ! 146: ! 147: SUBB(0); ! 148: SUBB(1); ! 149: SUBB(2); ! 150: SUBB(3); ! 151: #undef SUBB ! 152: ! 153: ! 154: r1 += tohigher(4); ! 155: r2 += tohigher(4); ! 156: } ! 157: ! 158: return(mborrow); /* return the final carry/borrow flag bit */ ! 159: } /* mp_subb */ ! 160: ! 161: ! 162: /* We've unrolled the loop here because the MIPS/DEC C compiler is too ! 163: * stupid to do so. Presumably on register poor machines this is not ! 164: * a clearly good idea ! 165: */ ! 166: ! 167: boolean mp_rotate_left(register unitptr r1,register boolean carry) ! 168: /* multiprecision rotate left 1 bit with carry, result in r1. */ ! 169: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 170: { register int precision; /* number of units to rotate */ ! 171: unsigned int mcarry = carry, carry2,carry3,carry4, nextcarry; ! 172: ! 173: int i; ! 174: precision = global_precision; ! 175: make_lsbptr(r1,precision); ! 176: i = precision &3; ! 177: while (i--) { ! 178: nextcarry = (((signedunit) *r1) < 0); ! 179: *r1 = (*r1 << 1) | mcarry; ! 180: mcarry = nextcarry; ! 181: pre_higherunit(r1); ! 182: } ! 183: i = precision>>2; ! 184: while (i--) ! 185: { ! 186: carry2 = (((signedunit) *r1) < 0); ! 187: *r1 = (*r1 << 1) | mcarry; ! 188: ! 189: carry3 = (((signedunit) word_v(r1,1)) < 0); ! 190: word_v(r1,1) = (word_v(r1,1) << 1) | carry2; ! 191: ! 192: carry4 = (((signedunit) word_v(r1,2)) < 0); ! 193: word_v(r1,2) = (word_v(r1,2) << 1) | carry3; ! 194: ! 195: mcarry = (((signedunit) word_v(r1,3)) < 0); ! 196: word_v(r1,3) = (word_v(r1,3) << 1) | carry4; ! 197: ! 198: r1 += tohigher(4); ! 199: } ! 200: return(mcarry); /* return the final carry flag bit */ ! 201: } /* mp_rotate_left */ ! 202: ! 203: void p_setp(short nbits){} ; ! 204: ! 205: /************** end of primitives that should be in assembly *************/ ! 206: ! 207: #include "lmul.h" /* used only by R3000.c */ ! 208: ! 209: #ifdef MUNIT16 ! 210: typedef unsigned short MULTUNIT; ! 211: #endif ! 212: ! 213: #ifdef MUNIT32 ! 214: typedef unsigned int MULTUNIT; ! 215: #endif ! 216: #define MULTUNITSIZE (sizeof(MULTUNIT)*8) ! 217: #define MULTUNIT_hmask ((1UL << (MULTUNITSIZE/2))-1) ! 218: #define MULTUNIT_mask ((MULTUNIT_hmask<<(MULTUNITSIZE/2)) | MULTUNIT_hmask) ! 219: ! 220: p_smula ( ! 221: MULTUNIT *prod, ! 222: MULTUNIT *multiplicand, ! 223: MULTUNIT multiplier) ! 224: { ! 225: short i=global_precision; ! 226: int count=i,j; ! 227: MULTUNIT *pp=prod, *mp=multiplicand; ! 228: MULTUNIT pl, ph, npl, nph, cl, ch; ! 229: MULTUNIT al, ah; ! 230: ! 231: cl = 0; ! 232: ch = 0; ! 233: ! 234: if (i <= 0 ) return; ! 235: lmul(multiplier, *multiplicand, pl, ph); ! 236: post_higherunit(multiplicand); ! 237: al = 0; ! 238: ah = 0; ! 239: ch = 0; ! 240: while(--i) { ! 241: lmul(multiplier, *multiplicand, npl, nph); ! 242: post_higherunit(multiplicand); ! 243: al += *prod; ! 244: cl = (al < *prod); ! 245: al += pl; ! 246: cl += (al < pl); ! 247: ah += ph; ! 248: ch = (ah < ph); ! 249: ah += cl; ! 250: ch += (ah < cl); ! 251: *prod = al; ! 252: post_higherunit(prod); ! 253: al = ah; ! 254: ah = ch; ! 255: pl = npl; ! 256: ph = nph; ! 257: } ! 258: al += *prod; ! 259: cl = (al < *prod); ! 260: al += pl; ! 261: cl += (al < pl); ! 262: ah += ph; ! 263: ch = (ah < ph); ! 264: ah += cl; ! 265: ch += (ah < cl); ! 266: ! 267: *prod = al; ! 268: post_higherunit(prod); ! 269: ! 270: *prod += ah; ! 271: ch += (*prod < ah); ! 272: post_higherunit(prod); ! 273: ! 274: /* ! 275: *prod += ch ; ! 276: post_higherunit(prod); ! 277: */ ! 278: ! 279: ! 280: } /* mp_smul */ ! 281: ! 282: ! 283: ! 284: static int mshift; /* number of bits for ! 285: ** recip scaling */ ! 286: static MULTUNIT reciph; /* MSunit of scaled recip */ ! 287: static MULTUNIT recipl; /* LSunit of scaled recip */ ! 288: ! 289: void p_setrecip(MULTUNIT rh, MULTUNIT rl, int m) ! 290: { ! 291: reciph = rh; ! 292: recipl = rl; ! 293: mshift = m; ! 294: } ! 295: ! 296: ! 297: MULTUNIT p_quo_digit (MULTUNIT *dividend) ! 298: { ! 299: MULTUNIT ql, qh, q0l, q0h, q1l, q1h, q2l, q2h; ! 300: MULTUNIT q3h, q3l, q4h, q4l; ! 301: MULTUNIT mutemp; ! 302: ! 303: ! 304: /* Compute the least significant product group. ! 305: The last terms of q1 and q2 perform upward rounding, which is ! 306: needed to guarantee that the result not be too small. ! 307: */ ! 308: lmul(dividend[tohigher(-2)] ^ MULTUNIT_mask, reciph, q1l, q1h); ! 309: q1l += reciph; ! 310: q1h += (q1l < reciph); ! 311: ! 312: lmul(dividend[tohigher(-1)]^ MULTUNIT_mask, recipl, q2l, q2h); ! 313: q2h += 1; ! 314: ! 315: q1l = (q1l >> 1) + (q1h << (MULTUNITSIZE-1)); ! 316: q1h >>= 1; ! 317: q2l = (q2l >> 1) + (q2h << (MULTUNITSIZE-1)); ! 318: q2h >>= 1; ! 319: ! 320: q0l = q1l + q2l; ! 321: q0h = q1h + q2h + (q0l < q1l); ! 322: ! 323: q0l++; ! 324: q0h+= (q0l < 1); ! 325: ! 326: /* Compute the middle significant product group. */ ! 327: ! 328: lmul(dividend[tohigher(-1)]^MULTUNIT_mask, reciph, q3l, q3h); ! 329: lmul(dividend[0] ^ MULTUNIT_mask, recipl, q4l, q4h); ! 330: ! 331: q3l = (q3l >> 1) + (q3h << (MULTUNITSIZE-1)); ! 332: q3h >>= 1; ! 333: q4l = (q4l >> 1) + (q4h << (MULTUNITSIZE-1)); ! 334: q4h >>= 1; ! 335: ! 336: ql = q0h + 1; ! 337: qh = (ql < 1); ! 338: ! 339: ql += q3l; ! 340: qh += q3h + (ql < q3l); ! 341: ql += q4l; ! 342: qh += q4h + (ql < q4l); ! 343: ! 344: /* Compute the most significant term and add in the others */ ! 345: ! 346: lmul((dividend[0] ^ MULTUNIT_mask), reciph, q1l, q1h); ! 347: q1h = (q1h << 1) + (q1l>>(MULTUNITSIZE-1)); ! 348: q1l = (q1l << 1) ; ! 349: ! 350: ql = (ql >> (MULTUNITSIZE-2)) + (qh << 2); ! 351: qh >>= (MULTUNITSIZE-2); ! 352: ! 353: ql += q1l; ! 354: qh += (ql < q1l) + q1h; ! 355: ! 356: if (mshift != MULTUNITSIZE) { ! 357: ql = (ql >> mshift) + (qh << (MULTUNITSIZE-mshift)); ! 358: qh >>= mshift; ! 359: } else { ! 360: ql = qh; ! 361: qh = 0; ! 362: } ! 363: ! 364: /* Prevent overflow and then wipe out the intermediate results. */ ! 365: mutemp = (qh != 0) ? MULTUNIT_mask : ql; ! 366: ! 367: return(mutemp); ! 368: } ! 369: ! 370: /* end of r3000.c */ ! 371:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.