|
|
1.1.1.5 ! root 1: /* r3000.c - MIPS R3000 adaptation of selected routines from mpilib.c. ! 2: ! 3: C source code for multiprecision arithmetic library routines. ! 4: R3000 optimizations by Castor Fu, in Sep 1992. ! 5: See the comments in the file mpilib.c for details on the purpose of ! 6: these functions. ! 7: ! 8: Original version of mpilib.c implemented in 1986 by ! 9: Philip R. Zimmermann, updated by PRZ 1990-1992. ! 10: ! 11: Boulder Software Engineering ! 12: 3021 Eleventh Street ! 13: Boulder, CO 80304 ! 14: (303) 541-0140 ! 15: ! 16: (c) Copyright 1990-1996 by Philip Zimmermann. All rights reserved. ! 17: The author assumes no liability for damages resulting from the use ! 18: of this software, even if the damage results from defects in this ! 19: software. No warranty is expressed or implied. ! 20: ! 21: Note that while most PGP source modules bear Philip Zimmermann's ! 22: copyright notice, many of them have been revised or entirely written ! 23: by contributors who frequently failed to put their names in their ! 24: code. Code that has been incorporated into PGP from other authors ! 25: was either originally published in the public domain or is used with ! 26: permission from the various authors. ! 27: ! 28: PGP is available for free to the public under certain restrictions. ! 29: See the PGP User's Guide (included in the release package) for ! 30: important information about licensing, patent restrictions on ! 31: certain algorithms, trademarks, copyrights, and export controls. ! 32: ! 33: -- Adapt so that the unit size can be a full int size. This ! 34: was inspired by the lack of a carry bit on MIPSco processors. ! 35: On such machines, the advantage of assembly language coding ! 36: is less clear. (Except for the multiply. . . ) ! 37: ! 38: One reason for creating an R3000 module of C routines is that ! 39: we have one routine (P_ROTL) which is compiled particularly ! 40: poorly, and chose to explicitly unroll the loop. ! 41: ! 42: */ ! 43: ! 44: ! 45: #include "mpilib.h" ! 46: ! 47: #define word_v(r,n) (r)[tohigher(n)] ! 48: ! 49: extern short global_precision; /* units of precision for all routines */ ! 50: /* global_precision is the unit precision last set by set_precision. ! 51: Initially, set_precision() should be called to define global_precision ! 52: before using any of these other multiprecision library routines. ! 53: i.e.: set_precision(MAX_UNIT_PRECISION); ! 54: */ ! 55: ! 56: /*************** multiprecision library primitives ****************/ ! 57: /* The following portable C primitives should be recoded in assembly. ! 58: The equivalent assembly primitives are externally defined under ! 59: different names, unless PORTABLE is defined. See the header file ! 60: "mpilib.h" for further details. ! 61: */ ! 62: ! 63: typedef unsigned long int ulint; ! 64: /* ...assumes sizeof(unit) <= sizeof(unsigned long) */ ! 65: ! 66: boolean mp_addc ! 67: (register unitptr r1,register unitptr r2,register boolean carry) ! 68: /* multiprecision add with carry r2 to r1, result in r1 */ ! 69: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 70: { ! 71: register unit x,x1; ! 72: int i; ! 73: short precision; /* number of units to add */ ! 74: unsigned int mcarry = carry; ! 75: precision = global_precision; ! 76: make_lsbptr(r1,precision); ! 77: make_lsbptr(r2,precision); ! 78: ! 79: i = precision&3; ! 80: while (i--) { ! 81: if (mcarry) { ! 82: x = *r1 + *r2 + 1; ! 83: x1 = ~ *r1; ! 84: mcarry = 1 ^ (*r2 < x1); ! 85: } else { ! 86: x = *r1 + *r2; ! 87: mcarry = (x < *r1) ; ! 88: } ! 89: post_higherunit(r2); ! 90: *post_higherunit(r1) = x; ! 91: } ! 92: ! 93: i = precision>>2; ! 94: while (i--) { ! 95: #define ADDC(n) \ ! 96: if (mcarry) { \ ! 97: x = word_v(r1,n) + word_v(r2,n) + 1; \ ! 98: x1 = ~word_v(r1,n); \ ! 99: mcarry = 1 ^ (word_v(r2,n) < x1); \ ! 100: } else { \ ! 101: x = word_v(r1,n) + word_v(r2,n); \ ! 102: mcarry = (x < word_v(r1,n)) ; \ ! 103: } \ ! 104: word_v(r1,n) = x; ! 105: ! 106: ADDC(0); ! 107: ADDC(1); ! 108: ADDC(2); ! 109: ADDC(3); ! 110: #undef ADDC ! 111: r1 += tohigher(4); ! 112: r2 += tohigher(4); ! 113: } ! 114: ! 115: return(mcarry); /* return the final carry flag bit */ ! 116: } /* mp_addc */ ! 117: ! 118: ! 119: boolean mp_subb ! 120: (register unitptr r1,register unitptr r2,register boolean borrow) ! 121: /* multiprecision subtract with borrow, r2 from r1, result in r1 */ ! 122: /* borrow is incoming borrow flag-- value should be 0 or 1 */ ! 123: { ! 124: register unit x; ! 125: unsigned int mborrow = borrow; ! 126: int i; ! 127: short precision; /* number of units to subtract */ ! 128: precision = global_precision; ! 129: make_lsbptr(r1,precision); ! 130: make_lsbptr(r2,precision); ! 131: ! 132: i = precision&3; ! 133: while (i--) { ! 134: if (mborrow) { ! 135: x = *r1 - *r2 - mborrow; ! 136: mborrow = 1 ^ (*r2 < *r1); ! 137: } else { ! 138: x = *r1 - *r2; ! 139: mborrow = (*r1 < *r2); ! 140: } ! 141: post_higherunit(r2); ! 142: *post_higherunit(r1) = x; ! 143: } ! 144: ! 145: i = precision>>2; ! 146: while (i--) { ! 147: ! 148: #define SUBB(n) \ ! 149: if (mborrow) { \ ! 150: x = word_v(r1,n) - word_v(r2,n) - mborrow; \ ! 151: mborrow = 1 ^ (word_v(r2,n) < word_v(r1,n)); \ ! 152: } else { \ ! 153: x = word_v(r1,n) - word_v(r2,n); \ ! 154: mborrow = (word_v(r1,n) < word_v(r2,n)); \ ! 155: } \ ! 156: word_v(r1,n) = x; ! 157: ! 158: SUBB(0); ! 159: SUBB(1); ! 160: SUBB(2); ! 161: SUBB(3); ! 162: #undef SUBB ! 163: ! 164: ! 165: r1 += tohigher(4); ! 166: r2 += tohigher(4); ! 167: } ! 168: ! 169: return(mborrow); /* return the final carry/borrow flag bit */ ! 170: } /* mp_subb */ ! 171: ! 172: ! 173: /* We've unrolled the loop here because the MIPS/DEC C compiler is too ! 174: * stupid to do so. Presumably on register poor machines this is not ! 175: * a clearly good idea ! 176: */ ! 177: ! 178: boolean mp_rotate_left(register unitptr r1,register boolean carry) ! 179: /* multiprecision rotate left 1 bit with carry, result in r1. */ ! 180: /* carry is incoming carry flag-- value should be 0 or 1 */ ! 181: { ! 182: register int precision; /* number of units to rotate */ ! 183: unsigned int mcarry = carry, carry2,carry3,carry4, nextcarry; ! 184: ! 185: int i; ! 186: precision = global_precision; ! 187: make_lsbptr(r1,precision); ! 188: i = precision &3; ! 189: while (i--) { ! 190: nextcarry = (((signedunit) *r1) < 0); ! 191: *r1 = (*r1 << 1) | mcarry; ! 192: mcarry = nextcarry; ! 193: pre_higherunit(r1); ! 194: } ! 195: i = precision>>2; ! 196: while (i--) { ! 197: carry2 = (((signedunit) *r1) < 0); ! 198: *r1 = (*r1 << 1) | mcarry; ! 199: ! 200: carry3 = (((signedunit) word_v(r1,1)) < 0); ! 201: word_v(r1,1) = (word_v(r1,1) << 1) | carry2; ! 202: ! 203: carry4 = (((signedunit) word_v(r1,2)) < 0); ! 204: word_v(r1,2) = (word_v(r1,2) << 1) | carry3; ! 205: ! 206: mcarry = (((signedunit) word_v(r1,3)) < 0); ! 207: word_v(r1,3) = (word_v(r1,3) << 1) | carry4; ! 208: ! 209: r1 += tohigher(4); ! 210: } ! 211: return(mcarry); /* return the final carry flag bit */ ! 212: } /* mp_rotate_left */ ! 213: ! 214: void p_setp(short nbits){} ! 215: ! 216: /************** end of primitives that should be in assembly *************/ ! 217: ! 218: #include "lmul.h" /* used only by R3000.c */ ! 219: ! 220: #ifdef MUNIT16 ! 221: typedef unsigned short MULTUNIT; ! 222: #endif ! 223: ! 224: #ifdef MUNIT32 ! 225: typedef unsigned int MULTUNIT; ! 226: #endif ! 227: #define MULTUNITSIZE (sizeof(MULTUNIT)*8) ! 228: #define MULTUNIT_hmask ((1UL << (MULTUNITSIZE/2))-1) ! 229: #define MULTUNIT_mask ((MULTUNIT_hmask<<(MULTUNITSIZE/2)) | MULTUNIT_hmask) ! 230: ! 231: void p_smula ( ! 232: MULTUNIT *prod, ! 233: MULTUNIT *multiplicand, ! 234: MULTUNIT multiplier) ! 235: { ! 236: short i=global_precision; ! 237: int count=i,j; ! 238: MULTUNIT *pp=prod, *mp=multiplicand; ! 239: MULTUNIT pl, ph, npl, nph, cl, ch; ! 240: MULTUNIT al, ah; ! 241: ! 242: cl = 0; ! 243: ch = 0; ! 244: ! 245: if (i <= 0 ) return; ! 246: lmul(multiplier, *multiplicand, pl, ph); ! 247: post_higherunit(multiplicand); ! 248: al = 0; ! 249: ah = 0; ! 250: ch = 0; ! 251: while(--i) { ! 252: lmul(multiplier, *multiplicand, npl, nph); ! 253: post_higherunit(multiplicand); ! 254: al += *prod; ! 255: cl = (al < *prod); ! 256: al += pl; ! 257: cl += (al < pl); ! 258: ah += ph; ! 259: ch = (ah < ph); ! 260: ah += cl; ! 261: ch += (ah < cl); ! 262: *prod = al; ! 263: post_higherunit(prod); ! 264: al = ah; ! 265: ah = ch; ! 266: pl = npl; ! 267: ph = nph; ! 268: } ! 269: al += *prod; ! 270: cl = (al < *prod); ! 271: al += pl; ! 272: cl += (al < pl); ! 273: ah += ph; ! 274: ch = (ah < ph); ! 275: ah += cl; ! 276: ch += (ah < cl); ! 277: ! 278: *prod = al; ! 279: post_higherunit(prod); ! 280: ! 281: *prod += ah; ! 282: ch += (*prod < ah); ! 283: post_higherunit(prod); ! 284: ! 285: /* ! 286: *prod += ch ; ! 287: post_higherunit(prod); ! 288: */ ! 289: ! 290: ! 291: } /* mp_smul */ ! 292: ! 293: ! 294: ! 295: static int mshift; /* number of bits for ! 296: ** recip scaling */ ! 297: static MULTUNIT reciph; /* MSunit of scaled recip */ ! 298: static MULTUNIT recipl; /* LSunit of scaled recip */ ! 299: ! 300: void p_setrecip(MULTUNIT rh, MULTUNIT rl, int m) ! 301: { ! 302: reciph = rh; ! 303: recipl = rl; ! 304: mshift = m; ! 305: } ! 306: ! 307: ! 308: MULTUNIT p_quo_digit (MULTUNIT *dividend) ! 309: { ! 310: MULTUNIT ql, qh, q0l, q0h, q1l, q1h, q2l, q2h; ! 311: MULTUNIT q3h, q3l, q4h, q4l; ! 312: MULTUNIT mutemp; ! 313: ! 314: ! 315: /* Compute the least significant product group. ! 316: The last terms of q1 and q2 perform upward rounding, which is ! 317: needed to guarantee that the result not be too small. ! 318: */ ! 319: lmul(dividend[tohigher(-2)] ^ MULTUNIT_mask, reciph, q1l, q1h); ! 320: q1l += reciph; ! 321: q1h += (q1l < reciph); ! 322: ! 323: lmul(dividend[tohigher(-1)]^ MULTUNIT_mask, recipl, q2l, q2h); ! 324: q2h += 1; ! 325: ! 326: q1l = (q1l >> 1) + (q1h << (MULTUNITSIZE-1)); ! 327: q1h >>= 1; ! 328: q2l = (q2l >> 1) + (q2h << (MULTUNITSIZE-1)); ! 329: q2h >>= 1; ! 330: ! 331: q0l = q1l + q2l; ! 332: q0h = q1h + q2h + (q0l < q1l); ! 333: ! 334: q0l++; ! 335: q0h+= (q0l < 1); ! 336: ! 337: /* Compute the middle significant product group. */ ! 338: ! 339: lmul(dividend[tohigher(-1)]^MULTUNIT_mask, reciph, q3l, q3h); ! 340: lmul(dividend[0] ^ MULTUNIT_mask, recipl, q4l, q4h); ! 341: ! 342: q3l = (q3l >> 1) + (q3h << (MULTUNITSIZE-1)); ! 343: q3h >>= 1; ! 344: q4l = (q4l >> 1) + (q4h << (MULTUNITSIZE-1)); ! 345: q4h >>= 1; ! 346: ! 347: ql = q0h + 1; ! 348: qh = (ql < 1); ! 349: ! 350: ql += q3l; ! 351: qh += q3h + (ql < q3l); ! 352: ql += q4l; ! 353: qh += q4h + (ql < q4l); ! 354: ! 355: /* Compute the most significant term and add in the others */ ! 356: ! 357: lmul((dividend[0] ^ MULTUNIT_mask), reciph, q1l, q1h); ! 358: q1h = (q1h << 1) + (q1l>>(MULTUNITSIZE-1)); ! 359: q1l = (q1l << 1) ; ! 360: ! 361: ql = (ql >> (MULTUNITSIZE-2)) + (qh << 2); ! 362: qh >>= (MULTUNITSIZE-2); ! 363: ! 364: ql += q1l; ! 365: qh += (ql < q1l) + q1h; ! 366: ! 367: if (mshift != MULTUNITSIZE) { ! 368: ql = (ql >> mshift) + (qh << (MULTUNITSIZE-mshift)); ! 369: qh >>= mshift; ! 370: } else { ! 371: ql = qh; ! 372: qh = 0; ! 373: } ! 374: ! 375: /* Prevent overflow and then wipe out the intermediate results. */ ! 376: mutemp = (qh != 0) ? MULTUNIT_mask : ql; ! 377: ! 378: return(mutemp); ! 379: } ! 380: ! 381: /* end of r3000.c */ ! 382:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.