|
|
1.1 root 1: /* r3000.c - MIPS R3000 adaptation of selected routines from mpilib.c.
2:
3: C source code for multiprecision arithmetic library routines.
4: R3000 optimizations by Castor Fu, in Sep 1992.
5: See the comments in the file mpilib.c for details on the purpose of
6: these functions.
7:
8: Original version of mpilib.c implemented in 1986 by
9: Philip R. Zimmermann, updated by PRZ 1990-1992.
10:
11: Boulder Software Engineering
12: 3021 Eleventh Street
13: Boulder, CO 80304
14: (303) 541-0140
15:
16: (c) Copyright 1986-92 by Philip Zimmermann. All rights reserved.
17: The author assumes no liability for damages resulting from the use
18: of this software, even if the damage results from defects in this
19: software. No warranty is expressed or implied.
20:
21: -- Adapt so that the unit size can be a full int size. This
22: was inspired by the lack of a carry bit on MIPSco processors.
23: On such machines, the advantage of assembly language coding
24: is less clear. (Except for the multiply. . . )
25:
26: One reason for creating an R3000 module of C routines is that
27: we have one routine (P_ROTL) which is compiled particularly
28: poorly, and chose to explicitly unroll the loop.
29:
30: */
31:
32:
33: #include "mpilib.h"
34:
35: #define word_v(r,n) (r)[tohigher(n)]
36:
37: extern short global_precision; /* units of precision for all routines */
38: /* global_precision is the unit precision last set by set_precision.
39: Initially, set_precision() should be called to define global_precision
40: before using any of these other multiprecision library routines.
41: i.e.: set_precision(MAX_UNIT_PRECISION);
42: */
43:
44: /*************** multiprecision library primitives ****************/
45: /* The following portable C primitives should be recoded in assembly.
46: The equivalent assembly primitives are externally defined under
47: different names, unless PORTABLE is defined. See the header file
48: "mpilib.h" for further details.
49: */
50:
51: typedef unsigned long int ulint;
52: /* ...assumes sizeof(unit) <= sizeof(unsigned long) */
53:
54: boolean mp_addc
55: (register unitptr r1,register unitptr r2,register boolean carry)
56: /* multiprecision add with carry r2 to r1, result in r1 */
57: /* carry is incoming carry flag-- value should be 0 or 1 */
58: { register unit x,x1;
59: int i;
60: short precision; /* number of units to add */
61: unsigned int mcarry = carry;
62: precision = global_precision;
63: make_lsbptr(r1,precision);
64: make_lsbptr(r2,precision);
65:
66: i = precision&3;
67: while (i--)
68: {
69: if (mcarry) {
70: x = *r1 + *r2 + 1;
71: x1 = ~ *r1;
72: mcarry = 1 ^ (*r2 < x1);
73: } else {
74: x = *r1 + *r2;
75: mcarry = (x < *r1) ;
76: }
77: post_higherunit(r2);
78: *post_higherunit(r1) = x;
79: }
80:
81: i = precision>>2;
82: while (i--)
83: {
84: #define ADDC(n) \
85: if (mcarry) { \
86: x = word_v(r1,n) + word_v(r2,n) + 1; \
87: x1 = ~word_v(r1,n); \
88: mcarry = 1 ^ (word_v(r2,n) < x1); \
89: } else { \
90: x = word_v(r1,n) + word_v(r2,n); \
91: mcarry = (x < word_v(r1,n)) ; \
92: } \
93: word_v(r1,n) = x;
94:
95: ADDC(0);
96: ADDC(1);
97: ADDC(2);
98: ADDC(3);
99: #undef ADDC
100: r1 += tohigher(4);
101: r2 += tohigher(4);
102: }
103:
104: return(mcarry); /* return the final carry flag bit */
105: } /* mp_addc */
106:
107:
108: boolean mp_subb
109: (register unitptr r1,register unitptr r2,register boolean borrow)
110: /* multiprecision subtract with borrow, r2 from r1, result in r1 */
111: /* borrow is incoming borrow flag-- value should be 0 or 1 */
112: { register unit x;
113: unsigned int mborrow = borrow;
114: int i;
115: short precision; /* number of units to subtract */
116: precision = global_precision;
117: make_lsbptr(r1,precision);
118: make_lsbptr(r2,precision);
119:
120: i = precision&3;
121: while (i--)
122: { if (mborrow) {
123: x = *r1 - *r2 - mborrow;
124: mborrow = 1 ^ (*r2 < *r1);
125: } else {
126: x = *r1 - *r2;
127: mborrow = (*r1 < *r2);
128: }
129: post_higherunit(r2);
130: *post_higherunit(r1) = x;
131: }
132:
133: i = precision>>2;
134: while (i--)
135: {
136:
137: #define SUBB(n) \
138: if (mborrow) { \
139: x = word_v(r1,n) - word_v(r2,n) - mborrow; \
140: mborrow = 1 ^ (word_v(r2,n) < word_v(r1,n)); \
141: } else { \
142: x = word_v(r1,n) - word_v(r2,n); \
143: mborrow = (word_v(r1,n) < word_v(r2,n)); \
144: } \
145: word_v(r1,n) = x;
146:
147: SUBB(0);
148: SUBB(1);
149: SUBB(2);
150: SUBB(3);
151: #undef SUBB
152:
153:
154: r1 += tohigher(4);
155: r2 += tohigher(4);
156: }
157:
158: return(mborrow); /* return the final carry/borrow flag bit */
159: } /* mp_subb */
160:
161:
162: /* We've unrolled the loop here because the MIPS/DEC C compiler is too
163: * stupid to do so. Presumably on register poor machines this is not
164: * a clearly good idea
165: */
166:
167: boolean mp_rotate_left(register unitptr r1,register boolean carry)
168: /* multiprecision rotate left 1 bit with carry, result in r1. */
169: /* carry is incoming carry flag-- value should be 0 or 1 */
170: { register int precision; /* number of units to rotate */
171: unsigned int mcarry = carry, carry2,carry3,carry4, nextcarry;
172:
173: int i;
174: precision = global_precision;
175: make_lsbptr(r1,precision);
176: i = precision &3;
177: while (i--) {
178: nextcarry = (((signedunit) *r1) < 0);
179: *r1 = (*r1 << 1) | mcarry;
180: mcarry = nextcarry;
181: pre_higherunit(r1);
182: }
183: i = precision>>2;
184: while (i--)
185: {
186: carry2 = (((signedunit) *r1) < 0);
187: *r1 = (*r1 << 1) | mcarry;
188:
189: carry3 = (((signedunit) word_v(r1,1)) < 0);
190: word_v(r1,1) = (word_v(r1,1) << 1) | carry2;
191:
192: carry4 = (((signedunit) word_v(r1,2)) < 0);
193: word_v(r1,2) = (word_v(r1,2) << 1) | carry3;
194:
195: mcarry = (((signedunit) word_v(r1,3)) < 0);
196: word_v(r1,3) = (word_v(r1,3) << 1) | carry4;
197:
198: r1 += tohigher(4);
199: }
200: return(mcarry); /* return the final carry flag bit */
201: } /* mp_rotate_left */
202:
203: void p_setp(short nbits){} ;
204:
205: /************** end of primitives that should be in assembly *************/
206:
207: #include "lmul.h" /* used only by R3000.c */
208:
209: #ifdef MUNIT16
210: typedef unsigned short MULTUNIT;
211: #endif
212:
213: #ifdef MUNIT32
214: typedef unsigned int MULTUNIT;
215: #endif
216: #define MULTUNITSIZE (sizeof(MULTUNIT)*8)
217: #define MULTUNIT_hmask ((1UL << (MULTUNITSIZE/2))-1)
218: #define MULTUNIT_mask ((MULTUNIT_hmask<<(MULTUNITSIZE/2)) | MULTUNIT_hmask)
219:
1.1.1.2 ! root 220: void p_smula (
1.1 root 221: MULTUNIT *prod,
222: MULTUNIT *multiplicand,
223: MULTUNIT multiplier)
224: {
225: short i=global_precision;
226: int count=i,j;
227: MULTUNIT *pp=prod, *mp=multiplicand;
228: MULTUNIT pl, ph, npl, nph, cl, ch;
229: MULTUNIT al, ah;
230:
231: cl = 0;
232: ch = 0;
233:
234: if (i <= 0 ) return;
235: lmul(multiplier, *multiplicand, pl, ph);
236: post_higherunit(multiplicand);
237: al = 0;
238: ah = 0;
239: ch = 0;
240: while(--i) {
241: lmul(multiplier, *multiplicand, npl, nph);
242: post_higherunit(multiplicand);
243: al += *prod;
244: cl = (al < *prod);
245: al += pl;
246: cl += (al < pl);
247: ah += ph;
248: ch = (ah < ph);
249: ah += cl;
250: ch += (ah < cl);
251: *prod = al;
252: post_higherunit(prod);
253: al = ah;
254: ah = ch;
255: pl = npl;
256: ph = nph;
257: }
258: al += *prod;
259: cl = (al < *prod);
260: al += pl;
261: cl += (al < pl);
262: ah += ph;
263: ch = (ah < ph);
264: ah += cl;
265: ch += (ah < cl);
266:
267: *prod = al;
268: post_higherunit(prod);
269:
270: *prod += ah;
271: ch += (*prod < ah);
272: post_higherunit(prod);
273:
274: /*
275: *prod += ch ;
276: post_higherunit(prod);
277: */
278:
279:
280: } /* mp_smul */
281:
282:
283:
284: static int mshift; /* number of bits for
285: ** recip scaling */
286: static MULTUNIT reciph; /* MSunit of scaled recip */
287: static MULTUNIT recipl; /* LSunit of scaled recip */
288:
289: void p_setrecip(MULTUNIT rh, MULTUNIT rl, int m)
290: {
291: reciph = rh;
292: recipl = rl;
293: mshift = m;
294: }
295:
296:
297: MULTUNIT p_quo_digit (MULTUNIT *dividend)
298: {
299: MULTUNIT ql, qh, q0l, q0h, q1l, q1h, q2l, q2h;
300: MULTUNIT q3h, q3l, q4h, q4l;
301: MULTUNIT mutemp;
302:
303:
304: /* Compute the least significant product group.
305: The last terms of q1 and q2 perform upward rounding, which is
306: needed to guarantee that the result not be too small.
307: */
308: lmul(dividend[tohigher(-2)] ^ MULTUNIT_mask, reciph, q1l, q1h);
309: q1l += reciph;
310: q1h += (q1l < reciph);
311:
312: lmul(dividend[tohigher(-1)]^ MULTUNIT_mask, recipl, q2l, q2h);
313: q2h += 1;
314:
315: q1l = (q1l >> 1) + (q1h << (MULTUNITSIZE-1));
316: q1h >>= 1;
317: q2l = (q2l >> 1) + (q2h << (MULTUNITSIZE-1));
318: q2h >>= 1;
319:
320: q0l = q1l + q2l;
321: q0h = q1h + q2h + (q0l < q1l);
322:
323: q0l++;
324: q0h+= (q0l < 1);
325:
326: /* Compute the middle significant product group. */
327:
328: lmul(dividend[tohigher(-1)]^MULTUNIT_mask, reciph, q3l, q3h);
329: lmul(dividend[0] ^ MULTUNIT_mask, recipl, q4l, q4h);
330:
331: q3l = (q3l >> 1) + (q3h << (MULTUNITSIZE-1));
332: q3h >>= 1;
333: q4l = (q4l >> 1) + (q4h << (MULTUNITSIZE-1));
334: q4h >>= 1;
335:
336: ql = q0h + 1;
337: qh = (ql < 1);
338:
339: ql += q3l;
340: qh += q3h + (ql < q3l);
341: ql += q4l;
342: qh += q4h + (ql < q4l);
343:
344: /* Compute the most significant term and add in the others */
345:
346: lmul((dividend[0] ^ MULTUNIT_mask), reciph, q1l, q1h);
347: q1h = (q1h << 1) + (q1l>>(MULTUNITSIZE-1));
348: q1l = (q1l << 1) ;
349:
350: ql = (ql >> (MULTUNITSIZE-2)) + (qh << 2);
351: qh >>= (MULTUNITSIZE-2);
352:
353: ql += q1l;
354: qh += (ql < q1l) + q1h;
355:
356: if (mshift != MULTUNITSIZE) {
357: ql = (ql >> mshift) + (qh << (MULTUNITSIZE-mshift));
358: qh >>= mshift;
359: } else {
360: ql = qh;
361: qh = 0;
362: }
363:
364: /* Prevent overflow and then wipe out the intermediate results. */
365: mutemp = (qh != 0) ? MULTUNIT_mask : ql;
366:
367: return(mutemp);
368: }
369:
370: /* end of r3000.c */
371:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.