|
|
1.1 root 1: /*
2: * Copyright (c) 1985 Regents of the University of California.
3: *
4: * Use and reproduction of this software are granted in accordance with
5: * the terms and conditions specified in the Berkeley Software License
6: * Agreement (in particular, this entails acknowledgement of the programs'
7: * source, and inclusion of this notice) with the additional understanding
8: * that all recipients should regard themselves as participants in an
9: * ongoing research project and hence should feel obligated to report
10: * their experiences (good or bad) with these elementary function codes,
11: * using "sendbug 4bsd-bugs@BERKELEY", to the authors.
12: */
13:
14: #ifndef lint
15: static char sccsid[] = "@(#)support.c 1.1 (Berkeley) 5/23/85";
16: #endif not lint
17:
18: /*
19: * Some IEEE standard p754 recommended functions and remainder and sqrt for
20: * supporting the C elementary functions.
21: ******************************************************************************
22: * WARNING:
23: * These codes are developed (in double) to support the C elementary
24: * functions temporarily. They are not universal, and some of them are very
25: * slow (in particular, drem and sqrt is extremely inefficient). Each
26: * computer system should have its implementation of these functions using
27: * its own assembler.
28: ******************************************************************************
29: *
30: * IEEE p754 required operations:
31: * drem(x,p)
32: * returns x REM y = x - [x/y]*y , where [x/y] is the integer
33: * nearest x/y; in half way case, choose the even one.
34: * sqrt(x)
35: * returns the square root of x correctly rounded according to
36: * the rounding mod.
37: *
38: * IEEE p754 recommended functions:
39: * (a) copysign(x,y)
40: * returns x with the sign of y.
41: * (b) scalb(x,N)
42: * returns x * (2**N), for integer values N.
43: * (c) logb(x)
44: * returns the unbiased exponent of x, a signed integer in
45: * double precision, except that logb(0) is -INF, logb(INF)
46: * is +INF, and logb(NAN) is that NAN.
47: * (d) finite(x)
48: * returns the value TRUE if -INF < x < +INF and returns
49: * FALSE otherwise.
50: *
51: *
52: * CODED IN C BY K.C. NG, 11/25/84;
53: * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85.
54: */
55:
56:
57: #ifdef VAX /* VAX D format */
58: static unsigned short msign=0x7fff , mexp =0x7f80 ;
59: static short prep1=57, gap=7, bias=129 ;
60: static double novf=1.7E38, nunf=3.0E-39, zero=0.0 ;
61: #else /*IEEE double format */
62: static unsigned short msign=0x7fff, mexp =0x7ff0 ;
63: static short prep1=54, gap=4, bias=1023 ;
64: static double novf=1.7E308, nunf=3.0E-308,zero=0.0;
65: #endif
66:
67: double scalb(x,N)
68: double x; int N;
69: {
70: int k;
71: double scalb();
72:
73: #ifdef NATIONAL
74: unsigned short *px=(unsigned short *) &x + 3;
75: #else /* VAX, SUN, ZILOG */
76: unsigned short *px=(unsigned short *) &x;
77: #endif
78:
79: if( x == zero ) return(x);
80:
81: #ifdef VAX
82: if( (k= *px & mexp ) != ~msign ) {
83: if( N<-260) return(nunf*nunf); else if(N>260) return(novf+novf);
84: #else /* IEEE */
85: if( (k= *px & mexp ) != mexp ) {
86: if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf);
87: if( k == 0 ) {
88: x *= scalb(1.0,(int)prep1); N -= prep1; return(scalb(x,N));}
89: #endif
90:
91: if((k = (k>>gap)+ N) > 0 )
92: if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap);
93: else x=novf+novf; /* overflow */
94: else
95: if( k > -prep1 )
96: /* gradual underflow */
97: {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);}
98: else
99: return(nunf*nunf);
100: }
101: return(x);
102: }
103:
104:
105: double copysign(x,y)
106: double x,y;
107: {
108: #ifdef NATIONAL
109: unsigned short *px=(unsigned short *) &x+3,
110: *py=(unsigned short *) &y+3;
111: #else /* VAX, SUN, ZILOG */
112: unsigned short *px=(unsigned short *) &x,
113: *py=(unsigned short *) &y;
114: #endif
115:
116: #ifdef VAX
117: if ( (*px & mexp) == 0 ) return(x);
118: #endif
119:
120: *px = ( *px & msign ) | ( *py & ~msign );
121: return(x);
122: }
123:
124: double logb(x)
125: double x;
126: {
127:
128: #ifdef NATIONAL
129: short *px=(short *) &x+3, k;
130: #else /* VAX, SUN, ZILOG */
131: short *px=(short *) &x, k;
132: #endif
133:
134: #ifdef VAX
135: return( ((*px & mexp)>>gap) - bias);
136: #else /* IEEE */
137: if( (k= *px & mexp ) != mexp )
138: if ( k != 0 )
139: return ( (k>>gap) - bias );
140: else if( x != zero)
141: return ( -1022.0 );
142: else
143: return(-(1.0/zero));
144: else if(x != x)
145: return(x);
146: else
147: {*px &= msign; return(x);}
148: #endif
149: }
150:
151: finite(x)
152: double x;
153: {
154: #ifdef VAX
155: return(1.0);
156: #else /* IEEE */
157: #ifdef NATIONAL
158: return( (*((short *) &x+3 ) & mexp ) != mexp );
159: #else /* SUN, ZILOG */
160: return( (*((short *) &x ) & mexp ) != mexp );
161: #endif
162: #endif
163: }
164:
165: double drem(x,p)
166: double x,p;
167: {
168: short sign;
169: double hp,dp,tmp,drem(),scalb();
170: unsigned short k;
171: #ifdef NATIONAL
172: unsigned short
173: *px=(unsigned short *) &x +3,
174: *pp=(unsigned short *) &p +3,
175: *pd=(unsigned short *) &dp +3,
176: *pt=(unsigned short *) &tmp+3;
177: #else /* VAX, SUN, ZILOG */
178: unsigned short
179: *px=(unsigned short *) &x ,
180: *pp=(unsigned short *) &p ,
181: *pd=(unsigned short *) &dp ,
182: *pt=(unsigned short *) &tmp;
183: #endif
184:
185: *pp &= msign ;
186:
187: #ifdef VAX
188: if( ( *px & mexp ) == ~msign || p == zero )
189: #else /* IEEE */
190: if( ( *px & mexp ) == mexp || p == zero )
191: #endif
192:
193: return( (x != x)? x:zero/zero );
194:
195: else if ( ((*pp & mexp)>>gap) <= 1 )
196: /* subnormal p, or almost subnormal p */
197: { double b; b=scalb(1.0,(int)prep1);
198: p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);}
199: else if ( p >= novf/2)
200: { p /= 2 ; x /= 2; return(drem(x,p)*2);}
201: else
202: {
203: dp=p+p; hp=p/2;
204: sign= *px & ~msign ;
205: *px &= msign ;
206: while ( x > dp )
207: {
208: k=(*px & mexp) - (*pd & mexp) ;
209: tmp = dp ;
210: *pt += k ;
211:
212: #ifdef VAX
213: if( x < tmp ) *pt -= 128 ;
214: #else /* IEEE */
215: if( x < tmp ) *pt -= 16 ;
216: #endif
217:
218: x -= tmp ;
219: }
220: if ( x > hp )
221: { x -= p ; if ( x >= hp ) x -= p ; }
222:
223: *px = *px ^ sign;
224: return( x);
225:
226: }
227: }
228: double sqrt(x)
229: double x;
230: {
231: double q,s,b,r;
232: double logb(),scalb();
233: double t,zero=0.0;
234: int m,n,i,finite();
235: #ifdef VAX
236: int k=54;
237: #else /* IEEE */
238: int k=51;
239: #endif
240:
241: /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
242: if(x!=x||x==zero) return(x);
243:
244: /* sqrt(negative) is invalid */
245: if(x<zero) return(zero/zero);
246:
247: /* sqrt(INF) is INF */
248: if(!finite(x)) return(x);
249:
250: /* scale x to [1,4) */
251: n=logb(x);
252: x=scalb(x,-n);
253: if((m=logb(x))!=0) x=scalb(x,-m); /* subnormal number */
254: m += n;
255: n = m/2;
256: if((n+n)!=m) {x *= 2; m -=1; n=m/2;}
257:
258: /* generate sqrt(x) bit by bit (accumulating in q) */
259: q=1.0; s=4.0; x -= 1.0; r=1;
260: for(i=1;i<=k;i++) {
261: t=s+1; x *= 4; r /= 2;
262: if(t<=x) {
263: s=t+t+2, x -= t; q += r;}
264: else
265: s *= 2;
266: }
267:
268: /* generate the last bit and determine the final rounding */
269: r/=2; x *= 4;
270: if(x==zero) goto end; 100+r; /* trigger inexact flag */
271: if(s<x) {
272: q+=r; x -=s; s += 2; s *= 2; x *= 4;
273: t = (x-s)-5;
274: b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */
275: b=1.0+r/4; if(b>1.0) t=1; /* b>1 : Round-to-(+INF) */
276: if(t>=0) q+=r; } /* else: Round-to-nearest */
277: else {
278: s *= 2; x *= 4;
279: t = (x-s)-1;
280: b=1.0+3*r/4; if(b==1.0) goto end;
281: b=1.0+r/4; if(b>1.0) t=1;
282: if(t>=0) q+=r; }
283:
284: end: return(scalb(q,n));
285: }
286:
287: #if 0
288: /* DREM(X,Y)
289: * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE)
290: * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS)
291: * INTENDED FOR ASSEMBLY LANGUAGE
292: * CODED IN C BY K.C. NG, 3/23/85, 4/8/85.
293: *
294: * Warning: this code should not get compiled in unless ALL of
295: * the following machine-dependent routines are supplied.
296: *
297: * Required machine dependent functions (not on a VAX):
298: * swapINX(i): save inexact flag and reset it to "i"
299: * swapENI(e): save inexact enable and reset it to "e"
300: */
301:
302: double drem(x,y)
303: double x,y;
304: {
305:
306: #ifdef NATIONAL /* order of words in floating point number */
307: static n0=3,n1=2,n2=1,n3=0;
308: #else /* VAX, SUN, ZILOG */
309: static n0=0,n1=1,n2=2,n3=3;
310: #endif
311:
312: static unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390;
313: static double zero=0.0;
314: double hy,y1,t,t1;
315: short k;
316: long n;
317: int i,e;
318: unsigned short xexp,yexp, *px =(unsigned short *) &x ,
319: nx,nf, *py =(unsigned short *) &y ,
320: sign, *pt =(unsigned short *) &t ,
321: *pt1 =(unsigned short *) &t1 ;
322:
323: xexp = px[n0] & mexp ; /* exponent of x */
324: yexp = py[n0] & mexp ; /* exponent of y */
325: sign = px[n0] &0x8000; /* sign of x */
326:
327: /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */
328: if(x!=x) return(x); if(y!=y) return(y); /* x or y is NaN */
329: if( xexp == mexp ) return(zero/zero); /* x is INF */
330: if(y==zero) return(y/y);
331:
332: /* save the inexact flag and inexact enable in i and e respectively
333: * and reset them to zero
334: */
335: i=swapINX(0); e=swapENI(0);
336:
337: /* subnormal number */
338: nx=0;
339: if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;}
340:
341: /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */
342: if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;}
343:
344: nf=nx;
345: py[n0] &= 0x7fff;
346: px[n0] &= 0x7fff;
347:
348: /* mask off the least significant 27 bits of y */
349: t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t;
350:
351: /* LOOP: argument reduction on x whenever x > y */
352: loop:
353: while ( x > y )
354: {
355: t=y;
356: t1=y1;
357: xexp=px[n0]&mexp; /* exponent of x */
358: k=xexp-yexp-m25;
359: if(k>0) /* if x/y >= 2**26, scale up y so that x/y < 2**26 */
360: {pt[n0]+=k;pt1[n0]+=k;}
361: n=x/t; x=(x-n*t1)-n*(t-t1);
362: }
363: /* end while (x > y) */
364:
365: if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;}
366:
367: /* final adjustment */
368:
369: hy=y/2.0;
370: if(x>hy||((x==hy)&&n%2==1)) x-=y;
371: px[n0] ^= sign;
372: if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;}
373:
374: /* restore inexact flag and inexact enable */
375: swapINX(i); swapENI(e);
376:
377: return(x);
378: }
379: #endif
380:
381: #if 0
382: /* SQRT
383: * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT
384: * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE
385: * CODED IN C BY K.C. NG, 3/22/85.
386: *
387: * Warning: this code should not get compiled in unless ALL of
388: * the following machine-dependent routines are supplied.
389: *
390: * Required machine dependent functions:
391: * swapINX(i) ...return the status of INEXACT flag and reset it to "i"
392: * swapRM(r) ...return the current Rounding Mode and reset it to "r"
393: * swapENI(e) ...return the status of inexact enable and reset it to "e"
394: * addc(t) ...perform t=t+1 regarding t as a 64 bit unsigned integer
395: * subc(t) ...perform t=t-1 regarding t as a 64 bit unsigned integer
396: */
397:
398: static unsigned long table[] = {
399: 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740,
400: 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478,
401: 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, };
402:
403: double newsqrt(x)
404: double x;
405: {
406: double y,z,t,addc(),subc(),b54=134217728.*134217728.; /* b54=2**54 */
407: long mx,scalx,mexp=0x7ff00000;
408: int i,j,r,e,swapINX(),swapRM(),swapENI();
409: unsigned long *py=(unsigned long *) &y ,
410: *pt=(unsigned long *) &t ,
411: *px=(unsigned long *) &x ;
412: #ifdef NATIONAL /* ordering of word in a floating point number */
413: int n0=1, n1=0;
414: #else
415: int n0=0, n1=1;
416: #endif
417: /* Rounding Mode: RN ...round-to-nearest
418: * RZ ...round-towards 0
419: * RP ...round-towards +INF
420: * RM ...round-towards -INF
421: */
422: int RN=0,RZ=1,RP=2,RM=3;/* machine dependent: work on a Zilog Z8070
423: * and a National 32081 & 16081
424: */
425:
426: /* exceptions */
427: if(x!=x||x==0.0) return(x); /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
428: if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */
429: if((mx=px[n0]&mexp)==mexp) return(x); /* sqrt(+INF) is +INF */
430:
431: /* save, reset, initialize */
432: e=swapENI(0); /* ...save and reset the inexact enable */
433: i=swapINX(0); /* ...save INEXACT flag */
434: r=swapRM(RN); /* ...save and reset the Rounding Mode to RN */
435: scalx=0;
436:
437: /* subnormal number, scale up x to x*2**54 */
438: if(mx==0) {x *= b54 ; scalx-=0x01b00000;}
439:
440: /* scale x to avoid intermediate over/underflow:
441: * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */
442: if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;}
443: if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;}
444:
445: /* magic initial approximation to almost 8 sig. bits */
446: py[n0]=(px[n0]>>1)+0x1ff80000;
447: py[n0]=py[n0]-table[(py[n0]>>15)&31];
448:
449: /* Heron's rule once with correction to improve y to almost 18 sig. bits */
450: t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0;
451:
452: /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */
453: t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y;
454: t=z/(t+x) ; pt[n0]+=0x00100000; y+=t;
455:
456: /* twiddle last bit to force y correctly rounded */
457: swapRM(RZ); /* ...set Rounding Mode to round-toward-zero */
458: swapINX(0); /* ...clear INEXACT flag */
459: swapENI(e); /* ...restore inexact enable status */
460: t=x/y; /* ...chopped quotient, possibly inexact */
461: j=swapINX(i); /* ...read and restore inexact flag */
462: if(j==0) { if(t==y) goto end; else t=subc(t); } /* ...t=t-ulp */
463: b54+0.1; /* ..trigger inexact flag, sqrt(x) is inexact */
464: if(r==RN) t=addc(t); /* ...t=t+ulp */
465: else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */
466: y=y+t; /* ...chopped sum */
467: py[n0]=py[n0]-0x00100000; /* ...correctly rounded sqrt(x) */
468: end: py[n0]=py[n0]+scalx; /* ...scale back y */
469: swapRM(r); /* ...restore Rounding Mode */
470: return(y);
471: }
472: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.