|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1985 Regents of the University of California. ! 3: * ! 4: * Use and reproduction of this software are granted in accordance with ! 5: * the terms and conditions specified in the Berkeley Software License ! 6: * Agreement (in particular, this entails acknowledgement of the programs' ! 7: * source, and inclusion of this notice) with the additional understanding ! 8: * that all recipients should regard themselves as participants in an ! 9: * ongoing research project and hence should feel obligated to report ! 10: * their experiences (good or bad) with these elementary function codes, ! 11: * using "sendbug 4bsd-bugs@BERKELEY", to the authors. ! 12: */ ! 13: ! 14: #ifndef lint ! 15: static char sccsid[] = "@(#)support.c 1.1 (Berkeley) 5/23/85"; ! 16: #endif not lint ! 17: ! 18: /* ! 19: * Some IEEE standard p754 recommended functions and remainder and sqrt for ! 20: * supporting the C elementary functions. ! 21: ****************************************************************************** ! 22: * WARNING: ! 23: * These codes are developed (in double) to support the C elementary ! 24: * functions temporarily. They are not universal, and some of them are very ! 25: * slow (in particular, drem and sqrt is extremely inefficient). Each ! 26: * computer system should have its implementation of these functions using ! 27: * its own assembler. ! 28: ****************************************************************************** ! 29: * ! 30: * IEEE p754 required operations: ! 31: * drem(x,p) ! 32: * returns x REM y = x - [x/y]*y , where [x/y] is the integer ! 33: * nearest x/y; in half way case, choose the even one. ! 34: * sqrt(x) ! 35: * returns the square root of x correctly rounded according to ! 36: * the rounding mod. ! 37: * ! 38: * IEEE p754 recommended functions: ! 39: * (a) copysign(x,y) ! 40: * returns x with the sign of y. ! 41: * (b) scalb(x,N) ! 42: * returns x * (2**N), for integer values N. ! 43: * (c) logb(x) ! 44: * returns the unbiased exponent of x, a signed integer in ! 45: * double precision, except that logb(0) is -INF, logb(INF) ! 46: * is +INF, and logb(NAN) is that NAN. ! 47: * (d) finite(x) ! 48: * returns the value TRUE if -INF < x < +INF and returns ! 49: * FALSE otherwise. ! 50: * ! 51: * ! 52: * CODED IN C BY K.C. NG, 11/25/84; ! 53: * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85. ! 54: */ ! 55: ! 56: ! 57: #ifdef VAX /* VAX D format */ ! 58: static unsigned short msign=0x7fff , mexp =0x7f80 ; ! 59: static short prep1=57, gap=7, bias=129 ; ! 60: static double novf=1.7E38, nunf=3.0E-39, zero=0.0 ; ! 61: #else /*IEEE double format */ ! 62: static unsigned short msign=0x7fff, mexp =0x7ff0 ; ! 63: static short prep1=54, gap=4, bias=1023 ; ! 64: static double novf=1.7E308, nunf=3.0E-308,zero=0.0; ! 65: #endif ! 66: ! 67: double scalb(x,N) ! 68: double x; int N; ! 69: { ! 70: int k; ! 71: double scalb(); ! 72: ! 73: #ifdef NATIONAL ! 74: unsigned short *px=(unsigned short *) &x + 3; ! 75: #else /* VAX, SUN, ZILOG */ ! 76: unsigned short *px=(unsigned short *) &x; ! 77: #endif ! 78: ! 79: if( x == zero ) return(x); ! 80: ! 81: #ifdef VAX ! 82: if( (k= *px & mexp ) != ~msign ) { ! 83: if( N<-260) return(nunf*nunf); else if(N>260) return(novf+novf); ! 84: #else /* IEEE */ ! 85: if( (k= *px & mexp ) != mexp ) { ! 86: if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf); ! 87: if( k == 0 ) { ! 88: x *= scalb(1.0,(int)prep1); N -= prep1; return(scalb(x,N));} ! 89: #endif ! 90: ! 91: if((k = (k>>gap)+ N) > 0 ) ! 92: if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap); ! 93: else x=novf+novf; /* overflow */ ! 94: else ! 95: if( k > -prep1 ) ! 96: /* gradual underflow */ ! 97: {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);} ! 98: else ! 99: return(nunf*nunf); ! 100: } ! 101: return(x); ! 102: } ! 103: ! 104: ! 105: double copysign(x,y) ! 106: double x,y; ! 107: { ! 108: #ifdef NATIONAL ! 109: unsigned short *px=(unsigned short *) &x+3, ! 110: *py=(unsigned short *) &y+3; ! 111: #else /* VAX, SUN, ZILOG */ ! 112: unsigned short *px=(unsigned short *) &x, ! 113: *py=(unsigned short *) &y; ! 114: #endif ! 115: ! 116: #ifdef VAX ! 117: if ( (*px & mexp) == 0 ) return(x); ! 118: #endif ! 119: ! 120: *px = ( *px & msign ) | ( *py & ~msign ); ! 121: return(x); ! 122: } ! 123: ! 124: double logb(x) ! 125: double x; ! 126: { ! 127: ! 128: #ifdef NATIONAL ! 129: short *px=(short *) &x+3, k; ! 130: #else /* VAX, SUN, ZILOG */ ! 131: short *px=(short *) &x, k; ! 132: #endif ! 133: ! 134: #ifdef VAX ! 135: return( ((*px & mexp)>>gap) - bias); ! 136: #else /* IEEE */ ! 137: if( (k= *px & mexp ) != mexp ) ! 138: if ( k != 0 ) ! 139: return ( (k>>gap) - bias ); ! 140: else if( x != zero) ! 141: return ( -1022.0 ); ! 142: else ! 143: return(-(1.0/zero)); ! 144: else if(x != x) ! 145: return(x); ! 146: else ! 147: {*px &= msign; return(x);} ! 148: #endif ! 149: } ! 150: ! 151: finite(x) ! 152: double x; ! 153: { ! 154: #ifdef VAX ! 155: return(1.0); ! 156: #else /* IEEE */ ! 157: #ifdef NATIONAL ! 158: return( (*((short *) &x+3 ) & mexp ) != mexp ); ! 159: #else /* SUN, ZILOG */ ! 160: return( (*((short *) &x ) & mexp ) != mexp ); ! 161: #endif ! 162: #endif ! 163: } ! 164: ! 165: double drem(x,p) ! 166: double x,p; ! 167: { ! 168: short sign; ! 169: double hp,dp,tmp,drem(),scalb(); ! 170: unsigned short k; ! 171: #ifdef NATIONAL ! 172: unsigned short ! 173: *px=(unsigned short *) &x +3, ! 174: *pp=(unsigned short *) &p +3, ! 175: *pd=(unsigned short *) &dp +3, ! 176: *pt=(unsigned short *) &tmp+3; ! 177: #else /* VAX, SUN, ZILOG */ ! 178: unsigned short ! 179: *px=(unsigned short *) &x , ! 180: *pp=(unsigned short *) &p , ! 181: *pd=(unsigned short *) &dp , ! 182: *pt=(unsigned short *) &tmp; ! 183: #endif ! 184: ! 185: *pp &= msign ; ! 186: ! 187: #ifdef VAX ! 188: if( ( *px & mexp ) == ~msign || p == zero ) ! 189: #else /* IEEE */ ! 190: if( ( *px & mexp ) == mexp || p == zero ) ! 191: #endif ! 192: ! 193: return( (x != x)? x:zero/zero ); ! 194: ! 195: else if ( ((*pp & mexp)>>gap) <= 1 ) ! 196: /* subnormal p, or almost subnormal p */ ! 197: { double b; b=scalb(1.0,(int)prep1); ! 198: p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);} ! 199: else if ( p >= novf/2) ! 200: { p /= 2 ; x /= 2; return(drem(x,p)*2);} ! 201: else ! 202: { ! 203: dp=p+p; hp=p/2; ! 204: sign= *px & ~msign ; ! 205: *px &= msign ; ! 206: while ( x > dp ) ! 207: { ! 208: k=(*px & mexp) - (*pd & mexp) ; ! 209: tmp = dp ; ! 210: *pt += k ; ! 211: ! 212: #ifdef VAX ! 213: if( x < tmp ) *pt -= 128 ; ! 214: #else /* IEEE */ ! 215: if( x < tmp ) *pt -= 16 ; ! 216: #endif ! 217: ! 218: x -= tmp ; ! 219: } ! 220: if ( x > hp ) ! 221: { x -= p ; if ( x >= hp ) x -= p ; } ! 222: ! 223: *px = *px ^ sign; ! 224: return( x); ! 225: ! 226: } ! 227: } ! 228: double sqrt(x) ! 229: double x; ! 230: { ! 231: double q,s,b,r; ! 232: double logb(),scalb(); ! 233: double t,zero=0.0; ! 234: int m,n,i,finite(); ! 235: #ifdef VAX ! 236: int k=54; ! 237: #else /* IEEE */ ! 238: int k=51; ! 239: #endif ! 240: ! 241: /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */ ! 242: if(x!=x||x==zero) return(x); ! 243: ! 244: /* sqrt(negative) is invalid */ ! 245: if(x<zero) return(zero/zero); ! 246: ! 247: /* sqrt(INF) is INF */ ! 248: if(!finite(x)) return(x); ! 249: ! 250: /* scale x to [1,4) */ ! 251: n=logb(x); ! 252: x=scalb(x,-n); ! 253: if((m=logb(x))!=0) x=scalb(x,-m); /* subnormal number */ ! 254: m += n; ! 255: n = m/2; ! 256: if((n+n)!=m) {x *= 2; m -=1; n=m/2;} ! 257: ! 258: /* generate sqrt(x) bit by bit (accumulating in q) */ ! 259: q=1.0; s=4.0; x -= 1.0; r=1; ! 260: for(i=1;i<=k;i++) { ! 261: t=s+1; x *= 4; r /= 2; ! 262: if(t<=x) { ! 263: s=t+t+2, x -= t; q += r;} ! 264: else ! 265: s *= 2; ! 266: } ! 267: ! 268: /* generate the last bit and determine the final rounding */ ! 269: r/=2; x *= 4; ! 270: if(x==zero) goto end; 100+r; /* trigger inexact flag */ ! 271: if(s<x) { ! 272: q+=r; x -=s; s += 2; s *= 2; x *= 4; ! 273: t = (x-s)-5; ! 274: b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */ ! 275: b=1.0+r/4; if(b>1.0) t=1; /* b>1 : Round-to-(+INF) */ ! 276: if(t>=0) q+=r; } /* else: Round-to-nearest */ ! 277: else { ! 278: s *= 2; x *= 4; ! 279: t = (x-s)-1; ! 280: b=1.0+3*r/4; if(b==1.0) goto end; ! 281: b=1.0+r/4; if(b>1.0) t=1; ! 282: if(t>=0) q+=r; } ! 283: ! 284: end: return(scalb(q,n)); ! 285: } ! 286: ! 287: #if 0 ! 288: /* DREM(X,Y) ! 289: * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE) ! 290: * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS) ! 291: * INTENDED FOR ASSEMBLY LANGUAGE ! 292: * CODED IN C BY K.C. NG, 3/23/85, 4/8/85. ! 293: * ! 294: * Warning: this code should not get compiled in unless ALL of ! 295: * the following machine-dependent routines are supplied. ! 296: * ! 297: * Required machine dependent functions (not on a VAX): ! 298: * swapINX(i): save inexact flag and reset it to "i" ! 299: * swapENI(e): save inexact enable and reset it to "e" ! 300: */ ! 301: ! 302: double drem(x,y) ! 303: double x,y; ! 304: { ! 305: ! 306: #ifdef NATIONAL /* order of words in floating point number */ ! 307: static n0=3,n1=2,n2=1,n3=0; ! 308: #else /* VAX, SUN, ZILOG */ ! 309: static n0=0,n1=1,n2=2,n3=3; ! 310: #endif ! 311: ! 312: static unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390; ! 313: static double zero=0.0; ! 314: double hy,y1,t,t1; ! 315: short k; ! 316: long n; ! 317: int i,e; ! 318: unsigned short xexp,yexp, *px =(unsigned short *) &x , ! 319: nx,nf, *py =(unsigned short *) &y , ! 320: sign, *pt =(unsigned short *) &t , ! 321: *pt1 =(unsigned short *) &t1 ; ! 322: ! 323: xexp = px[n0] & mexp ; /* exponent of x */ ! 324: yexp = py[n0] & mexp ; /* exponent of y */ ! 325: sign = px[n0] &0x8000; /* sign of x */ ! 326: ! 327: /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */ ! 328: if(x!=x) return(x); if(y!=y) return(y); /* x or y is NaN */ ! 329: if( xexp == mexp ) return(zero/zero); /* x is INF */ ! 330: if(y==zero) return(y/y); ! 331: ! 332: /* save the inexact flag and inexact enable in i and e respectively ! 333: * and reset them to zero ! 334: */ ! 335: i=swapINX(0); e=swapENI(0); ! 336: ! 337: /* subnormal number */ ! 338: nx=0; ! 339: if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;} ! 340: ! 341: /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */ ! 342: if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;} ! 343: ! 344: nf=nx; ! 345: py[n0] &= 0x7fff; ! 346: px[n0] &= 0x7fff; ! 347: ! 348: /* mask off the least significant 27 bits of y */ ! 349: t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t; ! 350: ! 351: /* LOOP: argument reduction on x whenever x > y */ ! 352: loop: ! 353: while ( x > y ) ! 354: { ! 355: t=y; ! 356: t1=y1; ! 357: xexp=px[n0]&mexp; /* exponent of x */ ! 358: k=xexp-yexp-m25; ! 359: if(k>0) /* if x/y >= 2**26, scale up y so that x/y < 2**26 */ ! 360: {pt[n0]+=k;pt1[n0]+=k;} ! 361: n=x/t; x=(x-n*t1)-n*(t-t1); ! 362: } ! 363: /* end while (x > y) */ ! 364: ! 365: if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;} ! 366: ! 367: /* final adjustment */ ! 368: ! 369: hy=y/2.0; ! 370: if(x>hy||((x==hy)&&n%2==1)) x-=y; ! 371: px[n0] ^= sign; ! 372: if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;} ! 373: ! 374: /* restore inexact flag and inexact enable */ ! 375: swapINX(i); swapENI(e); ! 376: ! 377: return(x); ! 378: } ! 379: #endif ! 380: ! 381: #if 0 ! 382: /* SQRT ! 383: * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT ! 384: * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE ! 385: * CODED IN C BY K.C. NG, 3/22/85. ! 386: * ! 387: * Warning: this code should not get compiled in unless ALL of ! 388: * the following machine-dependent routines are supplied. ! 389: * ! 390: * Required machine dependent functions: ! 391: * swapINX(i) ...return the status of INEXACT flag and reset it to "i" ! 392: * swapRM(r) ...return the current Rounding Mode and reset it to "r" ! 393: * swapENI(e) ...return the status of inexact enable and reset it to "e" ! 394: * addc(t) ...perform t=t+1 regarding t as a 64 bit unsigned integer ! 395: * subc(t) ...perform t=t-1 regarding t as a 64 bit unsigned integer ! 396: */ ! 397: ! 398: static unsigned long table[] = { ! 399: 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740, ! 400: 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478, ! 401: 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, }; ! 402: ! 403: double newsqrt(x) ! 404: double x; ! 405: { ! 406: double y,z,t,addc(),subc(),b54=134217728.*134217728.; /* b54=2**54 */ ! 407: long mx,scalx,mexp=0x7ff00000; ! 408: int i,j,r,e,swapINX(),swapRM(),swapENI(); ! 409: unsigned long *py=(unsigned long *) &y , ! 410: *pt=(unsigned long *) &t , ! 411: *px=(unsigned long *) &x ; ! 412: #ifdef NATIONAL /* ordering of word in a floating point number */ ! 413: int n0=1, n1=0; ! 414: #else ! 415: int n0=0, n1=1; ! 416: #endif ! 417: /* Rounding Mode: RN ...round-to-nearest ! 418: * RZ ...round-towards 0 ! 419: * RP ...round-towards +INF ! 420: * RM ...round-towards -INF ! 421: */ ! 422: int RN=0,RZ=1,RP=2,RM=3;/* machine dependent: work on a Zilog Z8070 ! 423: * and a National 32081 & 16081 ! 424: */ ! 425: ! 426: /* exceptions */ ! 427: if(x!=x||x==0.0) return(x); /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */ ! 428: if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */ ! 429: if((mx=px[n0]&mexp)==mexp) return(x); /* sqrt(+INF) is +INF */ ! 430: ! 431: /* save, reset, initialize */ ! 432: e=swapENI(0); /* ...save and reset the inexact enable */ ! 433: i=swapINX(0); /* ...save INEXACT flag */ ! 434: r=swapRM(RN); /* ...save and reset the Rounding Mode to RN */ ! 435: scalx=0; ! 436: ! 437: /* subnormal number, scale up x to x*2**54 */ ! 438: if(mx==0) {x *= b54 ; scalx-=0x01b00000;} ! 439: ! 440: /* scale x to avoid intermediate over/underflow: ! 441: * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */ ! 442: if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;} ! 443: if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;} ! 444: ! 445: /* magic initial approximation to almost 8 sig. bits */ ! 446: py[n0]=(px[n0]>>1)+0x1ff80000; ! 447: py[n0]=py[n0]-table[(py[n0]>>15)&31]; ! 448: ! 449: /* Heron's rule once with correction to improve y to almost 18 sig. bits */ ! 450: t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; ! 451: ! 452: /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */ ! 453: t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; ! 454: t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; ! 455: ! 456: /* twiddle last bit to force y correctly rounded */ ! 457: swapRM(RZ); /* ...set Rounding Mode to round-toward-zero */ ! 458: swapINX(0); /* ...clear INEXACT flag */ ! 459: swapENI(e); /* ...restore inexact enable status */ ! 460: t=x/y; /* ...chopped quotient, possibly inexact */ ! 461: j=swapINX(i); /* ...read and restore inexact flag */ ! 462: if(j==0) { if(t==y) goto end; else t=subc(t); } /* ...t=t-ulp */ ! 463: b54+0.1; /* ..trigger inexact flag, sqrt(x) is inexact */ ! 464: if(r==RN) t=addc(t); /* ...t=t+ulp */ ! 465: else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */ ! 466: y=y+t; /* ...chopped sum */ ! 467: py[n0]=py[n0]-0x00100000; /* ...correctly rounded sqrt(x) */ ! 468: end: py[n0]=py[n0]+scalx; /* ...scale back y */ ! 469: swapRM(r); /* ...restore Rounding Mode */ ! 470: return(y); ! 471: } ! 472: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.