|
|
1.1 ! root 1: ; Copyright (c) 1985 Regents of the University of California. ! 2: ; All rights reserved. ! 3: ; ! 4: ; Redistribution and use in source and binary forms are permitted ! 5: ; provided that the above copyright notice and this paragraph are ! 6: ; duplicated in all such forms and that any documentation, ! 7: ; advertising materials, and other materials related to such ! 8: ; distribution and use acknowledge that the software was developed ! 9: ; by the University of California, Berkeley. The name of the ! 10: ; University may not be used to endorse or promote products derived ! 11: ; from this software without specific prior written permission. ! 12: ; THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR ! 13: ; IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED ! 14: ; WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 15: ; ! 16: ; All recipients should regard themselves as participants in an ongoing ! 17: ; research project and hence should feel obligated to report their ! 18: ; experiences (good or bad) with these elementary function codes, using ! 19: ; the sendbug(8) program, to the authors. ! 20: ; ! 21: ; @(#)sqrt.s 5.3 (Berkeley) 6/30/88 ! 22: ; ! 23: ! 24: ; double sqrt(x) ! 25: ; double x; ! 26: ; IEEE double precision sqrt ! 27: ; code in NSC assembly by K.C. Ng ! 28: ; 12/13/85 ! 29: ; ! 30: ; Method: ! 31: ; Use Kahan's trick to get 8 bits initial approximation ! 32: ; by integer shift and add/subtract. Then three Newton ! 33: ; iterations to get down error to within one ulp. Finally ! 34: ; twiddle the last bit to make to correctly rounded ! 35: ; according to the rounding mode. ! 36: ; ! 37: .vers 2 ! 38: .text ! 39: .align 2 ! 40: .globl _sqrt ! 41: _sqrt: ! 42: enter [r3,r4,r5,r6,r7],44 ! 43: movl f4,tos ! 44: movl f6,tos ! 45: movd 2146435072,r2 ; r2 = 0x7ff00000 ! 46: movl 8(fp),f0 ; f2 = x ! 47: movd 12(fp),r3 ; r3 = high part of x ! 48: movd r3,r4 ; make a copy of high part of x in r4 ! 49: andd r2,r3 ; r3 become the bias exponent of x ! 50: cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN ! 51: bne L22 ! 52: ; to see if x is INF ! 53: movd 8(fp),r0 ; r0 = low part of x ! 54: movd r4,r1 ; r1 is high part of x again ! 55: andd 0xfff00000,r1 ; mask off the sign and exponent of x ! 56: ord r0,r1 ; or with low part, if 0 then x is INF ! 57: cmpqd 0,r1 ; ! 58: bne L1 ; not 0; therefore x is NaN; return x. ! 59: cmpqd 0,r4 ; now x is Inf, is it +inf? ! 60: blt L1 ; +INF, return x ! 61: ; -INF, return NaN by doing 0/0 ! 62: nan: movl 0f0.0,f0 ; ! 63: divl f0,f0 ! 64: br L1 ! 65: L22: ; now x is finite ! 66: cmpl 0f0.0,f0 ; x = 0 ? ! 67: beq L1 ; return x if x is +0 or -0 ! 68: cmpqd 0,r4 ; Is x < 0 ? ! 69: bgt nan ; if x < 0 return NaN ! 70: movqd 0,r5 ; r5 == scalx initialize to zero ! 71: cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent) ! 72: bne L21 ; if x is normal, goto L21 ! 73: movl L30,f2 ; f2 = 2**54 ! 74: mull f2,f0 ; scale up x by 2**54 ! 75: subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent ! 76: L21: ! 77: ; now x is normal ! 78: ; notations: ! 79: ; r1 == copy of fsr ! 80: ; r2 == mask of e inexact enable flag ! 81: ; r3 == mask of i inexact flag ! 82: ; r4 == mask of r rounding mode ! 83: ; r5 == x's scale factor (already defined) ! 84: ! 85: movd 0x20,r2 ! 86: movd 0x40,r3 ! 87: movd 0x180,r4 ! 88: sfsr r0 ; store fsr to r0 ! 89: movd r0,r1 ; make a copy of fsr to r1 ! 90: bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest ! 91: lfsr r0 ! 92: ! 93: ; begin to compute sqrt(x) ! 94: movl f0,-8(fp) ! 95: movd -4(fp),r0 ; r0 the high part of modified x ! 96: lshd -1,r0 ; r0 >> 1 ! 97: addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx. ! 98: movd r0,r6 ! 99: lshd -13,r6 ; r6 = r0>>-15 ! 100: andd 0x7c,r6 ; obtain 4*leading 5 bits of r0 ! 101: addrd L29,r7 ; r7 = address of L29 = table[0] ! 102: addd r6,r7 ; r6 = address of L29[r6] = table[r6] ! 103: subd 0(r7),r0 ; r0 = r0 - table[r6] ! 104: movd r0,-4(fp) ! 105: movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits ! 106: ! 107: movl 0f0.5,f6 ; f6 = 0.5 ! 108: movl f0,f4 ! 109: divl f2,f4 ; t = x/y ! 110: addl f4,f2 ; y = y + x/y ! 111: mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx. ! 112: movl f0,f4 ! 113: divl f2,f4 ; t = x/y ! 114: addl f4,f2 ; y = y + x/y ! 115: mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx. ! 116: movl f0,f4 ! 117: divl f2,f4 ; t = x/y ! 118: subl f2,f4 ; t = x/y - y ! 119: mull f6,f4 ; t = 0.5(x/y-y) ! 120: addl f4,f2 ; y = y + 0.5(x/y -y) ! 121: ; now y approx. sqrt(x) to within 1 ulp ! 122: ! 123: ; twiddle last bit to force y correctly rounded ! 124: movd r1,r0 ; restore the old fsr ! 125: bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable ! 126: ord 0x80,r0 ; set rounding mode to round to zero ! 127: lfsr r0 ! 128: ! 129: movl f0,f4 ! 130: divl f2,f4 ; f4 = x/y ! 131: sfsr r0 ! 132: andd r3,r0 ; get the inexact flag ! 133: cmpqd 0,r0 ! 134: bne L18 ! 135: ; if x/y exact, then ... ! 136: cmpl f2,f4 ; if y == x/y ! 137: beq L2 ! 138: movl f4,-8(fp) ! 139: subd 1,-8(fp) ! 140: subcd 0,-4(fp) ! 141: movl -8(fp),f4 ; f4 = f4 - ulp ! 142: L18: ! 143: bicd [6],r1 ! 144: ord r3,r1 ; set inexact flag in r1 ! 145: ! 146: andd r1,r4 ; r4 = the old rounding mode ! 147: cmpqd 0,r4 ; round to nearest? ! 148: bne L17 ! 149: movl f4,-8(fp) ! 150: addd 1,-8(fp) ! 151: addcd 0,-4(fp) ! 152: movl -8(fp),f4 ; f4 = f4 + ulp ! 153: br L16 ! 154: L17: ! 155: cmpd 0x100,r4 ; round to positive inf ? ! 156: bne L16 ! 157: movl f4,-8(fp) ! 158: addd 1,-8(fp) ! 159: addcd 0,-4(fp) ! 160: movl -8(fp),f4 ; f4 = f4 + ulp ! 161: ! 162: movl f2,-8(fp) ! 163: addd 1,-8(fp) ! 164: addcd 0,-4(fp) ! 165: movl -8(fp),f2 ; f2 = f2 + ulp ! 166: L16: ! 167: addl f4,f2 ; y = y + t ! 168: subd 0x100000,r5 ; scalx = scalx - 1 ! 169: L2: ! 170: movl f2,-8(fp) ! 171: addd r5,-4(fp) ! 172: movl -8(fp),f0 ! 173: lfsr r1 ! 174: L1: ! 175: movl tos,f6 ! 176: movl tos,f4 ! 177: exit [r3,r4,r5,r6,r7] ! 178: ret 0 ! 179: .data ! 180: L28: .byte 64,40,35,41,115,113,114,116,46,99 ! 181: .byte 9,49,46,49,32,40,117,99,98,46 ! 182: .byte 101,108,101,102,117,110,116,41,32,57 ! 183: .byte 47,49,57,47,56,53,0 ! 184: L29: .blkb 4 ! 185: .double 1204 ! 186: .double 3062 ! 187: .double 5746 ! 188: .double 9193 ! 189: .double 13348 ! 190: .double 18162 ! 191: .double 23592 ! 192: .double 29598 ! 193: .double 36145 ! 194: .double 43202 ! 195: .double 50740 ! 196: .double 58733 ! 197: .double 67158 ! 198: .double 75992 ! 199: .double 85215 ! 200: .double 83599 ! 201: .double 71378 ! 202: .double 60428 ! 203: .double 50647 ! 204: .double 41945 ! 205: .double 34246 ! 206: .double 27478 ! 207: .double 21581 ! 208: .double 16499 ! 209: .double 12183 ! 210: .double 8588 ! 211: .double 5674 ! 212: .double 3403 ! 213: .double 1742 ! 214: .double 661 ! 215: .double 130 ! 216: L30: .blkb 4 ! 217: .double 1129316352 ;L30: .double 0,0x43500000 ! 218: L31: .blkb 4 ! 219: .double 0x1ff00000 ! 220: L32: .blkb 4 ! 221: .double 0x5ff00000
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.