|
|
1.1 root 1: ; Copyright (c) 1985 Regents of the University of California.
2: ; All rights reserved.
3: ;
4: ; Redistribution and use in source and binary forms are permitted
5: ; provided that the above copyright notice and this paragraph are
6: ; duplicated in all such forms and that any documentation,
7: ; advertising materials, and other materials related to such
8: ; distribution and use acknowledge that the software was developed
9: ; by the University of California, Berkeley. The name of the
10: ; University may not be used to endorse or promote products derived
11: ; from this software without specific prior written permission.
12: ; THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
13: ; IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
14: ; WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15: ;
16: ; All recipients should regard themselves as participants in an ongoing
17: ; research project and hence should feel obligated to report their
18: ; experiences (good or bad) with these elementary function codes, using
19: ; the sendbug(8) program, to the authors.
20: ;
21: ; @(#)sqrt.s 5.3 (Berkeley) 6/30/88
22: ;
23:
24: ; double sqrt(x)
25: ; double x;
26: ; IEEE double precision sqrt
27: ; code in NSC assembly by K.C. Ng
28: ; 12/13/85
29: ;
30: ; Method:
31: ; Use Kahan's trick to get 8 bits initial approximation
32: ; by integer shift and add/subtract. Then three Newton
33: ; iterations to get down error to within one ulp. Finally
34: ; twiddle the last bit to make to correctly rounded
35: ; according to the rounding mode.
36: ;
37: .vers 2
38: .text
39: .align 2
40: .globl _sqrt
41: _sqrt:
42: enter [r3,r4,r5,r6,r7],44
43: movl f4,tos
44: movl f6,tos
45: movd 2146435072,r2 ; r2 = 0x7ff00000
46: movl 8(fp),f0 ; f2 = x
47: movd 12(fp),r3 ; r3 = high part of x
48: movd r3,r4 ; make a copy of high part of x in r4
49: andd r2,r3 ; r3 become the bias exponent of x
50: cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN
51: bne L22
52: ; to see if x is INF
53: movd 8(fp),r0 ; r0 = low part of x
54: movd r4,r1 ; r1 is high part of x again
55: andd 0xfff00000,r1 ; mask off the sign and exponent of x
56: ord r0,r1 ; or with low part, if 0 then x is INF
57: cmpqd 0,r1 ;
58: bne L1 ; not 0; therefore x is NaN; return x.
59: cmpqd 0,r4 ; now x is Inf, is it +inf?
60: blt L1 ; +INF, return x
61: ; -INF, return NaN by doing 0/0
62: nan: movl 0f0.0,f0 ;
63: divl f0,f0
64: br L1
65: L22: ; now x is finite
66: cmpl 0f0.0,f0 ; x = 0 ?
67: beq L1 ; return x if x is +0 or -0
68: cmpqd 0,r4 ; Is x < 0 ?
69: bgt nan ; if x < 0 return NaN
70: movqd 0,r5 ; r5 == scalx initialize to zero
71: cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent)
72: bne L21 ; if x is normal, goto L21
73: movl L30,f2 ; f2 = 2**54
74: mull f2,f0 ; scale up x by 2**54
75: subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent
76: L21:
77: ; now x is normal
78: ; notations:
79: ; r1 == copy of fsr
80: ; r2 == mask of e inexact enable flag
81: ; r3 == mask of i inexact flag
82: ; r4 == mask of r rounding mode
83: ; r5 == x's scale factor (already defined)
84:
85: movd 0x20,r2
86: movd 0x40,r3
87: movd 0x180,r4
88: sfsr r0 ; store fsr to r0
89: movd r0,r1 ; make a copy of fsr to r1
90: bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest
91: lfsr r0
92:
93: ; begin to compute sqrt(x)
94: movl f0,-8(fp)
95: movd -4(fp),r0 ; r0 the high part of modified x
96: lshd -1,r0 ; r0 >> 1
97: addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx.
98: movd r0,r6
99: lshd -13,r6 ; r6 = r0>>-15
100: andd 0x7c,r6 ; obtain 4*leading 5 bits of r0
101: addrd L29,r7 ; r7 = address of L29 = table[0]
102: addd r6,r7 ; r6 = address of L29[r6] = table[r6]
103: subd 0(r7),r0 ; r0 = r0 - table[r6]
104: movd r0,-4(fp)
105: movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits
106:
107: movl 0f0.5,f6 ; f6 = 0.5
108: movl f0,f4
109: divl f2,f4 ; t = x/y
110: addl f4,f2 ; y = y + x/y
111: mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx.
112: movl f0,f4
113: divl f2,f4 ; t = x/y
114: addl f4,f2 ; y = y + x/y
115: mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx.
116: movl f0,f4
117: divl f2,f4 ; t = x/y
118: subl f2,f4 ; t = x/y - y
119: mull f6,f4 ; t = 0.5(x/y-y)
120: addl f4,f2 ; y = y + 0.5(x/y -y)
121: ; now y approx. sqrt(x) to within 1 ulp
122:
123: ; twiddle last bit to force y correctly rounded
124: movd r1,r0 ; restore the old fsr
125: bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable
126: ord 0x80,r0 ; set rounding mode to round to zero
127: lfsr r0
128:
129: movl f0,f4
130: divl f2,f4 ; f4 = x/y
131: sfsr r0
132: andd r3,r0 ; get the inexact flag
133: cmpqd 0,r0
134: bne L18
135: ; if x/y exact, then ...
136: cmpl f2,f4 ; if y == x/y
137: beq L2
138: movl f4,-8(fp)
139: subd 1,-8(fp)
140: subcd 0,-4(fp)
141: movl -8(fp),f4 ; f4 = f4 - ulp
142: L18:
143: bicd [6],r1
144: ord r3,r1 ; set inexact flag in r1
145:
146: andd r1,r4 ; r4 = the old rounding mode
147: cmpqd 0,r4 ; round to nearest?
148: bne L17
149: movl f4,-8(fp)
150: addd 1,-8(fp)
151: addcd 0,-4(fp)
152: movl -8(fp),f4 ; f4 = f4 + ulp
153: br L16
154: L17:
155: cmpd 0x100,r4 ; round to positive inf ?
156: bne L16
157: movl f4,-8(fp)
158: addd 1,-8(fp)
159: addcd 0,-4(fp)
160: movl -8(fp),f4 ; f4 = f4 + ulp
161:
162: movl f2,-8(fp)
163: addd 1,-8(fp)
164: addcd 0,-4(fp)
165: movl -8(fp),f2 ; f2 = f2 + ulp
166: L16:
167: addl f4,f2 ; y = y + t
168: subd 0x100000,r5 ; scalx = scalx - 1
169: L2:
170: movl f2,-8(fp)
171: addd r5,-4(fp)
172: movl -8(fp),f0
173: lfsr r1
174: L1:
175: movl tos,f6
176: movl tos,f4
177: exit [r3,r4,r5,r6,r7]
178: ret 0
179: .data
180: L28: .byte 64,40,35,41,115,113,114,116,46,99
181: .byte 9,49,46,49,32,40,117,99,98,46
182: .byte 101,108,101,102,117,110,116,41,32,57
183: .byte 47,49,57,47,56,53,0
184: L29: .blkb 4
185: .double 1204
186: .double 3062
187: .double 5746
188: .double 9193
189: .double 13348
190: .double 18162
191: .double 23592
192: .double 29598
193: .double 36145
194: .double 43202
195: .double 50740
196: .double 58733
197: .double 67158
198: .double 75992
199: .double 85215
200: .double 83599
201: .double 71378
202: .double 60428
203: .double 50647
204: .double 41945
205: .double 34246
206: .double 27478
207: .double 21581
208: .double 16499
209: .double 12183
210: .double 8588
211: .double 5674
212: .double 3403
213: .double 1742
214: .double 661
215: .double 130
216: L30: .blkb 4
217: .double 1129316352 ;L30: .double 0,0x43500000
218: L31: .blkb 4
219: .double 0x1ff00000
220: L32: .blkb 4
221: .double 0x5ff00000
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.