|
|
1.1 root 1: .data
2: .asciz "@(#)Faddd.s 1.1 86/02/03 Copyr 1985 Sun Micro"
3: .even
4: .text
5:
6: | Copyright (c) 1985 by Sun Microsystems, Inc.
7:
8: #include "fpcrtdefs.h"
9:
10: /*
11: * double-precision floating math run-time support
12: *
13: * copyright 1981, 1982 Richard E. James III
14: * translated to SUN idiom 10/11 March 1983 rt
15: * parameter passing re-done 22 July 1983 rt
16: */
17:
18: ARG2PTR = a0
19:
20: /*
21: * extract exponents from two double-precision numbers.
22: *
23: * input:
24: * d0/d1 one operand
25: * d2/d3 other operand
26: *
27: * output:
28: * d0/d1 mantissa, waiting for hidden bit to be turned on
29: * d2/d3 other mantissa, likewise
30: * d6 exponent from d2/d3
31: * d7 exponent for d0/d1
32: *
33: * destroys d4
34: */
35:
36: ENTER(d_exte)
37: moveq #11,d4 | size of exponent
38: roll d4,d0
39: roll d4,d2
40: roll d4,d1
41: roll d4,d3
42: movl #0x7ff,d6
43: movl d6,d7
44: andl d2,d6
45: eorl d6,d2
46: movl d7,d4
47: andl d3,d4
48: eorl d4,d3
49: lsrl #1,d2
50: orl d4,d2
51: | end transformation of larger
52: movl d7,d4
53: andl d0,d7
54: eorl d7,d0
55: andl d1,d4
56: eorl d4,d1
57: lsrl #1,d0
58: orl d4,d0
59: | end transformation of smaller
60: rts
61:
62: /*
63: * ieee double floating compare
64: * copyright 1981, Richard E. James III
65: * translated to SUN idiom 30 March 1983 rt
66: */
67:
68: /*
69: * entry conditions:
70: * first argument in d0/d1
71: * second argument on stack
72: * exit conditions:
73: * result in cc -- carry flag set if either a NAN
74: * problems:
75: * unordered cases (e.e.: projective infinities and NANs)
76: * produce random results.
77: * A NAN, however, does compare not equal to anything.
78: *
79: * register conventions:
80: * d0/d1 first operand
81: * d2/d3 second operand
82: * d4 scratch
83: */
84: SAVEMASK = 0x3800 | registers d2-d4
85: RESTMASK = 0x1c
86: NSAVED = 3*4 | 6 registers * sizeof(register)
87: CODE = NSAVED
88:
89: RTENTRY(Fcmpd)
90: subqw #2,sp | save room for result
91: | save registers and load operands into registers
92: moveml #SAVEMASK,sp@-
93: movl ARG2PTR@+,d2
94: movl ARG2PTR@ ,d3
95: | we are now set up.
96: movl d2,d4
97: andl d0,d4 | compare signs
98: bpls nbothmi
99: exg d0,d2 | both minus
100: exg d1,d3
101: nbothmi:cmpl d2,d0 | main compare
102: bnes gotcmp | got the answer
103: movl d1,d4
104: subl d3,d4 | compare lowers
105: beqs gotcmp | entirely equal
106: roxrl #1,d4
107: andb #0xa,cc | clear z, in case differ by 1 ulp
108: gotcmp: andb #0xe,cc | clear carry
109: bgts 1f
110: blts 2f
111: movw #FEQ,sp@(CODE)
112: bras 3f
113: 1:
114: movw #FGT,sp@(CODE)
115: bras 3f
116: 2:
117: movw #FLT,sp@(CODE)
118: 3:
119: lsll #1,d0
120: lsll #1,d2
121: cmpl d2,d0
122: bccs 4$
123: exg d0,d2 | find larger in magnitude
124: 4$: cmpl #0xffe00000,d0
125: blss 6$ | no nan
126: movw #FUN,sp@(CODE) | c, nz
127: bras 8$ | one was a nan
128: 6$: orl d1,d0
129: orl d2,d0
130: orl d3,d0
131: bnes 8$
132: movw #FEQ,sp@(CODE) | -0 == 0
133: | done, now go
134: 8$: moveml sp@+,#RESTMASK | put back saved registers
135: movw sp@+,cc | install condition code
136: RET
137:
138:
139: /*
140: * ieee double floating add
141: * copyright 1981, Richard E. James III
142: * translated to SUN idiom 10 March 1983 rt
143: */
144:
145: /*
146: * entry conditions:
147: * first argument in d0/d1
148: * second argument on stack
149: * exit conditions:
150: * result (8 bytes) in d0/d1
151: *
152: * register conventions:
153: * d0/d1 smaller operand (d0=most significant)
154: * d2/d3 larger operand
155: * d4 11 or mask of 11 bits
156: * d5 signs: sign of .w = sign of answer
157: * sign of .b = comparison of signs
158: * d6 exponent of larger
159: * d7 exponent of smaller
160: */
161: SAVEMASK = 0x3f00 | registers d2-d7
162: RESTMASK = 0xfc
163: NSAVED = 6*4 | 6 registers * sizeof(register)
164:
165: RTENTRY(Fsubd)
166: | save registers and load operands into registers
167: moveml #SAVEMASK,sp@- | registers d2-d7
168: movl ARG2PTR@+,d2
169: movl ARG2PTR@ ,d3
170: bchg #31,d2
171: jra adding
172: RTENTRY(Faddd)
173: | save registers and load operands into registers
174: moveml #SAVEMASK,sp@- | registers d2-d7
175: movl ARG2PTR@+,d2
176: movl ARG2PTR@ ,d3
177: adding:
178: | extract signs
179: asll #1,d0 | sign ->c
180: scs d4 | c -> d4
181: asll #1,d2
182: scs d5
183: | compare and exchange to put larger in d0/d1
184: cmpl d2,d0
185: blss 1$
186: exg d0,d2
187: exg d1,d3
188: exg d4,d5
189: 1$: extw d5 | sign of larger
190: eorb d4,d5 | comparison of signs
191: | extract exponents
192: jbsr d_exte | larger ->d2/d3,d6; smaller ->d0/d1,d7
193: tstw d7
194: bnes 2$ | not zero or denormalized
195: | here, smaller is zero or is denormalized
196: movl d0,d4
197: orl d1,d4
198: jeq signofzero | if smaller == 0 use larger
199: | (sign of 0-0 unpredictable)
200: lsll #1,d1
201: roxll #1,d0
202: tstw d6 | larger exp
203: bnes 3$ | not gradual underflow
204: lsll #1,d3
205: roxll #1,d2
206: bras addorsub | both gradual-underflow, no hidden or align needed
207: 2$: bset #31,d0 | add hidden bit
208: 3$: cmpw #0x7ff,d6
209: jeq a_ovfl | inf/nan
210: bset #31,d2
211: | align smaller
212: | shift-by-eight loop
213: subw d6,d7
214: negw d7 | d7 = difference of exponents
215: cmpw #16,d7
216: jge rsge16 | Branch if shift of 16 or more.
217: rs015: | Right shift 0..15.
218: subqw #8,d7
219: blts 5$ | exit loop when difference <8
220:
221: tstb d1
222: beqs 99$ | Branch if no bits to lose in shift.
223: bset #8,d1 | Turn on the sticky bit if any bits will be lost.
224: 99$:
225: movb d0,d1 | shift eight bits down
226: rorl #8,d1
227: lsrl #8,d0
228: bras rs015
229: 5$: addqw #7,d7
230: bmis addorsub
231: tstb d1
232: beqs 98$
233: bset #8,d1 | Turn on sticky bit.
234: 98$:
235: 6$: lsrl #1,d0
236: roxrl #1,d1
237: dbra d7,6$ | final part of alignment
238: addorsub:
239: | decide whether to add or subtract
240: tstb d5 | compare signs
241: bmis diff
242: | add them
243: addl d1,d3 | sum
244: addxl d0,d2
245: bccs endas | no c, ok
246: roxrl #1,d2
247: roxrl #1,d3
248: addqw #1,d6
249: cmpw #0x7ff,d6
250: blts endas | no overflow
251: jra a_geninf
252:
253: rsge16: | Right shift 16 or more.
254: cmpw #32,d7
255: blts rs1631 | Branch if shift is 16..31.
256: cmpw #64,d7
257: blts rs3263 | Branch if shift is 32..63.
258: clrl d0 | Top will be zero.
259: moveq #1,d1 | Bottom will be sticky.
260: bras addorsub
261: rs3263: | Shift 32.
262: tstl d1
263: beqs 1$
264: bset #0,d0 | Sticky bit on.
265: 1$:
266: movl d0,d1
267: clrl d0
268: subw #32,d7
269: cmpw #16,d7
270: blts rs015 | Branch if shift < 16.
271: rs1631: | Shift 16.
272: tstw d1
273: beq 2$ | Branch if no bits in D.
274: bset #16,d1 | Turn on sticky bit in C.
275: 2$:
276: clrw d1 | d1 gets Cs,0.
277: movw d0,d1 | d1 gets Cs,B.
278: swap d1 | d1 gets B,Cs.
279: clrw d0 | d0 gets A,0.
280: swap d0 | d0 gets 0,A.
281: subw #16,d7
282: jra rs015
283:
284:
285: | subtract then
286: diff: subl d1,d3 | subtract lowers
287: subxl d0,d2 | subtract uppers
288: bccs 9$
289: | cancelled down into 2nd word, but got wrong sign
290: notw d5 | flip result sign
291: negl d3
292: negxl d2 | negate value
293: 9$: bnes subrenorm | Branch if result nonzero.
294: tstl d3
295: bnes subrenorm | Branch if result nonzero.
296: clrw d5 | Exact zero result has positive sign.
297: subrenorm: | Renormalize result after cancellation.
298: jbsr d_norm
299: | rejoin, round
300: endas: jbsr d_rcp | round, check, and pack
301: assgn: lslw #1,d5 | get sign
302: roxrl #1,d2 | put in sign
303:
304: | answer is now in d2/d3: put in d0/d1
305: movl d2,d0
306: movl d3,d1
307: asexit: | restore registers and split
308: moveml sp@+,#RESTMASK
309: RET
310:
311: | EXCEPTION CASES
312: signofzero: | Set up proper sign for exact zero.
313: tstb d5
314: beqs useln | Branch if signs equal: either will do.
315: tstw d6
316: bnes useln | Branch if not zero or subnormal.
317: tstl d2
318: bnes useln | Branch if subnormal.
319: tstl d3
320: bnes useln | Branch if subnormal.
321: clrw d5 | Signs unequal so set positive.
322:
323: useln: tstw d6
324: beqs usel | Branch if subnormal: don't set i bit.
325: bset #31,d2 | Set i bit of normal number.
326: usel: jbsr d_usel | use the larger
327: bras assgn
328:
329: | larger exponent = 1-23
330: a_ovfl: movl d2,d4 | larger mantissa
331: orl d3,d4
332: bnes usel | larger = nan, use it
333: cmpw d6,d7 | exps
334: bnes usel | larger=inf and smaller=number
335: | (need nan...)
336: tstb d5 | comparison of signs
337: bpls usel | inf+inf=inf; inf-inf=nan
338: movl #0x7ff00001,d0 | NAN
339: clrl d1
340: bras asexit
341: | result overflows
342: a_geninf:
343: movl #0xffe00000,d2
344: clrl d3
345: bras assgn
346:
347: /*
348: * subroutine for unpacking one operand, and normalizing a denormalized number
349: * input:
350: * d0/d1 number
351: * output:
352: * d0/d1 mantissa
353: * d7.w exponent
354: * z on iff mantissa is zero_
355: *
356: * unchanged:
357: * d4 bottom = 0xf77
358: */
359:
360: unp: movl d0,d7 | start getting exp
361: andl #0xfffff,d0 | clear out sign and exp
362: swap d7
363: lsrw #(16-1-11),d7
364: andw d4,d7 | expondnt
365: bnes 3$ | normal number
366: | denormalized number or zero:
367: tstl d0 | upper
368: bnes 1$
369: tstl d1 | lower
370: beqs 3$ |zero
371: 1$: addqw #1,d7
372: 2$: subql #1,d7
373: lsll #1,d1
374: roxll #1,d0 | normalize
375: btst #20,d0
376: beqs 2$ | loop until normalized
377: 3$: rts
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.