|
|
1.1 root 1: ////////
2: /
3: / Intel 8086 floating point.
4: / Double divide.
5: / SMALL model.
6: /
7: ////////
8:
9: .globl drdiv
10: .globl dldiv
11: .globl _fpac_
12: .globl dzero
13:
14: oerror_: ret
15:
16: ////////
17: /
18: / drdiv - double divide (rvalue)
19: / dldiv - double divide (lvalue)
20: /
21: / compiler calling sequences:
22: / push <right double argument>
23: / push <left double argument>
24: / call drdiv
25: / add sp,16
26: /
27: / mov ax,offset <right double argument>
28: / push ax
29: / push <left double argument>
30: / call dldiv
31: / add sp,10
32: /
33: / outputs:
34: / _fpac_ = result.
35: /
36: / see Knuth "Art of Computer Programming" vol 2 for explanation of
37: / multiprecision divide.
38: /
39: ////////
40:
41: l = 8 / left argument
42: r = 16 / right argument (rvalue)
43: rp = 16 / right argument (lvalue)
44:
45: sign = -2
46: exp = -4
47: q0 = -6
48: q1 = -8
49: q2 = -10
50: q3 = -12
51:
52: claim = -q3 / stack claim size
53:
54: drdiv:
55: push si
56: push di
57: push bp
58: mov bp, sp
59:
60: lea si, r(bp) / si = pointer to right op.
61: jmp 0f
62:
63: dldiv:
64: push si
65: push di
66: push bp
67: mov bp, sp
68:
69: mov si, rp(bp) / si = pointer to right op.
70: 0:
71: sub sp, $claim
72:
73: mov ax, l+6(bp)
74: shl ax, $1 / l exp in ah
75: jne 0f
76: retz:
77: call dzero
78: jmp done
79: 0:
80: rclb dl, $1 / form sign
81: mov bx, 6(si)
82: shl bx, $1 / r exp in bh
83: adcb dl, $0
84: movb sign(bp), dl
85: orb bh, bh
86: je retinf
87:
88: movb al, ah
89: movb bl, bh
90: subb ah, ah
91: movb bh, ah
92: sub ax, bx
93: add ax, $129
94: js retz / exp will only get smaller
95:
96: cmp ax, $257 / by at most 2
97: ja retinf
98:
99: mov exp(bp), ax
100: mov bx, l+5(bp) / set up l fraction (u) for divide loop
101: orb bh, $128 / implicit bit of hi order word
102: mov cx, l+3(bp)
103: mov di, l+1(bp)
104: movb ah, l(bp)
105: subb al, al
106: shr bx, $1 / shift r to guarantee valid divide
107: rcr cx, $1
108: rcr di, $1
109: rcr ax, $1
110: add bp, $q0 + 2 / quotient pushed from here to top
111: jmp 0f
112: overf:
113: mov (bp), $-1 / q^ = word size - 1
114: sub dx, cx / do multiply and subtract using adds
115: mov bx, ax / u1
116: sub ax, ax
117: add bx, cx
118: adcb dl, al / must keep track of u0 for a while
119:
120: sub bx, 3(si) / u1 u2 -= q^ * v2
121: sbbb dl, al
122: add di, 3(si)
123: adc bx, ax
124: adcb dl, al
125: js 2f / adjust q^ and fall in
126:
127: mov cx, di / u2
128: mov di, 1(si) / v3
129: sub cx, di / u2 u3 -= q^ * v3
130: sbb bx, ax
131: sbbb dl, al
132: add di, _fpac_ / pick up u3
133: adc cx, ax
134: adc bx, ax
135: adcb dl, al
136:
137: movb ah, (si) / u3 u4 -= q^ * v4
138: sub di, ax
139: jmp 3f / fall in
140:
141: 0:
142: sub bp, $2
143: mov _fpac_, ax / u3
144:
145: mov dx, bx / u0
146: mov ax, cx / u1
147: mov cx, 5(si)
148: orb ch, $128 / v1
149: cmp dx, cx
150: je overf
151: jb 9f
152: call oerror_
153: 9:
154:
155: div cx
156: mov (bp), ax / q+2 >= a = q^ >= q
157: mov bx, dx / bx = dx = remainder = new u1 <= v1
158:
159: mul 3(si) / dx ax = q^ * v2
160: sub di, ax / subtract from new u1 new u2
161: sbb bx, dx / = new u1 old u3
162: jnc 1f / ok - no prelim q^ adjust
163: 2:
164: dec (bp) / q^ too big
165: add di, 3(si) / adjust u1 u2
166: adc bx, cx
167: jnc 2b / until positive ( at most twice)
168: 1: / now q+1 >= q^ >= q
169: mov cx, di / shift new u2 = old u3
170: mov di, _fpac_ / shift new u3 = old u4
171: / ( new u4 = 0)
172: mov ax, 1(si)
173: mul (bp) / dx ax = v3 * q^
174: sub di, ax / subtract from u2 u3
175: sbb cx, dx
176: mov ax, $0
177: sbb bx, ax
178: sbbb ah, ah
179: movb _fpac_+2, ah / save carry into u0
180:
181: movb ah, (si) / last word is short 8 bits
182: mul (bp) / dx ax = v4 * q^
183: neg ax / ax := u4 - ax = 0 - ax
184: sbb di, dx
185: movb dl, _fpac_+2
186: 3:
187: sbb cx, $0
188: sbb bx, $0
189: sbbb dl, $0
190: je 1f / q = q^ ok ( this is usually the case)
191:
192: dec (bp) / else q = q^ - 1
193: addb ah, (si) / add v1 v2 v3 v4 to u1 u2 u3 u4
194: adc di, 1(si)
195: adc cx, 3(si)
196: adc bx, $0
197: mov dx, 5(si)
198: orb dh, $128
199: add bx, dx
200: 1:
201: cmp bp, sp
202: jne 0b
203:
204: add bp, $claim
205: mov ax, exp(bp)
206: mov dx, q3(bp)
207: mov di, q2(bp)
208: mov cx, q1(bp)
209: mov bx, q0(bp)
210: orb bh, bh
211: js 0f / normalized
212:
213: dec ax / only need to shift once
214: shl dx, $1
215: rcl di, $1
216: rcl cx, $1
217: rcl bx, $1
218: 0:
219: shlb dl, $1 / round
220: adcb dh, $0
221: adc di, $0
222: adc cx, $0
223: adc bx, $0
224: jnc 0f
225:
226: rcr bx, $1 / exactly 1/2
227: dec ax
228: 0:
229: orb ah, ah / check exp
230: js retz / too small
231: jne retinf / too big
232:
233: movb _fpac_, dh / ok store it
234: mov _fpac_+1, di
235: mov _fpac_+3, cx
236: shlb bh, $1
237: movb ah, sign(bp)
238: shr ax, $1
239: rcrb bh, $1
240: mov _fpac_+5, bx
241: movb _fpac_+7, al
242: done:
243: mov sp, bp
244: pop bp
245: pop di
246: pop si
247: ret
248: retinf:
249: mov ax, $-1
250: mov _fpac_, ax
251: mov _fpac_+2, ax
252: mov _fpac_+4, ax
253: movb dl, sign(bp)
254: shrb dl, $1
255: rcrb ah, $1
256: mov _fpac_+6, ax
257: jmp done
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.