|
|
1.1 root 1: # Copyright (c) 1985 Regents of the University of California.
2: # All rights reserved.
3: #
4: # Redistribution and use in source and binary forms are permitted
5: # provided that the above copyright notice and this paragraph are
6: # duplicated in all such forms and that any documentation,
7: # advertising materials, and other materials related to such
8: # distribution and use acknowledge that the software was developed
9: # by the University of California, Berkeley. The name of the
10: # University may not be used to endorse or promote products derived
11: # from this software without specific prior written permission.
12: # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
13: # IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
14: # WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15: #
16: # All recipients should regard themselves as participants in an ongoing
17: # research project and hence should feel obligated to report their
18: # experiences (good or bad) with these elementary function codes, using
19: # the sendbug(8) program, to the authors.
20: #
21: # @(#)argred.s 5.3 (Berkeley) 6/30/88
22: #
23: .data
24: .align 2
25: _sccsid:
26: .asciz "@(#)argred.s 1.1 (Berkeley) 8/21/85; 5.3 (ucb.elefunt) 6/30/88"
27:
28: # libm$argred implements Bob Corbett's argument reduction and
29: # libm$sincos implements Peter Tang's double precision sin/cos.
30: #
31: # Note: The two entry points libm$argred and libm$sincos are meant
32: # to be used only by _sin, _cos and _tan.
33: #
34: # method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
35: # S. McDonald, April 4, 1985
36: #
37: .globl libm$argred
38: .globl libm$sincos
39: .text
40: .align 1
41:
42: libm$argred:
43: #
44: # Compare the argument with the largest possible that can
45: # be reduced by table lookup. r3 := |x| will be used in table_lookup .
46: #
47: movd r0,r3
48: bgeq abs1
49: mnegd r3,r3
50: abs1:
51: cmpd r3,$0d+4.55530934770520019583e+01
52: blss small_arg
53: jsb trigred
54: rsb
55: small_arg:
56: jsb table_lookup
57: rsb
58: #
59: # At this point,
60: # r0 contains the quadrant number, 0, 1, 2, or 3;
61: # r2/r1 contains the reduced argument as a D-format number;
62: # r3 contains a F-format extension to the reduced argument;
63: # r4 contains a 0 or 1 corresponding to a sin or cos entry.
64: #
65: libm$sincos:
66: #
67: # Compensate for a cosine entry by adding one to the quadrant number.
68: #
69: addl2 r4,r0
70: #
71: # Polyd clobbers r5-r0 ; save X in r7/r6 .
72: # This can be avoided by rewriting trigred .
73: #
74: movd r1,r6
75: #
76: # Likewise, save alpha in r8 .
77: # This can be avoided by rewriting trigred .
78: #
79: movf r3,r8
80: #
81: # Odd or even quadrant? cosine if odd, sine otherwise.
82: # Save floor(quadrant/2) in r9 ; it determines the final sign.
83: #
84: rotl $-1,r0,r9
85: blss cosine
86: sine:
87: muld2 r1,r1 # Xsq = X * X
88: cmpw $0x2480,r1 # [zl] Xsq > 2^-56?
89: blss 1f # [zl] yes, go ahead and do polyd
90: clrq r1 # [zl] work around 11/780 FPA polyd bug
91: 1:
92: polyd r1,$7,sin_coef # Q = P(Xsq) , of deg 7
93: mulf3 $0f3.0,r8,r4 # beta = 3 * alpha
94: mulf2 r0,r4 # beta = Q * beta
95: addf2 r8,r4 # beta = alpha + beta
96: muld2 r6,r0 # S(X) = X * Q
97: # cvtfd r4,r4 ... r5 = 0 after a polyd.
98: addd2 r4,r0 # S(X) = beta + S(X)
99: addd2 r6,r0 # S(X) = X + S(X)
100: brb done
101: cosine:
102: muld2 r6,r6 # Xsq = X * X
103: beql zero_arg
104: mulf2 r1,r8 # beta = X * alpha
105: polyd r6,$7,cos_coef # Q = P'(Xsq) , of deg 7
106: subd3 r0,r8,r0 # beta = beta - Q
107: subw2 $0x80,r6 # Xsq = Xsq / 2
108: addd2 r0,r6 # Xsq = Xsq + beta
109: zero_arg:
110: subd3 r6,$0d1.0,r0 # C(X) = 1 - Xsq
111: done:
112: blbc r9,even
113: mnegd r0,r0
114: even:
115: rsb
116:
117: .data
118: .align 2
119:
120: sin_coef:
121: .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
122: .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
123: .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
124: .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
125: .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
126: .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
127: .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
128: .double 0d+0.00000000000000000000e+00 # s0 = 0
129:
130: cos_coef:
131: .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
132: .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
133: .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
134: .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
135: .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
136: .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
137: .double 0d+0.00000000000000000000e+00 # s1 = 0
138: .double 0d+0.00000000000000000000e+00 # s0 = 0
139:
140: #
141: # Multiples of pi/2 expressed as the sum of three doubles,
142: #
143: # trailing: n * pi/2 , n = 0, 1, 2, ..., 29
144: # trailing[n] ,
145: #
146: # middle: n * pi/2 , n = 0, 1, 2, ..., 29
147: # middle[n] ,
148: #
149: # leading: n * pi/2 , n = 0, 1, 2, ..., 29
150: # leading[n] ,
151: #
152: # where
153: # leading[n] := (n * pi/2) rounded,
154: # middle[n] := (n * pi/2 - leading[n]) rounded,
155: # trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
156:
157: trailing:
158: .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
159: .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
160: .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
161: .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
162: .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
163: .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
164: .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
165: .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
166: .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
167: .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
168: .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
169: .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
170: .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
171: .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
172: .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
173: .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
174: .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
175: .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
176: .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
177: .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
178: .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
179: .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
180: .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
181: .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
182: .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
183: .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
184: .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
185: .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
186: .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
187: .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
188:
189: middle:
190: .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
191: .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
192: .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
193: .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
194: .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
195: .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
196: .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
197: .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
198: .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
199: .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
200: .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
201: .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
202: .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
203: .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
204: .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
205: .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
206: .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
207: .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
208: .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
209: .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
210: .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
211: .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
212: .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
213: .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
214: .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
215: .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
216: .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
217: .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
218: .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
219: .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
220:
221: leading:
222: .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
223: .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
224: .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
225: .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
226: .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
227: .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
228: .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
229: .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
230: .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
231: .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
232: .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
233: .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
234: .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
235: .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
236: .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
237: .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
238: .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
239: .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
240: .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
241: .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
242: .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
243: .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
244: .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
245: .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
246: .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
247: .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
248: .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
249: .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
250: .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
251: .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
252:
253: twoOverPi:
254: .double 0d+6.36619772367581343076e-01
255: .text
256: .align 1
257:
258: table_lookup:
259: muld3 r3,twoOverPi,r0
260: cvtrdl r0,r0 # n = nearest int to ((2/pi)*|x|) rnded
261: mull3 $8,r0,r5
262: subd2 leading(r5),r3 # p = (|x| - leading n*pi/2) exactly
263: subd3 middle(r5),r3,r1 # q = (p - middle n*pi/2) rounded
264: subd2 r1,r3 # r = (p - q)
265: subd2 middle(r5),r3 # r = r - middle n*pi/2
266: subd2 trailing(r5),r3 # r = r - trailing n*pi/2 rounded
267: #
268: # If the original argument was negative,
269: # negate the reduce argument and
270: # adjust the octant/quadrant number.
271: #
272: tstw 4(ap)
273: bgeq abs2
274: mnegf r1,r1
275: mnegf r3,r3
276: # subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD
277: subb3 r0,$4,r0
278: abs2:
279: #
280: # Clear all unneeded octant/quadrant bits.
281: #
282: # bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD
283: bicb2 $0xfc,r0
284: rsb
285: #
286: # p.0
287: .text
288: .align 2
289: #
290: # Only 256 (actually 225) bits of 2/pi are needed for VAX double
291: # precision; this was determined by enumerating all the nearest
292: # machine integer multiples of pi/2 using continued fractions.
293: # (8a8d3673775b7ff7 required the most bits.) -S.McD
294: #
295: .long 0
296: .long 0
297: .long 0xaef1586d
298: .long 0x9458eaf7
299: .long 0x10e4107f
300: .long 0xd8a5664f
301: .long 0x4d377036
302: .long 0x09d5f47d
303: .long 0x91054a7f
304: .long 0xbe60db93
305: bits2opi:
306: .long 0x00000028
307: .long 0
308: #
309: # Note: wherever you see the word `octant', read `quadrant'.
310: # Currently this code is set up for pi/2 argument reduction.
311: # By uncommenting/commenting the appropriate lines, it will
312: # also serve as a pi/4 argument reduction code.
313: #
314:
315: # p.1
316: # Trigred preforms argument reduction
317: # for the trigonometric functions. It
318: # takes one input argument, a D-format
319: # number in r1/r0 . The magnitude of
320: # the input argument must be greater
321: # than or equal to 1/2 . Trigred produces
322: # three results: the number of the octant
323: # occupied by the argument, the reduced
324: # argument, and an extension of the
325: # reduced argument. The octant number is
326: # returned in r0 . The reduced argument
327: # is returned as a D-format number in
328: # r2/r1 . An 8 bit extension of the
329: # reduced argument is returned as an
330: # F-format number in r3.
331: # p.2
332: trigred:
333: #
334: # Save the sign of the input argument.
335: #
336: movw r0,-(sp)
337: #
338: # Extract the exponent field.
339: #
340: extzv $7,$7,r0,r2
341: #
342: # Convert the fraction part of the input
343: # argument into a quadword integer.
344: #
345: bicw2 $0xff80,r0
346: bisb2 $0x80,r0 # -S.McD
347: rotl $16,r0,r0
348: rotl $16,r1,r1
349: #
350: # If r1 is negative, add 1 to r0 . This
351: # adjustment is made so that the two's
352: # complement multiplications done later
353: # will produce unsigned results.
354: #
355: bgeq posmid
356: incl r0
357: posmid:
358: # p.3
359: #
360: # Set r3 to the address of the first quadword
361: # used to obtain the needed portion of 2/pi .
362: # The address is longword aligned to ensure
363: # efficient access.
364: #
365: ashl $-3,r2,r3
366: bicb2 $3,r3
367: subl3 r3,$bits2opi,r3
368: #
369: # Set r2 to the size of the shift needed to
370: # obtain the correct portion of 2/pi .
371: #
372: bicb2 $0xe0,r2
373: # p.4
374: #
375: # Move the needed 128 bits of 2/pi into
376: # r11 - r8 . Adjust the numbers to allow
377: # for unsigned multiplication.
378: #
379: ashq r2,(r3),r10
380:
381: subl2 $4,r3
382: ashq r2,(r3),r9
383: bgeq signoff1
384: incl r11
385: signoff1:
386: subl2 $4,r3
387: ashq r2,(r3),r8
388: bgeq signoff2
389: incl r10
390: signoff2:
391: subl2 $4,r3
392: ashq r2,(r3),r7
393: bgeq signoff3
394: incl r9
395: signoff3:
396: # p.5
397: #
398: # Multiply the contents of r0/r1 by the
399: # slice of 2/pi in r11 - r8 .
400: #
401: emul r0,r8,$0,r4
402: emul r0,r9,r5,r5
403: emul r0,r10,r6,r6
404:
405: emul r1,r8,$0,r7
406: emul r1,r9,r8,r8
407: emul r1,r10,r9,r9
408: emul r1,r11,r10,r10
409:
410: addl2 r4,r8
411: adwc r5,r9
412: adwc r6,r10
413: # p.6
414: #
415: # If there are more than five leading zeros
416: # after the first two quotient bits or if there
417: # are more than five leading ones after the first
418: # two quotient bits, generate more fraction bits.
419: # Otherwise, branch to code to produce the result.
420: #
421: bicl3 $0xc1ffffff,r10,r4
422: beql more1
423: cmpl $0x3e000000,r4
424: bneq result
425: more1:
426: # p.7
427: #
428: # generate another 32 result bits.
429: #
430: subl2 $4,r3
431: ashq r2,(r3),r5
432: bgeq signoff4
433:
434: emul r1,r6,$0,r4
435: addl2 r1,r5
436: emul r0,r6,r5,r5
437: addl2 r0,r6
438: brb addbits1
439:
440: signoff4:
441: emul r1,r6,$0,r4
442: emul r0,r6,r5,r5
443:
444: addbits1:
445: addl2 r5,r7
446: adwc r6,r8
447: adwc $0,r9
448: adwc $0,r10
449: # p.8
450: #
451: # Check for massive cancellation.
452: #
453: bicl3 $0xc0000000,r10,r6
454: # bneq more2 -S.McD Test was backwards
455: beql more2
456: cmpl $0x3fffffff,r6
457: bneq result
458: more2:
459: # p.9
460: #
461: # If massive cancellation has occurred,
462: # generate another 24 result bits.
463: # Testing has shown there will always be
464: # enough bits after this point.
465: #
466: subl2 $4,r3
467: ashq r2,(r3),r5
468: bgeq signoff5
469:
470: emul r0,r6,r4,r5
471: addl2 r0,r6
472: brb addbits2
473:
474: signoff5:
475: emul r0,r6,r4,r5
476:
477: addbits2:
478: addl2 r6,r7
479: adwc $0,r8
480: adwc $0,r9
481: adwc $0,r10
482: # p.10
483: #
484: # The following code produces the reduced
485: # argument from the product bits contained
486: # in r10 - r7 .
487: #
488: result:
489: #
490: # Extract the octant number from r10 .
491: #
492: # extzv $29,$3,r10,r0 ...used for pi/4 reduction -S.McD
493: extzv $30,$2,r10,r0
494: #
495: # Clear the octant bits in r10 .
496: #
497: # bicl2 $0xe0000000,r10 ...used for pi/4 reduction -S.McD
498: bicl2 $0xc0000000,r10
499: #
500: # Zero the sign flag.
501: #
502: clrl r5
503: # p.11
504: #
505: # Check to see if the fraction is greater than
506: # or equal to one-half. If it is, add one
507: # to the octant number, set the sign flag
508: # on, and replace the fraction with 1 minus
509: # the fraction.
510: #
511: # bitl $0x10000000,r10 ...used for pi/4 reduction -S.McD
512: bitl $0x20000000,r10
513: beql small
514: incl r0
515: incl r5
516: # subl3 r10,$0x1fffffff,r10 ...used for pi/4 reduction -S.McD
517: subl3 r10,$0x3fffffff,r10
518: mcoml r9,r9
519: mcoml r8,r8
520: mcoml r7,r7
521: small:
522: # p.12
523: #
524: ## Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
525: # Test whether the first 30 bits of the
526: # fraction are zero.
527: #
528: tstl r10
529: beql tiny
530: #
531: # Find the position of the first one bit in r10 .
532: #
533: cvtld r10,r1
534: extzv $7,$7,r1,r1
535: #
536: # Compute the size of the shift needed.
537: #
538: subl3 r1,$32,r6
539: #
540: # Shift up the high order 64 bits of the
541: # product.
542: #
543: ashq r6,r9,r10
544: ashq r6,r8,r9
545: brb mult
546: # p.13
547: #
548: # Test to see if the sign bit of r9 is on.
549: #
550: tiny:
551: tstl r9
552: bgeq tinier
553: #
554: # If it is, shift the product bits up 32 bits.
555: #
556: movl $32,r6
557: movq r8,r10
558: tstl r10
559: brb mult
560: # p.14
561: #
562: # Test whether r9 is zero. It is probably
563: # impossible for both r10 and r9 to be
564: # zero, but until proven to be so, the test
565: # must be made.
566: #
567: tinier:
568: beql zero
569: #
570: # Find the position of the first one bit in r9 .
571: #
572: cvtld r9,r1
573: extzv $7,$7,r1,r1
574: #
575: # Compute the size of the shift needed.
576: #
577: subl3 r1,$32,r1
578: addl3 $32,r1,r6
579: #
580: # Shift up the high order 64 bits of the
581: # product.
582: #
583: ashq r1,r8,r10
584: ashq r1,r7,r9
585: brb mult
586: # p.15
587: #
588: # The following code sets the reduced
589: # argument to zero.
590: #
591: zero:
592: clrl r1
593: clrl r2
594: clrl r3
595: brw return
596: # p.16
597: #
598: # At this point, r0 contains the octant number,
599: # r6 indicates the number of bits the fraction
600: # has been shifted, r5 indicates the sign of
601: # the fraction, r11/r10 contain the high order
602: # 64 bits of the fraction, and the condition
603: # codes indicate where the sign bit of r10
604: # is on. The following code multiplies the
605: # fraction by pi/2 .
606: #
607: mult:
608: #
609: # Save r11/r10 in r4/r1 . -S.McD
610: movl r11,r4
611: movl r10,r1
612: #
613: # If the sign bit of r10 is on, add 1 to r11 .
614: #
615: bgeq signoff6
616: incl r11
617: signoff6:
618: # p.17
619: #
620: # Move pi/2 into r3/r2 .
621: #
622: movq $0xc90fdaa22168c235,r2
623: #
624: # Multiply the fraction by the portion of pi/2
625: # in r2 .
626: #
627: emul r2,r10,$0,r7
628: emul r2,r11,r8,r7
629: #
630: # Multiply the fraction by the portion of pi/2
631: # in r3 .
632: emul r3,r10,$0,r9
633: emul r3,r11,r10,r10
634: #
635: # Add the product bits together.
636: #
637: addl2 r7,r9
638: adwc r8,r10
639: adwc $0,r11
640: #
641: # Compensate for not sign extending r8 above.-S.McD
642: #
643: tstl r8
644: bgeq signoff6a
645: decl r11
646: signoff6a:
647: #
648: # Compensate for r11/r10 being unsigned. -S.McD
649: #
650: addl2 r2,r10
651: adwc r3,r11
652: #
653: # Compensate for r3/r2 being unsigned. -S.McD
654: #
655: addl2 r1,r10
656: adwc r4,r11
657: # p.18
658: #
659: # If the sign bit of r11 is zero, shift the
660: # product bits up one bit and increment r6 .
661: #
662: blss signon
663: incl r6
664: ashq $1,r10,r10
665: tstl r9
666: bgeq signoff7
667: incl r10
668: signoff7:
669: signon:
670: # p.19
671: #
672: # Shift the 56 most significant product
673: # bits into r9/r8 . The sign extension
674: # will be handled later.
675: #
676: ashq $-8,r10,r8
677: #
678: # Convert the low order 8 bits of r10
679: # into an F-format number.
680: #
681: cvtbf r10,r3
682: #
683: # If the result of the conversion was
684: # negative, add 1 to r9/r8 .
685: #
686: bgeq chop
687: incl r8
688: adwc $0,r9
689: #
690: # If r9 is now zero, branch to special
691: # code to handle that possibility.
692: #
693: beql carryout
694: chop:
695: # p.20
696: #
697: # Convert the number in r9/r8 into
698: # D-format number in r2/r1 .
699: #
700: rotl $16,r8,r2
701: rotl $16,r9,r1
702: #
703: # Set the exponent field to the appropriate
704: # value. Note that the extra bits created by
705: # sign extension are now eliminated.
706: #
707: subw3 r6,$131,r6
708: insv r6,$7,$9,r1
709: #
710: # Set the exponent field of the F-format
711: # number in r3 to the appropriate value.
712: #
713: tstf r3
714: beql return
715: # extzv $7,$8,r3,r4 -S.McD
716: extzv $7,$7,r3,r4
717: addw2 r4,r6
718: # subw2 $217,r6 -S.McD
719: subw2 $64,r6
720: insv r6,$7,$8,r3
721: brb return
722: # p.21
723: #
724: # The following code generates the appropriate
725: # result for the unlikely possibility that
726: # rounding the number in r9/r8 resulted in
727: # a carry out.
728: #
729: carryout:
730: clrl r1
731: clrl r2
732: subw3 r6,$132,r6
733: insv r6,$7,$9,r1
734: tstf r3
735: beql return
736: extzv $7,$8,r3,r4
737: addw2 r4,r6
738: subw2 $218,r6
739: insv r6,$7,$8,r3
740: # p.22
741: #
742: # The following code makes an needed
743: # adjustments to the signs of the
744: # results or to the octant number, and
745: # then returns.
746: #
747: return:
748: #
749: # Test if the fraction was greater than or
750: # equal to 1/2 . If so, negate the reduced
751: # argument.
752: #
753: blbc r5,signoff8
754: mnegf r1,r1
755: mnegf r3,r3
756: signoff8:
757: # p.23
758: #
759: # If the original argument was negative,
760: # negate the reduce argument and
761: # adjust the octant number.
762: #
763: tstw (sp)+
764: bgeq signoff9
765: mnegf r1,r1
766: mnegf r3,r3
767: # subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD
768: subb3 r0,$4,r0
769: signoff9:
770: #
771: # Clear all unneeded octant bits.
772: #
773: # bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD
774: bicb2 $0xfc,r0
775: #
776: # Return.
777: #
778: rsb
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.