Annotation of 43BSD/usr.lib/libm/VAX/argred.s, revision 1.1

1.1     ! root        1: # 
        !             2: # Copyright (c) 1985 Regents of the University of California.
        !             3: # 
        !             4: # Use and reproduction of this software are granted  in  accordance  with
        !             5: # the terms and conditions specified in  the  Berkeley  Software  License
        !             6: # Agreement (in particular, this entails acknowledgement of the programs'
        !             7: # source, and inclusion of this notice) with the additional understanding
        !             8: # that  all  recipients  should regard themselves as participants  in  an
        !             9: # ongoing  research  project and hence should  feel  obligated  to report
        !            10: # their  experiences (good or bad) with these elementary function  codes,
        !            11: # using "sendbug 4bsd-bugs@BERKELEY", to the authors.
        !            12: #
        !            13: 
        !            14: # @(#)argred.s 1.1 (Berkeley) 8/21/85
        !            15: 
        !            16: #  libm$argred implements Bob Corbett's argument reduction and
        !            17: #  libm$sincos implements Peter Tang's double precision sin/cos.
        !            18: #  
        !            19: #  Note: The two entry points libm$argred and libm$sincos are meant
        !            20: #        to be used only by _sin, _cos and _tan.
        !            21: #
        !            22: # method: true range reduction to [-pi/4,pi/4], P. Tang  &  B. Corbett
        !            23: # S. McDonald, April 4,  1985
        !            24: #
        !            25:        .globl  libm$argred
        !            26:        .globl  libm$sincos
        !            27:        .text
        !            28:        .align  1
        !            29: 
        !            30: libm$argred:
        !            31: #
        !            32: #  Compare the argument with the largest possible that can
        !            33: #  be reduced by table lookup.  r3 := |x|  will be used in  table_lookup .
        !            34: #
        !            35:        movd    r0,r3
        !            36:        bgeq    abs1
        !            37:        mnegd   r3,r3
        !            38: abs1:
        !            39:        cmpd    r3,$0d+4.55530934770520019583e+01
        !            40:        blss    small_arg
        !            41:        jsb     trigred
        !            42:        rsb
        !            43: small_arg:
        !            44:        jsb     table_lookup
        !            45:        rsb
        !            46: #
        !            47: #  At this point,
        !            48: #         r0  contains the quadrant number, 0, 1, 2, or 3;
        !            49: #      r2/r1  contains the reduced argument as a D-format number;
        !            50: #         r3  contains a F-format extension to the reduced argument;
        !            51: #          r4  contains a  0 or 1  corresponding to a  sin or cos  entry.
        !            52: #
        !            53: libm$sincos:
        !            54: #
        !            55: #  Compensate for a cosine entry by adding one to the quadrant number.
        !            56: #
        !            57:        addl2   r4,r0
        !            58: #
        !            59: #  Polyd clobbers  r5-r0 ;  save  X  in  r7/r6 .
        !            60: #  This can be avoided by rewriting  trigred .
        !            61: #
        !            62:        movd    r1,r6
        !            63: #
        !            64: #  Likewise, save  alpha  in  r8 .
        !            65: #  This can be avoided by rewriting  trigred .
        !            66: #
        !            67:        movf    r3,r8
        !            68: #
        !            69: #  Odd or even quadrant?  cosine if odd, sine otherwise.
        !            70: #  Save  floor(quadrant/2) in  r9  ; it determines the final sign.
        !            71: #
        !            72:        rotl    $-1,r0,r9
        !            73:        blss    cosine
        !            74: sine:
        !            75:        muld2   r1,r1           # Xsq = X * X
        !            76:        polyd   r1,$7,sin_coef  # Q = P(Xsq) , of deg 7
        !            77:        mulf3   $0f3.0,r8,r4    # beta = 3 * alpha
        !            78:        mulf2   r0,r4           # beta = Q * beta
        !            79:        addf2   r8,r4           # beta = alpha + beta
        !            80:        muld2   r6,r0           # S(X) = X * Q
        !            81: #      cvtfd   r4,r4           ... r5 = 0 after a polyd.
        !            82:        addd2   r4,r0           # S(X) = beta + S(X)
        !            83:        addd2   r6,r0           # S(X) = X + S(X)
        !            84:        brb     done
        !            85: cosine:
        !            86:        muld2   r6,r6           # Xsq = X * X
        !            87:        beql    zero_arg
        !            88:        mulf2   r1,r8           # beta = X * alpha
        !            89:        polyd   r6,$7,cos_coef  # Q = P'(Xsq) , of deg 7
        !            90:        subd3   r0,r8,r0        # beta = beta - Q
        !            91:        subw2   $0x80,r6        # Xsq = Xsq / 2
        !            92:        addd2   r0,r6           # Xsq = Xsq + beta
        !            93: zero_arg:
        !            94:        subd3   r6,$0d1.0,r0    # C(X) = 1 - Xsq
        !            95: done:
        !            96:        blbc    r9,even
        !            97:        mnegd   r0,r0
        !            98: even:
        !            99:        rsb
        !           100: 
        !           101: .data
        !           102: .align 2
        !           103: 
        !           104: sin_coef:
        !           105:        .double 0d-7.53080332264191085773e-13   # s7 = 2^-29 -1.a7f2504ffc49f8..
        !           106:        .double 0d+1.60573519267703489121e-10   # s6 = 2^-21  1.611adaede473c8..
        !           107:        .double 0d-2.50520965150706067211e-08   # s5 = 2^-1a -1.ae644921ed8382..
        !           108:        .double 0d+2.75573191800593885716e-06   # s4 = 2^-13  1.71de3a4b884278..
        !           109:        .double 0d-1.98412698411850507950e-04   # s3 = 2^-0d -1.a01a01a0125e7d..
        !           110:        .double 0d+8.33333333333325688985e-03   # s2 = 2^-07  1.11111111110e50
        !           111:        .double 0d-1.66666666666666664354e-01   # s1 = 2^-03 -1.55555555555554
        !           112:        .double 0d+0.00000000000000000000e+00   # s0 = 0
        !           113: 
        !           114: cos_coef:
        !           115:        .double 0d-1.13006966202629430300e-11   # s7 = 2^-25 -1.8D9BA04D1374BE..
        !           116:        .double 0d+2.08746646574796004700e-09   # s6 = 2^-1D  1.1EE632650350BA..
        !           117:        .double 0d-2.75573073031284417300e-07   # s5 = 2^-16 -1.27E4F31411719E..
        !           118:        .double 0d+2.48015872682668025200e-05   # s4 = 2^-10  1.A01A0196B902E8..
        !           119:        .double 0d-1.38888888888464709200e-03   # s3 = 2^-0A -1.6C16C16C11FACE..
        !           120:        .double 0d+4.16666666666664761400e-02   # s2 = 2^-05  1.5555555555539E
        !           121:        .double 0d+0.00000000000000000000e+00   # s1 = 0
        !           122:        .double 0d+0.00000000000000000000e+00   # s0 = 0
        !           123: 
        !           124: #
        !           125: #  Multiples of  pi/2  expressed as the sum of three doubles,
        !           126: #
        !           127: #  trailing:   n * pi/2 ,  n = 0, 1, 2, ..., 29
        !           128: #                      trailing[n] ,
        !           129: #
        !           130: #  middle:     n * pi/2 ,  n = 0, 1, 2, ..., 29
        !           131: #                      middle[n]   ,
        !           132: #
        !           133: #  leading:    n * pi/2 ,  n = 0, 1, 2, ..., 29
        !           134: #                      leading[n]  ,
        !           135: #
        !           136: #      where
        !           137: #              leading[n]  := (n * pi/2)  rounded,
        !           138: #              middle[n]   := (n * pi/2  -  leading[n])  rounded,
        !           139: #              trailing[n] := (( n * pi/2 - leading[n]) - middle[n])  rounded .
        !           140: 
        !           141: trailing:
        !           142:        .double 0d+0.00000000000000000000e+00   #  0 * pi/2  trailing
        !           143:        .double 0d+4.33590506506189049611e-35   #  1 * pi/2  trailing
        !           144:        .double 0d+8.67181013012378099223e-35   #  2 * pi/2  trailing
        !           145:        .double 0d+1.30077151951856714215e-34   #  3 * pi/2  trailing
        !           146:        .double 0d+1.73436202602475619845e-34   #  4 * pi/2  trailing
        !           147:        .double 0d-1.68390735624352669192e-34   #  5 * pi/2  trailing
        !           148:        .double 0d+2.60154303903713428430e-34   #  6 * pi/2  trailing
        !           149:        .double 0d-8.16726343231148352150e-35   #  7 * pi/2  trailing
        !           150:        .double 0d+3.46872405204951239689e-34   #  8 * pi/2  trailing
        !           151:        .double 0d+3.90231455855570147991e-34   #  9 * pi/2  trailing
        !           152:        .double 0d-3.36781471248705338384e-34   # 10 * pi/2  trailing
        !           153:        .double 0d-1.06379439835298071785e-33   # 11 * pi/2  trailing
        !           154:        .double 0d+5.20308607807426856861e-34   # 12 * pi/2  trailing
        !           155:        .double 0d+5.63667658458045770509e-34   # 13 * pi/2  trailing
        !           156:        .double 0d-1.63345268646229670430e-34   # 14 * pi/2  trailing
        !           157:        .double 0d-1.19986217995610764801e-34   # 15 * pi/2  trailing
        !           158:        .double 0d+6.93744810409902479378e-34   # 16 * pi/2  trailing
        !           159:        .double 0d-8.03640094449267300110e-34   # 17 * pi/2  trailing
        !           160:        .double 0d+7.80462911711140295982e-34   # 18 * pi/2  trailing
        !           161:        .double 0d-7.16921993148029483506e-34   # 19 * pi/2  trailing
        !           162:        .double 0d-6.73562942497410676769e-34   # 20 * pi/2  trailing
        !           163:        .double 0d-6.30203891846791677593e-34   # 21 * pi/2  trailing
        !           164:        .double 0d-2.12758879670596143570e-33   # 22 * pi/2  trailing
        !           165:        .double 0d+2.53800212047402350390e-33   # 23 * pi/2  trailing
        !           166:        .double 0d+1.04061721561485371372e-33   # 24 * pi/2  trailing
        !           167:        .double 0d+6.11729905311472319056e-32   # 25 * pi/2  trailing
        !           168:        .double 0d+1.12733531691609154102e-33   # 26 * pi/2  trailing
        !           169:        .double 0d-3.70049587943078297272e-34   # 27 * pi/2  trailing
        !           170:        .double 0d-3.26690537292459340860e-34   # 28 * pi/2  trailing
        !           171:        .double 0d-1.14812616507957271361e-34   # 29 * pi/2  trailing
        !           172: 
        !           173: middle:
        !           174:        .double 0d+0.00000000000000000000e+00   #  0 * pi/2  middle
        !           175:        .double 0d+5.72118872610983179676e-18   #  1 * pi/2  middle
        !           176:        .double 0d+1.14423774522196635935e-17   #  2 * pi/2  middle
        !           177:        .double 0d-3.83475850529283316309e-17   #  3 * pi/2  middle
        !           178:        .double 0d+2.28847549044393271871e-17   #  4 * pi/2  middle
        !           179:        .double 0d-2.69052076007086676522e-17   #  5 * pi/2  middle
        !           180:        .double 0d-7.66951701058566632618e-17   #  6 * pi/2  middle
        !           181:        .double 0d-1.54628301484890040587e-17   #  7 * pi/2  middle
        !           182:        .double 0d+4.57695098088786543741e-17   #  8 * pi/2  middle
        !           183:        .double 0d+1.07001849766246313192e-16   #  9 * pi/2  middle
        !           184:        .double 0d-5.38104152014173353044e-17   # 10 * pi/2  middle
        !           185:        .double 0d-2.14622680169080983801e-16   # 11 * pi/2  middle
        !           186:        .double 0d-1.53390340211713326524e-16   # 12 * pi/2  middle
        !           187:        .double 0d-9.21580002543456677056e-17   # 13 * pi/2  middle
        !           188:        .double 0d-3.09256602969780081173e-17   # 14 * pi/2  middle
        !           189:        .double 0d+3.03066796603896507006e-17   # 15 * pi/2  middle
        !           190:        .double 0d+9.15390196177573087482e-17   # 16 * pi/2  middle
        !           191:        .double 0d+1.52771359575124969107e-16   # 17 * pi/2  middle
        !           192:        .double 0d+2.14003699532492626384e-16   # 18 * pi/2  middle
        !           193:        .double 0d-1.68853170360202329427e-16   # 19 * pi/2  middle
        !           194:        .double 0d-1.07620830402834670609e-16   # 20 * pi/2  middle
        !           195:        .double 0d+3.97700719404595604379e-16   # 21 * pi/2  middle
        !           196:        .double 0d-4.29245360338161967602e-16   # 22 * pi/2  middle
        !           197:        .double 0d-3.68013020380794313406e-16   # 23 * pi/2  middle
        !           198:        .double 0d-3.06780680423426653047e-16   # 24 * pi/2  middle
        !           199:        .double 0d-2.45548340466059054318e-16   # 25 * pi/2  middle
        !           200:        .double 0d-1.84316000508691335411e-16   # 26 * pi/2  middle
        !           201:        .double 0d-1.23083660551323675053e-16   # 27 * pi/2  middle
        !           202:        .double 0d-6.18513205939560162346e-17   # 28 * pi/2  middle
        !           203:        .double 0d-6.18980636588357585202e-19   # 29 * pi/2  middle
        !           204: 
        !           205: leading:
        !           206:        .double 0d+0.00000000000000000000e+00   #  0 * pi/2  leading
        !           207:        .double 0d+1.57079632679489661351e+00   #  1 * pi/2  leading
        !           208:        .double 0d+3.14159265358979322702e+00   #  2 * pi/2  leading
        !           209:        .double 0d+4.71238898038468989604e+00   #  3 * pi/2  leading
        !           210:        .double 0d+6.28318530717958645404e+00   #  4 * pi/2  leading
        !           211:        .double 0d+7.85398163397448312306e+00   #  5 * pi/2  leading
        !           212:        .double 0d+9.42477796076937979208e+00   #  6 * pi/2  leading
        !           213:        .double 0d+1.09955742875642763501e+01   #  7 * pi/2  leading
        !           214:        .double 0d+1.25663706143591729081e+01   #  8 * pi/2  leading
        !           215:        .double 0d+1.41371669411540694661e+01   #  9 * pi/2  leading
        !           216:        .double 0d+1.57079632679489662461e+01   # 10 * pi/2  leading
        !           217:        .double 0d+1.72787595947438630262e+01   # 11 * pi/2  leading
        !           218:        .double 0d+1.88495559215387595842e+01   # 12 * pi/2  leading
        !           219:        .double 0d+2.04203522483336561422e+01   # 13 * pi/2  leading
        !           220:        .double 0d+2.19911485751285527002e+01   # 14 * pi/2  leading
        !           221:        .double 0d+2.35619449019234492582e+01   # 15 * pi/2  leading
        !           222:        .double 0d+2.51327412287183458162e+01   # 16 * pi/2  leading
        !           223:        .double 0d+2.67035375555132423742e+01   # 17 * pi/2  leading
        !           224:        .double 0d+2.82743338823081389322e+01   # 18 * pi/2  leading
        !           225:        .double 0d+2.98451302091030359342e+01   # 19 * pi/2  leading
        !           226:        .double 0d+3.14159265358979324922e+01   # 20 * pi/2  leading
        !           227:        .double 0d+3.29867228626928286062e+01   # 21 * pi/2  leading
        !           228:        .double 0d+3.45575191894877260523e+01   # 22 * pi/2  leading
        !           229:        .double 0d+3.61283155162826226103e+01   # 23 * pi/2  leading
        !           230:        .double 0d+3.76991118430775191683e+01   # 24 * pi/2  leading
        !           231:        .double 0d+3.92699081698724157263e+01   # 25 * pi/2  leading
        !           232:        .double 0d+4.08407044966673122843e+01   # 26 * pi/2  leading
        !           233:        .double 0d+4.24115008234622088423e+01   # 27 * pi/2  leading
        !           234:        .double 0d+4.39822971502571054003e+01   # 28 * pi/2  leading
        !           235:        .double 0d+4.55530934770520019583e+01   # 29 * pi/2  leading
        !           236: 
        !           237: twoOverPi:
        !           238:        .double 0d+6.36619772367581343076e-01
        !           239:        .text
        !           240:        .align  1
        !           241: 
        !           242: table_lookup:
        !           243:        muld3   r3,twoOverPi,r0
        !           244:        cvtrdl  r0,r0                   # n = nearest int to ((2/pi)*|x|) rnded
        !           245:        mull3   $8,r0,r5
        !           246:        subd2   leading(r5),r3          # p = (|x| - leading n*pi/2) exactly
        !           247:        subd3   middle(r5),r3,r1        # q = (p - middle  n*pi/2) rounded
        !           248:        subd2   r1,r3                   # r = (p - q)
        !           249:        subd2   middle(r5),r3           # r =  r - middle  n*pi/2
        !           250:        subd2   trailing(r5),r3         # r =  r - trailing n*pi/2  rounded
        !           251: #
        !           252: #  If the original argument was negative,
        !           253: #  negate the reduce argument and
        !           254: #  adjust the octant/quadrant number.
        !           255: #
        !           256:        tstw    4(ap)
        !           257:        bgeq    abs2
        !           258:        mnegf   r1,r1
        !           259:        mnegf   r3,r3
        !           260: #      subb3   r0,$8,r0        ...used for  pi/4  reduction -S.McD
        !           261:        subb3   r0,$4,r0
        !           262: abs2:
        !           263: #
        !           264: #  Clear all unneeded octant/quadrant bits.
        !           265: #
        !           266: #      bicb2   $0xf8,r0        ...used for  pi/4  reduction -S.McD
        !           267:        bicb2   $0xfc,r0
        !           268:        rsb
        !           269: #
        !           270: #                                              p.0
        !           271:        .text
        !           272:        .align  2
        !           273: #
        !           274: # Only 256 (actually 225) bits of 2/pi are needed for VAX double
        !           275: # precision; this was determined by enumerating all the nearest
        !           276: # machine integer multiples of pi/2 using continued fractions.
        !           277: # (8a8d3673775b7ff7 required the most bits.)           -S.McD
        !           278: #
        !           279:        .long   0
        !           280:        .long   0
        !           281:        .long   0xaef1586d
        !           282:        .long   0x9458eaf7
        !           283:        .long   0x10e4107f
        !           284:        .long   0xd8a5664f
        !           285:        .long   0x4d377036
        !           286:        .long   0x09d5f47d
        !           287:        .long   0x91054a7f
        !           288:        .long   0xbe60db93
        !           289: bits2opi:
        !           290:        .long   0x00000028
        !           291:        .long   0
        !           292: #
        !           293: #  Note: wherever you see the word `octant', read `quadrant'.
        !           294: #  Currently this code is set up for  pi/2  argument reduction.
        !           295: #  By uncommenting/commenting the appropriate lines, it will
        !           296: #  also serve as a  pi/4  argument reduction code.
        !           297: #  
        !           298: 
        !           299: #                                              p.1
        !           300: #  Trigred  preforms argument reduction
        !           301: #  for the trigonometric functions.  It
        !           302: #  takes one input argument, a D-format
        !           303: #  number in  r1/r0 .  The magnitude of
        !           304: #  the input argument must be greater
        !           305: #  than or equal to  1/2 .  Trigred produces
        !           306: #  three results:  the number of the octant
        !           307: #  occupied by the argument, the reduced 
        !           308: #  argument, and an extension of the
        !           309: #  reduced argument.  The octant number is 
        !           310: #  returned in  r0 .  The reduced argument 
        !           311: #  is returned as a D-format number in 
        !           312: #  r2/r1 .  An 8 bit extension of the 
        !           313: #  reduced argument is returned as an 
        !           314: #  F-format number in r3.
        !           315: #                                              p.2
        !           316: trigred:
        !           317: #
        !           318: #  Save the sign of the input argument.
        !           319: #
        !           320:        movw    r0,-(sp)
        !           321: #
        !           322: #  Extract the exponent field.
        !           323: #
        !           324:        extzv   $7,$7,r0,r2
        !           325: #
        !           326: #  Convert the fraction part of the input
        !           327: #  argument into a quadword integer.
        !           328: #
        !           329:        bicw2   $0xff80,r0
        !           330:        bisb2   $0x80,r0        # -S.McD
        !           331:        rotl    $16,r0,r0
        !           332:        rotl    $16,r1,r1
        !           333: #
        !           334: #  If  r1  is negative, add  1  to  r0 .  This
        !           335: #  adjustment is made so that the two's
        !           336: #  complement multiplications done later
        !           337: #  will produce unsigned results.
        !           338: #
        !           339:        bgeq    posmid
        !           340:        incl    r0
        !           341: posmid:
        !           342: #                                              p.3
        !           343: #
        !           344: #  Set  r3  to the address of the first quadword
        !           345: #  used to obtain the needed portion of  2/pi .
        !           346: #  The address is longword aligned to ensure
        !           347: #  efficient access.
        !           348: #
        !           349:        ashl    $-3,r2,r3
        !           350:        bicb2   $3,r3
        !           351:        subl3   r3,$bits2opi,r3
        !           352: #
        !           353: #  Set  r2  to the size of the shift needed to 
        !           354: #  obtain the correct portion of  2/pi .
        !           355: #
        !           356:        bicb2   $0xe0,r2
        !           357: #                                              p.4
        !           358: #
        !           359: #  Move the needed  128  bits of  2/pi  into
        !           360: #  r11 - r8 .  Adjust the numbers to allow
        !           361: #  for unsigned multiplication.
        !           362: #
        !           363:        ashq    r2,(r3),r10
        !           364: 
        !           365:        subl2   $4,r3
        !           366:        ashq    r2,(r3),r9
        !           367:        bgeq    signoff1
        !           368:        incl    r11
        !           369: signoff1:
        !           370:        subl2   $4,r3
        !           371:        ashq    r2,(r3),r8
        !           372:        bgeq    signoff2
        !           373:        incl    r10
        !           374: signoff2:
        !           375:        subl2   $4,r3
        !           376:        ashq    r2,(r3),r7
        !           377:        bgeq    signoff3
        !           378:        incl    r9
        !           379: signoff3:
        !           380: #                                              p.5
        !           381: #
        !           382: #  Multiply the contents of  r0/r1  by the 
        !           383: #  slice of  2/pi  in  r11 - r8 .
        !           384: #
        !           385:        emul    r0,r8,$0,r4
        !           386:        emul    r0,r9,r5,r5
        !           387:        emul    r0,r10,r6,r6
        !           388: 
        !           389:        emul    r1,r8,$0,r7
        !           390:        emul    r1,r9,r8,r8
        !           391:        emul    r1,r10,r9,r9
        !           392:        emul    r1,r11,r10,r10
        !           393: 
        !           394:        addl2   r4,r8
        !           395:        adwc    r5,r9
        !           396:        adwc    r6,r10
        !           397: #                                              p.6
        !           398: #
        !           399: #  If there are more than five leading zeros
        !           400: #  after the first two quotient bits or if there
        !           401: #  are more than five leading ones after the first
        !           402: #  two quotient bits, generate more fraction bits.
        !           403: #  Otherwise, branch to code to produce the result.
        !           404: #
        !           405:        bicl3   $0xc1ffffff,r10,r4
        !           406:        beql    more1
        !           407:        cmpl    $0x3e000000,r4
        !           408:        bneq    result
        !           409: more1:
        !           410: #                                              p.7
        !           411: #
        !           412: #  generate another  32  result bits.
        !           413: #
        !           414:        subl2   $4,r3
        !           415:        ashq    r2,(r3),r5
        !           416:        bgeq    signoff4
        !           417: 
        !           418:        emul    r1,r6,$0,r4
        !           419:        addl2   r1,r5
        !           420:        emul    r0,r6,r5,r5
        !           421:        addl2   r0,r6
        !           422:        brb     addbits1
        !           423: 
        !           424: signoff4:
        !           425:        emul    r1,r6,$0,r4
        !           426:        emul    r0,r6,r5,r5
        !           427: 
        !           428: addbits1:
        !           429:        addl2   r5,r7
        !           430:        adwc    r6,r8
        !           431:        adwc    $0,r9
        !           432:        adwc    $0,r10
        !           433: #                                              p.8
        !           434: #
        !           435: #  Check for massive cancellation.
        !           436: #
        !           437:        bicl3   $0xc0000000,r10,r6
        !           438: #      bneq    more2                   -S.McD  Test was backwards
        !           439:        beql    more2
        !           440:        cmpl    $0x3fffffff,r6
        !           441:        bneq    result
        !           442: more2:
        !           443: #                                              p.9
        !           444: #
        !           445: #  If massive cancellation has occurred,
        !           446: #  generate another  24  result bits.
        !           447: #  Testing has shown there will always be 
        !           448: #  enough bits after this point.
        !           449: #
        !           450:        subl2   $4,r3
        !           451:        ashq    r2,(r3),r5
        !           452:        bgeq    signoff5
        !           453: 
        !           454:        emul    r0,r6,r4,r5
        !           455:        addl2   r0,r6
        !           456:        brb     addbits2
        !           457: 
        !           458: signoff5:
        !           459:        emul    r0,r6,r4,r5
        !           460: 
        !           461: addbits2:
        !           462:        addl2   r6,r7
        !           463:        adwc    $0,r8
        !           464:        adwc    $0,r9
        !           465:        adwc    $0,r10
        !           466: #                                              p.10
        !           467: #
        !           468: #  The following code produces the reduced
        !           469: #  argument from the product bits contained
        !           470: #  in  r10 - r7 .
        !           471: #
        !           472: result:
        !           473: #
        !           474: #  Extract the octant number from  r10 .
        !           475: #
        !           476: #      extzv   $29,$3,r10,r0   ...used for  pi/4  reduction -S.McD
        !           477:        extzv   $30,$2,r10,r0
        !           478: #
        !           479: #  Clear the octant bits in  r10 .
        !           480: #
        !           481: #      bicl2   $0xe0000000,r10 ...used for  pi/4  reduction -S.McD
        !           482:        bicl2   $0xc0000000,r10
        !           483: #
        !           484: #  Zero the sign flag.
        !           485: #
        !           486:        clrl    r5
        !           487: #                                              p.11
        !           488: #
        !           489: #  Check to see if the fraction is greater than
        !           490: #  or equal to one-half.  If it is, add one 
        !           491: #  to the octant number, set the sign flag
        !           492: #  on, and replace the fraction with  1 minus
        !           493: #  the fraction.
        !           494: #
        !           495: #      bitl    $0x10000000,r10         ...used for  pi/4  reduction -S.McD
        !           496:        bitl    $0x20000000,r10
        !           497:        beql    small
        !           498:        incl    r0
        !           499:        incl    r5
        !           500: #      subl3   r10,$0x1fffffff,r10     ...used for  pi/4  reduction -S.McD
        !           501:        subl3   r10,$0x3fffffff,r10
        !           502:        mcoml   r9,r9
        !           503:        mcoml   r8,r8
        !           504:        mcoml   r7,r7
        !           505: small:
        !           506: #                                              p.12
        !           507: #
        !           508: ##  Test whether the first  29  bits of the ...used for  pi/4  reduction -S.McD
        !           509: #  Test whether the first  30  bits of the 
        !           510: #  fraction are zero.
        !           511: #
        !           512:        tstl    r10
        !           513:        beql    tiny
        !           514: #
        !           515: #  Find the position of the first one bit in  r10 .
        !           516: #
        !           517:        cvtld   r10,r1
        !           518:        extzv   $7,$7,r1,r1
        !           519: #
        !           520: #  Compute the size of the shift needed.
        !           521: #
        !           522:        subl3   r1,$32,r6
        !           523: #
        !           524: #  Shift up the high order  64  bits of the
        !           525: #  product.
        !           526: #
        !           527:        ashq    r6,r9,r10
        !           528:        ashq    r6,r8,r9
        !           529:        brb     mult
        !           530: #                                              p.13
        !           531: #
        !           532: #  Test to see if the sign bit of  r9  is on.
        !           533: #
        !           534: tiny:
        !           535:        tstl    r9
        !           536:        bgeq    tinier
        !           537: #
        !           538: #  If it is, shift the product bits up  32  bits.
        !           539: #
        !           540:        movl    $32,r6
        !           541:        movq    r8,r10
        !           542:        tstl    r10
        !           543:        brb     mult
        !           544: #                                              p.14
        !           545: #
        !           546: #  Test whether  r9  is zero.  It is probably
        !           547: #  impossible for both  r10  and  r9  to be
        !           548: #  zero, but until proven to be so, the test
        !           549: #  must be made.
        !           550: #
        !           551: tinier:
        !           552:        beql    zero
        !           553: #
        !           554: #  Find the position of the first one bit in  r9 .
        !           555: #
        !           556:        cvtld   r9,r1
        !           557:        extzv   $7,$7,r1,r1
        !           558: #
        !           559: #  Compute the size of the shift needed.
        !           560: #
        !           561:        subl3   r1,$32,r1
        !           562:        addl3   $32,r1,r6
        !           563: #
        !           564: #  Shift up the high order  64  bits of the
        !           565: #  product.
        !           566: #
        !           567:        ashq    r1,r8,r10
        !           568:        ashq    r1,r7,r9
        !           569:        brb     mult
        !           570: #                                              p.15
        !           571: #
        !           572: #  The following code sets the reduced
        !           573: #  argument to zero.
        !           574: #
        !           575: zero:
        !           576:        clrl    r1
        !           577:        clrl    r2
        !           578:        clrl    r3
        !           579:        brw     return
        !           580: #                                              p.16
        !           581: #
        !           582: #  At this point,  r0  contains the octant number,
        !           583: #  r6  indicates the number of bits the fraction
        !           584: #  has been shifted,  r5  indicates the sign of
        !           585: #  the fraction,  r11/r10  contain the high order
        !           586: #  64  bits of the fraction, and the condition
        !           587: #  codes indicate where the sign bit of  r10
        !           588: #  is on.  The following code multiplies the
        !           589: #  fraction by  pi/2 .
        !           590: #
        !           591: mult:
        !           592: #
        !           593: #  Save  r11/r10  in  r4/r1 .          -S.McD
        !           594:        movl    r11,r4
        !           595:        movl    r10,r1
        !           596: #
        !           597: #  If the sign bit of  r10  is on, add  1  to  r11 .
        !           598: #
        !           599:        bgeq    signoff6
        !           600:        incl    r11
        !           601: signoff6:
        !           602: #                                              p.17
        !           603: #
        !           604: #  Move  pi/2  into  r3/r2 .
        !           605: #
        !           606:        movq    $0xc90fdaa22168c235,r2
        !           607: #
        !           608: #  Multiply the fraction by the portion of  pi/2
        !           609: #  in  r2 .
        !           610: #
        !           611:        emul    r2,r10,$0,r7
        !           612:        emul    r2,r11,r8,r7
        !           613: #
        !           614: #  Multiply the fraction by the portion of  pi/2 
        !           615: #  in  r3 .
        !           616:        emul    r3,r10,$0,r9
        !           617:        emul    r3,r11,r10,r10
        !           618: #
        !           619: #  Add the product bits together.
        !           620: #
        !           621:        addl2   r7,r9
        !           622:        adwc    r8,r10
        !           623:        adwc    $0,r11
        !           624: #
        !           625: #  Compensate for not sign extending  r8  above.-S.McD
        !           626: #
        !           627:        tstl    r8
        !           628:        bgeq    signoff6a
        !           629:        decl    r11
        !           630: signoff6a:
        !           631: #
        !           632: #  Compensate for  r11/r10  being unsigned.    -S.McD
        !           633: #
        !           634:        addl2   r2,r10
        !           635:        adwc    r3,r11
        !           636: #
        !           637: #  Compensate for  r3/r2  being unsigned.      -S.McD
        !           638: #
        !           639:        addl2   r1,r10
        !           640:        adwc    r4,r11
        !           641: #                                              p.18
        !           642: #
        !           643: #  If the sign bit of  r11  is zero, shift the
        !           644: #  product bits up one bit and increment  r6 .
        !           645: #
        !           646:        blss    signon
        !           647:        incl    r6
        !           648:        ashq    $1,r10,r10
        !           649:        tstl    r9
        !           650:        bgeq    signoff7
        !           651:        incl    r10
        !           652: signoff7:
        !           653: signon:
        !           654: #                                              p.19
        !           655: #
        !           656: #  Shift the  56  most significant product
        !           657: #  bits into  r9/r8 .  The sign extension
        !           658: #  will be handled later.
        !           659: #
        !           660:        ashq    $-8,r10,r8
        !           661: #
        !           662: #  Convert the low order  8  bits of  r10
        !           663: #  into an F-format number.
        !           664: #
        !           665:        cvtbf   r10,r3
        !           666: #
        !           667: #  If the result of the conversion was
        !           668: #  negative, add  1  to  r9/r8 .
        !           669: #
        !           670:        bgeq    chop
        !           671:        incl    r8
        !           672:        adwc    $0,r9
        !           673: #
        !           674: #  If  r9  is now zero, branch to special
        !           675: #  code to handle that possibility.
        !           676: #
        !           677:        beql    carryout
        !           678: chop:
        !           679: #                                              p.20
        !           680: #
        !           681: #  Convert the number in  r9/r8  into
        !           682: #  D-format number in  r2/r1 .
        !           683: #
        !           684:        rotl    $16,r8,r2
        !           685:        rotl    $16,r9,r1
        !           686: #
        !           687: #  Set the exponent field to the appropriate
        !           688: #  value.  Note that the extra bits created by
        !           689: #  sign extension are now eliminated.
        !           690: #
        !           691:        subw3   r6,$131,r6
        !           692:        insv    r6,$7,$9,r1
        !           693: #
        !           694: #  Set the exponent field of the F-format
        !           695: #  number in  r3  to the appropriate value.
        !           696: #
        !           697:        tstf    r3
        !           698:        beql    return
        !           699: #      extzv   $7,$8,r3,r4     -S.McD
        !           700:        extzv   $7,$7,r3,r4
        !           701:        addw2   r4,r6
        !           702: #      subw2   $217,r6         -S.McD
        !           703:        subw2   $64,r6
        !           704:        insv    r6,$7,$8,r3
        !           705:        brb     return
        !           706: #                                              p.21
        !           707: #
        !           708: #  The following code generates the appropriate 
        !           709: #  result for the unlikely possibility that
        !           710: #  rounding the number in  r9/r8  resulted in 
        !           711: #  a carry out.
        !           712: #
        !           713: carryout:
        !           714:        clrl    r1
        !           715:        clrl    r2
        !           716:        subw3   r6,$132,r6
        !           717:        insv    r6,$7,$9,r1
        !           718:        tstf    r3
        !           719:        beql    return
        !           720:        extzv   $7,$8,r3,r4
        !           721:        addw2   r4,r6
        !           722:        subw2   $218,r6
        !           723:        insv    r6,$7,$8,r3
        !           724: #                                              p.22
        !           725: #
        !           726: #  The following code makes an needed
        !           727: #  adjustments to the signs of the 
        !           728: #  results or to the octant number, and
        !           729: #  then returns.
        !           730: #
        !           731: return:
        !           732: #
        !           733: #  Test if the fraction was greater than or 
        !           734: #  equal to  1/2 .  If so, negate the reduced
        !           735: #  argument.
        !           736: #
        !           737:        blbc    r5,signoff8
        !           738:        mnegf   r1,r1
        !           739:        mnegf   r3,r3
        !           740: signoff8:
        !           741: #                                              p.23
        !           742: #
        !           743: #  If the original argument was negative,
        !           744: #  negate the reduce argument and
        !           745: #  adjust the octant number.
        !           746: #
        !           747:        tstw    (sp)+
        !           748:        bgeq    signoff9
        !           749:        mnegf   r1,r1
        !           750:        mnegf   r3,r3
        !           751: #      subb3   r0,$8,r0        ...used for  pi/4  reduction -S.McD
        !           752:        subb3   r0,$4,r0
        !           753: signoff9:
        !           754: #
        !           755: #  Clear all unneeded octant bits.
        !           756: #
        !           757: #      bicb2   $0xf8,r0        ...used for  pi/4  reduction -S.McD
        !           758:        bicb2   $0xfc,r0
        !           759: #
        !           760: #  Return.
        !           761: #
        !           762:        rsb

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.