|
|
1.1 ! root 1: .data ! 2: .asciz "@(#)Fmuld.s 1.1 86/02/03 Copyr 1985 Sun Micro" ! 3: .even ! 4: .text ! 5: ! 6: | Copyright (c) 1985 by Sun Microsystems, Inc. ! 7: ! 8: #include "fpcrtdefs.h" ! 9: ! 10: /* ! 11: * double-precision floating math run-time support ! 12: * ! 13: * copyright 1981, 1982 Richard E. James III ! 14: * translated to SUN idiom 10/11 March 1983 rt ! 15: * parameter passing re-done 22 July 1983 rt ! 16: */ ! 17: ! 18: ARG2PTR = a0 ! 19: ! 20: /* ! 21: * ieee double floating divide ! 22: * copyright 1981, Richard E. James III ! 23: * translated to SUN idiom 10 March 1983 rt ! 24: */ ! 25: ! 26: /* ! 27: * entry conditions: ! 28: * first argument in d0/d1 ! 29: * second argument on stack ! 30: * exit conditions: ! 31: * result (8 bytes) in d0/d1 ! 32: * ! 33: * register conventions: ! 34: * d0/d1 divisor ! 35: * d2/d3 dividend ! 36: * d4 count ! 37: * d5 sign an exponent ! 38: * d6 result exponent ! 39: * d6/d7 quotient ! 40: */ ! 41: SAVEMASK = 0x3f00 | registers d2-d7 ! 42: RESTMASK = 0xfc ! 43: NSAVED = 6*4 | 6 registers * sizeof(register) ! 44: ! 45: RTENTRY(Fdivd) ! 46: | save registers and load operands into registers ! 47: moveml #0x3f00,sp@- | registers d2-d7 ! 48: movl d0,d2 ! 49: movl d1,d3 ! 50: movl ARG2PTR@+,d0 ! 51: movl ARG2PTR@ ,d1 ! 52: | save sign of result ! 53: movl d0,d5 ! 54: eorl d2,d5 | sign of result ! 55: clrl d4 | flag for divide ! 56: bsrs extrem ! 57: | compute resulting exponent ! 58: movw d6,d5 ! 59: subw d7,d5 ! 60: | do top 30-31 bits of divide (d_rcp will post normalize) ! 61: movw #30,d4 | count 30..0 ! 62: bsr shsub | shift and subtract loop ! 63: movl d7,d6 | top of answer ! 64: | do next 22 bits of divide ! 65: movw #23,d4 | count 23..0 (total = 54-55 bits) ! 66: bsr shsub ! 67: | put together answer ! 68: lsll #8,d7 | line up bottom ! 69: orl d3,d2 ! 70: beqs 1$ ! 71: bset #1,d7 | sticky bit on if remainder <> 0 ! 72: 1$: lsll #1,d7 ! 73: roxll #1,d6 ! 74: | normalize (once), round, check result exp, pack ! 75: movl d6,d2 | upper ! 76: movl d7,d3 | lower ! 77: movw d5,d6 | exponent ! 78: addw #1023,d6 | re-bias ! 79: jbsr d_nrcp | norm once, rnd, ck, pack ! 80: bra dmsign ! 81: ! 82: ! 83: /* ! 84: * round, check for over/underflow, and pack in the exponent. ! 85: * d_nrcp does one normalize and then calls d_rcp. ! 86: * d_rcp rounds the double value and packs the exponent in, ! 87: * catching infinity, zero, and denormalized numbers. ! 88: * d_usel puts together the larger argument. ! 89: * ! 90: * input: ! 91: * d2/d3 mantissa (- if norm) ! 92: * d6 biased exponent ! 93: * (need sign, sticky) ! 94: * output: ! 95: * d2/d3 most of number, no sign or hidden bit, ! 96: * waiting to shift sign in. ! 97: * other: ! 98: * d4 lost ! 99: * d5 unchanged ! 100: */ ! 101: ! 102: d_nrcp: ! 103: tstl d2 ! 104: jmi d_rcp | already normalized ! 105: subqw #1,d6 ! 106: lsll #1,d3 | do extra normalize (for mul/div) ! 107: roxll #1,d2 ! 108: jra d_rcp ! 109: ! 110: /* ! 111: * subroutine for unpacking one operand, and normalizing a denormalized number ! 112: * input: ! 113: * d0/d1 number ! 114: * output: ! 115: * d0/d1 mantissa ! 116: * d7.w exponent ! 117: * z on iff mantissa is zero_ ! 118: * ! 119: * unchanged: ! 120: * d4 bottom = 0xf77 ! 121: */ ! 122: ! 123: unp: movl d0,d7 | start getting exp ! 124: andl #0xfffff,d0 | clear out sign and exp ! 125: swap d7 ! 126: lsrw #(16-1-11),d7 ! 127: andw d4,d7 | expondnt ! 128: bnes 3$ | normal number ! 129: | denormalized number or zero: ! 130: tstl d0 | upper ! 131: bnes 1$ ! 132: tstl d1 | lower ! 133: beqs 3$ |zero ! 134: 1$: addqw #1,d7 ! 135: 2$: subql #1,d7 ! 136: lsll #1,d1 ! 137: roxll #1,d0 | normalize ! 138: btst #20,d0 ! 139: beqs 2$ | loop until normalized ! 140: 3$: rts ! 141: ! 142: ! 143: /* ! 144: * subroutine for extracting and taking care of 0/GU/INF/NAN. ! 145: * input: ! 146: * d0/d1, d2/d3 ! 147: * d4 + for div; - for mod ! 148: * ! 149: * output: ! 150: * d0/d1 (bottom) and d2/d3 (top) converted to ! 151: * 000XXXXX/XXXXXXXX ! 152: * d6 exponent of top ! 153: * d7 exponent of bottom ! 154: * ! 155: * unchanged: ! 156: * d5 sign ! 157: */ ! 158: ! 159: extrem: ! 160: movw #0x7ff,d4 | mask, sign.l untouched ! 161: exg d2,d0 ! 162: exg d3,d1 ! 163: bsrs unp | unpack top ! 164: exg d0,d2 ! 165: exg d1,d3 ! 166: exg d7,d6 ! 167: beqs topzero | top is zero ! 168: cmpw d4,d6 ! 169: beqs topbig | top is inf or nan ! 170: bset #20,d2 | set hidden bit ! 171: topzero:bsrs unp | unpack bottom ! 172: beqs botzero | bottom is 0 ! 173: cmpw d4,d7 ! 174: beqs botbig | bottom is inf or nan ! 175: bset #20,d0 ! 176: lsll #1,d1 | left shift bottom ! 177: roxll #1,d0 ! 178: rts ! 179: ! 180: /* ! 181: * EXCEPTION HANDLING ! 182: */ ! 183: ! 184: topbig: | top is inf or nan ! 185: bsrs unp | unpack bottom ! 186: tstl d4 ! 187: bmis isnan | inf or nan / ... ! 188: cmpw d6,d7 ! 189: beqs isnan | both inf/nan ! 190: tstl d2 | top ! 191: beqs geninf | inf / ... = inf ! 192: bras isnan | nan / ... = nan ! 193: ! 194: botzero:| bottom is 0 ! 195: tstl d4 ! 196: bmis gennan | dmod(... 0) = nan ! 197: orl d2,d3 | top ! 198: beqs gennan | 0/0 = nan ! 199: | nonzero/0 = inf ! 200: | generate infinity for answer ! 201: geninf: movl #0x7ff00000*2,d2 | infinity ! 202: bras clrbot ! 203: ! 204: botbig: tstl d0 | bottom = nan, result = nan ! 205: bnes isnan ! 206: tstl d4 ! 207: bpls genzero | ... / inf = 0 ! 208: addqw #4,sp ! 209: addw #11,d6 | append sign ! 210: jbsr d_norm | normalize ! 211: jbsr d_rcp | adjust exp, ck extremes, pack ! 212: dmsign: roxll #1,d5 | sign -> x ! 213: roxrl #1, d2 | rotate in sign ! 214: | answer is now in: ! 215: | d2 most significant 32 bits ! 216: | d3 least significant 32 bits ! 217: dmexit: movl d2,d0 ! 218: movl d3,d1 ! 219: | restore registers and split ! 220: moveml sp@+,#RESTMASK ! 221: RET ! 222: ! 223: ! 224: | invalid operand/operation ! 225: isnan: cmpw d7,d6 ! 226: bnes 1$ | exponents equal ! 227: cmpl d0, d2 ! 228: 1$: bges 2$ ! 229: movw d7,d6 ! 230: movl d0,d2 ! 231: 2$: swap d2 ! 232: lslw #(16-1-11),d6 ! 233: orw d6,d2 | put back together ! 234: swap d2 ! 235: lsll #1,d2 ! 236: cmpl #0x7ff00000*2, d2 ! 237: bhis gotnan | use larger nan ! 238: ! 239: gennan: movl #0x7ff00004*2,d2 ! 240: tstl d4 ! 241: bpls gotnan | nan 4 for div ! 242: addql #2,d2 | nan 5 for mod ! 243: gotnan: clrl d5 ! 244: bras clrbot ! 245: ! 246: genzero:clrl d2 ! 247: clrbot: clrl d3 ! 248: sign: addqw #4,sp ! 249: bra dmsign ! 250: ! 251: /* ! 252: * the shsub subroutine does a shift-subtract loop ! 253: * that is the heart of divide and mod. ! 254: * the algorithm is a simple shift and subtract loop, ! 255: * but it adds when it overshoots. ! 256: * why not use the divs/divu instructions? That approach is slower! ! 257: * ! 258: * registers: ! 259: * d2/d3 current dividend (updated) ! 260: * d0/d1 divisor (unchanged) ! 261: * d4.w (inpout) number of interations -1, and bit number ! 262: * d5/d6 -untouched- ! 263: * d7 quotient being devloped (ignored by mod) ! 264: */ ! 265: ! 266: ! 267: shsub: ! 268: clrl d7 | quotient ! 269: | shift once, see if subtract needed ! 270: 1$: addl d3,d3 ! 271: addxl d2,d2 |(64-bit left shift) ! 272: cmpl d0,d2 ! 273: dbge d4,1$ | loop while divident is small ! 274: | tally quotient and subtract ! 275: bset d4,d7 | quotient bit ! 276: subl d1,d3 ! 277: subxl d0,d2 | 64-bit subtract ! 278: dbmi d4,1$ | loop (d4) times ! 279: | now one of three things has happeded: ! 280: | 1. count exhausted and extra subtract done (first DB hit count) ! 281: | 2. count exhausted in second DB ! 282: | 3. overshot because compare didn't check lower parts ! 283: bpls 2$ | case 2 ! 284: addl d1,d3 | take care of overshoot ! 285: addxl d0,d2 ! 286: bclr d4,d7 ! 287: tstw d4 ! 288: dblt d4,1$ | case 3 ! 289: | case 1 ! 290: 2$: rts ! 291: ! 292: /* ! 293: * ieee double floating multiply ! 294: * copyright 1981, Richard E. James III ! 295: * translated to SUN idiom 10 March 1983 rt ! 296: */ ! 297: ! 298: /* ! 299: Revised to do correct IEEE rounding by dgh 24 Aug 84 ! 300: * Conventions in the main multiplication section are as follows: ! 301: * r is the argument passed in the registers d0/d1 ! 302: * s is the argument passed on the stack, saved in a0/a1 ! 303: * while multiplying ! 304: * r1,r2,r3,r4 are the 16 bit components of r in descending order ! 305: * s1,s2,s3,s4 are the 16 bit components of s ! 306: * r is kept in d0 and d1, sometimes with words swapped ! 307: * s is kept in a0 and a1 unchanged ! 308: * the product is kept in d2/d3/d4/d5 ! 309: * d6 contains a current partial product ! 310: * d7 contains a current partial product ! 311: * d7 contains 0 when needed for addxl ! 312: * a2 saves the sign and exponent of the result ! 313: * ! 314: * At the end, d4 and d5 if nonzero are jammed into lsb of d3. ! 315: */ ! 316: /* ! 317: * entry conditions: ! 318: * first argument in d0/d1 ! 319: * second argument on stack ! 320: * exit conditions: ! 321: * result (8 bytes) in d0/d1 ! 322: * ! 323: * register conventions: ! 324: * d0-d3 operands or pieces of result ! 325: * d5 exponent of larger ! 326: * d5 temp for multiplying ! 327: * d6 sign and exponent ! 328: * d7 exponent of smaller ! 329: * d7 zero ! 330: */ ! 331: SAVEMASK = 0x3fe0 | registers d2-d7,a0-a2 ! 332: RESTMASK = 0x7fc ! 333: NSAVED = 6*4 | 6 registers * sizeof(register) ! 334: ! 335: SAVE03 = 0xf000 | registers d0-d3 ! 336: FETCH03 = 0x000f ! 337: ! 338: RTENTRY(Fsqrd) ! 339: moveml #SAVEMASK,sp@- | registers d2-d7 ! 340: movel d0,d2 ! 341: movel d1,d3 ! 342: bras Fmuld2 ! 343: RTENTRY(Fmuld) ! 344: | save registers and load operands into registers ! 345: moveml #SAVEMASK,sp@- | registers d2-d7 ! 346: movl ARG2PTR@+,d2 ! 347: movl ARG2PTR@ ,d3 ! 348: Fmuld2: ! 349: | save sign of result ! 350: movl d0,d5 ! 351: eorl d2,d5 | sign of result ! 352: asll #1,d0 | toss sign ! 353: asll #1,d2 | EEmmmm0 ! 354: cmpl d2,d0 ! 355: | order operands (exponents at least) ! 356: blss eswap ! 357: exg d0,d2 | d2/d3 = larger ! 358: exg d1,d3 ! 359: | extract and check exponents ! 360: eswap: jbsr d_exte ! 361: movw d6,d5 | larger exp ! 362: movl d5,d6 ! 363: addw d7,d6 | result exp (and sign) ! 364: cmpw #0x7ff,d5 | check larger ! 365: jeq ovfl | inf or nan ! 366: tstw d7 ! 367: jeq ufl | 0 or gradual underflow ! 368: | set hidden bits ! 369: bset #31,d0 ! 370: back: bset #31,d2 ! 371: ! 372: | Main multiply sequence. ! 373: ! 374: movl d2,a0 | a0 gets s1s2. ! 375: movl d3,a1 | a1 gets s3s4. ! 376: movl d6,a2 | a2 saves sign and exponent of result. ! 377: ! 378: movl a0,d3 | d3 gets s1s2. ! 379: swap d3 | d3 gets s2s1. ! 380: movw d3,d4 | d4 gets s1. ! 381: mulu d1,d4 | d4 gets r4*s1. ! 382: mulu d0,d3 | d3 gets r2*s1. ! 383: clrw d2 | d2 gets 0; only gets carries in this phase. ! 384: ! 385: movl a1,d6 | d6 gets s3s4. ! 386: swap d6 | d6 gets s4s3. ! 387: movw d6,d5 | d5 gets s3. ! 388: jeq s3zero | Branch if s3=0 to avoid following. ! 389: mulu d1,d5 | d5 gets r4*s3. ! 390: movw d6,d7 | d7 gets s3. ! 391: mulu d0,d7 | d7 gets r2*s3. ! 392: addl d7,d4 ! 393: clrl d7 | Make a zero for addx. ! 394: addxl d7,d3 ! 395: addxw d7,d2 ! 396: phase3: ! 397: swap d0 | d0 gets r2r1. ! 398: swap d1 | d1 gets r4r3. ! 399: movw a0,d6 | d6 gets s2. ! 400: beqs 4$ | Skip following if s2=0. ! 401: movw d6,d7 | d7 gets s2. ! 402: mulu d0,d6 | d6 gets r1*s2. ! 403: mulu d1,d7 | d7 gets r3*s2. ! 404: addl d7,d4 ! 405: addxl d6,d3 ! 406: clrw d7 ! 407: addxw d7,d2 ! 408: 4$: ! 409: movw a1,d6 | d6 gets s4. ! 410: beqs 5$ | Skip if s4=0. ! 411: movw d6,d7 | d7 gets s4. ! 412: mulu d0,d6 | d6 gets r1*s4. ! 413: mulu d1,d7 | d7 gets r3*s4. ! 414: addl d7,d5 ! 415: addxl d6,d4 ! 416: clrl d7 ! 417: addxl d7,d3 ! 418: addxw d7,d2 ! 419: 5$: ! 420: swap d2 | Exchange order of registers which contain ! 421: swap d3 | all the "odd" partial products. ! 422: swap d4 ! 423: swap d5 ! 424: movw d3,d2 | It's really a 16 bit shift! ! 425: movw d4,d3 ! 426: movw d5,d4 ! 427: clrw d5 ! 428: ! 429: movl a1,d6 | d6 gets s3s4. ! 430: swap d6 | d6 gets s4s3. ! 431: movw d6,d7 | d7 gets s3. ! 432: beqs 6$ ! 433: mulu d0,d6 | d6 gets r1*s3. ! 434: mulu d1,d7 | d7 gets r3*s3. ! 435: addl d7,d4 ! 436: addxl d6,d3 ! 437: clrl d7 ! 438: addxl d7,d2 ! 439: ! 440: 6$: ! 441: movl a0,d6 | d6 gets s1s2. ! 442: swap d6 | d6 gets s2s1. ! 443: movw d6,d7 | d7 gets s1. ! 444: mulu d0,d6 | d6 gets r1*s1. ! 445: mulu d1,d7 | d7 gets r3*s1. ! 446: addl d7,d3 ! 447: addxl d6,d2 ! 448: ! 449: swap d0 | d0 gets r1r2. ! 450: swap d1 | d1 gets r3r4. ! 451: movw a0,d6 | d6 gets s2. ! 452: beqs 8$ ! 453: movw d6,d7 | d7 gets s2. ! 454: mulu d0,d6 | d6 gets r2*s2. ! 455: mulu d1,d7 | d7 gets r4*s2. ! 456: addl d7,d4 ! 457: addxl d6,d3 ! 458: clrl d7 ! 459: addxl d7,d2 ! 460: 8$: ! 461: movw a1,d6 | d6 gets s4. ! 462: beqs 9$ ! 463: movw d6,d7 | d7 gets s4. ! 464: mulu d0,d6 | d6 gets r2*s4. ! 465: mulu d1,d7 | d7 gets r4*s4. ! 466: addl d7,d5 ! 467: addxl d6,d4 ! 468: clrl d7 ! 469: addxl d7,d3 ! 470: addxl d7,d2 ! 471: 9$: ! 472: orl d5,d4 ! 473: beqs 10$ | Branch if no sticky bits. ! 474: bset #0,d3 | Set that sticky! ! 475: 10$: ! 476: movl a2,d6 | Restore sign/exponent of result. ! 477: ! 478: subw #1022,d6 | toss xtra bias ! 479: jbsr d_nrcp | norm, rnd, ck, pack ! 480: msign: roxll #1,d6 ! 481: roxrl #1,d2 | append sign ! 482: mexit: | answer is now in d2/d3: put in d0/d1 ! 483: movl d2,d0 ! 484: movl d3,d1 ! 485: | restore registers and split ! 486: moveml sp@+,#RESTMASK ! 487: | slide down return address to pop off junk ! 488: RET ! 489: ! 490: s3zero: ! 491: clrl d5 | We do need to clear d5 sometime. ! 492: jra phase3 ! 493: ! 494: | EXCEPTION CASES ! 495: ! 496: ovfl: movl d2,d5 | larger mantissa, if it is nan, use it ! 497: orw d7,d5 | smaller exponent ! 498: orl d0,d5 ! 499: orl d1,d5 | smaller value ! 500: beqs m_gennan| inf * 0 ! 501: | else nan*x or inf*nonzero: ! 502: movw #0x7ff,d6 ! 503: jbsr d_usel ! 504: bras msign ! 505: ! 506: ufl: movl d0,d7 | mantissa of smaller ! 507: orl d1,d7 ! 508: beqs signed0 | 0*number ! 509: normu: subqw #1,d6 ! 510: lsll #1,d1 | adjust denormalized number ! 511: roxll #1,d0 ! 512: bpls normu ! 513: addqw #1,d6 ! 514: jra back ! 515: | (if both are denormalized, answer will be zero anyway) ! 516: m_gennan:movl #0x7ff00002,d2 ! 517: bras mexit ! 518: signed0:clrl d2 ! 519: clrl d3 ! 520: bras msign ! 521: ! 522: SAVEMASK = 0x3f00 ! 523: RESTMASK = 0x00fc ! 524: ! 525: EXP = d2 ! 526: TYPE = d3 ! 527: /* type values: */ ! 528: ZERO = 1 | wonderful ! 529: GU = 2 ! 530: PLAIN = 3 ! 531: INF = 4 ! 532: NAN = 5 ! 533: ! 534: RTENTRY(Fscaleid) ! 535: moveml #SAVEMASK,sp@- | state save ! 536: jbsr d_unpk ! 537: cmpb #PLAIN,TYPE | is it a funny number? ! 538: bgts gohome | yes -- return argument ! 539: cmpb #ZERO,TYPE ! 540: beqs gohome | is zero -- return arg ! 541: | normal path through here ! 542: movw EXP,d7 ! 543: extl d7 | d7 gets long exponent. ! 544: addl a0@,d7 | d7 gets modified exponent. ! 545: bvcs nooflo | Branch if no overflow. ! 546: bmis posov | Branch if positive overflow to -. ! 547: | Branch if negative overflow to +. ! 548: negov: ! 549: movw #-2000,EXP ! 550: bras gohome ! 551: posov: ! 552: movw #2000,EXP ! 553: bras gohome ! 554: nooflo: ! 555: cmpl #2000,d7 ! 556: bges posov ! 557: cmpl #-2000,d7 ! 558: bles negov ! 559: movw d7,EXP | Final OK exponent. ! 560: gohome: ! 561: jbsr d_pack ! 562: gone: moveml sp@+,#RESTMASK ! 563: RET
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.