researchv9/libc/sun/Fmuld.s - annotate

Return to Fmuld.s CVS log
Up to [Research Unix] / researchv9 / libc / sun
Annotation of researchv9/libc/sun/Fmuld.s, revision 1.1.1.1

1.1       root        1:        .data
                      2:        .asciz  "@(#)Fmuld.s 1.1 86/02/03 Copyr 1985 Sun Micro"
                      3:        .even
                      4:        .text
                      5: 
                      6: |      Copyright (c) 1985 by Sun Microsystems, Inc.
                      7: 
                      8: #include "fpcrtdefs.h"
                      9: 
                     10: /*
                     11:  *     double-precision floating math run-time support
                     12:  *
                     13:  *     copyright 1981, 1982 Richard E. James III
                     14:  *     translated to SUN idiom 10/11 March 1983 rt
                     15:  *     parameter passing re-done 22 July 1983 rt
                     16:  */
                     17: 
                     18: ARG2PTR = a0
                     19: 
                     20: /*
                     21:  *     ieee double floating divide
                     22:  *     copyright 1981, Richard E. James III
                     23:  *     translated to SUN idiom 10 March 1983 rt
                     24:  */
                     25: 
                     26: /*
                     27:  *     entry conditions:
                     28:  *         first argument in d0/d1
                     29:  *         second argument on stack
                     30:  *     exit conditions:
                     31:  *         result (8 bytes) in d0/d1
                     32:  *
                     33:  *     register conventions:
                     34:  *         d0/d1       divisor
                     35:  *         d2/d3       dividend
                     36:  *         d4          count
                     37:  *         d5          sign an exponent
                     38:  *         d6          result exponent
                     39:  *         d6/d7       quotient
                     40:  */
                     41:        SAVEMASK = 0x3f00       | registers d2-d7
                     42:        RESTMASK = 0xfc
                     43:        NSAVED   = 6*4          | 6 registers * sizeof(register)
                     44: 
                     45: RTENTRY(Fdivd)
                     46: |      save registers and load operands into registers
                     47:        moveml  #0x3f00,sp@-    | registers d2-d7
                     48:        movl    d0,d2
                     49:        movl    d1,d3
                     50:        movl    ARG2PTR@+,d0
                     51:        movl    ARG2PTR@ ,d1
                     52:        | save sign of result
                     53:        movl    d0,d5
                     54:        eorl    d2,d5           | sign of result
                     55:        clrl    d4              | flag for divide
                     56:        bsrs    extrem
                     57:        | compute resulting exponent
                     58:        movw    d6,d5
                     59:        subw    d7,d5
                     60:        | do top 30-31 bits of divide (d_rcp will post normalize)
                     61:        movw    #30,d4  | count 30..0
                     62:        bsr     shsub   | shift and subtract loop
                     63:        movl    d7,d6   | top of answer
                     64:        | do next 22 bits of divide
                     65:        movw    #23,d4  | count 23..0 (total = 54-55 bits)
                     66:        bsr     shsub
                     67:        | put together answer
                     68:        lsll    #8,d7   | line up bottom
                     69:        orl     d3,d2
                     70:        beqs    1$
                     71:        bset    #1,d7   | sticky bit on if remainder <> 0
                     72: 1$:    lsll    #1,d7
                     73:        roxll   #1,d6
                     74:        | normalize (once), round, check result exp, pack
                     75:        movl    d6,d2   | upper
                     76:        movl    d7,d3   | lower
                     77:        movw    d5,d6   | exponent
                     78:        addw    #1023,d6        | re-bias
                     79:        jbsr    d_nrcp  | norm once, rnd, ck, pack
                     80:        bra     dmsign
                     81: 
                     82: 
                     83: /*
                     84:  * round, check for over/underflow, and pack in the exponent.
                     85:  * d_nrcp does one normalize and then calls d_rcp.
                     86:  * d_rcp rounds the double value and packs the exponent in,
                     87:  * catching infinity, zero, and denormalized numbers.
                     88:  * d_usel puts together the larger argument.
                     89:  * 
                     90:  * input:
                     91:  *      d2/d3   mantissa (- if norm)
                     92:  *      d6      biased exponent
                     93:  *      (need sign, sticky)
                     94:  * output:
                     95:  *      d2/d3   most of number, no sign or hidden bit,
                     96:  *              waiting to shift sign in.
                     97:  * other:
                     98:  *      d4      lost
                     99:  *      d5      unchanged
                    100:  */
                    101:  
                    102: d_nrcp:
                    103:         tstl    d2
                    104:         jmi     d_rcp   | already normalized
                    105:         subqw   #1,d6
                    106:         lsll    #1,d3   | do extra normalize (for mul/div)
                    107:         roxll   #1,d2
                    108:        jra     d_rcp
                    109: 
                    110: /*
                    111:  * subroutine for unpacking one operand, and normalizing a denormalized number
                    112:  * input:
                    113:  *     d0/d1   number
                    114:  * output:
                    115:  *     d0/d1   mantissa
                    116:  *     d7.w    exponent
                    117:  *     z       on iff mantissa is zero_
                    118:  *
                    119:  * unchanged:
                    120:  *     d4      bottom = 0xf77
                    121:  */
                    122: 
                    123: unp:   movl    d0,d7   | start getting exp
                    124:        andl    #0xfffff,d0     | clear out sign and exp
                    125:        swap    d7
                    126:        lsrw    #(16-1-11),d7
                    127:        andw    d4,d7   | expondnt
                    128:        bnes    3$      | normal number
                    129:        | denormalized number or zero:
                    130:        tstl    d0      | upper
                    131:        bnes    1$
                    132:        tstl    d1      | lower
                    133:        beqs    3$      |zero
                    134: 1$:    addqw   #1,d7
                    135: 2$:    subql   #1,d7
                    136:        lsll    #1,d1
                    137:        roxll   #1,d0   | normalize
                    138:        btst    #20,d0
                    139:        beqs    2$      | loop until normalized
                    140: 3$:    rts
                    141: 
                    142: 
                    143: /*
                    144:  * subroutine for extracting and taking care of 0/GU/INF/NAN.
                    145:  * input:
                    146:  *     d0/d1, d2/d3
                    147:  *     d4      + for div; - for mod
                    148:  *
                    149:  * output:
                    150:  *     d0/d1 (bottom) and d2/d3 (top) converted to
                    151:  *             000XXXXX/XXXXXXXX
                    152:  *     d6      exponent of top
                    153:  *     d7      exponent of bottom
                    154:  *
                    155:  * unchanged:
                    156:  *     d5      sign
                    157:  */
                    158: 
                    159: extrem:
                    160:        movw    #0x7ff,d4       | mask, sign.l untouched
                    161:        exg     d2,d0
                    162:        exg     d3,d1
                    163:        bsrs    unp     | unpack top
                    164:        exg     d0,d2
                    165:        exg     d1,d3
                    166:        exg     d7,d6
                    167:        beqs    topzero | top is zero
                    168:        cmpw    d4,d6
                    169:        beqs    topbig  | top is inf or nan
                    170:        bset    #20,d2  | set hidden bit
                    171: topzero:bsrs   unp     | unpack bottom
                    172:        beqs    botzero | bottom is 0
                    173:        cmpw    d4,d7
                    174:        beqs    botbig  | bottom is inf or nan
                    175:        bset    #20,d0
                    176:        lsll    #1,d1   | left shift bottom
                    177:        roxll   #1,d0
                    178:        rts
                    179: 
                    180: /*
                    181:  * EXCEPTION HANDLING
                    182:  */
                    183: 
                    184: topbig:        | top is inf or nan
                    185:        bsrs    unp     | unpack bottom
                    186:        tstl    d4      
                    187:        bmis    isnan   | inf or nan / ...
                    188:        cmpw    d6,d7
                    189:        beqs    isnan   | both inf/nan
                    190:        tstl    d2      | top
                    191:        beqs    geninf  | inf / ... = inf
                    192:        bras    isnan   | nan / ... = nan
                    193: 
                    194: botzero:| bottom is 0
                    195:        tstl    d4
                    196:        bmis    gennan  | dmod(... 0) = nan
                    197:        orl     d2,d3   | top
                    198:        beqs    gennan  | 0/0 = nan
                    199:                        | nonzero/0 = inf
                    200:        | generate infinity for answer
                    201: geninf:        movl    #0x7ff00000*2,d2        | infinity
                    202:        bras    clrbot
                    203: 
                    204: botbig:        tstl    d0      | bottom = nan, result = nan
                    205:        bnes    isnan
                    206:        tstl    d4
                    207:        bpls    genzero | ... / inf = 0
                    208:        addqw   #4,sp
                    209:        addw    #11,d6          | append sign
                    210:         jbsr    d_norm          | normalize
                    211:         jbsr    d_rcp           | adjust exp, ck extremes, pack
                    212: dmsign:        roxll   #1,d5           | sign -> x
                    213:        roxrl   #1, d2          | rotate in sign
                    214:        | answer is now in:
                    215:        |       d2      most significant 32 bits
                    216:        |       d3      least significant 32 bits
                    217: dmexit:        movl    d2,d0
                    218:        movl    d3,d1
                    219:        | restore registers and split
                    220:        moveml  sp@+,#RESTMASK
                    221:        RET
                    222: 
                    223: 
                    224:        | invalid operand/operation
                    225: isnan: cmpw    d7,d6
                    226:        bnes    1$      | exponents equal
                    227:        cmpl    d0, d2
                    228: 1$:    bges    2$
                    229:        movw    d7,d6
                    230:        movl    d0,d2
                    231: 2$:    swap    d2
                    232:        lslw    #(16-1-11),d6
                    233:        orw     d6,d2   | put back together
                    234:        swap    d2
                    235:        lsll    #1,d2
                    236:        cmpl    #0x7ff00000*2, d2
                    237:        bhis    gotnan  | use larger nan
                    238: 
                    239: gennan:        movl    #0x7ff00004*2,d2
                    240:        tstl    d4
                    241:        bpls    gotnan  | nan 4 for div
                    242:        addql   #2,d2   | nan 5 for mod
                    243: gotnan:        clrl    d5
                    244:        bras    clrbot
                    245: 
                    246: genzero:clrl   d2
                    247: clrbot:        clrl    d3
                    248: sign:  addqw   #4,sp
                    249:        bra     dmsign
                    250: 
                    251: /*
                    252:  *  the shsub subroutine does a shift-subtract loop
                    253:  * that is the heart of divide and mod.
                    254:  * the algorithm is a simple shift and subtract loop,
                    255:  * but it adds when it overshoots.
                    256:  * why not use the divs/divu instructions? That approach is slower!
                    257:  *
                    258:  * registers:
                    259:  *     d2/d3   current dividend (updated)
                    260:  *     d0/d1   divisor (unchanged)
                    261:  *     d4.w    (inpout) number of interations -1, and bit number
                    262:  *     d5/d6   -untouched-
                    263:  *     d7      quotient being devloped (ignored by mod)
                    264:  */
                    265: 
                    266: 
                    267: shsub:
                    268:        clrl    d7      | quotient
                    269:        | shift once, see if subtract needed
                    270: 1$:    addl    d3,d3
                    271:        addxl   d2,d2   |(64-bit left shift)
                    272:        cmpl    d0,d2
                    273:        dbge    d4,1$   | loop while divident is small
                    274:        | tally quotient and subtract
                    275:        bset    d4,d7   | quotient bit
                    276:        subl    d1,d3
                    277:        subxl   d0,d2   | 64-bit subtract
                    278:        dbmi    d4,1$   | loop (d4) times
                    279:        | now one of three things has happeded:
                    280:        | 1. count exhausted and extra subtract done (first DB hit count)
                    281:        | 2. count exhausted in second DB
                    282:        | 3. overshot because compare didn't check lower parts
                    283:        bpls    2$      | case 2
                    284:        addl    d1,d3   | take care of overshoot
                    285:        addxl   d0,d2
                    286:        bclr    d4,d7
                    287:        tstw    d4
                    288:        dblt    d4,1$   | case 3
                    289:                        | case 1
                    290: 2$:    rts
                    291: 
                    292: /*
                    293:  *     ieee double floating multiply
                    294:  *     copyright 1981, Richard E. James III
                    295:  *     translated to SUN idiom 10 March 1983 rt
                    296:  */
                    297: 
                    298: /*
                    299:        Revised to do correct IEEE rounding by dgh 24 Aug 84
                    300:  *     Conventions in the main multiplication section are as follows:
                    301:  *     r is the argument passed in the registers d0/d1
                    302:  *     s is the argument passed on the stack, saved in a0/a1
                    303:  *             while multiplying
                    304:  *     r1,r2,r3,r4 are the 16 bit components of r in descending order
                    305:  *      s1,s2,s3,s4 are the 16 bit components of s
                    306:  *     r is kept in d0 and d1, sometimes with words swapped
                    307:  *     s is kept in a0 and a1 unchanged
                    308:  *      the product is kept in d2/d3/d4/d5
                    309:  *     d6 contains a current partial product
                    310:  *     d7 contains a current partial product
                    311:  *     d7 contains 0 when needed for addxl
                    312:  *     a2 saves the sign and exponent of the result
                    313:  *
                    314:  *     At the end, d4 and d5 if nonzero are jammed into lsb of d3.
                    315:  */
                    316: /*
                    317:  *     entry conditions:
                    318:  *         first argument in d0/d1
                    319:  *         second argument on stack
                    320:  *     exit conditions:
                    321:  *         result (8 bytes) in d0/d1
                    322:  *
                    323:  *     register conventions:
                    324:  *         d0-d3       operands or pieces of result
                    325:  *         d5          exponent of larger
                    326:  *         d5          temp for multiplying
                    327:  *         d6          sign and exponent
                    328:  *         d7          exponent of smaller
                    329:  *         d7          zero
                    330:  */
                    331:        SAVEMASK = 0x3fe0       | registers d2-d7,a0-a2
                    332:        RESTMASK = 0x7fc
                    333:        NSAVED   = 6*4          | 6 registers * sizeof(register)
                    334: 
                    335:        SAVE03   = 0xf000       | registers d0-d3
                    336:        FETCH03  = 0x000f
                    337: 
                    338: RTENTRY(Fsqrd)
                    339:        moveml  #SAVEMASK,sp@-  | registers d2-d7
                    340:        movel   d0,d2
                    341:        movel   d1,d3
                    342:        bras    Fmuld2
                    343: RTENTRY(Fmuld)
                    344: |      save registers and load operands into registers
                    345:        moveml  #SAVEMASK,sp@-  | registers d2-d7
                    346:        movl    ARG2PTR@+,d2
                    347:        movl    ARG2PTR@ ,d3
                    348: Fmuld2:
                    349:                                | save sign of result
                    350:        movl    d0,d5
                    351:        eorl    d2,d5           | sign of result
                    352:        asll    #1,d0           | toss sign
                    353:        asll    #1,d2           | EEmmmm0
                    354:        cmpl    d2,d0
                    355:        | order operands (exponents at least)
                    356:        blss    eswap
                    357:        exg     d0,d2           | d2/d3 = larger
                    358:        exg     d1,d3
                    359:        | extract and check exponents
                    360: eswap: jbsr    d_exte
                    361:        movw    d6,d5           | larger exp
                    362:        movl    d5,d6
                    363:        addw    d7,d6           | result exp (and sign)
                    364:        cmpw    #0x7ff,d5       | check larger
                    365:        jeq     ovfl            | inf or nan
                    366:        tstw    d7
                    367:        jeq     ufl             | 0 or  gradual underflow
                    368:        | set hidden bits
                    369:        bset    #31,d0
                    370: back:  bset    #31,d2
                    371: 
                    372: |              Main multiply sequence.
                    373: 
                    374:        movl    d2,a0           | a0 gets s1s2.
                    375:        movl    d3,a1           | a1 gets s3s4.
                    376:        movl    d6,a2           | a2 saves sign and exponent of result.
                    377: 
                    378:        movl    a0,d3           | d3 gets s1s2.
                    379:        swap    d3              | d3 gets s2s1.
                    380:        movw    d3,d4           | d4 gets s1.
                    381:        mulu    d1,d4           | d4 gets r4*s1.
                    382:        mulu    d0,d3           | d3 gets r2*s1.
                    383:        clrw    d2              | d2 gets 0; only gets carries in this phase.
                    384: 
                    385:        movl    a1,d6           | d6 gets s3s4.
                    386:        swap    d6              | d6 gets s4s3.
                    387:        movw    d6,d5           | d5 gets s3.
                    388:        jeq     s3zero          | Branch if s3=0 to avoid following.
                    389:        mulu    d1,d5           | d5 gets r4*s3.
                    390:        movw    d6,d7           | d7 gets s3.
                    391:        mulu    d0,d7           | d7 gets r2*s3.
                    392:        addl    d7,d4
                    393:        clrl    d7              | Make a zero for addx.
                    394:        addxl   d7,d3
                    395:        addxw   d7,d2
                    396: phase3:
                    397:        swap    d0              | d0 gets r2r1.
                    398:        swap    d1              | d1 gets r4r3.
                    399:        movw    a0,d6           | d6 gets s2.
                    400:        beqs    4$              | Skip following if s2=0.
                    401:        movw    d6,d7           | d7 gets s2.
                    402:        mulu    d0,d6           | d6 gets r1*s2.
                    403:        mulu    d1,d7           | d7 gets r3*s2.
                    404:        addl    d7,d4
                    405:        addxl   d6,d3
                    406:        clrw    d7
                    407:        addxw   d7,d2
                    408: 4$:
                    409:        movw    a1,d6           | d6 gets s4.
                    410:        beqs    5$              | Skip if s4=0.
                    411:        movw    d6,d7           | d7 gets s4.
                    412:        mulu    d0,d6           | d6 gets r1*s4.
                    413:        mulu    d1,d7           | d7 gets r3*s4.
                    414:        addl    d7,d5
                    415:        addxl   d6,d4
                    416:        clrl    d7
                    417:        addxl   d7,d3
                    418:        addxw   d7,d2
                    419: 5$:
                    420:        swap    d2              | Exchange order of registers which contain
                    421:        swap    d3              | all the "odd" partial products.
                    422:        swap    d4
                    423:        swap    d5
                    424:        movw    d3,d2           | It's really a 16 bit shift!
                    425:        movw    d4,d3
                    426:        movw    d5,d4
                    427:        clrw    d5
                    428: 
                    429:        movl    a1,d6           | d6 gets s3s4.
                    430:        swap    d6              | d6 gets s4s3.
                    431:        movw    d6,d7           | d7 gets s3.
                    432:        beqs    6$
                    433:        mulu    d0,d6           | d6 gets r1*s3.
                    434:        mulu    d1,d7           | d7 gets r3*s3.
                    435:        addl    d7,d4
                    436:        addxl   d6,d3
                    437:        clrl    d7
                    438:        addxl   d7,d2
                    439: 
                    440: 6$:
                    441:        movl    a0,d6           | d6 gets s1s2.
                    442:        swap    d6              | d6 gets s2s1.
                    443:        movw    d6,d7           | d7 gets s1.
                    444:        mulu    d0,d6           | d6 gets r1*s1.
                    445:        mulu    d1,d7           | d7 gets r3*s1.
                    446:        addl    d7,d3
                    447:        addxl   d6,d2
                    448: 
                    449:        swap    d0              | d0 gets r1r2.
                    450:        swap    d1              | d1 gets r3r4.
                    451:        movw    a0,d6           | d6 gets s2.
                    452:        beqs    8$
                    453:        movw    d6,d7           | d7 gets s2.
                    454:        mulu    d0,d6           | d6 gets r2*s2.
                    455:        mulu    d1,d7           | d7 gets r4*s2.
                    456:        addl    d7,d4
                    457:        addxl   d6,d3
                    458:        clrl    d7
                    459:        addxl   d7,d2
                    460: 8$:
                    461:        movw    a1,d6           | d6 gets s4.
                    462:        beqs    9$
                    463:        movw    d6,d7           | d7 gets s4.
                    464:        mulu    d0,d6           | d6 gets r2*s4.
                    465:        mulu    d1,d7           | d7 gets r4*s4.
                    466:        addl    d7,d5
                    467:        addxl   d6,d4
                    468:        clrl    d7
                    469:        addxl   d7,d3
                    470:        addxl   d7,d2
                    471: 9$:
                    472:        orl     d5,d4
                    473:        beqs    10$             | Branch if no sticky bits.
                    474:        bset    #0,d3           | Set that sticky!
                    475: 10$:
                    476:        movl    a2,d6           | Restore sign/exponent of result.
                    477: 
                    478:        subw    #1022,d6        | toss xtra bias
                    479:        jbsr    d_nrcp          | norm, rnd, ck, pack
                    480: msign: roxll   #1,d6
                    481:        roxrl   #1,d2           | append sign
                    482: mexit: | answer is now in d2/d3: put in d0/d1
                    483:        movl    d2,d0
                    484:        movl    d3,d1
                    485:        | restore registers and split
                    486:        moveml  sp@+,#RESTMASK
                    487:        | slide down return address to pop off junk
                    488:        RET
                    489: 
                    490: s3zero:
                    491:        clrl    d5              | We do need to clear d5 sometime.
                    492:        jra     phase3
                    493: 
                    494: | EXCEPTION CASES
                    495: 
                    496: ovfl:  movl    d2,d5   | larger mantissa, if it is nan, use it
                    497:        orw     d7,d5   | smaller exponent
                    498:        orl     d0,d5
                    499:        orl     d1,d5   | smaller value
                    500:        beqs    m_gennan| inf * 0
                    501:        | else nan*x or inf*nonzero:
                    502:        movw    #0x7ff,d6
                    503:        jbsr    d_usel
                    504:        bras    msign
                    505: 
                    506: ufl:   movl    d0,d7   | mantissa of smaller
                    507:        orl     d1,d7
                    508:        beqs    signed0 | 0*number
                    509: normu: subqw   #1,d6
                    510:        lsll    #1,d1   | adjust denormalized number
                    511:        roxll   #1,d0
                    512:        bpls    normu
                    513:        addqw   #1,d6
                    514:        jra     back
                    515:        | (if both are denormalized, answer will be zero anyway)
                    516: m_gennan:movl  #0x7ff00002,d2
                    517:        bras    mexit
                    518: signed0:clrl   d2
                    519:        clrl    d3
                    520:        bras    msign
                    521: 
                    522:        SAVEMASK = 0x3f00
                    523:        RESTMASK = 0x00fc
                    524: 
                    525:        EXP     = d2
                    526:        TYPE    = d3
                    527:        /* type values: */
                    528:            ZERO  = 1 | wonderful
                    529:            GU    = 2
                    530:            PLAIN = 3
                    531:            INF   = 4
                    532:            NAN   = 5
                    533: 
                    534: RTENTRY(Fscaleid)
                    535:        moveml  #SAVEMASK,sp@-  | state save
                    536:        jbsr    d_unpk
                    537:        cmpb    #PLAIN,TYPE     | is it a funny number?
                    538:        bgts    gohome          | yes -- return argument
                    539:        cmpb    #ZERO,TYPE
                    540:        beqs    gohome          | is zero -- return arg
                    541:        | normal path through here
                    542:        movw    EXP,d7          
                    543:        extl    d7              | d7 gets long exponent.
                    544:        addl    a0@,d7          | d7 gets modified exponent.
                    545:        bvcs    nooflo          | Branch if no overflow.
                    546:        bmis    posov           | Branch if positive overflow to -.
                    547:                                | Branch if negative overflow to +.
                    548: negov:
                    549:        movw    #-2000,EXP
                    550:        bras    gohome
                    551: posov: 
                    552:        movw    #2000,EXP
                    553:        bras    gohome
                    554: nooflo:
                    555:        cmpl    #2000,d7
                    556:        bges    posov
                    557:        cmpl    #-2000,d7
                    558:        bles    negov
                    559:        movw    d7,EXP          | Final OK exponent.
                    560: gohome:
                    561:        jbsr    d_pack
                    562: gone:  moveml  sp@+,#RESTMASK
                    563:        RET
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.