pgp/src/mc68020.s - annotate

Return to mc68020.s CVS log
Up to [PGP] / pgp / src
Annotation of pgp/src/mc68020.s, revision 1.1

1.1     ! root        1: |      Fast assembly routines for MC68020 (Sun-3)
        !             2: |      Assumptions:
        !             3: |              Arguments start at sp@(0x4)
        !             4: |              Return value is in d0
        !             5: |              d0/d1/a0/a1 are scratch
        !             6: |              P_SMUL needs MULTUNIT set to "unsigned long" in mpilib.c
        !             7: |              P_DMUL replaces mp_smul and mp_dmul in mpilib.c
        !             8: |
        !             9: |      92.9.21 - Tsutomu Shimomura, [email protected]
        !            10: 
        !            11:        .text
        !            12: 
        !            13: |      P_SETP(p) sets the current precision to be p longwords.  No-op.
        !            14:        .proc
        !            15:        .globl  _P_SETP
        !            16: _P_SETP:
        !            17: |      movl    #L2000, a0      |%
        !            18: |      jsr     mcount          |%
        !            19: |      .bss                    |%
        !            20: |      .even                   |%
        !            21: |L2000:        .skip   4               |%
        !            22: |      .text                   |%
        !            23:        rts
        !            24: 
        !            25: |      P_ADDC(*a, *b, c) performs a += b + c (carry).  Carry is returned.
        !            26:        .proc
        !            27:        .globl  _P_ADDC
        !            28: _P_ADDC:
        !            29: |      movl    #L2001, a0      |%
        !            30: |      jsr     mcount          |%
        !            31: |      .bss                    |%
        !            32: |      .even                   |%
        !            33: |L2001:        .skip   4               |%
        !            34: |      .text                   |%
        !            35:        movl    sp@(0x4), a0    | claim arguments
        !            36:        movl    sp@(0x8), a1
        !            37:        movl    sp@(0xc), d0
        !            38:        movl    d2, sp@-        | preserve d2
        !            39: 
        !            40:        movw    _global_precision, d1   | longword count
        !            41:        movw    d1, d2          | save a copy
        !            42: 
        !            43:        lslw    #2, d1
        !            44:        addw    d1, a0          | adjust array pointers
        !            45:        addw    d1, a1
        !            46: 
        !            47:        lsrw    #1, d1          | compute initial branch offset
        !            48:        andw    #0xe, d1
        !            49:        negw    d1              | branch offset in d1
        !            50: 
        !            51:        lsrw    #3, d2          | 8 longwords/loop; count in d2
        !            52: 
        !            53:        asrl    #1, d0          | set X if necessary
        !            54: 
        !            55:        jmp     pc@(0x12,d1:w)
        !            56: 1:
        !            57:        addxl   a1@-, a0@-
        !            58:        addxl   a1@-, a0@-
        !            59:        addxl   a1@-, a0@-
        !            60:        addxl   a1@-, a0@-
        !            61:        addxl   a1@-, a0@-
        !            62:        addxl   a1@-, a0@-
        !            63:        addxl   a1@-, a0@-
        !            64:        addxl   a1@-, a0@-
        !            65:        dbf     d2, 1b
        !            66: 
        !            67:        roxll   #1, d0
        !            68: 
        !            69:        movl    sp@+, d2
        !            70:        rts
        !            71: 
        !            72: |      P_SUBB(*a, *b, c) performs a -= b + c (borrow).  Borrow is returned.
        !            73:        .proc
        !            74:        .globl  _P_SUBB
        !            75: _P_SUBB:
        !            76: |      movl    #L2002, a0      |%
        !            77: |      jsr     mcount          |%
        !            78: |      .bss                    |%
        !            79: |      .even                   |%
        !            80: |L2002:        .skip   4               |%
        !            81: |      .text                   |%
        !            82:        movl    sp@(0x4), a0    | claim arguments
        !            83:        movl    sp@(0x8), a1
        !            84:        movl    sp@(0xc), d0
        !            85:        movl    d2, sp@-        | preserve d2
        !            86: 
        !            87:        movw    _global_precision, d1   | longword count
        !            88:        movw    d1, d2          | save a copy
        !            89: 
        !            90:        lslw    #2, d1
        !            91:        addw    d1, a0          | adjust array pointers
        !            92:        addw    d1, a1
        !            93: 
        !            94:        lsrw    #1, d1          | compute initial branch offset
        !            95:        andw    #0xe, d1
        !            96:        negw    d1              | branch offset in d1
        !            97: 
        !            98:        lsrw    #3, d2          | 8 longwords/loop; count in d2
        !            99: 
        !           100:        asrl    #1, d0          | set X if necessary
        !           101: 
        !           102:        jmp     pc@(0x12,d1:w)
        !           103: 1:
        !           104:        subxl   a1@-, a0@-
        !           105:        subxl   a1@-, a0@-
        !           106:        subxl   a1@-, a0@-
        !           107:        subxl   a1@-, a0@-
        !           108:        subxl   a1@-, a0@-
        !           109:        subxl   a1@-, a0@-
        !           110:        subxl   a1@-, a0@-
        !           111:        subxl   a1@-, a0@-
        !           112:        dbf     d2, 1b
        !           113: 
        !           114:        roxll   #1, d0
        !           115: 
        !           116:        movl    sp@+, d2
        !           117:        rts
        !           118: 
        !           119: |      P_ROTL(*a, c) performs a = (a<<1) | c (lo-bit).  Hi-bit is returned.
        !           120:        .proc
        !           121:        .globl  _P_ROTL
        !           122: _P_ROTL:
        !           123: |      movl    #L2003, a0      |%
        !           124: |      jsr     mcount          |%
        !           125: |      .bss                    |%
        !           126: |      .even                   |%
        !           127: |L2003:        .skip   4               |%
        !           128: |      .text                   |%
        !           129:        movl    sp@(0x4), a0    | claim arguments
        !           130:        movl    sp@(0x8), d0
        !           131:        movl    d2, a1          | preserve d2
        !           132: 
        !           133:        movw    _global_precision, d1   | longword count
        !           134:        movw    d1, d2          | save a copy
        !           135: 
        !           136:        lslw    #2, d1
        !           137:        addw    d1, a0          | adjust array pointer
        !           138: 
        !           139:        andw    #0x1c, d1
        !           140:        negw    d1              | branch offset in d1
        !           141: 
        !           142:        lsrw    #3, d2          | 8 longwords/loop; count in d2
        !           143: 
        !           144:        asrl    #1, d0          | set X if necessary
        !           145: 
        !           146:        jmp     pc@(0x22,d1:w)
        !           147: 1:
        !           148:        roxlw   a0@-
        !           149:        roxlw   a0@-
        !           150:        roxlw   a0@-
        !           151:        roxlw   a0@-
        !           152:        roxlw   a0@-
        !           153:        roxlw   a0@-
        !           154:        roxlw   a0@-
        !           155:        roxlw   a0@-
        !           156:        roxlw   a0@-
        !           157:        roxlw   a0@-
        !           158:        roxlw   a0@-
        !           159:        roxlw   a0@-
        !           160:        roxlw   a0@-
        !           161:        roxlw   a0@-
        !           162:        roxlw   a0@-
        !           163:        roxlw   a0@-
        !           164:        dbf     d2, 1b
        !           165: 
        !           166:        roxll   #1, d0
        !           167: 
        !           168:        movl    a1, d2
        !           169:        rts
        !           170: 
        !           171: |      P_SMUL(*a, *b, x) performs a += b * x.  Pointers are to the LSB.
        !           172:        .proc
        !           173:        .globl  _P_SMUL
        !           174: _P_SMUL:
        !           175: |      movl    #L2004, a0      |%
        !           176: |      jsr     mcount          |%
        !           177: |      .bss                    |%
        !           178: |      .even                   |%
        !           179: |L2004:        .skip   4               |%
        !           180: |      .text                   |%
        !           181:        movl    sp@(0x4), a0    | claim arguments
        !           182:        movl    sp@(0x8), a1
        !           183:        movl    sp@(0xc), d1
        !           184:        tstl    d1              | horrible kludge to speed multiply by 0
        !           185:        beq     3f
        !           186:        moveml  #0x3c00, sp@-   | d2/d3/d4/d5; Sun's optimizer is really *DUMB*
        !           187:        movw    _global_precision, d5   | longword count; 0 will fail
        !           188: 
        !           189:        subqw   #2, d5          | first longword not handled in loop
        !           190:        clrl    d4
        !           191: 
        !           192:        movl    a1@, d2
        !           193:        mulul   d1, d3:d2       | d3 is carry
        !           194:        addl    d2, a0@         | accumulate
        !           195: 
        !           196: |      tstw    d5              | This code needed if global_precision < 2
        !           197: |      blt     2f              | only one longword?
        !           198: 1:
        !           199:        movl    a0@-, d0
        !           200:        addxl   d3, d0          | accumulate carry and X-bit
        !           201:        movl    a1@-, d2
        !           202:        mulul   d1, d3:d2       | d3 is carry
        !           203:        addxl   d4, d3          | add X-bit to carry
        !           204:        addl    d2, d0          | accumulate
        !           205:        movl    d0, a0@
        !           206:        dbf     d5, 1b
        !           207: 2:
        !           208:        addxl   d4, d3          | add X-bit to carry
        !           209:        movl    d3, a0@-        
        !           210: 
        !           211:        moveml  sp@+, #0x3c     | d2/d3/d4/d5; Sun's optimizer is really *DUMB*
        !           212: 3:
        !           213:        rts
        !           214: 
        !           215: |      P_DMUL(*a, *b, *c) performs a = b * c.
        !           216:        .proc
        !           217:        .globl  _P_DMUL
        !           218: _P_DMUL:
        !           219: |      movl    #L2005, a0      |%
        !           220: |      jsr     mcount          |%
        !           221: |      .bss                    |%
        !           222: |      .even                   |%
        !           223: |L2005:        .skip   4               |%
        !           224: |      .text                   |%
        !           225:        moveml  #0x3f38, sp@-   | d2-d7/a2-a4; Sun's optimizer is really *DUMB*
        !           226:        movl    sp@(0x28), a0   | claim arguments
        !           227:        movl    sp@(0x2c), a1
        !           228:        movl    sp@(0x30), a2
        !           229:        movw    _global_precision, d0
        !           230:        subqw   #2, d0  | global_precision - 2
        !           231: 
        !           232:        movl    a0, a4  | product
        !           233: 
        !           234:        movw    d0, d7  | count for multiplicand
        !           235:        movl    a1, a3  | multiplicand
        !           236:        bra     2f
        !           237: 1:
        !           238:        clrl    a4@+
        !           239: 2:
        !           240:        tstl    a3@+
        !           241:        dbne    d7, 1b  | d7 contains effective size of the multiplicand-2
        !           242: 
        !           243:        movl    d0, d6  | count for multiplier
        !           244:        movl    a2, a3  | multiplier
        !           245:        bra     2f
        !           246: 1:
        !           247:        clrl    a4@+
        !           248: 2:
        !           249:        tstl    a3@+
        !           250:        dbne    d6, 1b  | d6 contains effective size of the multiplier-2
        !           251: 
        !           252:        addqw   #1, d0  | global_precision - 1
        !           253:        lslw    #2, d0
        !           254:        addw    d0, a1  | pointer to LSB of the multiplicand
        !           255:        addw    d0, a2  | pointer to LSB of the multiplier
        !           256:        addw    d0, a0
        !           257:        addw    d0, a0
        !           258:        addql   #4, a0  | pointer to LSB of product - KLUDGE!
        !           259: 
        !           260: |      First partial product not handled in loop
        !           261: |      Assumes that the X-bit is clear from the above contortions.
        !           262:        clrl    d4
        !           263: 
        !           264:        movl    a0, a3  | product
        !           265:        movl    a1, a4  | multiplicand
        !           266:        movl    a2@, d1 | one longword of the multiplier
        !           267:        movw    d7, d5  | loop count
        !           268: 
        !           269:        movl    a4@, d2
        !           270:        mulul   d1, d3:d2       | d3 is carry
        !           271:        movl    d2, a3@         | store product
        !           272: 1:
        !           273:        movl    a4@-, d2
        !           274:        mulul   d1, d0:d2
        !           275:        addxl   d3, d2
        !           276:        movl    d0, d3
        !           277:        movl    d2, a3@-
        !           278:        dbf     d5, 1b
        !           279: 
        !           280:        addxl   d4, d3
        !           281:        movl    d3, a3@-
        !           282: 
        !           283: | The other partial products
        !           284: 
        !           285: 2:
        !           286:        movl    a1, a4          | multiplicand
        !           287:        movl    a2@-, d1        | another longword of the multiplier
        !           288:        movw    d7, d5          | loop count
        !           289: 
        !           290:        movl    a4@, d2
        !           291:        mulul   d1, d3:d2       | d3 is carry
        !           292:        addl    d2, a0@-        | accumulate
        !           293: 
        !           294:        movl    a0, a3          | product
        !           295: 
        !           296: 1:
        !           297:        movl    a3@-, d0
        !           298:        addxl   d3, d0          | accumulate carry and X-bit
        !           299:        movl    a4@-, d2
        !           300:        mulul   d1, d3:d2       | d3 is carry
        !           301:        addxl   d4, d3          | add X-bit to carry
        !           302:        addl    d2, d0          | accumulate
        !           303:        movl    d0, a3@
        !           304:        dbf     d5, 1b
        !           305: 
        !           306:        addxl   d4, d3          | add X-bit to carry
        !           307:        movl    d3, a3@-        
        !           308: 
        !           309:        dbf     d6, 2b
        !           310: 
        !           311:        moveml  sp@+, #0x1cfc   | d2-d7/a2-a4; Sun's optimizer is really *DUMB*
        !           312:        rts
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.