pgp/src/mc68020.s - annotate

Return to mc68020.s CVS log
Up to [PGP] / pgp / src
Annotation of pgp/src/mc68020.s, revision 1.1.1.3

1.1.1.3 ! root        1: |      Fast assembly routines for MC68020 (Sun-3)
        !             2: |      Assumptions:
        !             3: |              Arguments start at sp@(0x4)
        !             4: |              Return value is in d0
        !             5: |              d0/d1/a0/a1 are scratch
        !             6: |              P_SMUL needs MULTUNIT set to "unsigned long" in mpilib.c
        !             7: |              P_DMUL replaces mp_smul and mp_dmul in mpilib.c
        !             8: |
        !             9: |      92.9.21 - Tsutomu Shimomura, [email protected]
        !            10: |       93.5.14 - Bug in P_DMUL fixed -- now works with small bignums
        !            11: 
        !            12:        .text
        !            13: 
        !            14: |      P_SETP(p) sets the current precision to be p longwords.  No-op.
        !            15:        .proc
        !            16:        .globl  _P_SETP
        !            17: _P_SETP:
        !            18: |      movl    #L2000, a0      |%
        !            19: |      jsr     mcount          |%
        !            20: |      .bss                    |%
        !            21: |      .even                   |%
        !            22: |L2000:        .skip   4               |%
        !            23: |      .text                   |%
        !            24:        rts
        !            25: 
        !            26: |      P_ADDC(*a, *b, c) performs a += b + c (carry).  Carry is returned.
        !            27:        .proc
        !            28:        .globl  _P_ADDC
        !            29: _P_ADDC:
        !            30: |      movl    #L2001, a0      |%
        !            31: |      jsr     mcount          |%
        !            32: |      .bss                    |%
        !            33: |      .even                   |%
        !            34: |L2001:        .skip   4               |%
        !            35: |      .text                   |%
        !            36:        movl    sp@(0x4), a0    | claim arguments
        !            37:        movl    sp@(0x8), a1
        !            38:        movl    sp@(0xc), d0
        !            39:        movl    d2, sp@-        | preserve d2
        !            40: 
        !            41:        movw    _global_precision, d1   | longword count
        !            42:        movw    d1, d2          | save a copy
        !            43: 
        !            44:        lslw    #2, d1
        !            45:        addw    d1, a0          | adjust array pointers
        !            46:        addw    d1, a1
        !            47: 
        !            48:        lsrw    #1, d1          | compute initial branch offset
        !            49:        andw    #0xe, d1
        !            50:        negw    d1              | branch offset in d1
        !            51: 
        !            52:        lsrw    #3, d2          | 8 longwords/loop; count in d2
        !            53: 
        !            54:        asrl    #1, d0          | set X if necessary
        !            55: 
        !            56:        jmp     pc@(0x12,d1:w)
        !            57: 1:
        !            58:        addxl   a1@-, a0@-
        !            59:        addxl   a1@-, a0@-
        !            60:        addxl   a1@-, a0@-
        !            61:        addxl   a1@-, a0@-
        !            62:        addxl   a1@-, a0@-
        !            63:        addxl   a1@-, a0@-
        !            64:        addxl   a1@-, a0@-
        !            65:        addxl   a1@-, a0@-
        !            66:        dbf     d2, 1b
        !            67: 
        !            68:        roxll   #1, d0
        !            69: 
        !            70:        movl    sp@+, d2
        !            71:        rts
        !            72: 
        !            73: |      P_SUBB(*a, *b, c) performs a -= b + c (borrow).  Borrow is returned.
        !            74:        .proc
        !            75:        .globl  _P_SUBB
        !            76: _P_SUBB:
        !            77: |      movl    #L2002, a0      |%
        !            78: |      jsr     mcount          |%
        !            79: |      .bss                    |%
        !            80: |      .even                   |%
        !            81: |L2002:        .skip   4               |%
        !            82: |      .text                   |%
        !            83:        movl    sp@(0x4), a0    | claim arguments
        !            84:        movl    sp@(0x8), a1
        !            85:        movl    sp@(0xc), d0
        !            86:        movl    d2, sp@-        | preserve d2
        !            87: 
        !            88:        movw    _global_precision, d1   | longword count
        !            89:        movw    d1, d2          | save a copy
        !            90: 
        !            91:        lslw    #2, d1
        !            92:        addw    d1, a0          | adjust array pointers
        !            93:        addw    d1, a1
        !            94: 
        !            95:        lsrw    #1, d1          | compute initial branch offset
        !            96:        andw    #0xe, d1
        !            97:        negw    d1              | branch offset in d1
        !            98: 
        !            99:        lsrw    #3, d2          | 8 longwords/loop; count in d2
        !           100: 
        !           101:        asrl    #1, d0          | set X if necessary
        !           102: 
        !           103:        jmp     pc@(0x12,d1:w)
        !           104: 1:
        !           105:        subxl   a1@-, a0@-
        !           106:        subxl   a1@-, a0@-
        !           107:        subxl   a1@-, a0@-
        !           108:        subxl   a1@-, a0@-
        !           109:        subxl   a1@-, a0@-
        !           110:        subxl   a1@-, a0@-
        !           111:        subxl   a1@-, a0@-
        !           112:        subxl   a1@-, a0@-
        !           113:        dbf     d2, 1b
        !           114: 
        !           115:        roxll   #1, d0
        !           116: 
        !           117:        movl    sp@+, d2
        !           118:        rts
        !           119: 
        !           120: |      P_ROTL(*a, c) performs a = (a<<1) | c (lo-bit).  Hi-bit is returned.
        !           121:        .proc
        !           122:        .globl  _P_ROTL
        !           123: _P_ROTL:
        !           124: |      movl    #L2003, a0      |%
        !           125: |      jsr     mcount          |%
        !           126: |      .bss                    |%
        !           127: |      .even                   |%
        !           128: |L2003:        .skip   4               |%
        !           129: |      .text                   |%
        !           130:        movl    sp@(0x4), a0    | claim arguments
        !           131:        movl    sp@(0x8), d0
        !           132:        movl    d2, a1          | preserve d2
        !           133: 
        !           134:        movw    _global_precision, d1   | longword count
        !           135:        movw    d1, d2          | save a copy
        !           136: 
        !           137:        lslw    #2, d1
        !           138:        addw    d1, a0          | adjust array pointer
        !           139: 
        !           140:        andw    #0x1c, d1
        !           141:        negw    d1              | branch offset in d1
        !           142: 
        !           143:        lsrw    #3, d2          | 8 longwords/loop; count in d2
        !           144: 
        !           145:        asrl    #1, d0          | set X if necessary
        !           146: 
        !           147:        jmp     pc@(0x22,d1:w)
        !           148: 1:
        !           149:        roxlw   a0@-
        !           150:        roxlw   a0@-
        !           151:        roxlw   a0@-
        !           152:        roxlw   a0@-
        !           153:        roxlw   a0@-
        !           154:        roxlw   a0@-
        !           155:        roxlw   a0@-
        !           156:        roxlw   a0@-
        !           157:        roxlw   a0@-
        !           158:        roxlw   a0@-
        !           159:        roxlw   a0@-
        !           160:        roxlw   a0@-
        !           161:        roxlw   a0@-
        !           162:        roxlw   a0@-
        !           163:        roxlw   a0@-
        !           164:        roxlw   a0@-
        !           165:        dbf     d2, 1b
        !           166: 
        !           167:        roxll   #1, d0
        !           168: 
        !           169:        movl    a1, d2
        !           170:        rts
        !           171: 
        !           172: |      P_SMUL(*a, *b, x) performs a += b * x.  Pointers are to the LSB.
        !           173:        .proc
        !           174:        .globl  _P_SMUL
        !           175: _P_SMUL:
        !           176: |      movl    #L2004, a0      |%
        !           177: |      jsr     mcount          |%
        !           178: |      .bss                    |%
        !           179: |      .even                   |%
        !           180: |L2004:        .skip   4               |%
        !           181: |      .text                   |%
        !           182:        movl    sp@(0x4), a0    | claim arguments
        !           183:        movl    sp@(0x8), a1
        !           184:        movl    sp@(0xc), d1
        !           185:        tstl    d1              | horrible kludge to speed multiply by 0
        !           186:        beq     3f
        !           187:        moveml  #0x3c00, sp@-   | d2/d3/d4/d5; Sun's optimizer is really *DUMB*
        !           188:        movw    _global_precision, d5   | longword count; 0 will fail
        !           189: 
        !           190:        subqw   #2, d5          | first longword not handled in loop
        !           191:        clrl    d4
        !           192: 
        !           193:        movl    a1@, d2
        !           194:        mulul   d1, d3:d2       | d3 is carry
        !           195:        addl    d2, a0@         | accumulate
        !           196: 
        !           197: |      tstw    d5              | This code needed if global_precision < 2
        !           198: |      blt     2f              | only one longword?
        !           199: 1:
        !           200:        movl    a0@-, d0
        !           201:        addxl   d3, d0          | accumulate carry and X-bit
        !           202:        movl    a1@-, d2
        !           203:        mulul   d1, d3:d2       | d3 is carry
        !           204:        addxl   d4, d3          | add X-bit to carry
        !           205:        addl    d2, d0          | accumulate
        !           206:        movl    d0, a0@
        !           207:        dbf     d5, 1b
        !           208: 2:
        !           209:        addxl   d4, d3          | add X-bit to carry
        !           210:        movl    d3, a0@-        
        !           211: 
        !           212:        moveml  sp@+, #0x3c     | d2/d3/d4/d5; Sun's optimizer is really *DUMB*
        !           213: 3:
        !           214:        rts
        !           215: 
        !           216: |      P_DMUL(*a, *b, *c) performs a = b * c.
        !           217:        .proc
        !           218:        .globl  _P_DMUL
        !           219: _P_DMUL:
        !           220: |      movl    #L2005, a0      |%
        !           221: |      jsr     mcount          |%
        !           222: |      .bss                    |%
        !           223: |      .even                   |%
        !           224: |L2005:        .skip   4               |%
        !           225: |      .text                   |%
        !           226:        moveml  #0x3f38, sp@-   | d2-d7/a2-a4; Sun's optimizer is really *DUMB*
        !           227:        movl    sp@(0x28), a0   | claim arguments
        !           228:        movl    sp@(0x2c), a1
        !           229:        movl    sp@(0x30), a2
        !           230:        movw    _global_precision, d0
        !           231:        subqw   #2, d0  | global_precision - 2
        !           232: 
        !           233:        movl    a0, a4  | product
        !           234: 
        !           235:        movw    d0, d7  | count for multiplicand
        !           236:        movl    a1, a3  | multiplicand
        !           237:        tstl    a3@+
        !           238:        bne     2f
        !           239:        subqw   #1, d7
        !           240: 1:
        !           241:        clrl    a4@+
        !           242:        tstl    a3@+
        !           243:        dbne    d7, 1b
        !           244:        addqw   #1, d7  | d7 contains effective size of the multiplicand-2
        !           245: 2:
        !           246:        movl    d0, d6  | count for multiplier
        !           247:        movl    a2, a3  | multiplier
        !           248:        tstl    a3@+
        !           249:        bne     2f
        !           250:        subqw   #1, d6
        !           251: 1:
        !           252:        clrl    a4@+
        !           253:        tstl    a3@+
        !           254:        dbne    d6, 1b
        !           255:        addqw   #1, d6  | d6 contains effective size of the multiplier-2
        !           256: 2:
        !           257: 
        !           258:        addqw   #1, d0  | global_precision - 1
        !           259:        lslw    #2, d0
        !           260:        addw    d0, a1  | pointer to LSB of the multiplicand
        !           261:        addw    d0, a2  | pointer to LSB of the multiplier
        !           262:        addw    d0, a0
        !           263:        addw    d0, a0
        !           264:        addql   #4, a0  | pointer to LSB of product - KLUDGE!
        !           265: 
        !           266: |      First partial product not handled in loop
        !           267: |      Assumes that the X-bit is clear from the above contortions.
        !           268:        clrl    d4
        !           269: 
        !           270:        movl    a0, a3  | product
        !           271:        movl    a1, a4  | multiplicand
        !           272:        movl    a2@, d1 | one longword of the multiplier
        !           273:        movw    d7, d5  | loop count
        !           274: 
        !           275:        movl    a4@, d2
        !           276:        mulul   d1, d3:d2       | d3 is carry
        !           277:        movl    d2, a3@         | store product
        !           278: 1:
        !           279:        movl    a4@-, d2
        !           280:        mulul   d1, d0:d2
        !           281:        addxl   d3, d2
        !           282:        movl    d0, d3
        !           283:        movl    d2, a3@-
        !           284:        dbf     d5, 1b
        !           285: 
        !           286:        addxl   d4, d3
        !           287:        movl    d3, a3@-
        !           288: 
        !           289: | The other partial products
        !           290: 
        !           291: 2:
        !           292:        movl    a1, a4          | multiplicand
        !           293:        movl    a2@-, d1        | another longword of the multiplier
        !           294:        movw    d7, d5          | loop count
        !           295: 
        !           296:        movl    a4@, d2
        !           297:        mulul   d1, d3:d2       | d3 is carry
        !           298:        addl    d2, a0@-        | accumulate
        !           299: 
        !           300:        movl    a0, a3          | product
        !           301: 
        !           302: 1:
        !           303:        movl    a3@-, d0
        !           304:        addxl   d3, d0          | accumulate carry and X-bit
        !           305:        movl    a4@-, d2
        !           306:        mulul   d1, d3:d2       | d3 is carry
        !           307:        addxl   d4, d3          | add X-bit to carry
        !           308:        addl    d2, d0          | accumulate
        !           309:        movl    d0, a3@
        !           310:        dbf     d5, 1b
        !           311: 
        !           312:        addxl   d4, d3          | add X-bit to carry
        !           313:        movl    d3, a3@-        
        !           314: 
        !           315:        dbf     d6, 2b
        !           316: 
        !           317:        moveml  sp@+, #0x1cfc   | d2-d7/a2-a4; Sun's optimizer is really *DUMB*
        !           318:        rts
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.