File:  [PGP] / pgp / src / mc68020.s
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 16:45:42 2018 UTC (8 years, 1 month ago) by root
Branches: phill, MAIN
CVS tags: pgp263i, HEAD
PGP 2.6.3i

|	Fast assembly routines for MC68020 (Sun-3)
|	Assumptions:
|		Arguments start at sp@(0x4)
|		Return value is in d0
|		d0/d1/a0/a1 are scratch
|		P_SMUL needs MULTUNIT set to "unsigned long" in mpilib.c
|		P_DMUL replaces mp_smul and mp_dmul in mpilib.c
|
|	92.9.21 - Tsutomu Shimomura, [email protected]
|       93.5.14 - Bug in P_DMUL fixed -- now works with small bignums

	.text

|	P_SETP(p) sets the current precision to be p longwords.  No-op.
	.proc
	.globl	_P_SETP
_P_SETP:
|	movl	#L2000, a0	|%
|	jsr	mcount		|%
|	.bss			|%
|	.even			|%
|L2000:	.skip	4		|%
|	.text			|%
	rts

|	P_ADDC(*a, *b, c) performs a += b + c (carry).  Carry is returned.
	.proc
	.globl	_P_ADDC
_P_ADDC:
|	movl	#L2001, a0	|%
|	jsr	mcount		|%
|	.bss			|%
|	.even			|%
|L2001:	.skip	4		|%
|	.text			|%
	movl	sp@(0x4), a0	| claim arguments
	movl	sp@(0x8), a1
	movl	sp@(0xc), d0
	movl	d2, sp@-	| preserve d2

	movw	_global_precision, d1	| longword count
	movw	d1, d2		| save a copy

	lslw	#2, d1
	addw	d1, a0		| adjust array pointers
	addw	d1, a1

	lsrw	#1, d1		| compute initial branch offset
	andw	#0xe, d1
	negw	d1		| branch offset in d1

	lsrw	#3, d2		| 8 longwords/loop; count in d2

	asrl	#1, d0		| set X if necessary

	jmp	pc@(0x12,d1:w)
1:
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	addxl	a1@-, a0@-
	dbf	d2, 1b

	roxll	#1, d0

	movl	sp@+, d2
	rts

|	P_SUBB(*a, *b, c) performs a -= b + c (borrow).  Borrow is returned.
	.proc
	.globl	_P_SUBB
_P_SUBB:
|	movl	#L2002, a0	|%
|	jsr	mcount		|%
|	.bss			|%
|	.even			|%
|L2002:	.skip	4		|%
|	.text			|%
	movl	sp@(0x4), a0	| claim arguments
	movl	sp@(0x8), a1
	movl	sp@(0xc), d0
	movl	d2, sp@-	| preserve d2

	movw	_global_precision, d1	| longword count
	movw	d1, d2		| save a copy

	lslw	#2, d1
	addw	d1, a0		| adjust array pointers
	addw	d1, a1

	lsrw	#1, d1		| compute initial branch offset
	andw	#0xe, d1
	negw	d1		| branch offset in d1

	lsrw	#3, d2		| 8 longwords/loop; count in d2

	asrl	#1, d0		| set X if necessary

	jmp	pc@(0x12,d1:w)
1:
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	subxl	a1@-, a0@-
	dbf	d2, 1b

	roxll	#1, d0

	movl	sp@+, d2
	rts

|	P_ROTL(*a, c) performs a = (a<<1) | c (lo-bit).  Hi-bit is returned.
	.proc
	.globl	_P_ROTL
_P_ROTL:
|	movl	#L2003, a0	|%
|	jsr	mcount		|%
|	.bss			|%
|	.even			|%
|L2003:	.skip	4		|%
|	.text			|%
	movl	sp@(0x4), a0	| claim arguments
	movl	sp@(0x8), d0
	movl	d2, a1		| preserve d2

	movw	_global_precision, d1	| longword count
	movw	d1, d2		| save a copy

	lslw	#2, d1
	addw	d1, a0		| adjust array pointer

	andw	#0x1c, d1
	negw	d1		| branch offset in d1

	lsrw	#3, d2		| 8 longwords/loop; count in d2

	asrl	#1, d0		| set X if necessary

	jmp	pc@(0x22,d1:w)
1:
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	roxlw	a0@-
	dbf	d2, 1b

	roxll	#1, d0

	movl	a1, d2
	rts

|	P_SMUL(*a, *b, x) performs a += b * x.  Pointers are to the LSB.
	.proc
	.globl	_P_SMUL
_P_SMUL:
|	movl	#L2004, a0	|%
|	jsr	mcount		|%
|	.bss			|%
|	.even			|%
|L2004:	.skip	4		|%
|	.text			|%
	movl	sp@(0x4), a0	| claim arguments
	movl	sp@(0x8), a1
	movl	sp@(0xc), d1
	tstl	d1		| horrible kludge to speed multiply by 0
	beq	3f
	moveml	#0x3c00, sp@-	| d2/d3/d4/d5; Sun's optimizer is really *DUMB*
	movw	_global_precision, d5	| longword count; 0 will fail

	subqw	#2, d5		| first longword not handled in loop
	clrl	d4

	movl	a1@, d2
	mulul	d1, d3:d2	| d3 is carry
	addl	d2, a0@		| accumulate

|	tstw	d5		| This code needed if global_precision < 2
|	blt	2f		| only one longword?
1:
	movl	a0@-, d0
	addxl	d3, d0		| accumulate carry and X-bit
	movl	a1@-, d2
	mulul	d1, d3:d2	| d3 is carry
	addxl	d4, d3		| add X-bit to carry
	addl	d2, d0		| accumulate
	movl	d0, a0@
	dbf	d5, 1b
2:
	addxl	d4, d3		| add X-bit to carry
	movl	d3, a0@-	

	moveml	sp@+, #0x3c	| d2/d3/d4/d5; Sun's optimizer is really *DUMB*
3:
	rts

|	P_DMUL(*a, *b, *c) performs a = b * c.
	.proc
	.globl	_P_DMUL
_P_DMUL:
|	movl	#L2005, a0	|%
|	jsr	mcount		|%
|	.bss			|%
|	.even			|%
|L2005:	.skip	4		|%
|	.text			|%
	moveml	#0x3f38, sp@-	| d2-d7/a2-a4; Sun's optimizer is really *DUMB*
	movl	sp@(0x28), a0	| claim arguments
	movl	sp@(0x2c), a1
	movl	sp@(0x30), a2
	movw	_global_precision, d0
	subqw	#2, d0	| global_precision - 2

	movl	a0, a4	| product

	movw	d0, d7	| count for multiplicand
	movl	a1, a3	| multiplicand
	tstl	a3@+
	bne	2f
	subqw	#1, d7
1:
	clrl	a4@+
	tstl	a3@+
	dbne	d7, 1b
	addqw	#1, d7	| d7 contains effective size of the multiplicand-2
2:
	movl	d0, d6	| count for multiplier
	movl	a2, a3	| multiplier
	tstl	a3@+
	bne	2f
	subqw	#1, d6
1:
	clrl	a4@+
	tstl	a3@+
	dbne	d6, 1b
	addqw	#1, d6	| d6 contains effective size of the multiplier-2
2:

	addqw	#1, d0	| global_precision - 1
	lslw	#2, d0
	addw	d0, a1	| pointer to LSB of the multiplicand
	addw	d0, a2	| pointer to LSB of the multiplier
	addw	d0, a0
	addw	d0, a0
	addql	#4, a0	| pointer to LSB of product - KLUDGE!

|	First partial product not handled in loop
|	Assumes that the X-bit is clear from the above contortions.
	clrl	d4

	movl	a0, a3	| product
	movl	a1, a4	| multiplicand
	movl	a2@, d1	| one longword of the multiplier
	movw	d7, d5	| loop count

	movl	a4@, d2
	mulul	d1, d3:d2	| d3 is carry
	movl	d2, a3@		| store product
1:
	movl	a4@-, d2
	mulul	d1, d0:d2
	addxl	d3, d2
	movl	d0, d3
	movl	d2, a3@-
	dbf	d5, 1b

	addxl	d4, d3
	movl	d3, a3@-

| The other partial products

2:
	movl	a1, a4		| multiplicand
	movl	a2@-, d1	| another longword of the multiplier
	movw	d7, d5		| loop count

	movl	a4@, d2
	mulul	d1, d3:d2	| d3 is carry
	addl	d2, a0@-	| accumulate

	movl	a0, a3		| product

1:
	movl	a3@-, d0
	addxl	d3, d0		| accumulate carry and X-bit
	movl	a4@-, d2
	mulul	d1, d3:d2	| d3 is carry
	addxl	d4, d3		| add X-bit to carry
	addl	d2, d0		| accumulate
	movl	d0, a3@
	dbf	d5, 1b

	addxl	d4, d3		| add X-bit to carry
	movl	d3, a3@-	

	dbf	d6, 2b

	moveml	sp@+, #0x1cfc	| d2-d7/a2-a4; Sun's optimizer is really *DUMB*
	rts

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.