--- pgp/src/3b168000.s	2018/04/24 16:40:44	1.1.1.1
+++ pgp/src/3b168000.s	2018/04/24 16:42:01	1.1.1.2
@@ -1,175 +1,175 @@
-# 3B1 Assembler - primitives for multi-precision math on the MC68010
-#
-# Written by Rob Stampfli  19-Oct-92 for 3B1
-# Assembler: 3B1 native assembler
-#
-# Note that the function P_SETP of the Intel primitives is not used.
-# `set_precision' has to be defined just like in `PORTABLE' mode.
-
-	    global global_precision
-	    global P_ADDC
-	    global P_SUBB
-	    global P_ROTL
-	    global P_SETP
-	    global P_SMUL
-
-	    text
-
-#boolean P_ADDC(unitptr r1, unitptr r2, boolean carry);
-# /* multiprecision add with carry r2 to r1, result in r1 */
-#Parameters: A0.l: r1, A1.l: r2, D0.b: carry
-#Result:     D0.b: new carry
-#Modifies: D0-D2/A0-A1
-
-P_ADDC:     mov.l   4(%sp),%a0	# fetch first argument: r1
-	    mov.l   8(%sp),%a1	# fetch second argument: r2
-	    mov.l   12(%sp),%d1	# fetch third argument: carry
-	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
-	    mov.w   global_precision,%d2 # fetch # of 16 bit units
-	    mov.w   %d2,%d0	# copy units
-	    lsl.w   &1,%d0	# convert units to bytes (1 unit = 2 bytes)
-	    add.w   %d0,%a0	# point r1 to 1 past least significant unit
-	    add.w   %d0,%a1	# point r2 to 1 past least significant unit
-	    lsr.w   &5,%d0	# conv bytes to (units/16); # times thru loop1
-	    and.w   &15,%d2	# = (units%16); # times thru loop2
-	    lsr.b   &1,%d1	# set X-bit as specified by carry arg
-	    bra     adbf1
-aloop1:	    #REPT    8		# 16 units per loop
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-	    addx.l -(%a1),-(%a0)
-adbf1:	    dbf     %d0,aloop1
-	    bra adbf2
-aloop2:	    addx.w -(%a1),-(%a0)
-adbf2:	    dbf     %d2,aloop2
-	    scs     %d0		    #set returned carry
-	    mov.l   (%sp)+,%d2	    # restore d2
-	    rts
-
-#boolean P_SUBB(unitptr r1, unitptr r2, boolean borrow);
-# /* multiprecision subtract with borrow, r2 from r1, result in r1 */
-#Parameters: A0.l: r1, A1.l: r2, D0.b: borrow
-#Result:     D0.b: new borrow
-#Modifies: D0-D2/A0/A1
-
-P_SUBB:     mov.l   4(%sp),%a0	# fetch first argument: r1
-	    mov.l   8(%sp),%a1	# fetch second argument: r2
-	    mov.l   12(%sp),%d1	# fetch third argument: carry
-	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
-	    mov.w   global_precision,%d2 # fetch # of 16 bit units
-	    mov.w   %d2,%d0	# copy units
-	    lsl.w   &1,%d0	# convert units to bytes (1 unit = 2 bytes)
-	    add.w   %d0,%a0	# point r1 to 1 past least significant unit
-	    add.w   %d0,%a1	# point r2 to 1 past least significant unit
-	    lsr.w   &5,%d0	# conv bytes to (units/16); # times thru loop1
-	    and.w   &15,%d2	# = (units%16); # times thru loop2
-	    lsr.b   &1,%d1	# set X-bit as specified by carry arg
-	    bra     bdbf1
-bloop1:	    #REPT    8		# 16 units per loop
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-	    subx.l  -(%a1),-(%a0)
-bdbf1:	    dbf     %d0,bloop1
-	    bra bdbf2
-bloop2:	    subx.w -(%a1),-(%a0)
-bdbf2:	    dbf     %d2,bloop2
-	    scs     %d0		    # set returned carry
-	    mov.l   (%sp)+,%d2	    # restore d2
-	    rts
-
-#boolean P_ROTL(unitptr r1, boolean carry);
-# /* multiprecision rotate left 1 bit with carry, result in r1. */
-#Parameters: A0.l: r1, D0.b: carry
-#Result:     D0.b: new carry
-#Modifies: D0-D2/A0
-
-P_ROTL:     mov.l   4(%sp),%a0	# fetch first argument: r1
-	    mov.l   8(%sp),%d1	# fetch second argument: carry
-	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
-	    mov.w   global_precision,%d2 # fetch # of 16 bit units
-	    mov.w   %d2,%d0	# copy units
-	    lsl.w   &1,%d0	# convert units to bytes (1 unit = 2 bytes)
-	    add.w   %d0,%a0	# point r1 to 1 past least significant unit
-	    lsr.w   &5,%d0	# conv bytes to (units/16); # times thru loop1
-	    and.w   &15,%d2	# = (units%16); # times thru loop2
-	    lsr.b   &1,%d1	# set X-bit as specified by carry arg
-	    bra     cdbf1
-cloop1:	    #REPT    16		# note roxl.l not valid on 68010
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-	    roxl.w  &1,-(%a0)
-cdbf1:	    dbf     %d0,cloop1
-	    bra     cdbf2
-cloop2:	    roxl.w  &1,-(%a0)
-cdbf2:	    dbf     %d2,cloop2
-	    scs     %d0		    # set returned carry
-	    mov.l   (%sp)+,%d2	    # restore d2
-	    rts
-
-#void P_SETP(short nbits);
-# /* sets working precision to specified number of bits. */
-# /* only to minimize portation differences		 */
-#Parameters: --
-#Result:     --
-
-P_SETP:	    rts
-
-#void P_SMUL(MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier)
-# /* multiprecision multiply */
-#Parameters: A0.l: prod, A1.l: multiplicand, D0.w: multiplier
-#Modifies: D0-D4/A0-A1
-#Result:     --
-#Note: prod and multiplicand have already been adjusted to point to LSB
-# prior to making the call.
-
-P_SMUL:     mov.l   4(%sp),%a0	# fetch first argument: prod
-	    mov.l   8(%sp),%a1	# fetch second argument: multiplicand
-	    mov.l   12(%sp),%d0	# fetch third argument: multiplier
-	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
-	    mov.l   %d3,-(%sp)  # save d3 -- not a Unix-PC scratch reg
-	    mov.l   %d4,-(%sp)  # save d4 -- not a Unix-PC scratch reg
-	clr.l	%d2		# clear the carry register
-	clr.l	%d4		# clear upper half of temp reg for prod
-	add.w	&2,%a0		# position prod to 1 beyond LSB
-	add.w	&2,%a1		# position multiplicand to 1 beyond LSB
-	mov.w	global_precision,%d3  # fetch size of multiplicand
-	bra	ddbf1
-
-dloop:	mov.w	-(%a1),%d1	# fetch multiplicand
-	mulu.w	%d0,%d1		# multiply by multiplier
-	mov.w	-(%a0),%d4	# fetch prod
-	add.l	%d4,%d1		# add in prod
-	add.l	%d2,%d1		# add in carry
-	mov.w	%d1,(%a0)	# store result back to prod
-	swap.w	%d1		# fetch carry info (upper 16 bits of mult)
-	mov.w	%d1,%d2		# and move it into carry
-ddbf1:	dbf	%d3,dloop
-
-	mov.w	%d2,-(%a0)	# store carry
-	mov.l	(%sp)+,%d4
-	mov.l	(%sp)+,%d3
-	mov.l	(%sp)+,%d2
-	rts
+# 3B1 Assembler - primitives for multi-precision math on the MC68010
+#
+# Written by Rob Stampfli  19-Oct-92 for 3B1
+# Assembler: 3B1 native assembler
+#
+# Note that the function P_SETP of the Intel primitives is not used.
+# `set_precision' has to be defined just like in `PORTABLE' mode.
+
+	    global global_precision
+	    global P_ADDC
+	    global P_SUBB
+	    global P_ROTL
+	    global P_SETP
+	    global P_SMUL
+
+	    text
+
+#boolean P_ADDC(unitptr r1, unitptr r2, boolean carry);
+# /* multiprecision add with carry r2 to r1, result in r1 */
+#Parameters: A0.l: r1, A1.l: r2, D0.b: carry
+#Result:     D0.b: new carry
+#Modifies: D0-D2/A0-A1
+
+P_ADDC:     mov.l   4(%sp),%a0	# fetch first argument: r1
+	    mov.l   8(%sp),%a1	# fetch second argument: r2
+	    mov.l   12(%sp),%d1	# fetch third argument: carry
+	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
+	    mov.w   global_precision,%d2 # fetch # of 16 bit units
+	    mov.w   %d2,%d0	# copy units
+	    lsl.w   &1,%d0	# convert units to bytes (1 unit = 2 bytes)
+	    add.w   %d0,%a0	# point r1 to 1 past least significant unit
+	    add.w   %d0,%a1	# point r2 to 1 past least significant unit
+	    lsr.w   &5,%d0	# conv bytes to (units/16); # times thru loop1
+	    and.w   &15,%d2	# = (units%16); # times thru loop2
+	    lsr.b   &1,%d1	# set X-bit as specified by carry arg
+	    bra     adbf1
+aloop1:	    #REPT    8		# 16 units per loop
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+	    addx.l -(%a1),-(%a0)
+adbf1:	    dbf     %d0,aloop1
+	    bra adbf2
+aloop2:	    addx.w -(%a1),-(%a0)
+adbf2:	    dbf     %d2,aloop2
+	    scs     %d0		    #set returned carry
+	    mov.l   (%sp)+,%d2	    # restore d2
+	    rts
+
+#boolean P_SUBB(unitptr r1, unitptr r2, boolean borrow);
+# /* multiprecision subtract with borrow, r2 from r1, result in r1 */
+#Parameters: A0.l: r1, A1.l: r2, D0.b: borrow
+#Result:     D0.b: new borrow
+#Modifies: D0-D2/A0/A1
+
+P_SUBB:     mov.l   4(%sp),%a0	# fetch first argument: r1
+	    mov.l   8(%sp),%a1	# fetch second argument: r2
+	    mov.l   12(%sp),%d1	# fetch third argument: carry
+	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
+	    mov.w   global_precision,%d2 # fetch # of 16 bit units
+	    mov.w   %d2,%d0	# copy units
+	    lsl.w   &1,%d0	# convert units to bytes (1 unit = 2 bytes)
+	    add.w   %d0,%a0	# point r1 to 1 past least significant unit
+	    add.w   %d0,%a1	# point r2 to 1 past least significant unit
+	    lsr.w   &5,%d0	# conv bytes to (units/16); # times thru loop1
+	    and.w   &15,%d2	# = (units%16); # times thru loop2
+	    lsr.b   &1,%d1	# set X-bit as specified by carry arg
+	    bra     bdbf1
+bloop1:	    #REPT    8		# 16 units per loop
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+	    subx.l  -(%a1),-(%a0)
+bdbf1:	    dbf     %d0,bloop1
+	    bra bdbf2
+bloop2:	    subx.w -(%a1),-(%a0)
+bdbf2:	    dbf     %d2,bloop2
+	    scs     %d0		    # set returned carry
+	    mov.l   (%sp)+,%d2	    # restore d2
+	    rts
+
+#boolean P_ROTL(unitptr r1, boolean carry);
+# /* multiprecision rotate left 1 bit with carry, result in r1. */
+#Parameters: A0.l: r1, D0.b: carry
+#Result:     D0.b: new carry
+#Modifies: D0-D2/A0
+
+P_ROTL:     mov.l   4(%sp),%a0	# fetch first argument: r1
+	    mov.l   8(%sp),%d1	# fetch second argument: carry
+	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
+	    mov.w   global_precision,%d2 # fetch # of 16 bit units
+	    mov.w   %d2,%d0	# copy units
+	    lsl.w   &1,%d0	# convert units to bytes (1 unit = 2 bytes)
+	    add.w   %d0,%a0	# point r1 to 1 past least significant unit
+	    lsr.w   &5,%d0	# conv bytes to (units/16); # times thru loop1
+	    and.w   &15,%d2	# = (units%16); # times thru loop2
+	    lsr.b   &1,%d1	# set X-bit as specified by carry arg
+	    bra     cdbf1
+cloop1:	    #REPT    16		# note roxl.l not valid on 68010
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+	    roxl.w  &1,-(%a0)
+cdbf1:	    dbf     %d0,cloop1
+	    bra     cdbf2
+cloop2:	    roxl.w  &1,-(%a0)
+cdbf2:	    dbf     %d2,cloop2
+	    scs     %d0		    # set returned carry
+	    mov.l   (%sp)+,%d2	    # restore d2
+	    rts
+
+#void P_SETP(short nbits);
+# /* sets working precision to specified number of bits. */
+# /* only to minimize portation differences		 */
+#Parameters: --
+#Result:     --
+
+P_SETP:	    rts
+
+#void P_SMUL(MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier)
+# /* multiprecision multiply */
+#Parameters: A0.l: prod, A1.l: multiplicand, D0.w: multiplier
+#Modifies: D0-D4/A0-A1
+#Result:     --
+#Note: prod and multiplicand have already been adjusted to point to LSB
+# prior to making the call.
+
+P_SMUL:     mov.l   4(%sp),%a0	# fetch first argument: prod
+	    mov.l   8(%sp),%a1	# fetch second argument: multiplicand
+	    mov.l   12(%sp),%d0	# fetch third argument: multiplier
+	    mov.l   %d2,-(%sp)	# save d2 -- not a Unix-PC scratch reg
+	    mov.l   %d3,-(%sp)  # save d3 -- not a Unix-PC scratch reg
+	    mov.l   %d4,-(%sp)  # save d4 -- not a Unix-PC scratch reg
+	clr.l	%d2		# clear the carry register
+	clr.l	%d4		# clear upper half of temp reg for prod
+	add.w	&2,%a0		# position prod to 1 beyond LSB
+	add.w	&2,%a1		# position multiplicand to 1 beyond LSB
+	mov.w	global_precision,%d3  # fetch size of multiplicand
+	bra	ddbf1
+
+dloop:	mov.w	-(%a1),%d1	# fetch multiplicand
+	mulu.w	%d0,%d1		# multiply by multiplier
+	mov.w	-(%a0),%d4	# fetch prod
+	add.l	%d4,%d1		# add in prod
+	add.l	%d2,%d1		# add in carry
+	mov.w	%d1,(%a0)	# store result back to prod
+	swap.w	%d1		# fetch carry info (upper 16 bits of mult)
+	mov.w	%d1,%d2		# and move it into carry
+ddbf1:	dbf	%d3,dloop
+
+	mov.w	%d2,-(%a0)	# store carry
+	mov.l	(%sp)+,%d4
+	mov.l	(%sp)+,%d3
+	mov.l	(%sp)+,%d2
+	rts