--- pgp/src/vax.mar 2018/04/24 16:37:52 1.1 +++ pgp/src/vax.mar 2018/04/24 16:38:23 1.1.1.2 @@ -1,689 +1,689 @@ -;Last Modified: 16-APR-1992 09:06:30.46 - .title fprims Fast Multiple Precision Primitives - .ident /V1.7B/ -;+ -; **-FPRIMS-Fast Multiple Precision Primitives -; -; Facility: PGP -; -; Language: Macro-32 -; -; Functional Description: -; -; This module contains fast multiple precision routines for operating on arrays -; of long words. Error checking is minimised at the expense of speed. -; -; Restrictions: -; -; This code is shareable but NOT reentrant as written because of static data. -; A reentrant version of this module could be written but it would be slower! -; -; Version: 1 -; -; Original: 00A Date: 17-Sep-1991 Author: Hugh A.J. Kennedy -; -; Based on FPRIMS.ASM written by Zhahai Stewart for the Intel 8086 -; architecture. -; -; Modification: 02A Date: 27-Sep-1991 Author: Hugh A.J. Kennedy. -; -; Add fast multiply routine, P_SMUL. -; Re-organise code slightly. -; Ammend/clarify copyright and license statement. -; Add checking for maximum precision exceeded, display a warning message -; and bomb! -; -; Modification: 03A Date: 16-Mar-1992 Author: Hugh A.J. Kennedy. -; -; Sniff for MSB in P_SMUL. In this way, avoid multiplies by leading zeroes -; (not efficient). -; -; Modification: 05A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy. -; -; Encode entire double precision multiply in VAX assembler. -; Correct some minor problems with handling embedded zeroes. -; -; Modification: 06A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy -; -; Align everything for speed. VAXen like stuff on 64-bit, or at least 32-bit -; boundaries. Therefore, we align the add, subtract and rotate tables and then -; we align the multiply loops. The extra NOPs used to pad these loops are of -; negligable cost because they already exist in the memory buffer. When the -; following instruction is aligned, it executes MUCH faster. -; -; Modification: 07A Date: 24-Mar-1991 Author: Hugh A.J. Kennedy. -; -; Implement fast compare. -;- - - .sbttl Copyright Notice And License To Use -; -; Copyright (c) 1991-1992, All Rights Reserved by -; Hugh A.J. Kennedy. -; -; A license to use and adapt this software without payment is hereby -; granted subject to the following conditions: -; -; 1) It may only be copied with the inclusion of this copyright -; notice in the program source with these associated conditions. -; -; 2) No title to or ownership of this software is hereby -; transferred. -; -; 3) The information in this software is subject to change -; without notice and should not be construed as a commitment by -; Hugh Kennedy. -; -; 4) The author assumes no liability for any damages arising from the -; use of this software, even if said damages arises from defects in -; this software. -; -; 5) No warranty as to merchantability or fitness of purpose is -; expressed or implied. -; -; 6) Any modifications to this source must be clearly identified as -; such and added to the modification history. -; -; 7) These routines may not be incorporated in a commercial cryptographic -; product. -; -; If you can not comply with these conditions, you *must* contact the author -; and obtain permission other wise you are in violation of copyright. - - .sbttl Misc Macros & Definitions -; -; Assembly Parameters -; -max_unit_prec = 72 ; Maximum unit precision -supersniffer = 1 ; Enable bit msb locator. -; -; The following parameter is dependent on the kind of VAX you are running on -; and should be defined if the execution time of the SOBGTR loop control -; instruction and the appropriate operation (ADWC or SBWC) from cache is much -; less than the execution time in main memory. If you have a slow VAX you -; should comment the following line out to use a vector of instructions. -; -novector = 1 ; Use loops rather than vectors. - -.macro ascid .string -;+ -; *-ASCID-Build An ASCII String Referenced By Descriptor -; -; Functional Description: -; -; This macro is a little like the system supplied .ASCID directive -; but it uses a separate program section to store the ASCII data. -; -; Arguments: -; -; STRING String to create -;- - .nocross - - .save_psect - - .psect puret - -$$$t0 = . - .ascii @.string@ -$$$t1 = .-$$$t0 - - .restore_psect - - .word $$$t1 - .byte dsc$k_dtype_t - .byte dsc$k_class_s - .address - - $$$t0 - .cross - -.endm ascid - - .sbttl Misc Data Areas -; -; Misc. Data Areas -; - .psect impurd,con,lcl,noshr,exe,rd,wrt,long - -; -; This data is static and is used to hold the current precision established -; by P_SETP for other calls to this library. -; -.if not_defined novector - -addoff: ; Offset into add table. - .blkl 1 ; also for sub and rot. -.endc - -precis: ; Precision in longwords. - .blkl 1 - - .psect pure,con,rel,shr,exe,rd,nowrt,quad - - .align quad - -.ifndef novector - -prectoobig: - ascid - -.endc - - .sbttl Start of Code - - .sbttl P_CMP Compare two very long integers -;+ -; **-P_COMP-Compare two very long integers -; -; Functional Description: -; -; This procedure is invoked to compare two extended precision unsigned -; integers. -; -; Calling Sequence -; -; short P_CMP ( r1, r2) -; -; Parameters: -; -; R1 -> Extended Precision Integer 1 -; R2 -> Extended Precision Integer 2 -; -; Implicit Inputs: -; -; PRECIS lr*r Precision expresses in longs. -; -; Returns: -; -; -1 if r1 < r2 -; 0 if r1 = r2 -; +1 if r1 > r2 -; - -;- - - .align long - - .entry p_cmp,^m - - movl 4(ap),r1 ; R1 -> Sum. - movl 8(ap),r2 ; R2 -> Addend. - movl precis,r0 ; R0 = Precision. - moval (r1)[r0],r1 ; Get MS longwords. - moval (r2)[r0],r2 ; Get MS longwords. -.align long 1 ; Align loop with NOPS. -10$: cmpl -(r1),-(r2) ; Compare. - bnequ 20$ ; If ne, then exit loop. - sobgtr r0,10$ ; Loop until done. - ret ; R0 = zero so R1 = R2. -20$: - bgtru 30$ ; If R1 > R2 then branch. - movw #-1,r0 ; Flag <. - ret -30$: - movw #1,r0 ; Flag >. - ret - - .sbttl P_ADDC Add two very long integers with carry -;+ -; **-P_ADDC-Add very long integers -; -; Functional Description: -; -; This procedure is invoked to add two very long integers with carry. Each -; integer is represented as an array of longwords, least significant first. -; -; Calling Sequence: -; -; P_ADDC sum,addend,carry -; -; Parameters: -; -; sum lm*r Sum. -; addend lr*r Addend. -; carry lr*v Carry bit. -; -; Implicit Inputs: -; -; Addoff This is used as an offset into the various tables -; of adds, subtracts and rotates to implement the -; operation to the requested precsion. -; -; Status Returns: -; -; R0 Resulting carry bit. -;- - - .align long - - .entry p_addc,^m - - movl 4(ap),r1 ; R1 -> Sum. - movl 8(ap),r2 ; R2 -> Addend. - -.if defined novector - - movl precis,r3 ; R3 = Precision. - subl3 12(ap),#0,r0 ; Set carry bit. - .align quad,1 ; Align loop with NOPs -10$: adwc (r2)+,(r1)+ ; Add with carry one longword. - .align quad,1 ; Align next instruction. - sobgtr r3,10$ ; Loop until done. - -.iff ; novector - - moval 10$,r3 - addl2 addoff,r3 ; Jump into table. - subl3 12(ap),#0,r0 ; Set carry bit. - jmp (r3) - - .align quad - -10$: - .rept max_unit_prec -$$$ = . - adwc (r2)+,(r1)+ ; Add with carry one longword. - nop -addsiz = .-$$$ - .endr - -.endc ; novector - - clrl r0 ; Assume carry clear. - bcc 20$ ; Carry set? - incl r0 ; Flag carry was set. -20$: ret - - .sbttl P_SUBB Subtract very long integers with borrow -;+ -; **-P_SUBB-Subtract very long integers -; -; Functional Description: -; -; This procedure is invoked to add subtract very long integers with carry. Each -; integer is represented as an array of longwords, least significant first. -; -; Calling Sequence: -; -; P_SUBB diff,sub,borrow -; -; Parameters: -; -; diff lm*r Difference -; sub lr*r Subtrahend. -; borrow lr*v Borrow bit. -; -; Implicit Inputs: -; -; Addoff This is used as an offset into the various tables -; of adds, subtracts and rotates to implement the -; operation to the requested precsion. -; -; Status Returns: -; -; R0 Resulting carry bit. -;- - - .align long - - .entry p_subb,^m - - movl 4(ap),r1 ; R1 -> Difference. - movl 8(ap),r2 ; R2 -> Minuend. - -.if defined novector - - movl precis,r3 ; R3 = No. of longs. - subl3 12(ap),#0,r0 ; Set borrow bit. - .align quad,1 ; Align loop with NOPs. -10$: sbwc (r2)+,(r1)+ ; Subtract with borrow one long. - .align quad,1 ; Align with NOPs. - sobgtr r3,10$ ; Loop through. - -.iff ; novector - - moval 10$,r3 - addl2 addoff,r3 ; Jump into table. - subl3 12(ap),#0,r0 ; Set borrow bit. - jmp (r3) - - .align quad -10$: - .rept max_unit_prec - sbwc (r2)+,(r1)+ ; Subtract w/carry one longword. - nop - .endr - -.endc ; novector - - clrl r0 ; Assume carry clear. - bcc 20$ ; Carry set? - incl r0 ; Flag carry was set. -20$: ret - - .sbttl P_ROTL Rotate left a very long integer with carry. -;+ -; **-P_ROTL-Rotate left one bit very long integers -; -; Functional Description: -; -; This procedure is invoked to rotate left one bit (e.g. divide by 2) very -; long integers with carry. Each integer is represented as an array of -; longwords, least significant first. Note that we use the add with carry -; instruction here because the VAX (unlike the dear old PDP-11) lacks a -; rotate instruction that includes the carry bit. -; -; Calling Sequence: -; -; P_ROTL num,carry -; -; Parameters: -; -; num lm*r Number to be shifted -; carry lr*v Carry bit. -; -; Implicit Inputs: -; -; Addoff This is used as an offset into the various tables -; of adds, subtracts and rotates to implement the -; operation to the requested precsion. -; -; Status Returns: -; -; R0 Resulting carry bit. -;- - - .align long - - .entry p_rotl,^m - - movl 4(ap),r1 ; R1 -> Sum. - -.if defined novector - - movl precis,r3 ; R3 = No. of longwords. - subl3 8(ap),#0,r0 ; Set carry bit. - .align quad,1 ; Align loop with NOPs -10$: adwc (r1),(r1)+ ; Add to itself with carry. - .align quad,1 ; Align with NOPs. - sobgtr r3,10$ ; Loop until done. - -.iff ; novector - - moval 10$,r3 - addl2 addoff,r3 ; Jump into table. - subl3 8(ap),#0,r0 ; Set carry bit. - jmp (r3) - - .align quad -10$: - .rept max_unit_prec - adwc (r1),(r1)+ ; *2+carry. - nop - .endr - -.endc ; novector - clrl r0 ; Assume carry clear. - bcc 20$ ; Carry set? - incl r0 ; Flag carry was set. -20$: ret - - .sbttl P_DMUL Extended Multiple Precision Multiply -;* -; **-P_DMUL-Extended Multiple Precision Multiply -; -; Functional Description: -; -; This procedure multiplies an unsigned single precision multiplier by a -; single precision multiplicand. The product register is double precision. -; It is expected that the length of the single precision multiplier and -; multiplicand has been previously set by a call to P_SETP. Note that the -; entire length of the product register is zeroed - so it must be a full -; double precision size. -; -; Calling Sequence: -; -; P_DMUL prod, multiplicand, multiplier -; -; Parameters: -; -; prod lw*r Product. -; multuplicand lr*r Multiplicand -; multiplier lr*r Multiplier -; -; Implicit Inputs: -; -; PRECIS lr*r Precision expresses in longs. -; -; Status Returns: -; -; None. -;- - - .align long - - .entry p_dmul,^m - - movl 4(ap),r8 ; R8 -> Product. - beql 49$ ; If eq, not specified. - movl precis,r10 ; R10 = Precision (longs) - ashl #3,r10,r2 ; R0 = No. of bytes to zero. - movc5 #0,#0,#0,r2,(r8) ; Zero product buffer. - movl 8(ap),r3 ; R3 -> Multiplicand. - beql 49$ ; If eq, not specified. - pushl r3 ; Save for posterity. - movl 12(ap),r11 ; R11 -> Multiplier. - beql 49$ ; If eq, not specified. - movl r10,r12 ; R12 = Multiplicand prec. - -.if defined SUPERSNIFFER - -; -; Here we calculate the effective maximum precision for the multiply by -; locating the long containing the most significant bit of the multiplier -; and the multiplicand. -; - moval (r11)[r10],r0 ; Supersniffer... - .align quad,1 ; Align with nops -45$: tstl -(r0) ; Examine next long. - bneq 50$ ; If ne, then we found msb. - sobgtr r10,45$ ; Loop until done. -49$: ret ; Multiplier = 0! -50$: - moval (r3)[r12],r0 ; Supersniffer... - .align quad,1 ; Align with nops -55$: tstl -(r0) ; Examine next long. - bneq 200$ ; If ne, then we found msb. - sobgtr r12,55$ ; Loop until done. - ret ; Multiplicand = 0! -.iff - - brb 200$ -49$: - ret - -.endc ; SUPERSNIFFER - -; -; Multiplier Loop -; -; R12 = Count of multiplicand longs to process. -; R11 -> Next long of multiplier. -; R10 = Count of multiplier longs to process. -; R8 -> Next long of product. -; - .align quad,1 ; Align with nops -200$: movl r12,r5 ; Multiplicand precision. - moval (r8)+,r4 ; R4 -> Next long of product. - movl (sp),r3 ; R3 -> 1st multiplier long. - movl (r4),r0 ; R0,R1 = Partial Sum. - movl 4(r4),r1 - clrl r7 ; Zero look-ahead carry. -; -; Perform an extended multiply of two unsigned numbers. This means that -; we have to compensate the hi-order product because either the multiplier -; or the multiplicand may be apparently a negative number. EMUL is a signed -; multiply - so we must be careful. Also, the EMUL longword addend is sign -; extended before adding into the product so we have to add the hard way. -; -; R6 = Current Multiplicand -; R2 = Multiplier -; R4 -> Current quadword of partial product. -; R0,R1 = Partial sum to which product is added -; R7 = Lookahead carry. This gets set if we try to carry after adding -; the partial product to the partial sum. This gets a little more -; complicated because here we are setting the high-order long of -; the next quadword to be operated on. -; -; Essentially the algorithm is as follows: -; -; 0) R0,R1 = (R4) ; Save current partial sum. -; 1) R6 = Next longword of multiplicand. -; 2) (R4) = R6 * R2 ; quad result compensating for negative numbers) -; 3) (R4) = (R4) + R0,R1 ; add back partial sum. -; 4) R7 = Carry bit. -; 5) R4 = R4 + 4 ; Point to next long. -; 6) R1 = 4(R4) + R7 ; Propagate carry to high order of next partial -; ; sum. -; 7) Loop back to step 1 until multiplicand completely processed. -; - movl (r11)+,r2 ; R2 = Multiplier. - beql 999$ ; If eq, not specified. - blss 1500$ ; This unfolds the compensation - ; test out of the loop. -; -; This version of the multiply loop is entered when the multiplier is positive -; saving three instructions per unit of precision. -; - .align quad,1 ; Align with NOPs. -500$: movl (r3)+,r6 ; R6 = Current multplicand. - emul r2,r6,#0,(r4) ; Multiply (64-bit result). -; -; Because we have removed leading zeroes, multiplication by zero is very -; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after -; the EMUL (looking at the zero product) that the multiplicand was zero so we -; don't need any special case logic later to adjust the product pointer. -; - beql 550$ ; If result eq, skip. - tstl r6 ; Was multiplicand negative? - bgeq 550$ ; No, skip. - addl2 r2,4(r4) ; Yes, compensate. -550$: addl2 r0,(r4)+ ; Accumulate. - adwc r1,(r4)+ - movl (r4),r1 ; R1 = Next hi-end partial sum. - adwc r7,r1 ; Add carry if needed. - clrl r7 ; Reset lookahead register. - adwc #0,r7 ; Save lookahead carry. - movl -(r4),r0 ; R0 = Next lo-end partial. - sobgtr r5,500$ ; More units? -999$: sobgtr r10,200$ ; Nope, go get next multiplier - ret -; -; This version of the above multiply loop is entered when the multiplier is -; negative - and we must compensate by adding the multiplicand to the hi-order -; product. This saves a test and a conditional branch per unit of precision. -; - .align quad,1 ; Align with NOPs. -1500$: - movl (r3)+,r6 ; R6 = Current multplicand. - emul r2,r6,#0,(r4) ; Multiply (64-bit result). -; -; Because we have removed leading zeroes, multiplication by zero is very -; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after -; the EMUL (looking at the zero product) that the multiplicand was zero so we -; don't need any special case logic later to adjust the product pointer. -; - beql 1560$ ; If result eq, skip. - tstl r6 ; Was multiplicand negative? - bgeq 1550$ ; No, skip. - addl2 r2,4(r4) ; Yes, compensate. -1550$: -; As documented above, we unfolded the following to save instructions -; tstl r2 ; Multiplier negative? -; bgeq 1560$ ; No, skip. - addl2 r6,4(r4) ; Yes, compensate. -1560$: addl2 r0,(r4)+ ; Accumulate. - adwc r1,(r4)+ ; R1 = High-end partial sum. - movl (r4),r1 ; R1 = Next hi-end partial sum. - adwc r7,r1 ; Add carry if needed. - clrl r7 ; Reset lookahead register. - adwc #0,r7 ; Save lookahead carry. - movl -(r4),r0 ; R0 = Next lo-end partial. - sobgtr r5,1500$ ; More units? - sobgtr r10,200$ ; Nope, go get next multiplier - ret - - .sbttl P_SETP Set Precison. -;+ -; **-P_SETP-Set Precision -; -; Functional Description: -; -; This procedure is invoked to set the operating precision of the package. -; -; Calling Sequence: -; -; P_SETP nbits -; -; Parameters: -; -; nbits rw*v Number of bits in number. -; -; Implicit Outputs: -; -; Precis Set to the number of longwords required to implement -; the requested precision. -; Addoff This is used as an offset into the various tables -; of adds, subtracts and rotates to implement the -; operation to the requested precsion. -; -; Status Returns: -; -; None. -; -; Side Effects: -; -; If the maximum precision set in 32-bit units by the assembly -; parameter "max_unit_prec" is exceeded, a message to that effect will -; be displayed and the program will terminate with a fatal error. -;- - - .entry p_setp,^m<> - - movzwl 4(ap),r1 ; R1 = No. of bits. - addl2 #31,r1 ; Round up to next long word. - ashl #-5,r1,r1 ; R1 = No. of 32 bit words. - movl r1,precis ; Save precision. - -.if not_defined novector - - subl3 r1,#max_unit_prec,r0 ; R0 = Number of steps reqd. - blss 10$ ; If > 0 then exit. - mull3 #addsiz,r0,addoff ; Get add table offset. - -.iftf ; novector - - ret - -.ift ; novector - -10$: ; Table size exceeded! - movab -80(sp),sp ; Output buffer. - pushab (sp) ; Build descriptor - movzwl #80,-(sp) - clrl -(sp) ; Receive return length. - pushl #max_unit_prec ; Compiled max table size. - pushl r1 ; Requested table size. - pushaq 8+4(sp) ; -> Output buffer descriptor. - pushaw 12(sp) ; -> Returned length. - pushaq prectoobig ; -> FAO control string. - calls #5,g^sys$fao ; Format output string. - movl (sp)+,(sp) ; Set actual buffer size. - pushaq (sp) ; -> Output buffer descr. - calls #1,g^lib$put_output ; Output message. - $exit_s - ; Exit with severe error. - code=#4 - -.endc ; novector - - .end +;Last Modified: 16-APR-1992 09:06:30.46 + .title fprims Fast Multiple Precision Primitives + .ident /V1.7B/ +;+ +; **-FPRIMS-Fast Multiple Precision Primitives +; +; Facility: PGP +; +; Language: Macro-32 +; +; Functional Description: +; +; This module contains fast multiple precision routines for operating on arrays +; of long words. Error checking is minimised at the expense of speed. +; +; Restrictions: +; +; This code is shareable but NOT reentrant as written because of static data. +; A reentrant version of this module could be written but it would be slower! +; +; Version: 1 +; +; Original: 00A Date: 17-Sep-1991 Author: Hugh A.J. Kennedy +; +; Based on FPRIMS.ASM written by Zhahai Stewart for the Intel 8086 +; architecture. +; +; Modification: 02A Date: 27-Sep-1991 Author: Hugh A.J. Kennedy. +; +; Add fast multiply routine, P_SMUL. +; Re-organise code slightly. +; Ammend/clarify copyright and license statement. +; Add checking for maximum precision exceeded, display a warning message +; and bomb! +; +; Modification: 03A Date: 16-Mar-1992 Author: Hugh A.J. Kennedy. +; +; Sniff for MSB in P_SMUL. In this way, avoid multiplies by leading zeroes +; (not efficient). +; +; Modification: 05A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy. +; +; Encode entire double precision multiply in VAX assembler. +; Correct some minor problems with handling embedded zeroes. +; +; Modification: 06A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy +; +; Align everything for speed. VAXen like stuff on 64-bit, or at least 32-bit +; boundaries. Therefore, we align the add, subtract and rotate tables and then +; we align the multiply loops. The extra NOPs used to pad these loops are of +; negligable cost because they already exist in the memory buffer. When the +; following instruction is aligned, it executes MUCH faster. +; +; Modification: 07A Date: 24-Mar-1991 Author: Hugh A.J. Kennedy. +; +; Implement fast compare. +;- + + .sbttl Copyright Notice And License To Use +; +; Copyright (c) 1991-1992, All Rights Reserved by +; Hugh A.J. Kennedy. +; +; A license to use and adapt this software without payment is hereby +; granted subject to the following conditions: +; +; 1) It may only be copied with the inclusion of this copyright +; notice in the program source with these associated conditions. +; +; 2) No title to or ownership of this software is hereby +; transferred. +; +; 3) The information in this software is subject to change +; without notice and should not be construed as a commitment by +; Hugh Kennedy. +; +; 4) The author assumes no liability for any damages arising from the +; use of this software, even if said damages arises from defects in +; this software. +; +; 5) No warranty as to merchantability or fitness of purpose is +; expressed or implied. +; +; 6) Any modifications to this source must be clearly identified as +; such and added to the modification history. +; +; 7) These routines may not be incorporated in a commercial cryptographic +; product. +; +; If you can not comply with these conditions, you *must* contact the author +; and obtain permission other wise you are in violation of copyright. + + .sbttl Misc Macros & Definitions +; +; Assembly Parameters +; +max_unit_prec = 72 ; Maximum unit precision +supersniffer = 1 ; Enable bit msb locator. +; +; The following parameter is dependent on the kind of VAX you are running on +; and should be defined if the execution time of the SOBGTR loop control +; instruction and the appropriate operation (ADWC or SBWC) from cache is much +; less than the execution time in main memory. If you have a slow VAX you +; should comment the following line out to use a vector of instructions. +; +novector = 1 ; Use loops rather than vectors. + +.macro ascid .string +;+ +; *-ASCID-Build An ASCII String Referenced By Descriptor +; +; Functional Description: +; +; This macro is a little like the system supplied .ASCID directive +; but it uses a separate program section to store the ASCII data. +; +; Arguments: +; +; STRING String to create +;- + .nocross + + .save_psect + + .psect puret + +$$$t0 = . + .ascii @.string@ +$$$t1 = .-$$$t0 + + .restore_psect + + .word $$$t1 + .byte dsc$k_dtype_t + .byte dsc$k_class_s + .address - + $$$t0 + .cross + +.endm ascid + + .sbttl Misc Data Areas +; +; Misc. Data Areas +; + .psect impurd,con,lcl,noshr,exe,rd,wrt,long + +; +; This data is static and is used to hold the current precision established +; by P_SETP for other calls to this library. +; +.if not_defined novector + +addoff: ; Offset into add table. + .blkl 1 ; also for sub and rot. +.endc + +precis: ; Precision in longwords. + .blkl 1 + + .psect pure,con,rel,shr,exe,rd,nowrt,quad + + .align quad + +.if not_defined novector + +prectoobig: + ascid + +.endc + + .sbttl Start of Code + + .sbttl P_CMP Compare two very long integers +;+ +; **-P_COMP-Compare two very long integers +; +; Functional Description: +; +; This procedure is invoked to compare two extended precision unsigned +; integers. +; +; Calling Sequence +; +; short P_CMP ( r1, r2) +; +; Parameters: +; +; R1 -> Extended Precision Integer 1 +; R2 -> Extended Precision Integer 2 +; +; Implicit Inputs: +; +; PRECIS lr*r Precision expresses in longs. +; +; Returns: +; +; -1 if r1 < r2 +; 0 if r1 = r2 +; +1 if r1 > r2 +; + +;- + + .align long + + .entry p_cmp,^m + + movl 4(ap),r1 ; R1 -> Sum. + movl 8(ap),r2 ; R2 -> Addend. + movl precis,r0 ; R0 = Precision. + moval (r1)[r0],r1 ; Get MS longwords. + moval (r2)[r0],r2 ; Get MS longwords. +.align long 1 ; Align loop with NOPS. +10$: cmpl -(r1),-(r2) ; Compare. + bnequ 20$ ; If ne, then exit loop. + sobgtr r0,10$ ; Loop until done. + ret ; R0 = zero so R1 = R2. +20$: + bgtru 30$ ; If R1 > R2 then branch. + movw #-1,r0 ; Flag <. + ret +30$: + movw #1,r0 ; Flag >. + ret + + .sbttl P_ADDC Add two very long integers with carry +;+ +; **-P_ADDC-Add very long integers +; +; Functional Description: +; +; This procedure is invoked to add two very long integers with carry. Each +; integer is represented as an array of longwords, least significant first. +; +; Calling Sequence: +; +; P_ADDC sum,addend,carry +; +; Parameters: +; +; sum lm*r Sum. +; addend lr*r Addend. +; carry lr*v Carry bit. +; +; Implicit Inputs: +; +; Addoff This is used as an offset into the various tables +; of adds, subtracts and rotates to implement the +; operation to the requested precsion. +; +; Status Returns: +; +; R0 Resulting carry bit. +;- + + .align long + + .entry p_addc,^m + + movl 4(ap),r1 ; R1 -> Sum. + movl 8(ap),r2 ; R2 -> Addend. + +.if defined novector + + movl precis,r3 ; R3 = Precision. + subl3 12(ap),#0,r0 ; Set carry bit. + .align quad,1 ; Align loop with NOPs +10$: adwc (r2)+,(r1)+ ; Add with carry one longword. + .align quad,1 ; Align next instruction. + sobgtr r3,10$ ; Loop until done. + +.iff ; novector + + moval 10$,r3 + addl2 addoff,r3 ; Jump into table. + subl3 12(ap),#0,r0 ; Set carry bit. + jmp (r3) + + .align quad + +10$: + .rept max_unit_prec +$$$ = . + adwc (r2)+,(r1)+ ; Add with carry one longword. + nop +addsiz = .-$$$ + .endr + +.endc ; novector + + clrl r0 ; Assume carry clear. + bcc 20$ ; Carry set? + incl r0 ; Flag carry was set. +20$: ret + + .sbttl P_SUBB Subtract very long integers with borrow +;+ +; **-P_SUBB-Subtract very long integers +; +; Functional Description: +; +; This procedure is invoked to add subtract very long integers with carry. Each +; integer is represented as an array of longwords, least significant first. +; +; Calling Sequence: +; +; P_SUBB diff,sub,borrow +; +; Parameters: +; +; diff lm*r Difference +; sub lr*r Subtrahend. +; borrow lr*v Borrow bit. +; +; Implicit Inputs: +; +; Addoff This is used as an offset into the various tables +; of adds, subtracts and rotates to implement the +; operation to the requested precsion. +; +; Status Returns: +; +; R0 Resulting carry bit. +;- + + .align long + + .entry p_subb,^m + + movl 4(ap),r1 ; R1 -> Difference. + movl 8(ap),r2 ; R2 -> Minuend. + +.if defined novector + + movl precis,r3 ; R3 = No. of longs. + subl3 12(ap),#0,r0 ; Set borrow bit. + .align quad,1 ; Align loop with NOPs. +10$: sbwc (r2)+,(r1)+ ; Subtract with borrow one long. + .align quad,1 ; Align with NOPs. + sobgtr r3,10$ ; Loop through. + +.iff ; novector + + moval 10$,r3 + addl2 addoff,r3 ; Jump into table. + subl3 12(ap),#0,r0 ; Set borrow bit. + jmp (r3) + + .align quad +10$: + .rept max_unit_prec + sbwc (r2)+,(r1)+ ; Subtract w/carry one longword. + nop + .endr + +.endc ; novector + + clrl r0 ; Assume carry clear. + bcc 20$ ; Carry set? + incl r0 ; Flag carry was set. +20$: ret + + .sbttl P_ROTL Rotate left a very long integer with carry. +;+ +; **-P_ROTL-Rotate left one bit very long integers +; +; Functional Description: +; +; This procedure is invoked to rotate left one bit (e.g. divide by 2) very +; long integers with carry. Each integer is represented as an array of +; longwords, least significant first. Note that we use the add with carry +; instruction here because the VAX (unlike the dear old PDP-11) lacks a +; rotate instruction that includes the carry bit. +; +; Calling Sequence: +; +; P_ROTL num,carry +; +; Parameters: +; +; num lm*r Number to be shifted +; carry lr*v Carry bit. +; +; Implicit Inputs: +; +; Addoff This is used as an offset into the various tables +; of adds, subtracts and rotates to implement the +; operation to the requested precsion. +; +; Status Returns: +; +; R0 Resulting carry bit. +;- + + .align long + + .entry p_rotl,^m + + movl 4(ap),r1 ; R1 -> Sum. + +.if defined novector + + movl precis,r3 ; R3 = No. of longwords. + subl3 8(ap),#0,r0 ; Set carry bit. + .align quad,1 ; Align loop with NOPs +10$: adwc (r1),(r1)+ ; Add to itself with carry. + .align quad,1 ; Align with NOPs. + sobgtr r3,10$ ; Loop until done. + +.iff ; novector + + moval 10$,r3 + addl2 addoff,r3 ; Jump into table. + subl3 8(ap),#0,r0 ; Set carry bit. + jmp (r3) + + .align quad +10$: + .rept max_unit_prec + adwc (r1),(r1)+ ; *2+carry. + nop + .endr + +.endc ; novector + clrl r0 ; Assume carry clear. + bcc 20$ ; Carry set? + incl r0 ; Flag carry was set. +20$: ret + + .sbttl P_DMUL Extended Multiple Precision Multiply +;* +; **-P_DMUL-Extended Multiple Precision Multiply +; +; Functional Description: +; +; This procedure multiplies an unsigned single precision multiplier by a +; single precision multiplicand. The product register is double precision. +; It is expected that the length of the single precision multiplier and +; multiplicand has been previously set by a call to P_SETP. Note that the +; entire length of the product register is zeroed - so it must be a full +; double precision size. +; +; Calling Sequence: +; +; P_DMUL prod, multiplicand, multiplier +; +; Parameters: +; +; prod lw*r Product. +; multuplicand lr*r Multiplicand +; multiplier lr*r Multiplier +; +; Implicit Inputs: +; +; PRECIS lr*r Precision expresses in longs. +; +; Status Returns: +; +; None. +;- + + .align long + + .entry p_dmul,^m + + movl 4(ap),r8 ; R8 -> Product. + beql 49$ ; If eq, not specified. + movl precis,r10 ; R10 = Precision (longs) + ashl #3,r10,r2 ; R0 = No. of bytes to zero. + movc5 #0,#0,#0,r2,(r8) ; Zero product buffer. + movl 8(ap),r3 ; R3 -> Multiplicand. + beql 49$ ; If eq, not specified. + pushl r3 ; Save for posterity. + movl 12(ap),r11 ; R11 -> Multiplier. + beql 49$ ; If eq, not specified. + movl r10,r12 ; R12 = Multiplicand prec. + +.if defined SUPERSNIFFER + +; +; Here we calculate the effective maximum precision for the multiply by +; locating the long containing the most significant bit of the multiplier +; and the multiplicand. +; + moval (r11)[r10],r0 ; Supersniffer... + .align quad,1 ; Align with nops +45$: tstl -(r0) ; Examine next long. + bneq 50$ ; If ne, then we found msb. + sobgtr r10,45$ ; Loop until done. +49$: ret ; Multiplier = 0! +50$: + moval (r3)[r12],r0 ; Supersniffer... + .align quad,1 ; Align with nops +55$: tstl -(r0) ; Examine next long. + bneq 200$ ; If ne, then we found msb. + sobgtr r12,55$ ; Loop until done. + ret ; Multiplicand = 0! +.iff + + brb 200$ +49$: + ret + +.endc ; SUPERSNIFFER + +; +; Multiplier Loop +; +; R12 = Count of multiplicand longs to process. +; R11 -> Next long of multiplier. +; R10 = Count of multiplier longs to process. +; R8 -> Next long of product. +; + .align quad,1 ; Align with nops +200$: movl r12,r5 ; Multiplicand precision. + moval (r8)+,r4 ; R4 -> Next long of product. + movl (sp),r3 ; R3 -> 1st multiplier long. + movl (r4),r0 ; R0,R1 = Partial Sum. + movl 4(r4),r1 + clrl r7 ; Zero look-ahead carry. +; +; Perform an extended multiply of two unsigned numbers. This means that +; we have to compensate the hi-order product because either the multiplier +; or the multiplicand may be apparently a negative number. EMUL is a signed +; multiply - so we must be careful. Also, the EMUL longword addend is sign +; extended before adding into the product so we have to add the hard way. +; +; R6 = Current Multiplicand +; R2 = Multiplier +; R4 -> Current quadword of partial product. +; R0,R1 = Partial sum to which product is added +; R7 = Lookahead carry. This gets set if we try to carry after adding +; the partial product to the partial sum. This gets a little more +; complicated because here we are setting the high-order long of +; the next quadword to be operated on. +; +; Essentially the algorithm is as follows: +; +; 0) R0,R1 = (R4) ; Save current partial sum. +; 1) R6 = Next longword of multiplicand. +; 2) (R4) = R6 * R2 ; quad result compensating for negative numbers) +; 3) (R4) = (R4) + R0,R1 ; add back partial sum. +; 4) R7 = Carry bit. +; 5) R4 = R4 + 4 ; Point to next long. +; 6) R1 = 4(R4) + R7 ; Propagate carry to high order of next partial +; ; sum. +; 7) Loop back to step 1 until multiplicand completely processed. +; + movl (r11)+,r2 ; R2 = Multiplier. + beql 999$ ; If eq, not specified. + blss 1500$ ; This unfolds the compensation + ; test out of the loop. +; +; This version of the multiply loop is entered when the multiplier is positive +; saving three instructions per unit of precision. +; + .align quad,1 ; Align with NOPs. +500$: movl (r3)+,r6 ; R6 = Current multplicand. + emul r2,r6,#0,(r4) ; Multiply (64-bit result). +; +; Because we have removed leading zeroes, multiplication by zero is very +; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after +; the EMUL (looking at the zero product) that the multiplicand was zero so we +; don't need any special case logic later to adjust the product pointer. +; + beql 550$ ; If result eq, skip. + tstl r6 ; Was multiplicand negative? + bgeq 550$ ; No, skip. + addl2 r2,4(r4) ; Yes, compensate. +550$: addl2 r0,(r4)+ ; Accumulate. + adwc r1,(r4)+ + movl (r4),r1 ; R1 = Next hi-end partial sum. + adwc r7,r1 ; Add carry if needed. + clrl r7 ; Reset lookahead register. + adwc #0,r7 ; Save lookahead carry. + movl -(r4),r0 ; R0 = Next lo-end partial. + sobgtr r5,500$ ; More units? +999$: sobgtr r10,200$ ; Nope, go get next multiplier + ret +; +; This version of the above multiply loop is entered when the multiplier is +; negative - and we must compensate by adding the multiplicand to the hi-order +; product. This saves a test and a conditional branch per unit of precision. +; + .align quad,1 ; Align with NOPs. +1500$: + movl (r3)+,r6 ; R6 = Current multplicand. + emul r2,r6,#0,(r4) ; Multiply (64-bit result). +; +; Because we have removed leading zeroes, multiplication by zero is very +; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after +; the EMUL (looking at the zero product) that the multiplicand was zero so we +; don't need any special case logic later to adjust the product pointer. +; + beql 1560$ ; If result eq, skip. + tstl r6 ; Was multiplicand negative? + bgeq 1550$ ; No, skip. + addl2 r2,4(r4) ; Yes, compensate. +1550$: +; As documented above, we unfolded the following to save instructions +; tstl r2 ; Multiplier negative? +; bgeq 1560$ ; No, skip. + addl2 r6,4(r4) ; Yes, compensate. +1560$: addl2 r0,(r4)+ ; Accumulate. + adwc r1,(r4)+ ; R1 = High-end partial sum. + movl (r4),r1 ; R1 = Next hi-end partial sum. + adwc r7,r1 ; Add carry if needed. + clrl r7 ; Reset lookahead register. + adwc #0,r7 ; Save lookahead carry. + movl -(r4),r0 ; R0 = Next lo-end partial. + sobgtr r5,1500$ ; More units? + sobgtr r10,200$ ; Nope, go get next multiplier + ret + + .sbttl P_SETP Set Precison. +;+ +; **-P_SETP-Set Precision +; +; Functional Description: +; +; This procedure is invoked to set the operating precision of the package. +; +; Calling Sequence: +; +; P_SETP nbits +; +; Parameters: +; +; nbits rw*v Number of bits in number. +; +; Implicit Outputs: +; +; Precis Set to the number of longwords required to implement +; the requested precision. +; Addoff This is used as an offset into the various tables +; of adds, subtracts and rotates to implement the +; operation to the requested precsion. +; +; Status Returns: +; +; None. +; +; Side Effects: +; +; If the maximum precision set in 32-bit units by the assembly +; parameter "max_unit_prec" is exceeded, a message to that effect will +; be displayed and the program will terminate with a fatal error. +;- + + .entry p_setp,^m<> + + movzwl 4(ap),r1 ; R1 = No. of bits. + addl2 #31,r1 ; Round up to next long word. + ashl #-5,r1,r1 ; R1 = No. of 32 bit words. + movl r1,precis ; Save precision. + +.if not_defined novector + + subl3 r1,#max_unit_prec,r0 ; R0 = Number of steps reqd. + blss 10$ ; If > 0 then exit. + mull3 #addsiz,r0,addoff ; Get add table offset. + +.iftf ; novector + + ret + +.ift ; novector + +10$: ; Table size exceeded! + movab -80(sp),sp ; Output buffer. + pushab (sp) ; Build descriptor + movzwl #80,-(sp) + clrl -(sp) ; Receive return length. + pushl #max_unit_prec ; Compiled max table size. + pushl r1 ; Requested table size. + pushaq 8+4(sp) ; -> Output buffer descriptor. + pushaw 12(sp) ; -> Returned length. + pushaq prectoobig ; -> FAO control string. + calls #5,g^sys$fao ; Format output string. + movl (sp)+,(sp) ; Set actual buffer size. + pushaq (sp) ; -> Output buffer descr. + calls #1,g^lib$put_output ; Output message. + $exit_s - ; Exit with severe error. + code=#4 + +.endc ; novector + + .end