pgp/src/vax.mar - annotate

Return to vax.mar CVS log
Up to [PGP] / pgp / src
Annotation of pgp/src/vax.mar, revision 1.1.1.1

1.1       root        1: ;Last Modified:  16-APR-1992 09:06:30.46
                      2:        .title  fprims  Fast Multiple Precision Primitives
                      3:        .ident  /V1.7B/
                      4: ;+
                      5: ; **-FPRIMS-Fast Multiple Precision Primitives
                      6: ;
                      7: ; Facility:     PGP
                      8: ;
                      9: ; Language:     Macro-32
                     10: ;
                     11: ; Functional Description:
                     12: ;
                     13: ; This module contains fast multiple precision routines for operating on arrays
                     14: ; of long words. Error checking is minimised at the expense of speed.
                     15: ;
                     16: ; Restrictions:
                     17: ;
                     18: ; This code is shareable but NOT reentrant as written because of static data.
                     19: ; A reentrant version of this module could be written but it would be slower!
                     20: ;
                     21: ; Version:      1
                     22: ;
                     23: ; Original:     00A     Date:   17-Sep-1991     Author: Hugh A.J. Kennedy
                     24: ;
                     25: ; Based on FPRIMS.ASM written by Zhahai Stewart for the Intel 8086
                     26: ; architecture.
                     27: ;
                     28: ; Modification: 02A     Date:   27-Sep-1991     Author: Hugh A.J. Kennedy.
                     29: ;
                     30: ; Add fast multiply routine, P_SMUL.
                     31: ; Re-organise code slightly.
                     32: ; Ammend/clarify copyright and license statement.
                     33: ; Add checking for maximum precision exceeded, display a warning message
                     34: ; and bomb!
                     35: ;
                     36: ; Modification: 03A     Date:   16-Mar-1992     Author: Hugh A.J. Kennedy.
                     37: ;
                     38: ; Sniff for MSB in P_SMUL. In this way, avoid multiplies by leading zeroes
                     39: ; (not efficient).
                     40: ;
                     41: ; Modification: 05A     Date:   17-Mar-1992     Author: Hugh A.J. Kennedy.
                     42: ;
                     43: ; Encode entire double precision multiply in VAX assembler.
                     44: ; Correct some minor problems with handling embedded zeroes.
                     45: ;
                     46: ; Modification: 06A     Date:   17-Mar-1992     Author: Hugh A.J. Kennedy
                     47: ;
                     48: ; Align everything for speed. VAXen like stuff on 64-bit, or at least 32-bit
                     49: ; boundaries. Therefore, we align the add, subtract and rotate tables and then
                     50: ; we align the multiply loops. The extra NOPs used to pad these loops are of
                     51: ; negligable cost because they already exist in the memory buffer. When the
                     52: ; following instruction is aligned, it executes MUCH faster.
                     53: ;
                     54: ; Modification: 07A     Date:   24-Mar-1991     Author: Hugh A.J. Kennedy.
                     55: ;
                     56: ; Implement fast compare.
                     57: ;-
                     58: 
                     59:        .sbttl  Copyright Notice And License To Use
                     60: ;
                     61: ;                 Copyright (c) 1991-1992, All Rights Reserved by
                     62: ;                            Hugh A.J. Kennedy.
                     63: ;
                     64: ;       A license to use and adapt this software without payment is hereby 
                     65: ;       granted subject to the following conditions:
                     66: ;
                     67: ;       1) It may only be copied with the inclusion of this copyright
                     68: ;       notice in the program source with these associated conditions.
                     69: ;
                     70: ;       2) No title to or ownership of this software is hereby 
                     71: ;       transferred.
                     72: ;
                     73: ;       3) The  information  in this software is subject  to change 
                     74: ;       without notice and should  not be construed as a  commitment  by  
                     75: ;       Hugh Kennedy.
                     76: ;
                     77: ;       4) The author assumes no liability for any damages arising from the 
                     78: ;       use of this software, even if said damages  arises from defects in 
                     79: ;       this software.
                     80: ;
                     81: ;       5) No warranty as to merchantability or fitness of purpose is 
                     82: ;       expressed or implied.
                     83: ;
                     84: ;       6) Any modifications to this source must be clearly identified as
                     85: ;       such and added to the modification history.
                     86: ;
                     87: ;       7) These routines may not be incorporated in a commercial cryptographic
                     88: ;       product.
                     89: ;
                     90: ; If you can not comply with these conditions, you *must* contact the author
                     91: ; and obtain permission other wise you are in violation of copyright.
                     92: 
                     93:        .sbttl  Misc Macros & Definitions
                     94: ;
                     95: ; Assembly Parameters
                     96: ;
                     97: max_unit_prec   =       72                      ; Maximum unit precision
                     98: supersniffer    =       1                       ; Enable bit msb locator.
                     99: ;
                    100: ; The following parameter is dependent on the kind of VAX you are running on
                    101: ; and should be defined if the execution time of the SOBGTR loop control
                    102: ; instruction and the appropriate operation (ADWC or SBWC) from cache is much
                    103: ; less than the execution time in main memory. If you have a slow VAX you
                    104: ; should comment the following line out to use a vector of instructions.
                    105: ;
                    106: novector        =       1                       ; Use loops rather than vectors.
                    107: 
                    108: .macro  ascid   .string
                    109: ;+
                    110: ; *-ASCID-Build An ASCII String Referenced By Descriptor
                    111: ;
                    112: ; Functional Description:
                    113: ;
                    114: ;       This macro is a little like the system supplied .ASCID directive
                    115: ; but it uses a separate program section to store the ASCII data.
                    116: ;
                    117: ; Arguments:
                    118: ;
                    119: ;       STRING          String to create
                    120: ;-
                    121:        .nocross
                    122: 
                    123:        .save_psect
                    124: 
                    125:        .psect  puret
                    126: 
                    127: $$$t0   =       .
                    128:        .ascii  @.string@
                    129: $$$t1   =       .-$$$t0
                    130: 
                    131:        .restore_psect
                    132: 
                    133:        .word   $$$t1
                    134:        .byte   dsc$k_dtype_t
                    135:        .byte   dsc$k_class_s
                    136:        .address -
                    137:                $$$t0
                    138:        .cross
                    139: 
                    140: .endm   ascid
                    141: 
                    142:        .sbttl  Misc Data Areas
                    143: ;
                    144: ; Misc. Data Areas
                    145: ;
                    146:        .psect  impurd,con,lcl,noshr,exe,rd,wrt,long
                    147: 
                    148: ;
                    149: ; This data is static and is used to hold the current precision established
                    150: ; by P_SETP for other calls to this library.
                    151: ;
                    152: .if not_defined novector
                    153: 
                    154: addoff:                                         ; Offset into add table.
                    155:        .blkl   1                               ; also for sub and rot.
                    156: .endc
                    157: 
                    158: precis:                                         ; Precision in longwords.
                    159:        .blkl   1
                    160: 
                    161:        .psect  pure,con,rel,shr,exe,rd,nowrt,quad
                    162: 
                    163:        .align  quad
                    164: 
                    165: .ifndef novector
                    166: 
                    167: prectoobig:
                    168:        ascid   <PGP (FPRIMS) - Requested precision (!ZL) exceeds capacity (!ZL)>
                    169: 
                    170: .endc
                    171: 
                    172:        .sbttl  Start of Code
                    173: 
                    174:        .sbttl  P_CMP           Compare two very long integers
                    175: ;+
                    176: ; **-P_COMP-Compare two very long integers
                    177: ;
                    178: ; Functional Description:
                    179: ;
                    180: ; This procedure is invoked to compare two extended precision unsigned
                    181: ; integers.
                    182: ;
                    183: ; Calling Sequence
                    184: ;
                    185: ;       short P_CMP ( r1, r2)
                    186: ;
                    187: ; Parameters:
                    188: ;
                    189: ;       R1      ->      Extended Precision Integer 1
                    190: ;       R2      ->      Extended Precision Integer 2
                    191: ;
                    192: ; Implicit Inputs:
                    193: ;
                    194: ;       PRECIS          lr*r    Precision expresses in longs.
                    195: ;
                    196: ; Returns:
                    197: ;
                    198: ;       -1      if r1 < r2
                    199: ;        0      if r1 = r2
                    200: ;       +1      if r1 > r2
                    201: ;
                    202: 
                    203: ;-
                    204: 
                    205:        .align  long
                    206: 
                    207:        .entry  p_cmp,^m<r2>
                    208: 
                    209:        movl    4(ap),r1                        ; R1 -> Sum.
                    210:        movl    8(ap),r2                        ; R2 -> Addend.
                    211:        movl    precis,r0                       ; R0 = Precision.
                    212:        moval   (r1)[r0],r1                     ; Get MS longwords.
                    213:        moval   (r2)[r0],r2                     ; Get MS longwords.
                    214: .align  long    1                               ; Align loop with NOPS.
                    215: 10$:    cmpl    -(r1),-(r2)                     ; Compare.
                    216:        bnequ   20$                             ; If ne, then exit loop.
                    217:        sobgtr  r0,10$                          ; Loop until done.
                    218:        ret                                     ; R0 = zero so R1 = R2.
                    219: 20$:
                    220:        bgtru   30$                             ; If R1 > R2 then branch.
                    221:        movw    #-1,r0                          ; Flag <.
                    222:        ret
                    223: 30$:
                    224:        movw    #1,r0                           ; Flag >.
                    225:        ret
                    226: 
                    227:        .sbttl  P_ADDC          Add two very long integers with carry
                    228: ;+
                    229: ; **-P_ADDC-Add very long integers
                    230: ;
                    231: ; Functional Description:
                    232: ;
                    233: ; This procedure is invoked to add two very long integers with carry. Each
                    234: ; integer is represented as an array of longwords, least significant first.
                    235: ;
                    236: ; Calling Sequence:
                    237: ;
                    238: ;       P_ADDC  sum,addend,carry
                    239: ;
                    240: ; Parameters:
                    241: ;
                    242: ;       sum             lm*r            Sum.
                    243: ;       addend          lr*r            Addend.
                    244: ;       carry           lr*v            Carry bit.
                    245: ;
                    246: ; Implicit Inputs:
                    247: ;
                    248: ;       Addoff          This is used as an offset into the various tables
                    249: ;                       of adds, subtracts and rotates to implement the
                    250: ;                       operation to the requested precsion.
                    251: ;
                    252: ; Status Returns:
                    253: ;
                    254: ;       R0      Resulting carry bit.
                    255: ;-
                    256: 
                    257:        .align  long
                    258: 
                    259:        .entry  p_addc,^m<r2,r3>
                    260: 
                    261:        movl    4(ap),r1                        ; R1 -> Sum.
                    262:        movl    8(ap),r2                        ; R2 -> Addend.
                    263: 
                    264: .if defined novector
                    265: 
                    266:        movl    precis,r3                       ; R3 = Precision.
                    267:        subl3   12(ap),#0,r0                    ; Set carry bit.
                    268:        .align  quad,1                          ; Align loop with NOPs
                    269: 10$:    adwc    (r2)+,(r1)+                     ; Add with carry one longword.
                    270:        .align  quad,1                          ; Align next instruction.
                    271:        sobgtr  r3,10$                          ; Loop until done.
                    272: 
                    273: .iff ; novector
                    274: 
                    275:        moval   10$,r3
                    276:        addl2   addoff,r3                       ; Jump into table.
                    277:        subl3   12(ap),#0,r0                    ; Set carry bit.
                    278:        jmp     (r3)
                    279: 
                    280:        .align  quad
                    281: 
                    282: 10$:
                    283:        .rept   max_unit_prec
                    284: $$$     =       .
                    285:        adwc    (r2)+,(r1)+                     ; Add with carry one longword.
                    286:        nop
                    287: addsiz  =       .-$$$
                    288:        .endr
                    289: 
                    290: .endc ; novector
                    291: 
                    292:        clrl    r0                              ; Assume carry clear.
                    293:        bcc     20$                             ; Carry set?
                    294:        incl    r0                              ; Flag carry was set.
                    295: 20$:    ret
                    296: 
                    297:        .sbttl  P_SUBB  Subtract very long integers with borrow
                    298: ;+
                    299: ; **-P_SUBB-Subtract very long integers
                    300: ;
                    301: ; Functional Description:
                    302: ;
                    303: ; This procedure is invoked to add subtract very long integers with carry. Each
                    304: ; integer is represented as an array of longwords, least significant first.
                    305: ;
                    306: ; Calling Sequence:
                    307: ;
                    308: ;       P_SUBB  diff,sub,borrow
                    309: ;
                    310: ; Parameters:
                    311: ;
                    312: ;       diff            lm*r            Difference
                    313: ;       sub             lr*r            Subtrahend.
                    314: ;       borrow          lr*v            Borrow bit.
                    315: ;
                    316: ; Implicit Inputs:
                    317: ;
                    318: ;       Addoff          This is used as an offset into the various tables
                    319: ;                       of adds, subtracts and rotates to implement the
                    320: ;                       operation to the requested precsion.
                    321: ;
                    322: ; Status Returns:
                    323: ;
                    324: ;       R0      Resulting carry bit.
                    325: ;-
                    326: 
                    327:        .align  long
                    328: 
                    329:        .entry  p_subb,^m<r2,r3>
                    330: 
                    331:        movl    4(ap),r1                        ; R1 -> Difference.
                    332:        movl    8(ap),r2                        ; R2 -> Minuend.
                    333: 
                    334: .if defined novector
                    335: 
                    336:        movl    precis,r3                       ; R3 = No. of longs.
                    337:        subl3   12(ap),#0,r0                    ; Set borrow bit.
                    338:        .align  quad,1                          ; Align loop with NOPs.
                    339: 10$:    sbwc    (r2)+,(r1)+                     ; Subtract with borrow one long.
                    340:        .align  quad,1                          ; Align with NOPs.
                    341:        sobgtr  r3,10$                          ; Loop through.
                    342: 
                    343: .iff ; novector
                    344: 
                    345:        moval   10$,r3
                    346:        addl2   addoff,r3                       ; Jump into table.
                    347:        subl3   12(ap),#0,r0                    ; Set borrow bit.
                    348:        jmp     (r3)
                    349: 
                    350:        .align  quad
                    351: 10$:
                    352:        .rept   max_unit_prec
                    353:        sbwc    (r2)+,(r1)+                     ; Subtract w/carry one longword.
                    354:        nop
                    355:        .endr
                    356: 
                    357: .endc ; novector
                    358: 
                    359:        clrl    r0                              ; Assume carry clear.
                    360:        bcc     20$                             ; Carry set?
                    361:        incl    r0                              ; Flag carry was set.
                    362: 20$:    ret
                    363: 
                    364:        .sbttl  P_ROTL  Rotate left a very long integer with carry.
                    365: ;+
                    366: ; **-P_ROTL-Rotate left one bit very long integers
                    367: ;
                    368: ; Functional Description:
                    369: ;
                    370: ; This procedure is invoked to rotate left one bit (e.g. divide by 2) very 
                    371: ; long integers with carry. Each integer is represented as an array of 
                    372: ; longwords, least significant first. Note that we use the add with carry
                    373: ; instruction here because the VAX (unlike the dear old PDP-11) lacks a
                    374: ; rotate instruction that includes the carry bit.
                    375: ;
                    376: ; Calling Sequence:
                    377: ;
                    378: ;       P_ROTL  num,carry
                    379: ;
                    380: ; Parameters:
                    381: ;
                    382: ;       num             lm*r            Number to be shifted
                    383: ;       carry           lr*v            Carry bit.
                    384: ;
                    385: ; Implicit Inputs:
                    386: ;
                    387: ;       Addoff          This is used as an offset into the various tables
                    388: ;                       of adds, subtracts and rotates to implement the
                    389: ;                       operation to the requested precsion.
                    390: ;
                    391: ; Status Returns:
                    392: ;
                    393: ;       R0      Resulting carry bit.
                    394: ;-
                    395: 
                    396:        .align  long
                    397: 
                    398:        .entry  p_rotl,^m<r3>
                    399: 
                    400:        movl    4(ap),r1                        ; R1 -> Sum.
                    401: 
                    402: .if defined novector
                    403: 
                    404:        movl    precis,r3                       ; R3 = No. of longwords.
                    405:        subl3   8(ap),#0,r0                     ; Set carry bit.
                    406:        .align  quad,1                          ; Align loop with NOPs
                    407: 10$:    adwc    (r1),(r1)+                      ; Add to itself with carry.
                    408:        .align  quad,1                          ; Align with NOPs.
                    409:        sobgtr  r3,10$                          ; Loop until done.
                    410: 
                    411: .iff ; novector
                    412: 
                    413:        moval   10$,r3
                    414:        addl2   addoff,r3                       ; Jump into table.
                    415:        subl3   8(ap),#0,r0                     ; Set carry bit.
                    416:        jmp     (r3)
                    417: 
                    418:        .align  quad
                    419: 10$:
                    420:        .rept   max_unit_prec
                    421:        adwc    (r1),(r1)+                      ; *2+carry.
                    422:        nop
                    423:        .endr
                    424: 
                    425: .endc ; novector
                    426:        clrl    r0                              ; Assume carry clear.
                    427:        bcc     20$                             ; Carry set?
                    428:        incl    r0                              ; Flag carry was set.
                    429: 20$:    ret
                    430: 
                    431:        .sbttl  P_DMUL  Extended Multiple Precision Multiply
                    432: ;*
                    433: ; **-P_DMUL-Extended Multiple Precision Multiply
                    434: ;
                    435: ; Functional Description:
                    436: ;
                    437: ; This procedure multiplies an unsigned single precision multiplier by a 
                    438: ; single precision multiplicand. The product register is double precision.
                    439: ; It is expected that the length of the single precision multiplier and
                    440: ; multiplicand has been previously set by a call to P_SETP. Note that the
                    441: ; entire length of the product register is zeroed - so it must be a full
                    442: ; double precision size.
                    443: ;
                    444: ; Calling Sequence:
                    445: ;
                    446: ;       P_DMUL  prod, multiplicand, multiplier
                    447: ;
                    448: ; Parameters:
                    449: ;
                    450: ;       prod            lw*r    Product.
                    451: ;       multuplicand    lr*r    Multiplicand
                    452: ;       multiplier      lr*r    Multiplier
                    453: ;
                    454: ; Implicit Inputs:
                    455: ;
                    456: ;       PRECIS          lr*r    Precision expresses in longs.
                    457: ;
                    458: ; Status Returns:
                    459: ;
                    460: ;       None.
                    461: ;-
                    462: 
                    463:        .align  long
                    464: 
                    465:        .entry  p_dmul,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
                    466: 
                    467:        movl    4(ap),r8                        ; R8 -> Product.
                    468:        beql    49$                             ; If eq, not specified.
                    469:        movl    precis,r10                      ; R10 = Precision (longs)
                    470:        ashl    #3,r10,r2                       ; R0 = No. of bytes to zero.
                    471:        movc5   #0,#0,#0,r2,(r8)                ; Zero product buffer.
                    472:        movl    8(ap),r3                        ; R3 -> Multiplicand.
                    473:        beql    49$                             ; If eq, not specified.
                    474:        pushl   r3                              ; Save for posterity.
                    475:        movl    12(ap),r11                      ; R11 -> Multiplier.
                    476:        beql    49$                             ; If eq, not specified.
                    477:        movl    r10,r12                         ; R12 = Multiplicand prec.
                    478: 
                    479: .if     defined SUPERSNIFFER
                    480: 
                    481: ;
                    482: ; Here we calculate the effective maximum precision for the multiply by
                    483: ; locating the long containing the most significant bit of the multiplier
                    484: ; and the multiplicand.
                    485: ;
                    486:        moval   (r11)[r10],r0                   ; Supersniffer...
                    487:        .align  quad,1                          ; Align with nops
                    488: 45$:    tstl    -(r0)                           ; Examine next long.
                    489:        bneq    50$                             ; If ne, then we found msb.
                    490:        sobgtr  r10,45$                         ; Loop until done.
                    491: 49$:    ret                                     ; Multiplier = 0!
                    492: 50$:
                    493:        moval   (r3)[r12],r0                    ; Supersniffer...
                    494:        .align  quad,1                          ; Align with nops
                    495: 55$:    tstl    -(r0)                           ; Examine next long.
                    496:        bneq    200$                            ; If ne, then we found msb.
                    497:        sobgtr  r12,55$                         ; Loop until done.
                    498:        ret                                     ; Multiplicand = 0!
                    499: .iff
                    500: 
                    501:        brb     200$
                    502: 49$:
                    503:        ret
                    504: 
                    505: .endc   ; SUPERSNIFFER
                    506: 
                    507: ;
                    508: ; Multiplier Loop
                    509: ;
                    510: ; R12 = Count of multiplicand longs to process.
                    511: ; R11 -> Next long of multiplier.
                    512: ; R10 = Count of multiplier longs to process.
                    513: ; R8 -> Next long of product.
                    514: ;
                    515:        .align  quad,1                          ; Align with nops
                    516: 200$:   movl    r12,r5                          ; Multiplicand precision.
                    517:        moval   (r8)+,r4                        ; R4 -> Next long of product.
                    518:        movl    (sp),r3                         ; R3 -> 1st multiplier long.
                    519:        movl    (r4),r0                         ; R0,R1 = Partial Sum.
                    520:        movl    4(r4),r1
                    521:        clrl    r7                              ; Zero look-ahead carry.
                    522: ;
                    523: ; Perform an extended multiply of two unsigned numbers. This means that
                    524: ; we have to compensate the hi-order product because either the multiplier
                    525: ; or the multiplicand may be apparently a negative number. EMUL is a signed
                    526: ; multiply - so we must be careful. Also, the EMUL longword addend is sign 
                    527: ; extended before adding into the product so we have to add the hard way.
                    528: ;
                    529: ; R6 =          Current Multiplicand
                    530: ; R2 =          Multiplier
                    531: ; R4 ->         Current quadword of partial product.
                    532: ; R0,R1 =       Partial sum to which product is added
                    533: ; R7 =          Lookahead carry. This gets set if we try to carry after adding
                    534: ;               the partial product to the partial sum. This gets a little more
                    535: ;               complicated because here we are setting the high-order long of
                    536: ;               the next quadword to be operated on.
                    537: ;
                    538: ; Essentially the algorithm is as follows:
                    539: ;
                    540: ; 0) R0,R1 = (R4)               ; Save current partial sum.
                    541: ; 1) R6 = Next longword of multiplicand.
                    542: ; 2) (R4) = R6 * R2             ; quad result compensating for negative numbers)
                    543: ; 3) (R4) = (R4) + R0,R1        ; add back partial sum.
                    544: ; 4) R7 = Carry bit.
                    545: ; 5) R4 = R4 + 4                ; Point to next long.
                    546: ; 6) R1 = 4(R4) + R7            ; Propagate carry to high order of next partial
                    547: ;                               ; sum.
                    548: ; 7) Loop back to step 1 until multiplicand completely processed.
                    549: ;
                    550:        movl    (r11)+,r2                       ; R2 = Multiplier.
                    551:        beql    999$                            ; If eq, not specified.
                    552:        blss    1500$                           ; This unfolds the compensation
                    553:                                                ; test out of the loop.
                    554: ;
                    555: ; This version of the multiply loop is entered when the multiplier is positive
                    556: ; saving three instructions per unit of precision.
                    557: ;
                    558:        .align  quad,1                          ; Align with NOPs.
                    559: 500$:   movl    (r3)+,r6                        ; R6 = Current multplicand.
                    560:        emul    r2,r6,#0,(r4)                   ; Multiply (64-bit result).
                    561: ;
                    562: ; Because we have removed leading zeroes, multiplication by zero is very
                    563: ; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after
                    564: ; the EMUL (looking at the zero product) that the multiplicand was zero so we 
                    565: ; don't need any special case logic later to adjust the product pointer.
                    566: ;
                    567:        beql    550$                            ; If result eq, skip.
                    568:        tstl    r6                              ; Was multiplicand negative?
                    569:        bgeq    550$                            ; No, skip.
                    570:        addl2   r2,4(r4)                        ; Yes, compensate.
                    571: 550$:   addl2   r0,(r4)+                        ; Accumulate.
                    572:        adwc    r1,(r4)+
                    573:        movl    (r4),r1                         ; R1 = Next hi-end partial sum.
                    574:        adwc    r7,r1                           ; Add carry if needed.
                    575:        clrl    r7                              ; Reset lookahead register.
                    576:        adwc    #0,r7                           ; Save lookahead carry.
                    577:        movl    -(r4),r0                        ; R0 = Next lo-end partial.
                    578:        sobgtr  r5,500$                         ; More units?
                    579: 999$:   sobgtr  r10,200$                        ; Nope, go get next multiplier
                    580:        ret
                    581: ;
                    582: ; This version of the above multiply loop is entered when the multiplier is
                    583: ; negative - and we must compensate by adding the multiplicand to the hi-order
                    584: ; product. This saves a test and a conditional branch per unit of precision.
                    585: ;
                    586:        .align  quad,1                          ; Align with NOPs.
                    587: 1500$:
                    588:        movl    (r3)+,r6                        ; R6 = Current multplicand.
                    589:        emul    r2,r6,#0,(r4)                   ; Multiply (64-bit result).
                    590: ;
                    591: ; Because we have removed leading zeroes, multiplication by zero is very
                    592: ; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after
                    593: ; the EMUL (looking at the zero product) that the multiplicand was zero so we 
                    594: ; don't need any special case logic later to adjust the product pointer.
                    595: ;
                    596:        beql    1560$                           ; If result eq, skip.
                    597:        tstl    r6                              ; Was multiplicand negative?
                    598:        bgeq    1550$                           ; No, skip.
                    599:        addl2   r2,4(r4)                        ; Yes, compensate.
                    600: 1550$:
                    601: ; As documented above, we unfolded the following to save instructions
                    602: ;       tstl    r2                              ; Multiplier negative?
                    603: ;       bgeq    1560$                           ; No, skip.
                    604:        addl2   r6,4(r4)                        ; Yes, compensate.
                    605: 1560$:  addl2   r0,(r4)+                        ; Accumulate.
                    606:        adwc    r1,(r4)+                        ; R1 = High-end partial sum.
                    607:        movl    (r4),r1                         ; R1 = Next hi-end partial sum.
                    608:        adwc    r7,r1                           ; Add carry if needed.
                    609:        clrl    r7                              ; Reset lookahead register.
                    610:        adwc    #0,r7                           ; Save lookahead carry.
                    611:        movl    -(r4),r0                        ; R0 = Next lo-end partial.
                    612:        sobgtr  r5,1500$                        ; More units?
                    613:        sobgtr  r10,200$                        ; Nope, go get next multiplier
                    614:        ret
                    615: 
                    616:        .sbttl  P_SETP          Set Precison.
                    617: ;+
                    618: ; **-P_SETP-Set Precision
                    619: ;
                    620: ; Functional Description:
                    621: ;
                    622: ; This procedure is invoked to set the operating precision of the package.
                    623: ;
                    624: ; Calling Sequence:
                    625: ;
                    626: ;       P_SETP  nbits
                    627: ;
                    628: ; Parameters:
                    629: ;
                    630: ;       nbits           rw*v    Number of bits in number.
                    631: ;
                    632: ; Implicit Outputs:
                    633: ;
                    634: ;       Precis          Set to the number of longwords required to implement
                    635: ;                       the requested precision.
                    636: ;       Addoff          This is used as an offset into the various tables
                    637: ;                       of adds, subtracts and rotates to implement the
                    638: ;                       operation to the requested precsion.
                    639: ;
                    640: ; Status Returns:
                    641: ;
                    642: ;       None.
                    643: ;
                    644: ; Side Effects:
                    645: ;
                    646: ;       If the maximum precision set in 32-bit units by the assembly
                    647: ;       parameter "max_unit_prec" is exceeded, a message to that effect will
                    648: ;       be displayed and the program will terminate with a fatal error.
                    649: ;-
                    650: 
                    651:        .entry  p_setp,^m<>
                    652: 
                    653:        movzwl  4(ap),r1                        ; R1 = No. of bits.
                    654:        addl2   #31,r1                          ; Round up to next long word.
                    655:        ashl    #-5,r1,r1                       ; R1 = No. of 32 bit words.
                    656:        movl    r1,precis                       ; Save precision.
                    657: 
                    658: .if not_defined novector
                    659: 
                    660:        subl3   r1,#max_unit_prec,r0            ; R0 = Number of steps reqd.
                    661:        blss    10$                             ; If > 0 then exit.
                    662:        mull3   #addsiz,r0,addoff               ; Get add table offset.
                    663: 
                    664: .iftf ; novector
                    665: 
                    666:        ret
                    667: 
                    668: .ift ; novector
                    669: 
                    670: 10$:                                            ; Table size exceeded!
                    671:        movab   -80(sp),sp                      ; Output buffer.
                    672:        pushab  (sp)                            ; Build descriptor
                    673:        movzwl  #80,-(sp)
                    674:        clrl    -(sp)                           ; Receive return length.
                    675:        pushl   #max_unit_prec                  ; Compiled max table size.
                    676:        pushl   r1                              ; Requested table size.
                    677:        pushaq  8+4(sp)                         ; -> Output buffer descriptor.
                    678:        pushaw  12(sp)                          ; -> Returned length.
                    679:        pushaq  prectoobig                      ; -> FAO control string.
                    680:        calls   #5,g^sys$fao                    ; Format output string.
                    681:        movl    (sp)+,(sp)                      ; Set actual buffer size.
                    682:        pushaq  (sp)                            ; -> Output buffer descr.
                    683:        calls   #1,g^lib$put_output             ; Output message.
                    684:        $exit_s -                               ; Exit with severe error.
                    685:                code=#4
                    686: 
                    687: .endc ; novector
                    688: 
                    689:        .end
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.