|
|
1.1.1.4 ! root 1: ; Assembly primitives for RSA multiprecision library ! 2: ; ! 3: ; Tested with Turbo Assembler 1.0 and masm 1.00 ! 4: ; ! 5: ; Written by Branko Lankester ([email protected]) 10/10/91 ! 6: ; ! 7: ; Modified to add, rather than store carry bit to allow using a ! 8: ; smaller precision for long division. ! 9: ! 10: ; define LDATA and LCODE as follows: ! 11: ; model: small compact medium large ! 12: ; LDATA 0 1 0 1 ! 13: ; LCODE 0 0 1 1 ! 14: ! 15: LDATA equ 1 ! 16: LCODE equ 1 ! 17: ! 18: ; Note: Only the large memory model has been implemented for P_SMULA, ! 19: ; P_SETRECIP and P_QUO_DIGIT. ! 20: ! 21: IF LDATA ! 22: DSTPTR equ es:[bx+si] ! 23: ELSE ! 24: DSTPTR equ [bx+si] ! 25: ENDIF ! 26: ! 27: IF LCODE ! 28: prec equ [bp+6] ; 1st arg ! 29: r1 equ [bp+6] ; 1st arg ! 30: IF LDATA ! 31: r2 equ [bp+10] ; 2nd arg ! 32: carry equ [bp+14] ; 3rd arg ! 33: scarry equ [bp+10] ; carry for shift (arg 2) ! 34: ELSE ! 35: r2 equ [bp+8] ! 36: carry equ [bp+10] ! 37: scarry equ [bp+8] ! 38: ENDIF ! 39: ELSE ; small code model ! 40: prec equ [bp+4] ! 41: r1 equ [bp+4] ! 42: IF LDATA ! 43: r2 equ [bp+8] ! 44: carry equ [bp+12] ! 45: scarry equ [bp+8] ! 46: ELSE ! 47: r2 equ [bp+6] ! 48: carry equ [bp+8] ! 49: scarry equ [bp+6] ! 50: ENDIF ! 51: ENDIF ! 52: ! 53: IF NOT LCODE ! 54: UPTON_TEXT = _TEXT ! 55: ENDIF ! 56: ! 57: _TEXT segment byte public 'CODE' ! 58: DGROUP group _DATA,_BSS ! 59: assume cs:_TEXT,ds:DGROUP ! 60: _TEXT ends ! 61: ! 62: _DATA segment word public 'DATA' ! 63: _DATA ends ! 64: ! 65: _BSS segment word public 'BSS' ! 66: prec16 dw ? ; precision / 16 (seems to be / 256?) ! 67: unitprec dw ? ; precision / 16, really ! 68: addp dw ? ; jump offset ! 69: subp dw ? ! 70: rotp dw ? ! 71: mulp dw ? ! 72: _BSS ends ! 73: ! 74: _TEXT segment byte public 'CODE' ! 75: ! 76: public _P_SETP ! 77: public _P_ADDC ! 78: public _P_SUBB ! 79: public _P_MUSUBB ! 80: public _P_ROTL ! 81: ! 82: IF LCODE ! 83: fprims proc far ; dummy proc ! 84: ELSE ! 85: fprims proc near ! 86: ENDIF ! 87: ! 88: ; ! 89: ; ******************** set precision ******************** ! 90: ; ! 91: _P_SETP: ! 92: push bp ! 93: mov bp,sp ! 94: mov ax, prec ; precision in bits ! 95: add ax, 0fh ! 96: mov cl,4 ! 97: shr ax,cl ; prec. in units ! 98: mov unitprec,ax ! 99: push ax ! 100: shr ax,cl ! 101: mov prec16,ax ; precision / 16 ! 102: pop ax ! 103: and ax,0fh ; al = prec % 16 ! 104: mov bx,ax ! 105: mov cx,ax ! 106: shl bx,1 ; multiply by 4 (=number of bytes ! 107: shl bx,1 ; in instruction sequence) ! 108: mov dx,bx ! 109: IFE LDATA ! 110: sub dx,ax ; small model only 3 for add/sub ! 111: ENDIF ! 112: mov ax,offset add_ref ! 113: sub ax,dx ! 114: mov addp,ax ! 115: ! 116: mov ax,offset sub_ref ! 117: sub ax,dx ! 118: mov subp,ax ! 119: ! 120: mov ax,offset rot_ref ! 121: sub ax,bx ! 122: mov rotp,ax ! 123: ! 124: mov ax,offset mul_ref ! 125: shl bx,1 ; MULU macro is 17 bytes for large data ! 126: shl bx,1 ! 127: sub ax,bx ! 128: sub ax,cx ! 129: mov mulp,ax ! 130: ! 131: pop bp ! 132: ret ! 133: ! 134: ! 135: ! 136: ; ! 137: ; ******************** mpi add with carry ******************** ! 138: ; ! 139: ADDU macro n ! 140: rept n ! 141: lodsw ! 142: adc DSTPTR,ax ! 143: endm ! 144: endm ! 145: ! 146: ! 147: _P_ADDC: ! 148: push bp ! 149: mov bp,sp ! 150: push si ! 151: mov cx, prec16 ! 152: mov dx, addp ! 153: IF LDATA ! 154: push ds ! 155: lds si, dword ptr r2 ! 156: les bx, dword ptr r1 ! 157: ELSE ! 158: mov si, r2 ! 159: mov bx, r1 ! 160: ENDIF ! 161: sub bx, si ; calculate relative offset ! 162: dec bx ! 163: dec bx ! 164: cld ! 165: shr byte ptr carry,1 ; load carry ! 166: jcxz add_units ! 167: add_16u: ! 168: ADDU 16 ! 169: loop add_16u ! 170: add_units: ! 171: jmp dx ! 172: ADDU 15 ! 173: add_ref: ! 174: rcl ax,1 ; return carry ! 175: and ax,1 ! 176: IF LDATA ! 177: pop ds ! 178: ENDIF ! 179: pop si ! 180: pop bp ! 181: ret ! 182: ! 183: ! 184: ! 185: ; ! 186: ; ******************** mpi subtract with borrow ******************** ! 187: ; ! 188: SUBU macro n ! 189: rept n ! 190: lodsw ! 191: sbb DSTPTR,ax ! 192: endm ! 193: endm ! 194: ! 195: ! 196: _P_MUSUBB: ; MULTUNIT is same size as unit ! 197: _P_SUBB: ! 198: push bp ! 199: mov bp,sp ! 200: push si ! 201: mov cx, prec16 ! 202: mov dx, subp ! 203: IF LDATA ! 204: push ds ! 205: lds si, dword ptr r2 ! 206: les bx, dword ptr r1 ! 207: ELSE ! 208: mov si, r2 ! 209: mov bx, r1 ! 210: ENDIF ! 211: sub bx, si ; calculate relative offset ! 212: dec bx ! 213: dec bx ! 214: cld ! 215: shr byte ptr carry,1 ! 216: jcxz sub_units ! 217: sub_16u: ! 218: SUBU 16 ! 219: loop sub_16u ! 220: sub_units: ! 221: jmp dx ! 222: SUBU 15 ! 223: sub_ref: ! 224: rcl ax,1 ; return carry ! 225: and ax,1 ! 226: IF LDATA ! 227: pop ds ! 228: ENDIF ! 229: pop si ! 230: pop bp ! 231: ret ! 232: ! 233: ! 234: ! 235: ; ! 236: ; ******************** mpi rotate left ******************** ! 237: ; ! 238: _P_ROTL: ! 239: push bp ! 240: mov bp,sp ! 241: mov cx, prec16 ! 242: mov dx, rotp ! 243: IF LDATA ! 244: push ds ! 245: lds bx, dword ptr r1 ! 246: ELSE ! 247: mov bx, r1 ! 248: ENDIF ! 249: shr byte ptr scarry,1 ! 250: jcxz rot_units ! 251: rot_16u: ! 252: i = 0 ! 253: rept 16 ! 254: rcl word ptr [bx + i],1 ! 255: i = i + 2 ! 256: endm ! 257: lahf ! 258: add bx,32 ! 259: sahf ! 260: loop rot_16u ! 261: rot_units: ! 262: jmp dx ! 263: rept 15 ! 264: rcl word ptr [bx],1 ! 265: inc bx ! 266: inc bx ! 267: endm ! 268: rot_ref: ! 269: ! 270: rcl ax,1 ! 271: and ax,1 ! 272: IF LDATA ! 273: pop ds ! 274: ENDIF ! 275: pop bp ! 276: ret ! 277: ! 278: fprims endp ! 279: ! 280: ! 281: ! 282: ! 283: ; *************************************************************** ! 284: ; P_SMULA (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier) ! 285: ; mp_smul routine from Upton's modmult, converted to assembler ! 286: ; ! 287: ; Multiply the single-word multiplier times the multiprecision integer ! 288: ; in multiplicand, accumulating result in prod. The resulting ! 289: ; multiprecision prod will be 1 word longer than the multiplicand. ! 290: ; multiplicand is unit_prec words long. We add into prod, so caller ! 291: ; should zero it out first. ! 292: ; ! 293: ; NOTE: Unlike other functions in the multiprecision arithmetic ! 294: ; library, both multiplicand and prod are pointing at the LSB, ! 295: ; regardless of byte order of the machine. On an 80x86, this makes ! 296: ; no difference. But if this assembly function is implemented ! 297: ; on a 680x0, it becomes important. ! 298: ; ! 299: ; This version differs from P_SMUL by adding in, rather than storing, ! 300: ; the final carry. This better supports use by Smith's modmult. ! 301: ; *************************************************************** ! 302: ; Variable assignments: ! 303: ; multiplier = [bp+14] ! 304: ; multiplicand = [ds:di] 32-bit pointer ! 305: ; prod = [es:si] 32-bit pointer ! 306: ; unit_prec = cx ! 307: ; p = ax-dx ! 308: ; carry = bx ! 309: ! 310: PUBLIC _P_SMULA ! 311: ! 312: MULU macro n ! 313: rept n ! 314: lodsw ;multiplicand ! 315: mul bp ;multiplier, results (p) to AX/DX ! 316: add ax,bx ;carry ! 317: adc dx,0 ! 318: add ax,WORD PTR es:[di] ! 319: adc dx,0 ! 320: mov bx,dx ;carry ! 321: stosw ! 322: endm ! 323: endm ! 324: ! 325: _P_SMULA PROC FAR ! 326: push bp ! 327: mov bp,sp ! 328: push di ! 329: push si ! 330: push ds ! 331: mov cx,prec16 ! 332: mov ax,mulp ! 333: push ax ! 334: ! 335: sub bx,bx ;carry = 0, store in bx ! 336: ! 337: les di,DWORD PTR [bp+6] ;prod in es:di ! 338: lds si,DWORD PTR [bp+10] ;multiplicand in ds:si ! 339: cld ! 340: mov bp,[bp+14] ! 341: ! 342: or cx,cx ! 343: jnz mul_16u ! 344: jmp mul_units ! 345: mul_16u: ! 346: MULU 16 ! 347: dec cx ! 348: jz mul_units ! 349: jmp mul_16u ! 350: mul_units: ! 351: pop cx ! 352: jmp cx ! 353: MULU 15 ! 354: mul_ref: ! 355: ! 356: add WORD PTR es:[di],bx ;add final carry ! 357: ! 358: pop ds ! 359: pop si ! 360: pop di ! 361: pop bp ! 362: ret ! 363: _P_SMULA ENDP ! 364: ! 365: ; *************************************************************** ! 366: ; void P_SETRECIP (MULTUNIT reciph, MULTUNIT recipl, short mshift) ! 367: ; Specify reciprocal factors for use by P_QUO_DIGIT. ! 368: ; ! 369: ; This implementation is for 16-bit MULTUNIT. ! 370: ; ! 371: ; *************************************************************** ! 372: ! 373: DGROUP group _DATA,_BSS ! 374: assume ds:DGROUP ! 375: _BSS segment word public 'BSS' ! 376: reciph dw ? ; recip msw ! 377: recipl dw ? ; recip lsw ! 378: mshift dw ? ; shift adjust ! 379: _BSS ends ! 380: ! 381: PUBLIC _P_SETRECIP ! 382: ! 383: _P_SETRECIP PROC FAR ! 384: push bp ! 385: mov bp,sp ! 386: ! 387: mov ax,6[bp] ; reciph ! 388: mov reciph,ax ! 389: mov ax,8[bp] ; recipl ! 390: mov recipl,ax ! 391: mov ax,10[bp] ; mshift ! 392: mov mshift,ax ! 393: ! 394: pop bp ! 395: ret ! 396: _P_SETRECIP endp ! 397: ! 398: ; *************************************************************** ! 399: ; MULTUNIT quo_digit (MULTUNIT *dividend) ! 400: ; Determine the next quotient digit. ! 401: ; (routine for modmult, converted to assembler) ! 402: ; ! 403: ; This implementation is for 16-bit MULTUNIT. ! 404: ; ! 405: ; The following items have already been set by calling ! 406: ; P_SETRECIP: ! 407: ; reciph, recipl - reciprocal of divisor ! 408: ; mshift - scaling factor ! 409: ; ! 410: ; The dividend parameter points to the most significant word ! 411: ; of the dividend. ! 412: ; ! 413: ; *************************************************************** ! 414: ; Register assignments: ! 415: ; dx:ax = product ! 416: ; cx:bx = temp long ! 417: ; es:si = dividend pointer ! 418: ; di = MS word of q0 ! 419: ; bp = lsb factor ! 420: ; ! 421: ; Comments reference the C implementation variables. ! 422: ! 423: DGROUP group _DATA,_BSS ! 424: assume ds:DGROUP ! 425: ! 426: PUBLIC _P_QUO_DIGIT ! 427: ! 428: ! 429: _P_QUO_DIGIT PROC FAR ! 430: push bp ! 431: mov bp,sp ! 432: push di ! 433: push si ! 434: ! 435: les si,6[bp] ; dividend ! 436: mov ax,es:[si-4] ; dividend[-2] ! 437: not ax ! 438: mul reciph ! 439: add ax,reciph ! 440: adc dx,0 ! 441: mov bx,ax ! 442: mov di,dx ; di:bx = q1 ! 443: ! 444: mov ax,es:[si-2] ; dividend[-1] ! 445: not ax ! 446: mul recipl ! 447: inc dx ; dx:ax = q2 ! 448: ! 449: mov bp,dx ! 450: and bp,di ! 451: and bp,1 ; bp = lsb_factor ! 452: ! 453: add ax,bx ! 454: adc di,dx ! 455: rcr di,1 ; di = MS word of q0 ! 456: ! 457: mov ax,es:[si-2] ; dividend [-1] ! 458: not ax ! 459: mul reciph ! 460: mov bx,ax ! 461: mov cx,dx ; cx:bx = q1 ! 462: ! 463: mov ax,es:[si] ; dividend[0] ! 464: not ax ! 465: mul recipl ; dx:ax = q2 ! 466: xor ax,bx ! 467: and bp,ax ; lsb correction ! 468: xor ax,bx ; restore ax ! 469: ! 470: add ax,bx ! 471: adc dx,cx ! 472: rcr dx,1 ! 473: rcr ax,1 ; dx:ax = q ! 474: ! 475: add ax,di ; + scaled q0 ! 476: adc dx,0 ! 477: add ax,bp ; + lsb correction ! 478: adc dx,0 ; q ! 479: ! 480: shl ax,1 ! 481: rcl dx,1 ! 482: rcl ax,1 ! 483: rcl dx,1 ! 484: rcl ax,1 ! 485: and ax,3 ! 486: mov cx,ax ! 487: mov bx,dx ; bx:cx = q >> 14 ! 488: ! 489: mov ax,es:[si] ; dividend[0] ! 490: not ax ! 491: mul reciph ! 492: shl ax,1 ! 493: rcl dx,1 ! 494: add ax,bx ! 495: adc dx,cx ; q ! 496: ! 497: mov cx,mshift ! 498: shr ax,cl ! 499: mov bx,dx ! 500: shr dx,cl ! 501: neg cx ! 502: add cx,16 ! 503: shl bx,cl ! 504: add ax,bx ; dx:ax = q >> mshift ! 505: ! 506: or dx,dx ! 507: jz no_overflow ! 508: mov ax,0ffffh ! 509: no_overflow: ! 510: pop si ! 511: pop di ! 512: pop bp ! 513: ret ! 514: _P_QUO_DIGIT ENDP ! 515: _TEXT ends ! 516: ! 517: end ! 518:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.