|
|
1.1 root 1: ; Assembly primitives for RSA multiprecision library
2: ;
3: ; Tested with Turbo Assembler 1.0 and masm 1.00
4: ;
5: ; Written by Branko Lankester ([email protected]) 10/10/91
6:
7: ; define LDATA and LCODE as follows:
8: ; model: small compact medium large
9: ; LDATA 0 1 0 1
10: ; LCODE 0 0 1 1
11:
12: LDATA equ 1
13: LCODE equ 1
14:
15: IF LDATA
16: DSTPTR equ es:[bx+si]
17: ELSE
18: DSTPTR equ [bx+si]
19: ENDIF
20:
21: IF LCODE
22: prec equ [bp+6] ; 1st arg
23: r1 equ [bp+6] ; 1st arg
24: IF LDATA
25: r2 equ [bp+10] ; 2nd arg
26: carry equ [bp+14] ; 3rd arg
27: scarry equ [bp+10] ; carry for shift (arg 2)
28: ELSE
29: r2 equ [bp+8]
30: carry equ [bp+10]
31: scarry equ [bp+8]
32: ENDIF
33: ELSE ; small code model
34: prec equ [bp+4]
35: r1 equ [bp+4]
36: IF LDATA
37: r2 equ [bp+8]
38: carry equ [bp+12]
39: scarry equ [bp+8]
40: ELSE
41: r2 equ [bp+6]
42: carry equ [bp+8]
43: scarry equ [bp+6]
44: ENDIF
45: ENDIF
46:
47:
48: _TEXT segment byte public 'CODE'
49: DGROUP group _DATA,_BSS
50: assume cs:_TEXT,ds:DGROUP
51: _TEXT ends
52:
53: _DATA segment word public 'DATA'
54: _DATA ends
55:
56: _BSS segment word public 'BSS'
57: prec16 dw ? ; precision / 16 (seems to be / 256?)
58: unitprec dw ? ; precision / 16, really
59: addp dw ? ; jump offset
60: subp dw ?
61: rotp dw ?
62: mulp dw ?
63: _BSS ends
64:
65: _TEXT segment byte public 'CODE'
66:
67: public _P_SETP
68: public _P_ADDC
69: public _P_SUBB
70: public _P_ROTL
71:
72: IF LCODE
73: fprims proc far ; dummy proc
74: ELSE
75: fprims proc near
76: ENDIF
77:
78: ;
79: ; ******************** set precision ********************
80: ;
81: _P_SETP:
82: push bp
83: mov bp,sp
84: mov ax, prec ; precision in bits
85: add ax, 0fh
86: mov cl,4
87: shr ax,cl ; prec. in units
88: mov unitprec,ax
89: push ax
90: shr ax,cl
91: mov prec16,ax ; precision / 16
92: pop ax
93: and ax,0fh ; al = prec % 16
94: mov bx,ax
95: mov cx,ax
96: shl bx,1 ; multiply by 4 (=number of bytes
97: shl bx,1 ; in instruction sequence)
98: mov dx,bx
99: IFE LDATA
100: sub dx,ax ; small model only 3 for add/sub
101: ENDIF
102: mov ax,offset add_ref
103: sub ax,dx
104: mov addp,ax
105:
106: mov ax,offset sub_ref
107: sub ax,dx
108: mov subp,ax
109:
110: mov ax,offset rot_ref
111: sub ax,bx
112: mov rotp,ax
113:
114: mov ax,offset mul_ref
115: shl bx,1 ; MULU macro is 17 bytes for large data
116: shl bx,1
117: sub ax,bx
118: sub ax,cx
119: mov mulp,ax
120:
121: pop bp
122: ret
123:
124:
125:
126: ;
127: ; ******************** mpi add with carry ********************
128: ;
129: ADDU macro n
130: rept n
131: lodsw
132: adc DSTPTR,ax
133: endm
134: endm
135:
136:
137: _P_ADDC:
138: push bp
139: mov bp,sp
140: push si
141: mov cx, prec16
142: mov dx, addp
143: IF LDATA
144: push ds
145: lds si, dword ptr r2
146: les bx, dword ptr r1
147: ELSE
148: mov si, r2
149: mov bx, r1
150: ENDIF
151: sub bx, si ; calculate relative offset
152: dec bx
153: dec bx
154: cld
155: shr byte ptr carry,1 ; load carry
156: jcxz add_units
157: add_16u:
158: ADDU 16
159: loop add_16u
160: add_units:
161: jmp dx
162: ADDU 15
163: add_ref:
164: rcl ax,1 ; return carry
165: and ax,1
166: IF LDATA
167: pop ds
168: ENDIF
169: pop si
170: pop bp
171: ret
172:
173:
174:
175: ;
176: ; ******************** mpi subtract with borrow ********************
177: ;
178: SUBU macro n
179: rept n
180: lodsw
181: sbb DSTPTR,ax
182: endm
183: endm
184:
185:
186: _P_SUBB:
187: push bp
188: mov bp,sp
189: push si
190: mov cx, prec16
191: mov dx, subp
192: IF LDATA
193: push ds
194: lds si, dword ptr r2
195: les bx, dword ptr r1
196: ELSE
197: mov si, r2
198: mov bx, r1
199: ENDIF
200: sub bx, si ; calculate relative offset
201: dec bx
202: dec bx
203: cld
204: shr byte ptr carry,1
205: jcxz sub_units
206: sub_16u:
207: SUBU 16
208: loop sub_16u
209: sub_units:
210: jmp dx
211: SUBU 15
212: sub_ref:
213: rcl ax,1 ; return carry
214: and ax,1
215: IF LDATA
216: pop ds
217: ENDIF
218: pop si
219: pop bp
220: ret
221:
222:
223:
224: ;
225: ; ******************** mpi rotate left ********************
226: ;
227: _P_ROTL:
228: push bp
229: mov bp,sp
230: mov cx, prec16
231: mov dx, rotp
232: IF LDATA
233: push ds
234: lds bx, dword ptr r1
235: ELSE
236: mov bx, r1
237: ENDIF
238: shr byte ptr scarry,1
239: jcxz rot_units
240: rot_16u:
241: i = 0
242: rept 16
243: rcl word ptr [bx + i],1
244: i = i + 2
245: endm
246: lahf
247: add bx,32
248: sahf
249: loop rot_16u
250: rot_units:
251: jmp dx
252: rept 15
253: rcl word ptr [bx],1
254: inc bx
255: inc bx
256: endm
257: rot_ref:
258:
259: rcl ax,1
260: and ax,1
261: IF LDATA
262: pop ds
263: ENDIF
264: pop bp
265: ret
266:
267: fprims endp
268:
269: _TEXT ends
270:
271:
272:
273: ; ***************************************************************
274: ; P_SMUL (MULTUNIT *prod, MULTUNIT *multiplicand, MULTUNIT multiplier)
275: ; mp_smul routine from Upton's modmult, converted to assembler
276: ;
277: ; Multiply the single-word multiplier times the multiprecision integer
278: ; in multiplicand, accumulating result in prod. The resulting
279: ; multiprecision prod will be 1 word longer than the multiplicand.
280: ; multiplicand is unit_prec words long. We add into prod, so caller
281: ; should zero it out first.
282: ;
283: ; NOTE: Unlike other functions in the multiprecision arithmetic
284: ; library, both multiplicand and prod are pointing at the LSB,
285: ; regardless of byte order of the machine. On an 80x86, this makes
286: ; no difference. But if this assembly function is implemented
287: ; on a 680x0, it becomes important.
288: ; ***************************************************************
289: ; Variable assignments:
290: ; multiplier = [bp+14]
291: ; multiplicand = [ds:di] 32-bit pointer
292: ; prod = [es:si] 32-bit pointer
293: ; unit_prec = cx
294: ; p = ax-dx
295: ; carry = bx
296: UPTON_TEXT SEGMENT WORD PUBLIC 'CODE'
297: UPTON_TEXT ENDS
298: UPTON_TEXT SEGMENT
299: ASSUME CS: UPTON_TEXT
300: ASSUME DS: DGROUP
301: PUBLIC _P_SMUL
302:
303: MULU macro n
304: rept n
305: lodsw ;multiplicand
306: mul bp ;multiplier, results (p) to AX/DX
307: add ax,bx ;carry
308: adc dx,0
309: add ax,WORD PTR es:[di]
310: adc dx,0
311: mov bx,dx ;carry
312: stosw
313: endm
314: endm
315:
316: _P_SMUL PROC FAR
317: push bp
318: mov bp,sp
319: push di
320: push si
321: push ds
322: mov cx,prec16
323: mov ax,mulp
324: push ax
325:
326: sub bx,bx ;carry = 0, store in bx
327:
328: les di,DWORD PTR [bp+6] ;prod in es:di
329: lds si,DWORD PTR [bp+10] ;multiplicand in ds:si
330: cld
331: mov bp,[bp+14]
332:
333: or cx,cx
334: jnz mul_16u
335: jmp mul_units
336: mul_16u:
337: MULU 16
338: dec cx
339: jz mul_units
340: jmp mul_16u
341: mul_units:
342: pop cx
343: jmp cx
344: MULU 15
345: mul_ref:
346:
347: ; We know that the high-order word of prod will always be 0
348: mov WORD PTR es:[di],bx ;store carry in prod empty high word
349:
350: pop ds
351: pop si
352: pop di
353: pop bp
354: ret
355:
356: _P_SMUL ENDP
357: UPTON_TEXT ends
358: end
359:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.