|
|
1.1 root 1: .file "wm_shrx.S"
2: /*---------------------------------------------------------------------------+
3: | wm_shrx.S |
4: | |
5: | 64 bit right shift functions |
6: | |
7: | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8: | Australia. E-mail [email protected] |
9: | |
10: | Call from C as: |
11: | unsigned shrx(void *arg1, unsigned arg2) |
12: | and |
13: | unsigned shrxs(void *arg1, unsigned arg2) |
14: | |
15: +---------------------------------------------------------------------------*/
16:
17: #include "fpu_asm.h"
18:
19: .text
20: .align 2,144
21:
22: /*---------------------------------------------------------------------------+
23: | unsigned shrx(void *arg1, unsigned arg2) |
24: | |
25: | Extended shift right function. |
26: | Fastest for small shifts. |
27: | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
28: | right by the number of bits specified by the second arg (arg2). |
29: | Forms a 96 bit quantity from the 64 bit arg and eax: |
30: | [ 64 bit arg ][ eax ] |
31: | shift right ---------> |
32: | The eax register is initialized to 0 before the shifting. |
33: | Results returned in the 64 bit arg and eax. |
34: +---------------------------------------------------------------------------*/
35:
36: .globl shrx
37:
38: shrx:
39: push %ebp
40: movl %esp,%ebp
41: pushl %esi
42: movl PARAM2,%ecx
43: movl PARAM1,%esi
44: cmpl $32,%ecx /* shrd only works for 0..31 bits */
45: jnc L_more_than_31
46:
47: /* less than 32 bits */
48: pushl %ebx
49: movl (%esi),%ebx // lsl
50: movl 4(%esi),%edx // msl
51: xorl %eax,%eax // extension
52: shrd %cl,%ebx,%eax
53: shrd %cl,%edx,%ebx
54: shr %cl,%edx
55: movl %ebx,(%esi)
56: movl %edx,4(%esi)
57: popl %ebx
58: popl %esi
59: leave
60: ret
61:
62: L_more_than_31:
63: cmpl $64,%ecx
64: jnc L_more_than_63
65:
66: subb $32,%cl
67: movl (%esi),%eax // lsl
68: movl 4(%esi),%edx // msl
69: shrd %cl,%edx,%eax
70: shr %cl,%edx
71: movl %edx,(%esi)
72: movl $0,4(%esi)
73: popl %esi
74: leave
75: ret
76:
77: L_more_than_63:
78: cmpl $96,%ecx
79: jnc L_more_than_95
80:
81: subb $64,%cl
82: movl 4(%esi),%eax // msl
83: shr %cl,%eax
84: xorl %edx,%edx
85: movl %edx,(%esi)
86: movl %edx,4(%esi)
87: popl %esi
88: leave
89: ret
90:
91: L_more_than_95:
92: xorl %eax,%eax
93: movl %eax,(%esi)
94: movl %eax,4(%esi)
95: popl %esi
96: leave
97: ret
98:
99:
100: /*---------------------------------------------------------------------------+
101: | unsigned shrxs(void *arg1, unsigned arg2) |
102: | |
103: | Extended shift right function (optimized for small floating point |
104: | integers). |
105: | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
106: | right by the number of bits specified by the second arg (arg2). |
107: | Forms a 96 bit quantity from the 64 bit arg and eax: |
108: | [ 64 bit arg ][ eax ] |
109: | shift right ---------> |
110: | The eax register is initialized to 0 before the shifting. |
111: | The lower 8 bits of eax are lost and replaced by a flag which is |
112: | set (to 0x01) if any bit, apart from the first one, is set in the |
113: | part which has been shifted out of the arg. |
114: | Results returned in the 64 bit arg and eax. |
115: +---------------------------------------------------------------------------*/
116: .globl shrxs
117: shrxs:
118: push %ebp
119: movl %esp,%ebp
120: pushl %esi
121: pushl %ebx
122: movl PARAM2,%ecx
123: movl PARAM1,%esi
124: cmpl $64,%ecx /* shrd only works for 0..31 bits */
125: jnc Ls_more_than_63
126:
127: cmpl $32,%ecx /* shrd only works for 0..31 bits */
128: jc Ls_less_than_32
129:
130: /* We got here without jumps by assuming that the most common requirement
131: is for small integers */
132: /* Shift by [32..63] bits */
133: subb $32,%cl
134: movl (%esi),%eax // lsl
135: movl 4(%esi),%edx // msl
136: xorl %ebx,%ebx
137: shrd %cl,%eax,%ebx
138: shrd %cl,%edx,%eax
139: shr %cl,%edx
140: orl %ebx,%ebx /* test these 32 bits */
141: setne %bl
142: test $0x7fffffff,%eax /* and 31 bits here */
143: setne %bh
144: orw %bx,%bx /* Any of the 63 bit set ? */
145: setne %al
146: movl %edx,(%esi)
147: movl $0,4(%esi)
148: popl %ebx
149: popl %esi
150: leave
151: ret
152:
153: /* Shift by [0..31] bits */
154: Ls_less_than_32:
155: movl (%esi),%ebx // lsl
156: movl 4(%esi),%edx // msl
157: xorl %eax,%eax // extension
158: shrd %cl,%ebx,%eax
159: shrd %cl,%edx,%ebx
160: shr %cl,%edx
161: test $0x7fffffff,%eax /* only need to look at eax here */
162: setne %al
163: movl %ebx,(%esi)
164: movl %edx,4(%esi)
165: popl %ebx
166: popl %esi
167: leave
168: ret
169:
170: /* Shift by [64..95] bits */
171: Ls_more_than_63:
172: cmpl $96,%ecx
173: jnc Ls_more_than_95
174:
175: subb $64,%cl
176: movl (%esi),%ebx // lsl
177: movl 4(%esi),%eax // msl
178: xorl %edx,%edx // extension
179: shrd %cl,%ebx,%edx
180: shrd %cl,%eax,%ebx
181: shr %cl,%eax
182: orl %ebx,%edx
183: setne %bl
184: test $0x7fffffff,%eax /* only need to look at eax here */
185: setne %bh
186: orw %bx,%bx
187: setne %al
188: xorl %edx,%edx
189: movl %edx,(%esi) // set to zero
190: movl %edx,4(%esi) // set to zero
191: popl %ebx
192: popl %esi
193: leave
194: ret
195:
196: Ls_more_than_95:
197: /* Shift by [96..inf) bits */
198: xorl %eax,%eax
199: movl (%esi),%ebx
200: orl 4(%esi),%ebx
201: setne %al
202: xorl %ebx,%ebx
203: movl %ebx,(%esi)
204: movl %ebx,4(%esi)
205: popl %ebx
206: popl %esi
207: leave
208: ret
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.