|
|
1.1 root 1: /*---------------------------------------------------------------------------+
2: | polynomial.S |
3: | |
4: | Fixed point arithmetic polynomial evaluation. |
5: | |
6: | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
7: | Australia. E-mail [email protected] |
8: | |
9: | Call from C as: |
10: | void polynomial(unsigned accum[], unsigned x[], unsigned terms[][2], |
11: | int n) |
12: | |
13: | Computes: |
14: | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
15: | The result is returned in accum. |
16: | |
17: +---------------------------------------------------------------------------*/
18:
19: .file "fpolynom.s"
20:
21: #include "fpu_asm.h"
22:
23:
24: // #define EXTRA_PRECISE
25:
26: #define TERM_SIZE $8
27:
28:
29: .text
30: .align 2,144
31: .globl polynomial
32: polynomial:
33: pushl %ebp
34: movl %esp,%ebp
35: subl $32,%esp
36: pushl %esi
37: pushl %edi
38: pushl %ebx
39:
40: movl PARAM1,%esi // accum
41: movl PARAM2,%edi // x
42: movl PARAM3,%ebx // terms
43: movl PARAM4,%ecx // n
44:
45: movl TERM_SIZE,%eax
46: mull %ecx
47: movl %eax,%ecx
48:
49: movl 4(%ebx,%ecx,1),%edx // terms[n]
50: movl %edx,-20(%ebp)
51: movl (%ebx,%ecx,1),%edx // terms[n]
52: movl %edx,-24(%ebp)
53: xor %eax,%eax
54: movl %eax,-28(%ebp)
55:
56: subl TERM_SIZE,%ecx
57: js xL_accum_done
58:
59: xL_accum_loop:
60: xor %eax,%eax
61: movl %eax,-4(%ebp)
62: movl %eax,-8(%ebp)
63:
64: #ifdef EXTRA_PRECISE
65: movl -28(%ebp),%eax
66: mull 4(%edi) // x ms long
67: movl %edx,-12(%ebp)
68: #endif EXTRA_PRECISE
69:
70: movl -24(%ebp),%eax
71: mull (%edi) // x ls long
72: // movl %eax,-16(%ebp) // Not needed
73: addl %edx,-12(%ebp)
74: adcl $0,-8(%ebp)
75:
76: movl -24(%ebp),%eax
77: mull 4(%edi) // x ms long
78: addl %eax,-12(%ebp)
79: adcl %edx,-8(%ebp)
80: adcl $0,-4(%ebp)
81:
82: movl -20(%ebp),%eax
83: mull (%edi)
84: addl %eax,-12(%ebp)
85: adcl %edx,-8(%ebp)
86: adcl $0,-4(%ebp)
87:
88: movl -20(%ebp),%eax
89: mull 4(%edi)
90: addl %eax,-8(%ebp)
91: adcl %edx,-4(%ebp)
92:
93: /* Now add the next term */
94: movl (%ebx,%ecx,1),%eax
95: addl %eax,-8(%ebp)
96: movl 4(%ebx,%ecx,1),%eax
97: adcl %eax,-4(%ebp)
98:
99: /* And put into the second register */
100: movl -4(%ebp),%eax
101: movl %eax,-20(%ebp)
102: movl -8(%ebp),%eax
103: movl %eax,-24(%ebp)
104:
105: #ifdef EXTRA_PRECISE
106: movl -12(%ebp),%eax
107: movl %eax,-28(%ebp)
108: #else
109: testb $128,-25(%ebp)
110: je xL_no_poly_round
111:
112: addl $1,-24(%ebp)
113: adcl $0,-20(%ebp)
114: xL_no_poly_round:
115: #endif EXTRA_PRECISE
116:
117: subl TERM_SIZE,%ecx
118: jns xL_accum_loop
119:
120: xL_accum_done:
121: #ifdef EXTRA_PRECISE
122: /* And round the result */
123: testb $128,-25(%ebp)
124: je xL_poly_done
125:
126: addl $1,-24(%ebp)
127: adcl $0,-20(%ebp)
128: #endif EXTRA_PRECISE
129:
130: xL_poly_done:
131: movl -24(%ebp),%eax
132: movl %eax,(%esi)
133: movl -20(%ebp),%eax
134: movl %eax,4(%esi)
135:
136: popl %ebx
137: popl %edi
138: popl %esi
139: leave
140: ret
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.