|
|
1.1 root 1: /* Multiprecision multiply routine. . . This assumes the ability to
2: * do a 32x32->64 multiply. I have put in dummy code for the multiplies
3: * which should be easily identifiable in the resulting source.
4: * This code was designed for the MIPS series processors, however
5: * should work pretty well on any machine with hardware multiply.
6: * The lack of a carry bit in the R3000 resulted in some of the
7: * machinations which take place.
8: *
9: * See code in dmul_proto.c for the prototype, and appropriate
10: * documentation.
11: *
12: * Note that this has hooks for both byte orderings of MIPS processors,
13: * and that if you have the wrong flags being set in your cpp,
14: * you may wind up with no code at all. (DEC's version will, when
15: * compiling for the other byte ordering, leave BOTH things defined,
16: * which is bad. . . )
17: *
18: * - Castor Fu Wed Sep 30 11:48:38 PDT 1992
19: */
20:
21: #if defined(MIPSEL) && !defined(MIPSEB)
22: .verstamp 2 10
23: .extern global_precision 2
24: .text
25: .align 2
26: .file 2 "ndmulprotoEL.c"
27: .globl p_dmul
28: .loc 2 69
29: # 69 {
30: .ent p_dmul 2
31: p_dmul:
32: .option O2
33: subu $sp, 168
34: sd $30, 32($sp)
35: sd $22, 24($sp)
36: sd $20, 16($sp)
37: sd $18, 8($sp)
38: sd $16, 0($sp)
39: .mask 0xC0FF0000, -132
40: .frame $sp, 168, $31
41: sw $5, 172($sp)
42: sw $6, 176($sp)
43: .loc 2 76
44: # 76 preca = global_precision;
45: lh $14, global_precision
46: sw $14, 164($sp)
47: .loc 2 77
48: # 77 pp=msbptr(multiplicand,preca);
49: .loc 2 78
50: # 78 while(*post_lowerunit(pp) == 0 && preca > 0) preca--;
51: mul $2, $14, 4
52: lw $15, 172($sp)
53: addu $3, $15, $2
54: lw $11, -4($3)
55: addu $20, $3, -8
56: bne $11, 0, $33
57: ble $14, 0, $33
58: $32:
59: lw $24, 164($sp)
60: addu $25, $24, -1
61: sw $25, 164($sp)
62: lw $11, 0($20)
63: addu $20, $20, -4
64: bne $11, 0, $33
65: bgt $25, 0, $32
66: $33:
67: .loc 2 80
68: # 79
69: # 80 precb = global_precision;
70: lh $15, global_precision
71: sw $15, 160($sp)
72: .loc 2 81
73: # 81 pp = msbptr(multiplier,precb);
74: .loc 2 82
75: # 82 while(*post_lowerunit(pp) == 0 && precb > 0) precb--;
76: lw $14, 176($sp)
77: addu $3, $14, $2
78: lw $11, -4($3)
79: addu $20, $3, -8
80: bne $11, 0, $35
81: ble $15, 0, $35
82: $34:
83: lw $24, 160($sp)
84: addu $25, $24, -1
85: sw $25, 160($sp)
86: lw $11, 0($20)
87: addu $20, $20, -4
88: bne $11, 0, $35
89: bgt $25, 0, $34
90: $35:
91: .loc 2 84
92: # 83
93: # 84 prec2 = global_precision<<1;
94: lh $3, global_precision
95: sll $3, $3, 1
96: move $2, $3
97: .loc 2 86
98: # 85
99: # 86 pp = msbptr(prod,prec2);
100: mul $14, $3, 4
101: addu $20, $4, $14
102: addu $20, $20, -4
103: .loc 2 87
104: # 87 for (i= preca+precb; i < prec2 ; i++) {
105: lw $15, 164($sp)
106: lw $24, 160($sp)
107: addu $25, $15, $24
108: sw $25, 72($sp)
109: move $21, $25
110: bge $25, $3, $37
111: $36:
112: .loc 2 88
113: # 88 *post_lowerunit(pp) = 0;
114: sw $0, 0($20)
115: addu $20, $20, -4
116: .loc 2 89
117: # 89 }
118: .loc 2 89
119: addu $21, $21, 1
120: blt $21, $2, $36
121: $37:
122: .loc 2 90
123: # 90 if (preca == 0 || precb == 0) {
124: lw $14, 164($sp)
125: beq $14, 0, $38
126: lw $15, 160($sp)
127: bne $15, 0, $41
128: $38:
129: .loc 2 91
130: # 91 pp = lsbptr(prod,prec2);
131: move $20, $4
132: .loc 2 92
133: # 92 for (i=0; i < preca + precb; i++)
134: move $21, $0
135: lw $24, 72($sp)
136: ble $24, 0, $61
137: and $2, $24, 3
138: beq $2, 0, $40
139: $39:
140: sw $0, 0($20)
141: addu $20, $20, 4
142: addu $21, $21, 1
143: bne $2, $21, $39
144: lw $25, 72($sp)
145: beq $21, $25, $61
146: $40:
147: .loc 2 93
148: # 93 *(post_higherunit(pp)) = 0;
149: sw $0, 0($20)
150: addu $20, $20, 4
151: sw $0, 0($20)
152: addu $20, $20, 4
153: sw $0, 0($20)
154: addu $20, $20, 4
155: sw $0, 0($20)
156: addu $20, $20, 4
157: addu $21, $21, 4
158: lw $14, 72($sp)
159: bne $21, $14, $40
160: .loc 2 94
161: # 94 return;
162: b $61
163: $41:
164: .loc 2 97
165: # 95 }
166: # 96 /* Canonicalize length(a) >= length(b) */
167: # 97 if (preca < precb) {
168: lw $15, 164($sp)
169: lw $24, 160($sp)
170: bge $15, $24, $42
171: .loc 2 98
172: # 98 pp = multiplicand;
173: lw $20, 172($sp)
174: .loc 2 99
175: # 99 multiplicand = multiplier;
176: lw $25, 176($sp)
177: sw $25, 172($sp)
178: .loc 2 100
179: # 100 multiplier = pp;
180: sw $20, 176($sp)
181: .loc 2 101
182: # 101 i = preca;
183: move $21, $15
184: .loc 2 102
185: # 102 preca = precb;
186: sw $24, 164($sp)
187: .loc 2 103
188: # 103 precb = i;
189: sw $21, 160($sp)
190: .loc 2 104
191: # 104 }
192: lw $14, 164($sp)
193: lw $25, 160($sp)
194: addu $15, $14, $25
195: sw $15, 72($sp)
196: $42:
197: .loc 2 107
198: # 105 make_lsbptr(multiplicand, global_precision);
199: # 106 make_lsbptr(multiplier, global_precision);
200: # 107 pp = lsbptr(prod,prec2);
201: move $20, $4
202: .loc 2 108
203: # 108 ah = 0;
204: move $12, $0
205: .loc 2 109
206: # 109 carryl = 0;
207: move $19, $0
208: .loc 2 110
209: # 110 carryh =0;
210: move $18, $0
211: .loc 2 111
212: # 111 al = 0;
213: .loc 2 114
214: # 112
215: # 113
216: # 114 amin = 0;
217: .loc 2 115
218: # 115 for (i=0; i < precb; i++) {
219: move $21, $0
220: lw $24, 160($sp)
221: ble $24, 0, $48
222: lw $23, 172($sp)
223: addu $30, $23, -4
224: lw $14, 176($sp)
225: addu $25, $14, 4
226: sw $25, 56($sp)
227: $43:
228: .loc 2 116
229: # 116 amax = i;
230: .loc 2 117
231: # 117 ma = word_index(multiplicand,i -1);
232: move $13, $30
233: .loc 2 118
234: # 118 mb = word_index(multiplier,1);
235: lw $16, 56($sp)
236: .loc 2 119
237: # 119 lmul(word_v(ma,1),word_v(mb,-1),ml, mh);
238: lw $2, 0($23)
239: lw $15, 176($sp)
240: lw $3, 0($15)
241: multu $2, $3
242: mflo $9
243: mfhi $10
244: .loc 2 120
245: # 120 al = ah+ carryl;
246: addu $4, $12, $19
247: move $8, $4
248: .loc 2 121
249: # 121 carryl = carryh + (al < ah);
250: sltu $24, $4, $12
251: addu $19, $18, $24
252: .loc 2 122
253: # 122 ah = 0;
254: move $12, $0
255: .loc 2 123
256: # 123 carryh = 0;
257: move $18, $0
258: .loc 2 124
259: # 124 j = amax - amin;
260: move $5, $21
261: move $31, $5
262: .loc 2 125
263: # 125 k = j & 3;
264: .loc 2 126
265: # 126 while (k--) {
266: and $6, $5, 3
267: addu $17, $6, -1
268: beq $6, $0, $45
269: $44:
270: .loc 2 127
271: # 127 lmul( *ma, *mb, nml, nmh);
272: lw $2, 0($13)
273: lw $3, 0($16)
274: multu $2, $3
275: .loc 2 128
276: # 128 al += ml;
277: addu $8, $8, $9
278: .loc 2 129
279: # 129 carryl += (al < ml);
280: sltu $14, $8, $9
281: addu $19, $19, $14
282: .loc 2 130
283: # 130 ah += mh;
284: addu $12, $12, $10
285: .loc 2 131
286: # 131 carryh += (ah < mh);
287: sltu $25, $12, $10
288: addu $18, $18, $25
289: .loc 2 132
290: # 132 post_lowerunit(ma);
291: addu $13, $13, -4
292: .loc 2 133
293: # 133 post_higherunit(mb);
294: addu $16, $16, 4
295: .loc 2 134
296: # 134 ml = nml;
297: mflo $9
298: .loc 2 135
299: # 135 mh = nmh;
300: mfhi $10
301: .loc 2 136
302: # 136 }
303: .loc 2 136
304: move $11, $17
305: addu $17, $17, -1
306: bne $11, 0, $44
307: $45:
308: .loc 2 137
309: # 137 k = j >> 2;
310: .loc 2 138
311: # 138 while (k--) {
312: sra $2, $31, 2
313: addu $17, $2, -1
314: beq $2, $0, $47
315: $46:
316: .loc 2 139
317: # 139 lmul( word_v(ma,0), word_v(mb,0), nml, nmh);
318: lw $2, 0($13)
319: lw $3, 0($16)
320: multu $2, $3
321: .loc 2 140
322: # 140 al += ml;
323: addu $8, $8, $9
324: .loc 2 141
325: # 141 carryl += (al < ml);
326: sltu $15, $8, $9
327: addu $19, $19, $15
328: .loc 2 142
329: # 142 ah += mh;
330: addu $12, $12, $10
331: .loc 2 143
332: # 143 carryh += (ah < mh);
333: sltu $24, $12, $10
334: addu $18, $18, $24
335: .loc 2 144
336: # 144 ml = nml;
337: mflo $9
338: .loc 2 145
339: # 145 mh = nmh;
340: mfhi $10
341: .loc 2 147
342: # 146
343: # 147 lmul( word_v(ma,-1), word_v(mb,1), nml, nmh);
344: lw $4, -4($13)
345: lw $5, 4($16)
346: multu $4, $5
347: .loc 2 148
348: # 148 al += ml;
349: addu $8, $8, $9
350: .loc 2 149
351: # 149 carryl += (al < ml);
352: sltu $14, $8, $9
353: addu $19, $19, $14
354: .loc 2 150
355: # 150 ah += mh;
356: addu $12, $12, $10
357: .loc 2 151
358: # 151 carryh += (ah < mh);
359: sltu $25, $12, $10
360: addu $18, $18, $25
361: .loc 2 152
362: # 152 ml = nml;
363: mflo $9
364: .loc 2 153
365: # 153 mh = nmh;
366: mfhi $10
367: .loc 2 155
368: # 154
369: # 155 lmul( word_v(ma,-2), word_v(mb,2), nml, nmh);
370: lw $2, -8($13)
371: lw $3, 8($16)
372: multu $2, $3
373: .loc 2 156
374: # 156 al += ml;
375: addu $8, $8, $9
376: .loc 2 157
377: # 157 carryl += (al < ml);
378: sltu $15, $8, $9
379: addu $19, $19, $15
380: .loc 2 158
381: # 158 ah += mh;
382: addu $12, $12, $10
383: .loc 2 159
384: # 159 carryh += (ah < mh);
385: sltu $24, $12, $10
386: addu $18, $18, $24
387: .loc 2 160
388: # 160 ml = nml;
389: mflo $9
390: .loc 2 161
391: # 161 mh = nmh;
392: mfhi $10
393: .loc 2 163
394: # 162
395: # 163 lmul( word_v(ma,-3), word_v(mb,3), nml, nmh);
396: lw $4, -12($13)
397: lw $5, 12($16)
398: multu $4, $5
399: .loc 2 164
400: # 164 al += ml;
401: addu $8, $8, $9
402: .loc 2 165
403: # 165 carryl += (al < ml);
404: sltu $14, $8, $9
405: addu $19, $19, $14
406: .loc 2 166
407: # 166 ah += mh;
408: addu $12, $12, $10
409: .loc 2 167
410: # 167 carryh += (ah < mh);
411: sltu $25, $12, $10
412: addu $18, $18, $25
413: .loc 2 168
414: # 168 ml = nml;
415: mflo $9
416: .loc 2 169
417: # 169 mh = nmh;
418: mfhi $10
419: .loc 2 171
420: # 170
421: # 171 nlowerunit(ma,4); nhigherunit(mb,4);
422: addu $13, $13, -16
423: addu $16, $16, 16
424: .loc 2 173
425: # 172
426: # 173 }
427: .loc 2 173
428: move $11, $17
429: addu $17, $17, -1
430: bne $11, 0, $46
431: $47:
432: .loc 2 175
433: # 174
434: # 175 al += ml;
435: addu $8, $8, $9
436: .loc 2 176
437: # 176 carryl += (al < ml);
438: sltu $15, $8, $9
439: addu $19, $19, $15
440: .loc 2 177
441: # 177 *pp = al;
442: sw $8, 0($20)
443: .loc 2 178
444: # 178 ah += mh;
445: addu $12, $12, $10
446: .loc 2 179
447: # 179 carryh += (ah < mh);
448: sltu $24, $12, $10
449: addu $18, $18, $24
450: .loc 2 180
451: # 180 post_higherunit(pp);
452: addu $20, $20, 4
453: .loc 2 181
454: # 181 }
455: .loc 2 181
456: addu $21, $21, 1
457: addu $23, $23, 4
458: addu $30, $30, 4
459: lw $14, 160($sp)
460: bne $21, $14, $43
461: $48:
462: .loc 2 182
463: # 182 amax = precb-1;
464: .loc 2 183
465: # 183 for (i=precb; i < preca; i++) {
466: lw $25, 160($sp)
467: move $21, $25
468: lw $15, 164($sp)
469: bge $25, $15, $54
470: mul $22, $21, 4
471: lw $24, 172($sp)
472: addu $23, $24, $22
473: addu $30, $23, -4
474: lw $14, 176($sp)
475: addu $24, $14, 4
476: sw $24, 56($sp)
477: mul $14, $15, 4
478: sw $14, 52($sp)
479: addu $2, $25, -1
480: move $31, $2
481: and $24, $2, 3
482: sw $24, 44($sp)
483: addu $15, $24, -1
484: sw $15, 40($sp)
485: $49:
486: .loc 2 184
487: # 184 ma = word_index( multiplicand,i-1);
488: move $13, $30
489: .loc 2 185
490: # 185 mb = word_index(multiplier,1);
491: lw $16, 56($sp)
492: .loc 2 186
493: # 186 lmul(word_v(ma,1),word_v(mb,-1),ml, mh);
494: lw $2, 0($23)
495: lw $14, 176($sp)
496: lw $3, 0($14)
497: multu $2, $3
498: mflo $9
499: mfhi $10
500: .loc 2 187
501: # 187 al = ah+ carryl;
502: addu $4, $12, $19
503: move $8, $4
504: .loc 2 188
505: # 188 carryl = carryh + (al < ah);
506: sltu $25, $4, $12
507: addu $19, $18, $25
508: .loc 2 189
509: # 189 ah = 0;
510: move $12, $0
511: .loc 2 190
512: # 190 carryh = 0;
513: move $18, $0
514: .loc 2 191
515: # 191 j = amax - amin;
516: .loc 2 192
517: # 192 k = j & 3;
518: .loc 2 193
519: # 193 while (k--) {
520: lw $17, 40($sp)
521: lw $24, 44($sp)
522: beq $24, $0, $51
523: $50:
524: .loc 2 194
525: # 194 lmul( *ma, *mb, nml, nmh);
526: lw $2, 0($13)
527: lw $3, 0($16)
528: multu $2, $3
529: .loc 2 195
530: # 195 al += ml;
531: addu $8, $8, $9
532: .loc 2 196
533: # 196 carryl += (al < ml);
534: sltu $15, $8, $9
535: addu $19, $19, $15
536: .loc 2 197
537: # 197 ah += mh;
538: addu $12, $12, $10
539: .loc 2 198
540: # 198 carryh += (ah < mh);
541: sltu $14, $12, $10
542: addu $18, $18, $14
543: .loc 2 199
544: # 199 post_lowerunit(ma);
545: addu $13, $13, -4
546: .loc 2 200
547: # 200 post_higherunit(mb);
548: addu $16, $16, 4
549: .loc 2 201
550: # 201 ml = nml;
551: mflo $9
552: .loc 2 202
553: # 202 mh = nmh;
554: mfhi $10
555: .loc 2 203
556: # 203 }
557: .loc 2 203
558: move $11, $17
559: addu $17, $17, -1
560: bne $11, 0, $50
561: $51:
562: .loc 2 204
563: # 204 k = j >> 2;
564: .loc 2 205
565: # 205 while (k--) {
566: sra $2, $31, 2
567: addu $17, $2, -1
568: beq $2, $0, $53
569: $52:
570: .loc 2 206
571: # 206 lmul( word_v(ma,0), word_v(mb,0), nml, nmh);
572: lw $2, 0($13)
573: lw $3, 0($16)
574: multu $2, $3
575: .loc 2 207
576: # 207 al += ml;
577: addu $8, $8, $9
578: .loc 2 208
579: # 208 carryl += (al < ml);
580: sltu $25, $8, $9
581: addu $19, $19, $25
582: .loc 2 209
583: # 209 ah += mh;
584: addu $12, $12, $10
585: .loc 2 210
586: # 210 carryh += (ah < mh);
587: sltu $24, $12, $10
588: addu $18, $18, $24
589: .loc 2 211
590: # 211 ml = nml;
591: mflo $9
592: .loc 2 212
593: # 212 mh = nmh;
594: mfhi $10
595: .loc 2 214
596: # 213
597: # 214 lmul( word_v(ma,-1), word_v(mb,1), nml, nmh);
598: lw $4, -4($13)
599: lw $5, 4($16)
600: multu $4, $5
601: .loc 2 215
602: # 215 al += ml;
603: addu $8, $8, $9
604: .loc 2 216
605: # 216 carryl += (al < ml);
606: sltu $15, $8, $9
607: addu $19, $19, $15
608: .loc 2 217
609: # 217 ah += mh;
610: addu $12, $12, $10
611: .loc 2 218
612: # 218 carryh += (ah < mh);
613: sltu $14, $12, $10
614: addu $18, $18, $14
615: .loc 2 219
616: # 219 ml = nml;
617: mflo $9
618: .loc 2 220
619: # 220 mh = nmh;
620: mfhi $10
621: .loc 2 222
622: # 221
623: # 222 lmul( word_v(ma,-2), word_v(mb,2), nml, nmh);
624: lw $2, -8($13)
625: lw $3, 8($16)
626: multu $2, $3
627: .loc 2 223
628: # 223 al += ml;
629: addu $8, $8, $9
630: .loc 2 224
631: # 224 carryl += (al < ml);
632: sltu $25, $8, $9
633: addu $19, $19, $25
634: .loc 2 225
635: # 225 ah += mh;
636: addu $12, $12, $10
637: .loc 2 226
638: # 226 carryh += (ah < mh);
639: sltu $24, $12, $10
640: addu $18, $18, $24
641: .loc 2 227
642: # 227 ml = nml;
643: mflo $9
644: .loc 2 228
645: # 228 mh = nmh;
646: mfhi $10
647: .loc 2 230
648: # 229
649: # 230 lmul( word_v(ma,-3), word_v(mb,3), nml, nmh);
650: lw $4, -12($13)
651: lw $5, 12($16)
652: multu $4, $5
653: .loc 2 231
654: # 231 al += ml;
655: addu $8, $8, $9
656: .loc 2 232
657: # 232 carryl += (al < ml);
658: sltu $15, $8, $9
659: addu $19, $19, $15
660: .loc 2 233
661: # 233 ah += mh;
662: addu $12, $12, $10
663: .loc 2 234
664: # 234 carryh += (ah < mh);
665: sltu $14, $12, $10
666: addu $18, $18, $14
667: .loc 2 235
668: # 235 ml = nml;
669: mflo $9
670: .loc 2 236
671: # 236 mh = nmh;
672: mfhi $10
673: .loc 2 238
674: # 237
675: # 238 nlowerunit(ma,4);
676: addu $13, $13, -16
677: .loc 2 239
678: # 239 nhigherunit(mb,4);
679: addu $16, $16, 16
680: .loc 2 240
681: # 240 }
682: .loc 2 240
683: move $11, $17
684: addu $17, $17, -1
685: bne $11, 0, $52
686: $53:
687: .loc 2 242
688: # 241
689: # 242 al += ml;
690: addu $8, $8, $9
691: .loc 2 243
692: # 243 carryl += (al < ml);
693: sltu $25, $8, $9
694: addu $19, $19, $25
695: .loc 2 244
696: # 244 *pp = al;
697: sw $8, 0($20)
698: .loc 2 245
699: # 245 ah += mh;
700: addu $12, $12, $10
701: .loc 2 246
702: # 246 carryh += (ah < mh);
703: sltu $24, $12, $10
704: addu $18, $18, $24
705: .loc 2 247
706: # 247 post_higherunit(pp);
707: addu $20, $20, 4
708: .loc 2 248
709: # 248 }
710: .loc 2 248
711: addu $22, $22, 4
712: addu $23, $23, 4
713: addu $30, $30, 4
714: lw $15, 52($sp)
715: blt $22, $15, $49
716: $54:
717: .loc 2 249
718: # 249 amax = precb-1;
719: .loc 2 250
720: # 250 for (i=preca; i < preca + precb -1; i++) {
721: lw $14, 164($sp)
722: move $21, $14
723: lw $25, 72($sp)
724: addu $24, $25, -1
725: sw $24, 68($sp)
726: bge $14, $24, $60
727: lw $15, 160($sp)
728: addu $25, $15, -1
729: sw $25, 72($sp)
730: lw $24, 172($sp)
731: mul $15, $14, 4
732: addu $30, $24, $15
733: addu $25, $30, -8
734: sw $25, 60($sp)
735: lw $24, 176($sp)
736: subu $15, $21, $14
737: mul $25, $15, 4
738: addu $22, $24, $25
739: addu $23, $22, 8
740: $55:
741: .loc 2 251
742: # 251 amin = i-preca + 1;
743: .loc 2 252
744: # 252 ma = word_index(multiplicand,preca-2);
745: lw $13, 60($sp)
746: .loc 2 253
747: # 253 mb = word_index(multiplier, - preca + 2 +i) ;
748: move $16, $23
749: .loc 2 254
750: # 254 lmul(word_v(ma,1),word_v(mb,-1),ml, mh);
751: lw $2, -4($30)
752: lw $3, 4($22)
753: multu $2, $3
754: mflo $9
755: mfhi $10
756: .loc 2 255
757: # 255 al = ah+ carryl;
758: addu $4, $12, $19
759: move $8, $4
760: .loc 2 256
761: # 256 carryl = carryh + (al < ah);
762: sltu $14, $4, $12
763: addu $19, $18, $14
764: .loc 2 257
765: # 257 ah = 0;
766: move $12, $0
767: .loc 2 258
768: # 258 carryh = 0;
769: move $18, $0
770: .loc 2 259
771: # 259 j = amax - amin;
772: lw $15, 72($sp)
773: lw $24, 164($sp)
774: subu $25, $21, $24
775: subu $5, $15, $25
776: addu $5, $5, -1
777: move $31, $5
778: .loc 2 260
779: # 260 k = j & 3;
780: .loc 2 261
781: # 261 while (k--) {
782: and $6, $5, 3
783: addu $17, $6, -1
784: beq $6, $0, $57
785: $56:
786: .loc 2 262
787: # 262 lmul( *ma, *mb, nml, nmh);
788: lw $2, 0($13)
789: lw $3, 0($16)
790: multu $2, $3
791: .loc 2 263
792: # 263 al += ml;
793: addu $8, $8, $9
794: .loc 2 264
795: # 264 carryl += (al < ml);
796: sltu $14, $8, $9
797: addu $19, $19, $14
798: .loc 2 265
799: # 265 ah += mh;
800: addu $12, $12, $10
801: .loc 2 266
802: # 266 carryh += (ah < mh);
803: sltu $24, $12, $10
804: addu $18, $18, $24
805: .loc 2 267
806: # 267 post_lowerunit(ma);
807: addu $13, $13, -4
808: .loc 2 268
809: # 268 post_higherunit(mb);
810: addu $16, $16, 4
811: .loc 2 269
812: # 269 ml = nml;
813: mflo $9
814: .loc 2 270
815: # 270 mh = nmh;
816: mfhi $10
817: .loc 2 271
818: # 271 }
819: .loc 2 271
820: move $11, $17
821: addu $17, $17, -1
822: bne $11, 0, $56
823: $57:
824: .loc 2 272
825: # 272 k = j >> 2;
826: .loc 2 273
827: # 273 while (k--) {
828: sra $2, $31, 2
829: addu $17, $2, -1
830: beq $2, $0, $59
831: $58:
832: .loc 2 274
833: # 274 lmul( word_v(ma,0), word_v(mb,0), nml, nmh);
834: lw $2, 0($13)
835: lw $3, 0($16)
836: multu $2, $3
837: .loc 2 275
838: # 275 al += ml;
839: addu $8, $8, $9
840: .loc 2 276
841: # 276 carryl += (al < ml);
842: sltu $15, $8, $9
843: addu $19, $19, $15
844: .loc 2 277
845: # 277 ah += mh;
846: addu $12, $12, $10
847: .loc 2 278
848: # 278 carryh += (ah < mh);
849: sltu $25, $12, $10
850: addu $18, $18, $25
851: .loc 2 279
852: # 279 ml = nml;
853: mflo $9
854: .loc 2 280
855: # 280 mh = nmh;
856: mfhi $10
857: .loc 2 282
858: # 281
859: # 282 lmul( word_v(ma,-1), word_v(mb,1), nml, nmh);
860: lw $4, -4($13)
861: lw $5, 4($16)
862: multu $4, $5
863: .loc 2 283
864: # 283 al += ml;
865: addu $8, $8, $9
866: .loc 2 284
867: # 284 carryl += (al < ml);
868: sltu $14, $8, $9
869: addu $19, $19, $14
870: .loc 2 285
871: # 285 ah += mh;
872: addu $12, $12, $10
873: .loc 2 286
874: # 286 carryh += (ah < mh);
875: sltu $24, $12, $10
876: addu $18, $18, $24
877: .loc 2 287
878: # 287 ml = nml;
879: mflo $9
880: .loc 2 288
881: # 288 mh = nmh;
882: mfhi $10
883: .loc 2 290
884: # 289
885: # 290 lmul( word_v(ma,-2), word_v(mb,2), nml, nmh);
886: lw $2, -8($13)
887: lw $3, 8($16)
888: multu $2, $3
889: .loc 2 291
890: # 291 al += ml;
891: addu $8, $8, $9
892: .loc 2 292
893: # 292 carryl += (al < ml);
894: sltu $15, $8, $9
895: addu $19, $19, $15
896: .loc 2 293
897: # 293 ah += mh;
898: addu $12, $12, $10
899: .loc 2 294
900: # 294 carryh += (ah < mh);
901: sltu $25, $12, $10
902: addu $18, $18, $25
903: .loc 2 295
904: # 295 ml = nml;
905: mflo $9
906: .loc 2 296
907: # 296 mh = nmh;
908: mfhi $10
909: .loc 2 298
910: # 297
911: # 298 lmul( word_v(ma,-3), word_v(mb,3), nml, nmh);
912: lw $4, -12($13)
913: lw $5, 12($16)
914: multu $4, $5
915: .loc 2 299
916: # 299 al += ml;
917: addu $8, $8, $9
918: .loc 2 300
919: # 300 carryl += (al < ml);
920: sltu $14, $8, $9
921: addu $19, $19, $14
922: .loc 2 301
923: # 301 ah += mh;
924: addu $12, $12, $10
925: .loc 2 302
926: # 302 carryh += (ah < mh);
927: sltu $24, $12, $10
928: addu $18, $18, $24
929: .loc 2 303
930: # 303 ml = nml;
931: mflo $9
932: .loc 2 304
933: # 304 mh = nmh;
934: mfhi $10
935: .loc 2 306
936: # 305
937: # 306 nlowerunit(ma,4);
938: addu $13, $13, -16
939: .loc 2 307
940: # 307 nhigherunit(mb,4);
941: addu $16, $16, 16
942: .loc 2 308
943: # 308 }
944: .loc 2 308
945: move $11, $17
946: addu $17, $17, -1
947: bne $11, 0, $58
948: $59:
949: .loc 2 310
950: # 309
951: # 310 al += ml;
952: addu $8, $8, $9
953: .loc 2 311
954: # 311 carryl += (al < ml);
955: sltu $15, $8, $9
956: addu $19, $19, $15
957: .loc 2 312
958: # 312 *pp = al;
959: sw $8, 0($20)
960: .loc 2 313
961: # 313 ah += mh;
962: addu $12, $12, $10
963: .loc 2 314
964: # 314 carryh += (ah < mh);
965: sltu $25, $12, $10
966: addu $18, $18, $25
967: .loc 2 315
968: # 315 post_higherunit(pp);
969: addu $20, $20, 4
970: .loc 2 316
971: # 316 }
972: .loc 2 316
973: addu $21, $21, 1
974: addu $22, $22, 4
975: addu $23, $23, 4
976: lw $14, 68($sp)
977: blt $21, $14, $55
978: $60:
979: .loc 2 317
980: # 317 al = ah + carryl;
981: .loc 2 318
982: # 318 carryh += (al < ah);
983: .loc 2 319
984: # 319 *pp = al;
985: addu $24, $12, $19
986: sw $24, 0($20)
987: .loc 2 320
988: # 320 }
989: $61:
990: ld $16, 0($sp)
991: ld $18, 8($sp)
992: ld $20, 16($sp)
993: ld $22, 24($sp)
994: ld $30, 32($sp)
995: addu $sp, 168
996: j $31
997: .end p_dmul
998: #endif
999:
1000: #if defined(MIPSEB) && !defined(MIPSEL)
1001: .verstamp 2 10
1002: .extern global_precision 2
1003: .text
1004: .align 2
1005: .file 2 "ndmulprotoEB.c"
1006: .globl p_dmul
1007: .loc 2 69
1008: # 69 {
1009: .ent p_dmul 2
1010: p_dmul:
1011: .option O2
1012: subu $sp, 168
1013: sd $30, 32($sp)
1014: sd $22, 24($sp)
1015: sd $20, 16($sp)
1016: sd $18, 8($sp)
1017: sd $16, 0($sp)
1018: .mask 0xC0FF0000, -132
1019: .frame $sp, 168, $31
1020: sw $5, 172($sp)
1021: sw $6, 176($sp)
1022: .loc 2 76
1023: # 76 preca = global_precision;
1024: lh $14, global_precision
1025: sw $14, 164($sp)
1026: .loc 2 77
1027: # 77 pp=msbptr(multiplicand,preca);
1028: .loc 2 78
1029: # 78 while(*post_lowerunit(pp) == 0 && preca > 0) preca--;
1030: lw $15, 172($sp)
1031: lw $11, 0($15)
1032: addu $20, $15, 4
1033: bne $11, 0, $33
1034: ble $14, 0, $33
1035: $32:
1036: lw $24, 164($sp)
1037: addu $25, $24, -1
1038: sw $25, 164($sp)
1039: lw $11, 0($20)
1040: addu $20, $20, 4
1041: bne $11, 0, $33
1042: bgt $25, 0, $32
1043: $33:
1044: .loc 2 80
1045: # 79
1046: # 80 precb = global_precision;
1047: lh $15, global_precision
1048: sw $15, 160($sp)
1049: .loc 2 81
1050: # 81 pp = msbptr(multiplier,precb);
1051: .loc 2 82
1052: # 82 while(*post_lowerunit(pp) == 0 && precb > 0) precb--;
1053: lw $14, 176($sp)
1054: lw $11, 0($14)
1055: addu $20, $14, 4
1056: bne $11, 0, $35
1057: ble $15, 0, $35
1058: $34:
1059: lw $24, 160($sp)
1060: addu $25, $24, -1
1061: sw $25, 160($sp)
1062: lw $11, 0($20)
1063: addu $20, $20, 4
1064: bne $11, 0, $35
1065: bgt $25, 0, $34
1066: $35:
1067: .loc 2 84
1068: # 83
1069: # 84 prec2 = global_precision<<1;
1070: lh $3, global_precision
1071: sll $3, $3, 1
1072: move $2, $3
1073: .loc 2 86
1074: # 85
1075: # 86 pp = msbptr(prod,prec2);
1076: move $20, $4
1077: .loc 2 87
1078: # 87 for (i= preca+precb; i < prec2 ; i++) {
1079: lw $14, 164($sp)
1080: lw $15, 160($sp)
1081: addu $24, $14, $15
1082: sw $24, 72($sp)
1083: move $21, $24
1084: bge $24, $3, $37
1085: $36:
1086: .loc 2 88
1087: # 88 *post_lowerunit(pp) = 0;
1088: sw $0, 0($20)
1089: addu $20, $20, 4
1090: .loc 2 89
1091: # 89 }
1092: .loc 2 89
1093: addu $21, $21, 1
1094: blt $21, $2, $36
1095: $37:
1096: .loc 2 90
1097: # 90 if (preca == 0 || precb == 0) {
1098: lw $25, 164($sp)
1099: beq $25, 0, $38
1100: lw $14, 160($sp)
1101: bne $14, 0, $41
1102: $38:
1103: .loc 2 91
1104: # 91 pp = lsbptr(prod,prec2);
1105: mul $15, $2, 4
1106: addu $20, $4, $15
1107: addu $20, $20, -4
1108: .loc 2 92
1109: # 92 for (i=0; i < preca + precb; i++)
1110: move $21, $0
1111: lw $24, 72($sp)
1112: ble $24, 0, $61
1113: and $2, $24, 3
1114: beq $2, 0, $40
1115: $39:
1116: sw $0, 0($20)
1117: addu $20, $20, -4
1118: addu $21, $21, 1
1119: bne $2, $21, $39
1120: lw $25, 72($sp)
1121: beq $21, $25, $61
1122: $40:
1123: .loc 2 93
1124: # 93 *(post_higherunit(pp)) = 0;
1125: sw $0, 0($20)
1126: addu $20, $20, -4
1127: sw $0, 0($20)
1128: addu $20, $20, -4
1129: sw $0, 0($20)
1130: addu $20, $20, -4
1131: sw $0, 0($20)
1132: addu $20, $20, -4
1133: addu $21, $21, 4
1134: lw $14, 72($sp)
1135: bne $21, $14, $40
1136: .loc 2 94
1137: # 94 return;
1138: b $61
1139: $41:
1140: .loc 2 97
1141: # 95 }
1142: # 96 /* Canonicalize length(a) >= length(b) */
1143: # 97 if (preca < precb) {
1144: lw $15, 164($sp)
1145: lw $24, 160($sp)
1146: bge $15, $24, $42
1147: .loc 2 98
1148: # 98 pp = multiplicand;
1149: lw $20, 172($sp)
1150: .loc 2 99
1151: # 99 multiplicand = multiplier;
1152: lw $25, 176($sp)
1153: sw $25, 172($sp)
1154: .loc 2 100
1155: # 100 multiplier = pp;
1156: sw $20, 176($sp)
1157: .loc 2 101
1158: # 101 i = preca;
1159: move $21, $15
1160: .loc 2 102
1161: # 102 preca = precb;
1162: sw $24, 164($sp)
1163: .loc 2 103
1164: # 103 precb = i;
1165: sw $21, 160($sp)
1166: .loc 2 104
1167: # 104 }
1168: lw $14, 164($sp)
1169: lw $25, 160($sp)
1170: addu $15, $14, $25
1171: sw $15, 72($sp)
1172: $42:
1173: .loc 2 105
1174: # 105 make_lsbptr(multiplicand, global_precision);
1175: lh $3, global_precision
1176: mul $3, $3, 4
1177: lw $24, 172($sp)
1178: addu $14, $24, $3
1179: addu $25, $14, -4
1180: sw $25, 172($sp)
1181: .loc 2 106
1182: # 106 make_lsbptr(multiplier, global_precision);
1183: lw $15, 176($sp)
1184: addu $24, $15, $3
1185: addu $14, $24, -4
1186: sw $14, 176($sp)
1187: .loc 2 107
1188: # 107 pp = lsbptr(prod,prec2);
1189: mul $15, $2, 4
1190: addu $20, $4, $15
1191: addu $20, $20, -4
1192: .loc 2 108
1193: # 108 ah = 0;
1194: move $12, $0
1195: .loc 2 109
1196: # 109 carryl = 0;
1197: move $19, $0
1198: .loc 2 110
1199: # 110 carryh =0;
1200: move $18, $0
1201: .loc 2 111
1202: # 111 al = 0;
1203: .loc 2 114
1204: # 112
1205: # 113
1206: # 114 amin = 0;
1207: .loc 2 115
1208: # 115 for (i=0; i < precb; i++) {
1209: move $21, $0
1210: lw $24, 160($sp)
1211: ble $24, 0, $48
1212: move $23, $25
1213: addu $30, $23, 4
1214: addu $15, $14, -4
1215: sw $15, 56($sp)
1216: $43:
1217: .loc 2 116
1218: # 116 amax = i;
1219: .loc 2 117
1220: # 117 ma = word_index(multiplicand,i -1);
1221: move $13, $30
1222: .loc 2 118
1223: # 118 mb = word_index(multiplier,1);
1224: lw $16, 56($sp)
1225: .loc 2 119
1226: # 119 lmul(word_v(ma,1),word_v(mb,-1),ml, mh);
1227: lw $3, 0($23)
1228: lw $24, 176($sp)
1229: lw $2, 0($24)
1230: multu $3,$2
1231: mflo $9
1232: mfhi $10
1233: .loc 2 120
1234: # 120 al = ah+ carryl;
1235: addu $4, $12, $19
1236: move $8, $4
1237: .loc 2 121
1238: # 121 carryl = carryh + (al < ah);
1239: sltu $25, $4, $12
1240: addu $19, $18, $25
1241: .loc 2 122
1242: # 122 ah = 0;
1243: move $12, $0
1244: .loc 2 123
1245: # 123 carryh = 0;
1246: move $18, $0
1247: .loc 2 124
1248: # 124 j = amax - amin;
1249: move $5, $21
1250: move $31, $5
1251: .loc 2 125
1252: # 125 k = j & 3;
1253: .loc 2 126
1254: # 126 while (k--) {
1255: and $6, $5, 3
1256: addu $17, $6, -1
1257: beq $6, $0, $45
1258: $44:
1259: .loc 2 127
1260: # 127 lmul( *ma, *mb, nml, nmh);
1261: lw $2, 0($13)
1262: lw $3, 0($16)
1263: multu $2, $3
1264: .loc 2 128
1265: # 128 al += ml;
1266: addu $8, $8, $9
1267: .loc 2 129
1268: # 129 carryl += (al < ml);
1269: sltu $14, $8, $9
1270: addu $19, $19, $14
1271: .loc 2 130
1272: # 130 ah += mh;
1273: addu $12, $12, $10
1274: .loc 2 131
1275: # 131 carryh += (ah < mh);
1276: sltu $15, $12, $10
1277: addu $18, $18, $15
1278: .loc 2 132
1279: # 132 post_lowerunit(ma);
1280: addu $13, $13, 4
1281: .loc 2 133
1282: # 133 post_higherunit(mb);
1283: addu $16, $16, -4
1284: .loc 2 134
1285: # 134 ml = nml;
1286: mflo $9
1287: .loc 2 135
1288: # 135 mh = nmh;
1289: mfhi $10
1290: .loc 2 136
1291: # 136 }
1292: .loc 2 136
1293: move $11, $17
1294: addu $17, $17, -1
1295: bne $11, 0, $44
1296: $45:
1297: .loc 2 137
1298: # 137 k = j >> 2;
1299: .loc 2 138
1300: # 138 while (k--) {
1301: sra $2, $31, 2
1302: addu $17, $2, -1
1303: beq $2, $0, $47
1304: $46:
1305: .loc 2 139
1306: # 139 lmul( word_v(ma,0), word_v(mb,0), nml, nmh);
1307: lw $2, 0($13)
1308: lw $3, 0($16)
1309: multu $2, $3
1310: .loc 2 140
1311: # 140 al += ml;
1312: addu $8, $8, $9
1313: .loc 2 141
1314: # 141 carryl += (al < ml);
1315: sltu $24, $8, $9
1316: addu $19, $19, $24
1317: .loc 2 142
1318: # 142 ah += mh;
1319: addu $12, $12, $10
1320: .loc 2 143
1321: # 143 carryh += (ah < mh);
1322: sltu $25, $12, $10
1323: addu $18, $18, $25
1324: .loc 2 144
1325: # 144 ml = nml;
1326: mflo $9
1327: .loc 2 145
1328: # 145 mh = nmh;
1329: mfhi $10
1330: .loc 2 147
1331: # 146
1332: # 147 lmul( word_v(ma,-1), word_v(mb,1), nml, nmh);
1333: lw $4, 4($13)
1334: lw $5, -4($16)
1335: multu $4, $5
1336: .loc 2 148
1337: # 148 al += ml;
1338: addu $8, $8, $9
1339: .loc 2 149
1340: # 149 carryl += (al < ml);
1341: sltu $14, $8, $9
1342: addu $19, $19, $14
1343: .loc 2 150
1344: # 150 ah += mh;
1345: addu $12, $12, $10
1346: .loc 2 151
1347: # 151 carryh += (ah < mh);
1348: sltu $15, $12, $10
1349: addu $18, $18, $15
1350: .loc 2 152
1351: # 152 ml = nml;
1352: mflo $9
1353: .loc 2 153
1354: # 153 mh = nmh;
1355: mfhi $10
1356: .loc 2 155
1357: # 154
1358: # 155 lmul( word_v(ma,-2), word_v(mb,2), nml, nmh);
1359: lw $2, 8($13)
1360: lw $3, -8($16)
1361: multu $2, $3
1362: .loc 2 156
1363: # 156 al += ml;
1364: addu $8, $8, $9
1365: .loc 2 157
1366: # 157 carryl += (al < ml);
1367: sltu $24, $8, $9
1368: addu $19, $19, $24
1369: .loc 2 158
1370: # 158 ah += mh;
1371: addu $12, $12, $10
1372: .loc 2 159
1373: # 159 carryh += (ah < mh);
1374: sltu $25, $12, $10
1375: addu $18, $18, $25
1376: .loc 2 160
1377: # 160 ml = nml;
1378: mflo $9
1379: .loc 2 161
1380: # 161 mh = nmh;
1381: mfhi $10
1382: .loc 2 163
1383: # 162
1384: # 163 lmul( word_v(ma,-3), word_v(mb,3), nml, nmh);
1385: lw $4, 12($13)
1386: lw $5, -12($16)
1387: multu $4, $5
1388: .loc 2 164
1389: # 164 al += ml;
1390: addu $8, $8, $9
1391: .loc 2 165
1392: # 165 carryl += (al < ml);
1393: sltu $14, $8, $9
1394: addu $19, $19, $14
1395: .loc 2 166
1396: # 166 ah += mh;
1397: addu $12, $12, $10
1398: .loc 2 167
1399: # 167 carryh += (ah < mh);
1400: sltu $15, $12, $10
1401: addu $18, $18, $15
1402: .loc 2 168
1403: # 168 ml = nml;
1404: mflo $9
1405: .loc 2 169
1406: # 169 mh = nmh;
1407: mfhi $10
1408: .loc 2 171
1409: # 170
1410: # 171 nlowerunit(ma,4); nhigherunit(mb,4);
1411: addu $13, $13, 16
1412: addu $16, $16, -16
1413: .loc 2 173
1414: # 172
1415: # 173 }
1416: .loc 2 173
1417: move $11, $17
1418: addu $17, $17, -1
1419: bne $11, 0, $46
1420: $47:
1421: .loc 2 175
1422: # 174
1423: # 175 al += ml;
1424: addu $8, $8, $9
1425: .loc 2 176
1426: # 176 carryl += (al < ml);
1427: sltu $24, $8, $9
1428: addu $19, $19, $24
1429: .loc 2 177
1430: # 177 *pp = al;
1431: sw $8, 0($20)
1432: .loc 2 178
1433: # 178 ah += mh;
1434: addu $12, $12, $10
1435: .loc 2 179
1436: # 179 carryh += (ah < mh);
1437: sltu $25, $12, $10
1438: addu $18, $18, $25
1439: .loc 2 180
1440: # 180 post_higherunit(pp);
1441: addu $20, $20, -4
1442: .loc 2 181
1443: # 181 }
1444: .loc 2 181
1445: addu $21, $21, 1
1446: addu $23, $23, -4
1447: addu $30, $30, -4
1448: lw $14, 160($sp)
1449: bne $21, $14, $43
1450: $48:
1451: .loc 2 182
1452: # 182 amax = precb-1;
1453: .loc 2 183
1454: # 183 for (i=precb; i < preca; i++) {
1455: lw $15, 160($sp)
1456: move $21, $15
1457: lw $24, 164($sp)
1458: bge $15, $24, $54
1459: negu $22, $21
1460: mul $22, $22, 4
1461: lw $25, 172($sp)
1462: addu $23, $25, $22
1463: addu $30, $23, 4
1464: lw $14, 176($sp)
1465: addu $25, $14, -4
1466: sw $25, 56($sp)
1467: mul $14, $24, -4
1468: sw $14, 52($sp)
1469: addu $2, $15, -1
1470: move $31, $2
1471: and $25, $2, 3
1472: sw $25, 44($sp)
1473: addu $24, $25, -1
1474: sw $24, 40($sp)
1475: $49:
1476: .loc 2 184
1477: # 184 ma = word_index( multiplicand,i-1);
1478: move $13, $30
1479: .loc 2 185
1480: # 185 mb = word_index(multiplier,1);
1481: lw $16, 56($sp)
1482: .loc 2 186
1483: # 186 lmul(word_v(ma,1),word_v(mb,-1),ml, mh);
1484: lw $3, 0($23)
1485: lw $14, 176($sp)
1486: lw $2, 0($14)
1487: multu $3,$2
1488: mflo $9
1489: mfhi $10
1490: .loc 2 187
1491: # 187 al = ah+ carryl;
1492: addu $4, $12, $19
1493: move $8, $4
1494: .loc 2 188
1495: # 188 carryl = carryh + (al < ah);
1496: sltu $15, $4, $12
1497: addu $19, $18, $15
1498: .loc 2 189
1499: # 189 ah = 0;
1500: move $12, $0
1501: .loc 2 190
1502: # 190 carryh = 0;
1503: move $18, $0
1504: .loc 2 191
1505: # 191 j = amax - amin;
1506: .loc 2 192
1507: # 192 k = j & 3;
1508: .loc 2 193
1509: # 193 while (k--) {
1510: lw $17, 40($sp)
1511: lw $25, 44($sp)
1512: beq $25, $0, $51
1513: $50:
1514: .loc 2 194
1515: # 194 lmul( *ma, *mb, nml, nmh);
1516: lw $2, 0($13)
1517: lw $3, 0($16)
1518: multu $2, $3
1519: .loc 2 195
1520: # 195 al += ml;
1521: addu $8, $8, $9
1522: .loc 2 196
1523: # 196 carryl += (al < ml);
1524: sltu $24, $8, $9
1525: addu $19, $19, $24
1526: .loc 2 197
1527: # 197 ah += mh;
1528: addu $12, $12, $10
1529: .loc 2 198
1530: # 198 carryh += (ah < mh);
1531: sltu $14, $12, $10
1532: addu $18, $18, $14
1533: .loc 2 199
1534: # 199 post_lowerunit(ma);
1535: addu $13, $13, 4
1536: .loc 2 200
1537: # 200 post_higherunit(mb);
1538: addu $16, $16, -4
1539: .loc 2 201
1540: # 201 ml = nml;
1541: mflo $9
1542: .loc 2 202
1543: # 202 mh = nmh;
1544: mfhi $10
1545: .loc 2 203
1546: # 203 }
1547: .loc 2 203
1548: move $11, $17
1549: addu $17, $17, -1
1550: bne $11, 0, $50
1551: $51:
1552: .loc 2 204
1553: # 204 k = j >> 2;
1554: .loc 2 205
1555: # 205 while (k--) {
1556: sra $2, $31, 2
1557: addu $17, $2, -1
1558: beq $2, $0, $53
1559: $52:
1560: .loc 2 206
1561: # 206 lmul( word_v(ma,0), word_v(mb,0), nml, nmh);
1562: lw $2, 0($13)
1563: lw $3, 0($16)
1564: multu $2, $3
1565: .loc 2 207
1566: # 207 al += ml;
1567: addu $8, $8, $9
1568: .loc 2 208
1569: # 208 carryl += (al < ml);
1570: sltu $15, $8, $9
1571: addu $19, $19, $15
1572: .loc 2 209
1573: # 209 ah += mh;
1574: addu $12, $12, $10
1575: .loc 2 210
1576: # 210 carryh += (ah < mh);
1577: sltu $25, $12, $10
1578: addu $18, $18, $25
1579: .loc 2 211
1580: # 211 ml = nml;
1581: mflo $9
1582: .loc 2 212
1583: # 212 mh = nmh;
1584: mfhi $10
1585: .loc 2 214
1586: # 213
1587: # 214 lmul( word_v(ma,-1), word_v(mb,1), nml, nmh);
1588: lw $4, 4($13)
1589: lw $5, -4($16)
1590: multu $4, $5
1591: .loc 2 215
1592: # 215 al += ml;
1593: addu $8, $8, $9
1594: .loc 2 216
1595: # 216 carryl += (al < ml);
1596: sltu $24, $8, $9
1597: addu $19, $19, $24
1598: .loc 2 217
1599: # 217 ah += mh;
1600: addu $12, $12, $10
1601: .loc 2 218
1602: # 218 carryh += (ah < mh);
1603: sltu $14, $12, $10
1604: addu $18, $18, $14
1605: .loc 2 219
1606: # 219 ml = nml;
1607: mflo $9
1608: .loc 2 220
1609: # 220 mh = nmh;
1610: mfhi $10
1611: .loc 2 222
1612: # 221
1613: # 222 lmul( word_v(ma,-2), word_v(mb,2), nml, nmh);
1614: lw $2, 8($13)
1615: lw $3, -8($16)
1616: multu $2, $3
1617: .loc 2 223
1618: # 223 al += ml;
1619: addu $8, $8, $9
1620: .loc 2 224
1621: # 224 carryl += (al < ml);
1622: sltu $15, $8, $9
1623: addu $19, $19, $15
1624: .loc 2 225
1625: # 225 ah += mh;
1626: addu $12, $12, $10
1627: .loc 2 226
1628: # 226 carryh += (ah < mh);
1629: sltu $25, $12, $10
1630: addu $18, $18, $25
1631: .loc 2 227
1632: # 227 ml = nml;
1633: mflo $9
1634: .loc 2 228
1635: # 228 mh = nmh;
1636: mfhi $10
1637: .loc 2 230
1638: # 229
1639: # 230 lmul( word_v(ma,-3), word_v(mb,3), nml, nmh);
1640: lw $4, 12($13)
1641: lw $5, -12($16)
1642: multu $4, $5
1643: .loc 2 231
1644: # 231 al += ml;
1645: addu $8, $8, $9
1646: .loc 2 232
1647: # 232 carryl += (al < ml);
1648: sltu $24, $8, $9
1649: addu $19, $19, $24
1650: .loc 2 233
1651: # 233 ah += mh;
1652: addu $12, $12, $10
1653: .loc 2 234
1654: # 234 carryh += (ah < mh);
1655: sltu $14, $12, $10
1656: addu $18, $18, $14
1657: .loc 2 235
1658: # 235 ml = nml;
1659: mflo $9
1660: .loc 2 236
1661: # 236 mh = nmh;
1662: mfhi $10
1663: .loc 2 238
1664: # 237
1665: # 238 nlowerunit(ma,4);
1666: addu $13, $13, 16
1667: .loc 2 239
1668: # 239 nhigherunit(mb,4);
1669: addu $16, $16, -16
1670: .loc 2 240
1671: # 240 }
1672: .loc 2 240
1673: move $11, $17
1674: addu $17, $17, -1
1675: bne $11, 0, $52
1676: $53:
1677: .loc 2 242
1678: # 241
1679: # 242 al += ml;
1680: addu $8, $8, $9
1681: .loc 2 243
1682: # 243 carryl += (al < ml);
1683: sltu $15, $8, $9
1684: addu $19, $19, $15
1685: .loc 2 244
1686: # 244 *pp = al;
1687: sw $8, 0($20)
1688: .loc 2 245
1689: # 245 ah += mh;
1690: addu $12, $12, $10
1691: .loc 2 246
1692: # 246 carryh += (ah < mh);
1693: sltu $25, $12, $10
1694: addu $18, $18, $25
1695: .loc 2 247
1696: # 247 post_higherunit(pp);
1697: addu $20, $20, -4
1698: .loc 2 248
1699: # 248 }
1700: .loc 2 248
1701: addu $22, $22, -4
1702: addu $23, $23, -4
1703: addu $30, $30, -4
1704: lw $24, 52($sp)
1705: blt $24, $22, $49
1706: $54:
1707: .loc 2 249
1708: # 249 amax = precb-1;
1709: .loc 2 250
1710: # 250 for (i=preca; i < preca + precb -1; i++) {
1711: lw $14, 164($sp)
1712: move $21, $14
1713: lw $15, 72($sp)
1714: addu $25, $15, -1
1715: sw $25, 68($sp)
1716: bge $14, $25, $60
1717: lw $24, 160($sp)
1718: addu $15, $24, -1
1719: sw $15, 72($sp)
1720: negu $2, $14
1721: lw $25, 172($sp)
1722: mul $24, $2, 4
1723: addu $30, $25, $24
1724: addu $15, $30, 8
1725: sw $15, 56($sp)
1726: lw $14, 176($sp)
1727: addu $25, $2, $21
1728: negu $24, $25
1729: mul $15, $24, 4
1730: addu $22, $14, $15
1731: addu $23, $22, -8
1732: $55:
1733: .loc 2 251
1734: # 251 amin = i-preca + 1;
1735: .loc 2 252
1736: # 252 ma = word_index(multiplicand,preca-2);
1737: lw $13, 56($sp)
1738: .loc 2 253
1739: # 253 mb = word_index(multiplier, - preca + 2 +i) ;
1740: move $16, $23
1741: .loc 2 254
1742: # 254 lmul(word_v(ma,1),word_v(mb,-1),ml, mh);
1743: lw $2, 4($30)
1744: lw $3, -4($22)
1745: multu $3,$2
1746: mflo $9
1747: mfhi $10
1748: .loc 2 255
1749: # 255 al = ah+ carryl;
1750: addu $4, $12, $19
1751: move $8, $4
1752: .loc 2 256
1753: # 256 carryl = carryh + (al < ah);
1754: sltu $25, $4, $12
1755: addu $19, $18, $25
1756: .loc 2 257
1757: # 257 ah = 0;
1758: move $12, $0
1759: .loc 2 258
1760: # 258 carryh = 0;
1761: move $18, $0
1762: .loc 2 259
1763: # 259 j = amax - amin;
1764: lw $24, 72($sp)
1765: lw $14, 164($sp)
1766: subu $15, $21, $14
1767: subu $5, $24, $15
1768: addu $5, $5, -1
1769: move $31, $5
1770: .loc 2 260
1771: # 260 k = j & 3;
1772: .loc 2 261
1773: # 261 while (k--) {
1774: and $6, $5, 3
1775: addu $17, $6, -1
1776: beq $6, $0, $57
1777: $56:
1778: .loc 2 262
1779: # 262 lmul( *ma, *mb, nml, nmh);
1780: lw $2, 0($13)
1781: lw $3, 0($16)
1782: multu $2, $3
1783: .loc 2 263
1784: # 263 al += ml;
1785: addu $8, $8, $9
1786: .loc 2 264
1787: # 264 carryl += (al < ml);
1788: sltu $25, $8, $9
1789: addu $19, $19, $25
1790: .loc 2 265
1791: # 265 ah += mh;
1792: addu $12, $12, $10
1793: .loc 2 266
1794: # 266 carryh += (ah < mh);
1795: sltu $14, $12, $10
1796: addu $18, $18, $14
1797: .loc 2 267
1798: # 267 post_lowerunit(ma);
1799: addu $13, $13, 4
1800: .loc 2 268
1801: # 268 post_higherunit(mb);
1802: addu $16, $16, -4
1803: .loc 2 269
1804: # 269 ml = nml;
1805: mflo $9
1806: .loc 2 270
1807: # 270 mh = nmh;
1808: mfhi $10
1809: .loc 2 271
1810: # 271 }
1811: .loc 2 271
1812: move $11, $17
1813: addu $17, $17, -1
1814: bne $11, 0, $56
1815: $57:
1816: .loc 2 272
1817: # 272 k = j >> 2;
1818: .loc 2 273
1819: # 273 while (k--) {
1820: sra $2, $31, 2
1821: addu $17, $2, -1
1822: beq $2, $0, $59
1823: $58:
1824: .loc 2 274
1825: # 274 lmul( word_v(ma,0), word_v(mb,0), nml, nmh);
1826: lw $2, 0($13)
1827: lw $3, 0($16)
1828: multu $2, $3
1829: .loc 2 275
1830: # 275 al += ml;
1831: addu $8, $8, $9
1832: .loc 2 276
1833: # 276 carryl += (al < ml);
1834: sltu $24, $8, $9
1835: addu $19, $19, $24
1836: .loc 2 277
1837: # 277 ah += mh;
1838: addu $12, $12, $10
1839: .loc 2 278
1840: # 278 carryh += (ah < mh);
1841: sltu $15, $12, $10
1842: addu $18, $18, $15
1843: .loc 2 279
1844: # 279 ml = nml;
1845: mflo $9
1846: .loc 2 280
1847: # 280 mh = nmh;
1848: mfhi $10
1849: .loc 2 282
1850: # 281
1851: # 282 lmul( word_v(ma,-1), word_v(mb,1), nml, nmh);
1852: lw $4, 4($13)
1853: lw $5, -4($16)
1854: multu $4, $5
1855: .loc 2 283
1856: # 283 al += ml;
1857: addu $8, $8, $9
1858: .loc 2 284
1859: # 284 carryl += (al < ml);
1860: sltu $25, $8, $9
1861: addu $19, $19, $25
1862: .loc 2 285
1863: # 285 ah += mh;
1864: addu $12, $12, $10
1865: .loc 2 286
1866: # 286 carryh += (ah < mh);
1867: sltu $14, $12, $10
1868: addu $18, $18, $14
1869: .loc 2 287
1870: # 287 ml = nml;
1871: mflo $9
1872: .loc 2 288
1873: # 288 mh = nmh;
1874: mfhi $10
1875: .loc 2 290
1876: # 289
1877: # 290 lmul( word_v(ma,-2), word_v(mb,2), nml, nmh);
1878: lw $2, 8($13)
1879: lw $3, -8($16)
1880: multu $2, $3
1881: .loc 2 291
1882: # 291 al += ml;
1883: addu $8, $8, $9
1884: .loc 2 292
1885: # 292 carryl += (al < ml);
1886: sltu $24, $8, $9
1887: addu $19, $19, $24
1888: .loc 2 293
1889: # 293 ah += mh;
1890: addu $12, $12, $10
1891: .loc 2 294
1892: # 294 carryh += (ah < mh);
1893: sltu $15, $12, $10
1894: addu $18, $18, $15
1895: .loc 2 295
1896: # 295 ml = nml;
1897: mflo $9
1898: .loc 2 296
1899: # 296 mh = nmh;
1900: mfhi $10
1901: .loc 2 298
1902: # 297
1903: # 298 lmul( word_v(ma,-3), word_v(mb,3), nml, nmh);
1904: lw $4, 12($13)
1905: lw $5, -12($16)
1906: multu $4, $5
1907: .loc 2 299
1908: # 299 al += ml;
1909: addu $8, $8, $9
1910: .loc 2 300
1911: # 300 carryl += (al < ml);
1912: sltu $25, $8, $9
1913: addu $19, $19, $25
1914: .loc 2 301
1915: # 301 ah += mh;
1916: addu $12, $12, $10
1917: .loc 2 302
1918: # 302 carryh += (ah < mh);
1919: sltu $14, $12, $10
1920: addu $18, $18, $14
1921: .loc 2 303
1922: # 303 ml = nml;
1923: mflo $9
1924: .loc 2 304
1925: # 304 mh = nmh;
1926: mfhi $10
1927: .loc 2 306
1928: # 305
1929: # 306 nlowerunit(ma,4);
1930: addu $13, $13, 16
1931: .loc 2 307
1932: # 307 nhigherunit(mb,4);
1933: addu $16, $16, -16
1934: .loc 2 308
1935: # 308 }
1936: .loc 2 308
1937: move $11, $17
1938: addu $17, $17, -1
1939: bne $11, 0, $58
1940: $59:
1941: .loc 2 310
1942: # 309
1943: # 310 al += ml;
1944: addu $8, $8, $9
1945: .loc 2 311
1946: # 311 carryl += (al < ml);
1947: sltu $24, $8, $9
1948: addu $19, $19, $24
1949: .loc 2 312
1950: # 312 *pp = al;
1951: sw $8, 0($20)
1952: .loc 2 313
1953: # 313 ah += mh;
1954: addu $12, $12, $10
1955: .loc 2 314
1956: # 314 carryh += (ah < mh);
1957: sltu $15, $12, $10
1958: addu $18, $18, $15
1959: .loc 2 315
1960: # 315 post_higherunit(pp);
1961: addu $20, $20, -4
1962: .loc 2 316
1963: # 316 }
1964: .loc 2 316
1965: addu $21, $21, 1
1966: addu $22, $22, -4
1967: addu $23, $23, -4
1968: lw $25, 68($sp)
1969: blt $21, $25, $55
1970: $60:
1971: .loc 2 317
1972: # 317 al = ah + carryl;
1973: .loc 2 318
1974: # 318 carryh += (al < ah);
1975: .loc 2 319
1976: # 319 *pp = al;
1977: addu $14, $12, $19
1978: sw $14, 0($20)
1979: .loc 2 320
1980: # 320 }
1981: $61:
1982: ld $16, 0($sp)
1983: ld $18, 8($sp)
1984: ld $20, 16($sp)
1985: ld $22, 24($sp)
1986: ld $30, 32($sp)
1987: addu $sp, 168
1988: j $31
1989: .end p_dmul
1990: #endif /* MIPSEB */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.