|
|
1.1 root 1: /*
2: Copyright (C) 1996-1997 Id Software, Inc.
3:
4: This program is free software; you can redistribute it and/or
5: modify it under the terms of the GNU General Public License
6: as published by the Free Software Foundation; either version 2
7: of the License, or (at your option) any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12:
13: See the GNU General Public License for more details.
14:
15: You should have received a copy of the GNU General Public License
16: along with this program; if not, write to the Free Software
17: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18:
19: */
20: //
21: // d_parta.s
22: // x86 assembly-language 8-bpp particle-drawing code.
23: //
24:
25: #include "asm_i386.h"
26: #include "quakeasm.h"
27: #include "d_ifacea.h"
28: #include "asm_draw.h"
29:
30: #if id386
31:
32: //----------------------------------------------------------------------
33: // 8-bpp particle drawing code.
34: //----------------------------------------------------------------------
35:
36: //FIXME: comments, full optimization
37:
38: //----------------------------------------------------------------------
39: // 8-bpp particle queueing code.
40: //----------------------------------------------------------------------
41:
42: .text
43:
44: #define P 12+4
45:
46: .align 4
47: .globl C(D_DrawParticle)
48: C(D_DrawParticle):
49: pushl %ebp // preserve caller's stack frame
50: pushl %edi // preserve register variables
51: pushl %ebx
52:
53: movl P(%esp),%edi
54:
55: // FIXME: better FP overlap in general here
56:
57: // transform point
58: // VectorSubtract (p->org, r_origin, local);
59: flds C(r_origin)
60: fsubrs pt_org(%edi)
61: flds pt_org+4(%edi)
62: fsubs C(r_origin)+4
63: flds pt_org+8(%edi)
64: fsubs C(r_origin)+8
65: fxch %st(2) // local[0] | local[1] | local[2]
66:
67: // transformed[2] = DotProduct(local, r_ppn);
68: flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
69: fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
70: flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
71: fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
72: flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
73: // local[1] | local[2]
74: fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
75: fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
76: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
77: // local[2]
78: faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
79: fld %st(0) // z | z | local[0] | local[1] |
80: // local[2]
81: fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
82: fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
83:
84: // if (transformed[2] < PARTICLE_Z_CLIP)
85: // return;
86: fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
87: fxch %st(3) // local[2] | local[0] | local[1] | 1/z
88:
89: flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
90: fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
91: flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
92: // local[1] | 1/z
93:
94: fnstsw %ax
95: testb $1,%ah
96: jnz LPop6AndDone
97:
98: // transformed[1] = DotProduct(local, r_pup);
99: fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
100: flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
101: // local[0] | local[1] | 1/z
102: fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
103: // local[1] | 1/z
104: fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
105: // local[1] | 1/z
106: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
107: // local[1] | 1/z
108: faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
109: fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
110:
111: // transformed[0] = DotProduct(local, r_pright);
112: fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
113: fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
114: fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
115: fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
116: fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
117: fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
118: faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
119:
120: faddp %st(0),%st(1) // x | y | 1/z
121: fxch %st(1) // y | x | 1/z
122:
123: // project the point
124: fmul %st(2),%st(0) // y/z | x | 1/z
125: fxch %st(1) // x | y/z | 1/z
126: fmul %st(2),%st(0) // x/z | y/z | 1/z
127: fxch %st(1) // y/z | x/z | 1/z
128: fsubrs C(ycenter) // v | x/z | 1/z
129: fxch %st(1) // x/z | v | 1/z
130: fadds C(xcenter) // u | v | 1/z
131: // FIXME: preadjust xcenter and ycenter
132: fxch %st(1) // v | u | 1/z
133: fadds float_point5 // v | u | 1/z
134: fxch %st(1) // u | v | 1/z
135: fadds float_point5 // u | v | 1/z
136: fxch %st(2) // 1/z | v | u
137: fmuls DP_32768 // 1/z * 0x8000 | v | u
138: fxch %st(2) // u | v | 1/z * 0x8000
139:
140: // FIXME: use Terje's fp->int trick here?
141: // FIXME: check we're getting proper rounding here
142: fistpl DP_u // v | 1/z * 0x8000
143: fistpl DP_v // 1/z * 0x8000
144:
145: movl DP_u,%eax
146: movl DP_v,%edx
147:
148: // if ((v > d_vrectbottom_particle) ||
149: // (u > d_vrectright_particle) ||
150: // (v < d_vrecty) ||
151: // (u < d_vrectx))
152: // {
153: // continue;
154: // }
155:
156: movl C(d_vrectbottom_particle),%ebx
157: movl C(d_vrectright_particle),%ecx
158: cmpl %ebx,%edx
159: jg LPop1AndDone
160: cmpl %ecx,%eax
161: jg LPop1AndDone
162: movl C(d_vrecty),%ebx
163: movl C(d_vrectx),%ecx
164: cmpl %ebx,%edx
165: jl LPop1AndDone
166:
167: cmpl %ecx,%eax
168: jl LPop1AndDone
169:
170: flds pt_color(%edi) // color | 1/z * 0x8000
171: // FIXME: use Terje's fast fp->int trick?
172: fistpl DP_Color // 1/z * 0x8000
173:
174: movl C(d_viewbuffer),%ebx
175:
176: addl %eax,%ebx
177: movl C(d_scantable)(,%edx,4),%edi // point to the pixel
178:
179: imull C(d_zrowbytes),%edx // point to the z pixel
180:
181: leal (%edx,%eax,2),%edx
182: movl C(d_pzbuffer),%eax
183:
184: fistpl izi
185:
186: addl %ebx,%edi
187: addl %eax,%edx
188:
189: // pix = izi >> d_pix_shift;
190:
191: movl izi,%eax
192: movl C(d_pix_shift),%ecx
193: shrl %cl,%eax
194: movl izi,%ebp
195:
196: // if (pix < d_pix_min)
197: // pix = d_pix_min;
198: // else if (pix > d_pix_max)
199: // pix = d_pix_max;
200:
201: movl C(d_pix_min),%ebx
202: movl C(d_pix_max),%ecx
203: cmpl %ebx,%eax
204: jnl LTestPixMax
205: movl %ebx,%eax
206: jmp LTestDone
207:
208: LTestPixMax:
209: cmpl %ecx,%eax
210: jng LTestDone
211: movl %ecx,%eax
212: LTestDone:
213:
214: movb DP_Color,%ch
215:
216: movl C(d_y_aspect_shift),%ebx
217: testl %ebx,%ebx
218: jnz LDefault
219:
220: cmpl $4,%eax
221: ja LDefault
222:
223: jmp DP_EntryTable-4(,%eax,4)
224:
225: // 1x1
226: .globl DP_1x1
227: DP_1x1:
228: cmpw %bp,(%edx) // just one pixel to do
229: jg LDone
230: movw %bp,(%edx)
231: movb %ch,(%edi)
232: jmp LDone
233:
234: // 2x2
235: .globl DP_2x2
236: DP_2x2:
237: pushl %esi
238: movl C(screenwidth),%ebx
239: movl C(d_zrowbytes),%esi
240:
241: cmpw %bp,(%edx)
242: jg L2x2_1
243: movw %bp,(%edx)
244: movb %ch,(%edi)
245: L2x2_1:
246: cmpw %bp,2(%edx)
247: jg L2x2_2
248: movw %bp,2(%edx)
249: movb %ch,1(%edi)
250: L2x2_2:
251: cmpw %bp,(%edx,%esi,1)
252: jg L2x2_3
253: movw %bp,(%edx,%esi,1)
254: movb %ch,(%edi,%ebx,1)
255: L2x2_3:
256: cmpw %bp,2(%edx,%esi,1)
257: jg L2x2_4
258: movw %bp,2(%edx,%esi,1)
259: movb %ch,1(%edi,%ebx,1)
260: L2x2_4:
261:
262: popl %esi
263: jmp LDone
264:
265: // 3x3
266: .globl DP_3x3
267: DP_3x3:
268: pushl %esi
269: movl C(screenwidth),%ebx
270: movl C(d_zrowbytes),%esi
271:
272: cmpw %bp,(%edx)
273: jg L3x3_1
274: movw %bp,(%edx)
275: movb %ch,(%edi)
276: L3x3_1:
277: cmpw %bp,2(%edx)
278: jg L3x3_2
279: movw %bp,2(%edx)
280: movb %ch,1(%edi)
281: L3x3_2:
282: cmpw %bp,4(%edx)
283: jg L3x3_3
284: movw %bp,4(%edx)
285: movb %ch,2(%edi)
286: L3x3_3:
287:
288: cmpw %bp,(%edx,%esi,1)
289: jg L3x3_4
290: movw %bp,(%edx,%esi,1)
291: movb %ch,(%edi,%ebx,1)
292: L3x3_4:
293: cmpw %bp,2(%edx,%esi,1)
294: jg L3x3_5
295: movw %bp,2(%edx,%esi,1)
296: movb %ch,1(%edi,%ebx,1)
297: L3x3_5:
298: cmpw %bp,4(%edx,%esi,1)
299: jg L3x3_6
300: movw %bp,4(%edx,%esi,1)
301: movb %ch,2(%edi,%ebx,1)
302: L3x3_6:
303:
304: cmpw %bp,(%edx,%esi,2)
305: jg L3x3_7
306: movw %bp,(%edx,%esi,2)
307: movb %ch,(%edi,%ebx,2)
308: L3x3_7:
309: cmpw %bp,2(%edx,%esi,2)
310: jg L3x3_8
311: movw %bp,2(%edx,%esi,2)
312: movb %ch,1(%edi,%ebx,2)
313: L3x3_8:
314: cmpw %bp,4(%edx,%esi,2)
315: jg L3x3_9
316: movw %bp,4(%edx,%esi,2)
317: movb %ch,2(%edi,%ebx,2)
318: L3x3_9:
319:
320: popl %esi
321: jmp LDone
322:
323:
324: // 4x4
325: .globl DP_4x4
326: DP_4x4:
327: pushl %esi
328: movl C(screenwidth),%ebx
329: movl C(d_zrowbytes),%esi
330:
331: cmpw %bp,(%edx)
332: jg L4x4_1
333: movw %bp,(%edx)
334: movb %ch,(%edi)
335: L4x4_1:
336: cmpw %bp,2(%edx)
337: jg L4x4_2
338: movw %bp,2(%edx)
339: movb %ch,1(%edi)
340: L4x4_2:
341: cmpw %bp,4(%edx)
342: jg L4x4_3
343: movw %bp,4(%edx)
344: movb %ch,2(%edi)
345: L4x4_3:
346: cmpw %bp,6(%edx)
347: jg L4x4_4
348: movw %bp,6(%edx)
349: movb %ch,3(%edi)
350: L4x4_4:
351:
352: cmpw %bp,(%edx,%esi,1)
353: jg L4x4_5
354: movw %bp,(%edx,%esi,1)
355: movb %ch,(%edi,%ebx,1)
356: L4x4_5:
357: cmpw %bp,2(%edx,%esi,1)
358: jg L4x4_6
359: movw %bp,2(%edx,%esi,1)
360: movb %ch,1(%edi,%ebx,1)
361: L4x4_6:
362: cmpw %bp,4(%edx,%esi,1)
363: jg L4x4_7
364: movw %bp,4(%edx,%esi,1)
365: movb %ch,2(%edi,%ebx,1)
366: L4x4_7:
367: cmpw %bp,6(%edx,%esi,1)
368: jg L4x4_8
369: movw %bp,6(%edx,%esi,1)
370: movb %ch,3(%edi,%ebx,1)
371: L4x4_8:
372:
373: leal (%edx,%esi,2),%edx
374: leal (%edi,%ebx,2),%edi
375:
376: cmpw %bp,(%edx)
377: jg L4x4_9
378: movw %bp,(%edx)
379: movb %ch,(%edi)
380: L4x4_9:
381: cmpw %bp,2(%edx)
382: jg L4x4_10
383: movw %bp,2(%edx)
384: movb %ch,1(%edi)
385: L4x4_10:
386: cmpw %bp,4(%edx)
387: jg L4x4_11
388: movw %bp,4(%edx)
389: movb %ch,2(%edi)
390: L4x4_11:
391: cmpw %bp,6(%edx)
392: jg L4x4_12
393: movw %bp,6(%edx)
394: movb %ch,3(%edi)
395: L4x4_12:
396:
397: cmpw %bp,(%edx,%esi,1)
398: jg L4x4_13
399: movw %bp,(%edx,%esi,1)
400: movb %ch,(%edi,%ebx,1)
401: L4x4_13:
402: cmpw %bp,2(%edx,%esi,1)
403: jg L4x4_14
404: movw %bp,2(%edx,%esi,1)
405: movb %ch,1(%edi,%ebx,1)
406: L4x4_14:
407: cmpw %bp,4(%edx,%esi,1)
408: jg L4x4_15
409: movw %bp,4(%edx,%esi,1)
410: movb %ch,2(%edi,%ebx,1)
411: L4x4_15:
412: cmpw %bp,6(%edx,%esi,1)
413: jg L4x4_16
414: movw %bp,6(%edx,%esi,1)
415: movb %ch,3(%edi,%ebx,1)
416: L4x4_16:
417:
418: popl %esi
419: jmp LDone
420:
421: // default case, handling any size particle
422: LDefault:
423:
424: // count = pix << d_y_aspect_shift;
425:
426: movl %eax,%ebx
427: movl %eax,DP_Pix
428: movb C(d_y_aspect_shift),%cl
429: shll %cl,%ebx
430:
431: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
432: // {
433: // for (i=0 ; i<pix ; i++)
434: // {
435: // if (pz[i] <= izi)
436: // {
437: // pz[i] = izi;
438: // pdest[i] = color;
439: // }
440: // }
441: // }
442:
443: LGenRowLoop:
444: movl DP_Pix,%eax
445:
446: LGenColLoop:
447: cmpw %bp,-2(%edx,%eax,2)
448: jg LGSkip
449: movw %bp,-2(%edx,%eax,2)
450: movb %ch,-1(%edi,%eax,1)
451: LGSkip:
452: decl %eax // --pix
453: jnz LGenColLoop
454:
455: addl C(d_zrowbytes),%edx
456: addl C(screenwidth),%edi
457:
458: decl %ebx // --count
459: jnz LGenRowLoop
460:
461: LDone:
462: popl %ebx // restore register variables
463: popl %edi
464: popl %ebp // restore the caller's stack frame
465: ret
466:
467: LPop6AndDone:
468: fstp %st(0)
469: fstp %st(0)
470: fstp %st(0)
471: fstp %st(0)
472: fstp %st(0)
473: LPop1AndDone:
474: fstp %st(0)
475: jmp LDone
476:
477: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.