|
|
1.1 root 1: //
2: // d_parta.s
3: // x86 assembly-language 8-bpp particle-drawing code.
4: //
5:
6: #include "asm_i386.h"
7: #include "quakeasm.h"
8: #include "d_ifacea.h"
9: #include "asm_draw.h"
10:
1.1.1.2 ! root 11: #if id386
1.1 root 12:
13: //----------------------------------------------------------------------
14: // 8-bpp particle drawing code.
15: //----------------------------------------------------------------------
16:
17: //FIXME: comments, full optimization
18:
19: //----------------------------------------------------------------------
20: // 8-bpp particle queueing code.
21: //----------------------------------------------------------------------
22:
23: .text
24:
25: #define P 12+4
26:
27: .align 4
28: .globl C(D_DrawParticle)
29: C(D_DrawParticle):
30: pushl %ebp // preserve caller's stack frame
31: pushl %edi // preserve register variables
32: pushl %ebx
33:
34: movl P(%esp),%edi
35:
36: // FIXME: better FP overlap in general here
37:
38: // transform point
39: // VectorSubtract (p->org, r_origin, local);
40: flds C(r_origin)
41: fsubrs pt_org(%edi)
42: flds pt_org+4(%edi)
43: fsubs C(r_origin)+4
44: flds pt_org+8(%edi)
45: fsubs C(r_origin)+8
46: fxch %st(2) // local[0] | local[1] | local[2]
47:
48: // transformed[2] = DotProduct(local, r_ppn);
49: flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
1.1.1.2 ! root 50: fmul %st(1) // dot0 | local[0] | local[1] | local[2]
1.1 root 51: flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
1.1.1.2 ! root 52: fmul %st(3) // dot1 | dot0 | local[0] | local[1] | local[2]
1.1 root 53: flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
54: // local[1] | local[2]
1.1.1.2 ! root 55: fmul %st(5) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
1.1 root 56: fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
57: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
58: // local[2]
59: faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
60: fld %st(0) // z | z | local[0] | local[1] |
61: // local[2]
62: fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
63: fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
64:
65: // if (transformed[2] < PARTICLE_Z_CLIP)
66: // return;
67: fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
68: fxch %st(3) // local[2] | local[0] | local[1] | 1/z
69:
70: flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
1.1.1.2 ! root 71: fmul %st(2) // dot0 | local[2] | local[0] | local[1] | 1/z
1.1 root 72: flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
73: // local[1] | 1/z
74:
75: fnstsw %ax
76: testb $1,%ah
77: jnz LPop6AndDone
78:
79: // transformed[1] = DotProduct(local, r_pup);
1.1.1.2 ! root 80: fmul %st(4) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
1.1 root 81: flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
82: // local[0] | local[1] | 1/z
1.1.1.2 ! root 83: fmul %st(3) // dot2 | dot1 | dot0 | local[2] | local[0] |
1.1 root 84: // local[1] | 1/z
85: fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
86: // local[1] | 1/z
87: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
88: // local[1] | 1/z
89: faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
90: fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
91:
92: // transformed[0] = DotProduct(local, r_pright);
93: fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
94: fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
95: fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
96: fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
97: fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
98: fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
99: faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
100:
101: faddp %st(0),%st(1) // x | y | 1/z
102: fxch %st(1) // y | x | 1/z
103:
104: // project the point
1.1.1.2 ! root 105: fmul %st(2) // y/z | x | 1/z
1.1 root 106: fxch %st(1) // x | y/z | 1/z
1.1.1.2 ! root 107: fmul %st(2) // x/z | y/z | 1/z
1.1 root 108: fxch %st(1) // y/z | x/z | 1/z
109: fsubrs C(ycenter) // v | x/z | 1/z
110: fxch %st(1) // x/z | v | 1/z
111: fadds C(xcenter) // u | v | 1/z
112: // FIXME: preadjust xcenter and ycenter
113: fxch %st(1) // v | u | 1/z
114: fadds float_point5 // v | u | 1/z
115: fxch %st(1) // u | v | 1/z
116: fadds float_point5 // u | v | 1/z
117: fxch %st(2) // 1/z | v | u
1.1.1.2 ! root 118: fmuls DP_0x8000 // 1/z * 0x8000 | v | u
1.1 root 119: fxch %st(2) // u | v | 1/z * 0x8000
120:
121: // FIXME: use Terje's fp->int trick here?
122: // FIXME: check we're getting proper rounding here
123: fistpl DP_u // v | 1/z * 0x8000
124: fistpl DP_v // 1/z * 0x8000
125:
126: movl DP_u,%eax
127: movl DP_v,%edx
128:
129: // if ((v > d_vrectbottom_particle) ||
130: // (u > d_vrectright_particle) ||
131: // (v < d_vrecty) ||
132: // (u < d_vrectx))
133: // {
134: // continue;
135: // }
136:
137: movl C(d_vrectbottom_particle),%ebx
138: movl C(d_vrectright_particle),%ecx
139: cmpl %ebx,%edx
140: jg LPop1AndDone
141: cmpl %ecx,%eax
142: jg LPop1AndDone
143: movl C(d_vrecty),%ebx
144: movl C(d_vrectx),%ecx
145: cmpl %ebx,%edx
146: jl LPop1AndDone
147:
148: cmpl %ecx,%eax
149: jl LPop1AndDone
150:
151: flds pt_color(%edi) // color | 1/z * 0x8000
152: // FIXME: use Terje's fast fp->int trick?
153: fistpl DP_Color // 1/z * 0x8000
154:
155: movl C(d_viewbuffer),%ebx
156:
157: addl %eax,%ebx
158: movl C(d_scantable)(,%edx,4),%edi // point to the pixel
159:
160: imull C(d_zrowbytes),%edx // point to the z pixel
161:
162: leal (%edx,%eax,2),%edx
163: movl C(d_pzbuffer),%eax
164:
165: fistpl izi
166:
167: addl %ebx,%edi
168: addl %eax,%edx
169:
170: // pix = izi >> d_pix_shift;
171:
172: movl izi,%eax
173: movl C(d_pix_shift),%ecx
174: shrl %cl,%eax
175: movl izi,%ebp
176:
177: // if (pix < d_pix_min)
178: // pix = d_pix_min;
179: // else if (pix > d_pix_max)
180: // pix = d_pix_max;
181:
182: movl C(d_pix_min),%ebx
183: movl C(d_pix_max),%ecx
184: cmpl %ebx,%eax
185: jnl LTestPixMax
186: movl %ebx,%eax
187: jmp LTestDone
188:
189: LTestPixMax:
190: cmpl %ecx,%eax
191: jng LTestDone
192: movl %ecx,%eax
193: LTestDone:
194:
195: movb DP_Color,%ch
196:
1.1.1.2 ! root 197: movb C(d_y_aspect_shift),%ebx
1.1 root 198: testl %ebx,%ebx
199: jnz LDefault
200:
201: cmpl $4,%eax
202: ja LDefault
203:
204: jmp DP_EntryTable-4(,%eax,4)
205:
206: // 1x1
207: .globl DP_1x1
208: DP_1x1:
209: cmpw %bp,(%edx) // just one pixel to do
210: jg LDone
211: movw %bp,(%edx)
212: movb %ch,(%edi)
213: jmp LDone
214:
215: // 2x2
216: .globl DP_2x2
217: DP_2x2:
218: movl C(screenwidth),%ebx
219:
220: cmpw %bp,(%edx)
221: jg L2x2_1
222: movw %bp,(%edx)
223: movb %ch,(%edi)
224: L2x2_1:
225: cmpw %bp,2(%edx)
226: jg L2x2_2
227: movw %bp,2(%edx)
228: movb %ch,1(%edi)
229: L2x2_2:
1.1.1.2 ! root 230: cmpw %bp,(%edx,%ebx,2)
1.1 root 231: jg L2x2_3
1.1.1.2 ! root 232: movw %bp,(%edx,%ebx,2)
1.1 root 233: movb %ch,(%edi,%ebx,1)
234: L2x2_3:
1.1.1.2 ! root 235: cmpw %bp,2(%edx,%ebx,2)
! 236: jg LDone
! 237: movw %bp,2(%edx,%ebx,2)
1.1 root 238: movb %ch,1(%edi,%ebx,1)
239: jmp LDone
240:
241: // 3x3
242: .globl DP_3x3
243: DP_3x3:
244: movl C(screenwidth),%ebx
245:
246: cmpw %bp,(%edx)
247: jg L3x3_1
248: movw %bp,(%edx)
249: movb %ch,(%edi)
250: L3x3_1:
251: cmpw %bp,2(%edx)
252: jg L3x3_2
253: movw %bp,2(%edx)
254: movb %ch,1(%edi)
255: L3x3_2:
256: cmpw %bp,4(%edx)
257: jg L3x3_3
258: movw %bp,4(%edx)
259: movb %ch,2(%edi)
260: L3x3_3:
261:
1.1.1.2 ! root 262: cmpw %bp,(%edx,%ebx,2)
1.1 root 263: jg L3x3_4
1.1.1.2 ! root 264: movw %bp,(%edx,%ebx,2)
1.1 root 265: movb %ch,(%edi,%ebx,1)
266: L3x3_4:
1.1.1.2 ! root 267: cmpw %bp,2(%edx,%ebx,2)
1.1 root 268: jg L3x3_5
1.1.1.2 ! root 269: movw %bp,2(%edx,%ebx,2)
1.1 root 270: movb %ch,1(%edi,%ebx,1)
271: L3x3_5:
1.1.1.2 ! root 272: cmpw %bp,4(%edx,%ebx,2)
1.1 root 273: jg L3x3_6
1.1.1.2 ! root 274: movw %bp,4(%edx,%ebx,2)
1.1 root 275: movb %ch,2(%edi,%ebx,1)
276: L3x3_6:
277:
1.1.1.2 ! root 278: cmpw %bp,(%edx,%ebx,4)
1.1 root 279: jg L3x3_7
1.1.1.2 ! root 280: movw %bp,(%edx,%ebx,4)
1.1 root 281: movb %ch,(%edi,%ebx,2)
282: L3x3_7:
1.1.1.2 ! root 283: cmpw %bp,2(%edx,%ebx,4)
1.1 root 284: jg L3x3_8
1.1.1.2 ! root 285: movw %bp,2(%edx,%ebx,4)
1.1 root 286: movb %ch,1(%edi,%ebx,2)
287: L3x3_8:
1.1.1.2 ! root 288: cmpw %bp,4(%edx,%ebx,4)
! 289: jg LDone
! 290: movw %bp,4(%edx,%ebx,4)
1.1 root 291: movb %ch,2(%edi,%ebx,2)
292: L3x3_9:
293: jmp LDone
294:
295:
296: // 4x4
297: .globl DP_4x4
298: DP_4x4:
299: movl C(screenwidth),%ebx
300:
301: cmpw %bp,(%edx)
302: jg L4x4_1
303: movw %bp,(%edx)
304: movb %ch,(%edi)
305: L4x4_1:
306: cmpw %bp,2(%edx)
307: jg L4x4_2
308: movw %bp,2(%edx)
309: movb %ch,1(%edi)
310: L4x4_2:
311: cmpw %bp,4(%edx)
312: jg L4x4_3
313: movw %bp,4(%edx)
314: movb %ch,2(%edi)
315: L4x4_3:
316: cmpw %bp,6(%edx)
317: jg L4x4_4
318: movw %bp,6(%edx)
319: movb %ch,3(%edi)
320: L4x4_4:
321:
1.1.1.2 ! root 322: cmpw %bp,(%edx,%ebx,2)
1.1 root 323: jg L4x4_5
1.1.1.2 ! root 324: movw %bp,(%edx,%ebx,2)
1.1 root 325: movb %ch,(%edi,%ebx,1)
326: L4x4_5:
1.1.1.2 ! root 327: cmpw %bp,2(%edx,%ebx,2)
1.1 root 328: jg L4x4_6
1.1.1.2 ! root 329: movw %bp,2(%edx,%ebx,2)
1.1 root 330: movb %ch,1(%edi,%ebx,1)
331: L4x4_6:
1.1.1.2 ! root 332: cmpw %bp,4(%edx,%ebx,2)
1.1 root 333: jg L4x4_7
1.1.1.2 ! root 334: movw %bp,4(%edx,%ebx,2)
1.1 root 335: movb %ch,2(%edi,%ebx,1)
336: L4x4_7:
1.1.1.2 ! root 337: cmpw %bp,6(%edx,%ebx,2)
1.1 root 338: jg L4x4_8
1.1.1.2 ! root 339: movw %bp,6(%edx,%ebx,2)
1.1 root 340: movb %ch,3(%edi,%ebx,1)
341: L4x4_8:
342:
1.1.1.2 ! root 343: leal (%edx,%ebx,4),%edx
1.1 root 344: leal (%edi,%ebx,2),%edi
345:
346: cmpw %bp,(%edx)
347: jg L4x4_9
348: movw %bp,(%edx)
349: movb %ch,(%edi)
350: L4x4_9:
351: cmpw %bp,2(%edx)
352: jg L4x4_10
353: movw %bp,2(%edx)
354: movb %ch,1(%edi)
355: L4x4_10:
356: cmpw %bp,4(%edx)
357: jg L4x4_11
358: movw %bp,4(%edx)
359: movb %ch,2(%edi)
360: L4x4_11:
361: cmpw %bp,6(%edx)
362: jg L4x4_12
363: movw %bp,6(%edx)
364: movb %ch,3(%edi)
365: L4x4_12:
366:
1.1.1.2 ! root 367: cmpw %bp,(%edx,%ebx,2)
1.1 root 368: jg L4x4_13
1.1.1.2 ! root 369: movw %bp,(%edx,%ebx,2)
1.1 root 370: movb %ch,(%edi,%ebx,1)
371: L4x4_13:
1.1.1.2 ! root 372: cmpw %bp,2(%edx,%ebx,2)
1.1 root 373: jg L4x4_14
1.1.1.2 ! root 374: movw %bp,2(%edx,%ebx,2)
1.1 root 375: movb %ch,1(%edi,%ebx,1)
376: L4x4_14:
1.1.1.2 ! root 377: cmpw %bp,4(%edx,%ebx,2)
1.1 root 378: jg L4x4_15
1.1.1.2 ! root 379: movw %bp,4(%edx,%ebx,2)
1.1 root 380: movb %ch,2(%edi,%ebx,1)
381: L4x4_15:
1.1.1.2 ! root 382: cmpw %bp,6(%edx,%ebx,2)
! 383: jg LDone
! 384: movw %bp,6(%edx,%ebx,2)
1.1 root 385: movb %ch,3(%edi,%ebx,1)
386: L4x4_16:
387: jmp LDone
388:
389: // default case, handling any size particle
390: LDefault:
391:
392: // count = pix << d_y_aspect_shift;
393:
394: movl %eax,%ebx
395: movl %eax,DP_Pix
396: movb C(d_y_aspect_shift),%cl
397: shll %cl,%ebx
398:
399: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
400: // {
401: // for (i=0 ; i<pix ; i++)
402: // {
403: // if (pz[i] <= izi)
404: // {
405: // pz[i] = izi;
406: // pdest[i] = color;
407: // }
408: // }
409: // }
410:
411: LGenRowLoop:
412: movl DP_Pix,%eax
413:
414: LGenColLoop:
415: cmpw %bp,-2(%edx,%eax,2)
416: jg LGSkip
417: movw %bp,-2(%edx,%eax,2)
418: movb %ch,-1(%edi,%eax,1)
419: LGSkip:
420: decl %eax // --pix
421: jnz LGenColLoop
422:
423: addl C(d_zrowbytes),%edx
424: addl C(screenwidth),%edi
425:
426: decl %ebx // --count
427: jnz LGenRowLoop
428:
429: LDone:
430: popl %ebx // restore register variables
431: popl %edi
432: popl %ebp // restore the caller's stack frame
433: ret
434:
435: LPop6AndDone:
436: fstp %st(0)
437: fstp %st(0)
438: fstp %st(0)
439: fstp %st(0)
440: fstp %st(0)
441: LPop1AndDone:
442: fstp %st(0)
443: jmp LDone
444:
445: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.