|
|
1.1 root 1: //
2: // d_parta.s
3: // x86 assembly-language 8-bpp particle-drawing code.
4: //
5:
6: #include "asm_i386.h"
7: #include "quakeasm.h"
8: #include "d_ifacea.h"
9: #include "asm_draw.h"
10:
1.1.1.3 ! root 11: #if id386
1.1 root 12:
13: //----------------------------------------------------------------------
14: // 8-bpp particle drawing code.
15: //----------------------------------------------------------------------
16:
17: //FIXME: comments, full optimization
18:
19: //----------------------------------------------------------------------
20: // 8-bpp particle queueing code.
21: //----------------------------------------------------------------------
22:
23: .text
24:
25: #define P 12+4
26:
27: .align 4
28: .globl C(D_DrawParticle)
29: C(D_DrawParticle):
30: pushl %ebp // preserve caller's stack frame
31: pushl %edi // preserve register variables
32: pushl %ebx
33:
34: movl P(%esp),%edi
35:
36: // FIXME: better FP overlap in general here
37:
38: // transform point
39: // VectorSubtract (p->org, r_origin, local);
40: flds C(r_origin)
41: fsubrs pt_org(%edi)
42: flds pt_org+4(%edi)
43: fsubs C(r_origin)+4
44: flds pt_org+8(%edi)
45: fsubs C(r_origin)+8
46: fxch %st(2) // local[0] | local[1] | local[2]
47:
48: // transformed[2] = DotProduct(local, r_ppn);
49: flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
1.1.1.3 ! root 50: fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
1.1 root 51: flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
1.1.1.3 ! root 52: fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
1.1 root 53: flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
54: // local[1] | local[2]
1.1.1.3 ! root 55: fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
1.1 root 56: fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
57: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
58: // local[2]
59: faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
60: fld %st(0) // z | z | local[0] | local[1] |
61: // local[2]
62: fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
63: fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
64:
65: // if (transformed[2] < PARTICLE_Z_CLIP)
66: // return;
67: fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
68: fxch %st(3) // local[2] | local[0] | local[1] | 1/z
69:
70: flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
1.1.1.3 ! root 71: fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
1.1 root 72: flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
73: // local[1] | 1/z
74:
75: fnstsw %ax
76: testb $1,%ah
77: jnz LPop6AndDone
78:
79: // transformed[1] = DotProduct(local, r_pup);
1.1.1.3 ! root 80: fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
1.1 root 81: flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
82: // local[0] | local[1] | 1/z
1.1.1.3 ! root 83: fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
1.1 root 84: // local[1] | 1/z
85: fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
86: // local[1] | 1/z
87: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
88: // local[1] | 1/z
89: faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
90: fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
91:
92: // transformed[0] = DotProduct(local, r_pright);
93: fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
94: fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
95: fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
96: fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
97: fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
98: fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
99: faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
100:
101: faddp %st(0),%st(1) // x | y | 1/z
102: fxch %st(1) // y | x | 1/z
103:
104: // project the point
1.1.1.3 ! root 105: fmul %st(2),%st(0) // y/z | x | 1/z
1.1 root 106: fxch %st(1) // x | y/z | 1/z
1.1.1.3 ! root 107: fmul %st(2),%st(0) // x/z | y/z | 1/z
1.1 root 108: fxch %st(1) // y/z | x/z | 1/z
109: fsubrs C(ycenter) // v | x/z | 1/z
110: fxch %st(1) // x/z | v | 1/z
111: fadds C(xcenter) // u | v | 1/z
112: // FIXME: preadjust xcenter and ycenter
113: fxch %st(1) // v | u | 1/z
114: fadds float_point5 // v | u | 1/z
115: fxch %st(1) // u | v | 1/z
116: fadds float_point5 // u | v | 1/z
117: fxch %st(2) // 1/z | v | u
1.1.1.3 ! root 118: fmuls DP_32768 // 1/z * 0x8000 | v | u
1.1 root 119: fxch %st(2) // u | v | 1/z * 0x8000
120:
121: // FIXME: use Terje's fp->int trick here?
122: // FIXME: check we're getting proper rounding here
123: fistpl DP_u // v | 1/z * 0x8000
124: fistpl DP_v // 1/z * 0x8000
125:
126: movl DP_u,%eax
127: movl DP_v,%edx
128:
129: // if ((v > d_vrectbottom_particle) ||
130: // (u > d_vrectright_particle) ||
131: // (v < d_vrecty) ||
132: // (u < d_vrectx))
133: // {
134: // continue;
135: // }
136:
137: movl C(d_vrectbottom_particle),%ebx
138: movl C(d_vrectright_particle),%ecx
139: cmpl %ebx,%edx
140: jg LPop1AndDone
141: cmpl %ecx,%eax
142: jg LPop1AndDone
143: movl C(d_vrecty),%ebx
144: movl C(d_vrectx),%ecx
145: cmpl %ebx,%edx
146: jl LPop1AndDone
147:
148: cmpl %ecx,%eax
149: jl LPop1AndDone
150:
151: flds pt_color(%edi) // color | 1/z * 0x8000
152: // FIXME: use Terje's fast fp->int trick?
153: fistpl DP_Color // 1/z * 0x8000
154:
155: movl C(d_viewbuffer),%ebx
156:
157: addl %eax,%ebx
158: movl C(d_scantable)(,%edx,4),%edi // point to the pixel
159:
160: imull C(d_zrowbytes),%edx // point to the z pixel
161:
162: leal (%edx,%eax,2),%edx
163: movl C(d_pzbuffer),%eax
164:
165: fistpl izi
166:
167: addl %ebx,%edi
168: addl %eax,%edx
169:
170: // pix = izi >> d_pix_shift;
171:
172: movl izi,%eax
173: movl C(d_pix_shift),%ecx
174: shrl %cl,%eax
175: movl izi,%ebp
176:
177: // if (pix < d_pix_min)
178: // pix = d_pix_min;
179: // else if (pix > d_pix_max)
180: // pix = d_pix_max;
181:
182: movl C(d_pix_min),%ebx
183: movl C(d_pix_max),%ecx
184: cmpl %ebx,%eax
185: jnl LTestPixMax
186: movl %ebx,%eax
187: jmp LTestDone
188:
189: LTestPixMax:
190: cmpl %ecx,%eax
191: jng LTestDone
192: movl %ecx,%eax
193: LTestDone:
194:
195: movb DP_Color,%ch
196:
1.1.1.3 ! root 197: movl C(d_y_aspect_shift),%ebx
1.1 root 198: testl %ebx,%ebx
199: jnz LDefault
200:
201: cmpl $4,%eax
202: ja LDefault
203:
204: jmp DP_EntryTable-4(,%eax,4)
205:
206: // 1x1
207: .globl DP_1x1
208: DP_1x1:
209: cmpw %bp,(%edx) // just one pixel to do
210: jg LDone
211: movw %bp,(%edx)
212: movb %ch,(%edi)
213: jmp LDone
214:
215: // 2x2
216: .globl DP_2x2
217: DP_2x2:
1.1.1.3 ! root 218: pushl %esi
1.1 root 219: movl C(screenwidth),%ebx
1.1.1.3 ! root 220: movl C(d_zrowbytes),%esi
1.1 root 221:
222: cmpw %bp,(%edx)
223: jg L2x2_1
224: movw %bp,(%edx)
225: movb %ch,(%edi)
226: L2x2_1:
227: cmpw %bp,2(%edx)
228: jg L2x2_2
229: movw %bp,2(%edx)
230: movb %ch,1(%edi)
231: L2x2_2:
1.1.1.3 ! root 232: cmpw %bp,(%edx,%esi,1)
1.1 root 233: jg L2x2_3
1.1.1.3 ! root 234: movw %bp,(%edx,%esi,1)
1.1 root 235: movb %ch,(%edi,%ebx,1)
236: L2x2_3:
1.1.1.3 ! root 237: cmpw %bp,2(%edx,%esi,1)
! 238: jg L2x2_4
! 239: movw %bp,2(%edx,%esi,1)
1.1 root 240: movb %ch,1(%edi,%ebx,1)
1.1.1.3 ! root 241: L2x2_4:
! 242:
! 243: popl %esi
1.1 root 244: jmp LDone
245:
246: // 3x3
247: .globl DP_3x3
248: DP_3x3:
1.1.1.3 ! root 249: pushl %esi
1.1 root 250: movl C(screenwidth),%ebx
1.1.1.3 ! root 251: movl C(d_zrowbytes),%esi
1.1 root 252:
253: cmpw %bp,(%edx)
254: jg L3x3_1
255: movw %bp,(%edx)
256: movb %ch,(%edi)
257: L3x3_1:
258: cmpw %bp,2(%edx)
259: jg L3x3_2
260: movw %bp,2(%edx)
261: movb %ch,1(%edi)
262: L3x3_2:
263: cmpw %bp,4(%edx)
264: jg L3x3_3
265: movw %bp,4(%edx)
266: movb %ch,2(%edi)
267: L3x3_3:
268:
1.1.1.3 ! root 269: cmpw %bp,(%edx,%esi,1)
1.1 root 270: jg L3x3_4
1.1.1.3 ! root 271: movw %bp,(%edx,%esi,1)
1.1 root 272: movb %ch,(%edi,%ebx,1)
273: L3x3_4:
1.1.1.3 ! root 274: cmpw %bp,2(%edx,%esi,1)
1.1 root 275: jg L3x3_5
1.1.1.3 ! root 276: movw %bp,2(%edx,%esi,1)
1.1 root 277: movb %ch,1(%edi,%ebx,1)
278: L3x3_5:
1.1.1.3 ! root 279: cmpw %bp,4(%edx,%esi,1)
1.1 root 280: jg L3x3_6
1.1.1.3 ! root 281: movw %bp,4(%edx,%esi,1)
1.1 root 282: movb %ch,2(%edi,%ebx,1)
283: L3x3_6:
284:
1.1.1.3 ! root 285: cmpw %bp,(%edx,%esi,2)
1.1 root 286: jg L3x3_7
1.1.1.3 ! root 287: movw %bp,(%edx,%esi,2)
1.1 root 288: movb %ch,(%edi,%ebx,2)
289: L3x3_7:
1.1.1.3 ! root 290: cmpw %bp,2(%edx,%esi,2)
1.1 root 291: jg L3x3_8
1.1.1.3 ! root 292: movw %bp,2(%edx,%esi,2)
1.1 root 293: movb %ch,1(%edi,%ebx,2)
294: L3x3_8:
1.1.1.3 ! root 295: cmpw %bp,4(%edx,%esi,2)
! 296: jg L3x3_9
! 297: movw %bp,4(%edx,%esi,2)
1.1 root 298: movb %ch,2(%edi,%ebx,2)
299: L3x3_9:
1.1.1.3 ! root 300:
! 301: popl %esi
1.1 root 302: jmp LDone
303:
304:
305: // 4x4
306: .globl DP_4x4
307: DP_4x4:
1.1.1.3 ! root 308: pushl %esi
1.1 root 309: movl C(screenwidth),%ebx
1.1.1.3 ! root 310: movl C(d_zrowbytes),%esi
1.1 root 311:
312: cmpw %bp,(%edx)
313: jg L4x4_1
314: movw %bp,(%edx)
315: movb %ch,(%edi)
316: L4x4_1:
317: cmpw %bp,2(%edx)
318: jg L4x4_2
319: movw %bp,2(%edx)
320: movb %ch,1(%edi)
321: L4x4_2:
322: cmpw %bp,4(%edx)
323: jg L4x4_3
324: movw %bp,4(%edx)
325: movb %ch,2(%edi)
326: L4x4_3:
327: cmpw %bp,6(%edx)
328: jg L4x4_4
329: movw %bp,6(%edx)
330: movb %ch,3(%edi)
331: L4x4_4:
332:
1.1.1.3 ! root 333: cmpw %bp,(%edx,%esi,1)
1.1 root 334: jg L4x4_5
1.1.1.3 ! root 335: movw %bp,(%edx,%esi,1)
1.1 root 336: movb %ch,(%edi,%ebx,1)
337: L4x4_5:
1.1.1.3 ! root 338: cmpw %bp,2(%edx,%esi,1)
1.1 root 339: jg L4x4_6
1.1.1.3 ! root 340: movw %bp,2(%edx,%esi,1)
1.1 root 341: movb %ch,1(%edi,%ebx,1)
342: L4x4_6:
1.1.1.3 ! root 343: cmpw %bp,4(%edx,%esi,1)
1.1 root 344: jg L4x4_7
1.1.1.3 ! root 345: movw %bp,4(%edx,%esi,1)
1.1 root 346: movb %ch,2(%edi,%ebx,1)
347: L4x4_7:
1.1.1.3 ! root 348: cmpw %bp,6(%edx,%esi,1)
1.1 root 349: jg L4x4_8
1.1.1.3 ! root 350: movw %bp,6(%edx,%esi,1)
1.1 root 351: movb %ch,3(%edi,%ebx,1)
352: L4x4_8:
353:
1.1.1.3 ! root 354: leal (%edx,%esi,2),%edx
1.1 root 355: leal (%edi,%ebx,2),%edi
356:
357: cmpw %bp,(%edx)
358: jg L4x4_9
359: movw %bp,(%edx)
360: movb %ch,(%edi)
361: L4x4_9:
362: cmpw %bp,2(%edx)
363: jg L4x4_10
364: movw %bp,2(%edx)
365: movb %ch,1(%edi)
366: L4x4_10:
367: cmpw %bp,4(%edx)
368: jg L4x4_11
369: movw %bp,4(%edx)
370: movb %ch,2(%edi)
371: L4x4_11:
372: cmpw %bp,6(%edx)
373: jg L4x4_12
374: movw %bp,6(%edx)
375: movb %ch,3(%edi)
376: L4x4_12:
377:
1.1.1.3 ! root 378: cmpw %bp,(%edx,%esi,1)
1.1 root 379: jg L4x4_13
1.1.1.3 ! root 380: movw %bp,(%edx,%esi,1)
1.1 root 381: movb %ch,(%edi,%ebx,1)
382: L4x4_13:
1.1.1.3 ! root 383: cmpw %bp,2(%edx,%esi,1)
1.1 root 384: jg L4x4_14
1.1.1.3 ! root 385: movw %bp,2(%edx,%esi,1)
1.1 root 386: movb %ch,1(%edi,%ebx,1)
387: L4x4_14:
1.1.1.3 ! root 388: cmpw %bp,4(%edx,%esi,1)
1.1 root 389: jg L4x4_15
1.1.1.3 ! root 390: movw %bp,4(%edx,%esi,1)
1.1 root 391: movb %ch,2(%edi,%ebx,1)
392: L4x4_15:
1.1.1.3 ! root 393: cmpw %bp,6(%edx,%esi,1)
! 394: jg L4x4_16
! 395: movw %bp,6(%edx,%esi,1)
1.1 root 396: movb %ch,3(%edi,%ebx,1)
397: L4x4_16:
1.1.1.3 ! root 398:
! 399: popl %esi
1.1 root 400: jmp LDone
401:
402: // default case, handling any size particle
403: LDefault:
404:
405: // count = pix << d_y_aspect_shift;
406:
407: movl %eax,%ebx
408: movl %eax,DP_Pix
409: movb C(d_y_aspect_shift),%cl
410: shll %cl,%ebx
411:
412: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
413: // {
414: // for (i=0 ; i<pix ; i++)
415: // {
416: // if (pz[i] <= izi)
417: // {
418: // pz[i] = izi;
419: // pdest[i] = color;
420: // }
421: // }
422: // }
423:
424: LGenRowLoop:
425: movl DP_Pix,%eax
426:
427: LGenColLoop:
428: cmpw %bp,-2(%edx,%eax,2)
429: jg LGSkip
430: movw %bp,-2(%edx,%eax,2)
431: movb %ch,-1(%edi,%eax,1)
432: LGSkip:
433: decl %eax // --pix
434: jnz LGenColLoop
435:
436: addl C(d_zrowbytes),%edx
437: addl C(screenwidth),%edi
438:
439: decl %ebx // --count
440: jnz LGenRowLoop
441:
442: LDone:
443: popl %ebx // restore register variables
444: popl %edi
445: popl %ebp // restore the caller's stack frame
446: ret
447:
448: LPop6AndDone:
449: fstp %st(0)
450: fstp %st(0)
451: fstp %st(0)
452: fstp %st(0)
453: fstp %st(0)
454: LPop1AndDone:
455: fstp %st(0)
456: jmp LDone
457:
458: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.