|
|
1.1 root 1: //
2: // d_spr8.s
3: // x86 assembly-language horizontal 8-bpp transparent span-drawing code.
4: //
5:
6: #include "asm_i386.h"
7: #include "quakeasm.h"
8: #include "asm_draw.h"
9:
10: #if id386
11:
12: //----------------------------------------------------------------------
13: // 8-bpp horizontal span drawing code for polygons, with transparency.
14: //----------------------------------------------------------------------
15:
16: .text
17:
18: // out-of-line, rarely-needed clamping code
19:
20: LClampHigh0:
21: movl C(bbextents),%esi
22: jmp LClampReentry0
23: LClampHighOrLow0:
24: jg LClampHigh0
25: xorl %esi,%esi
26: jmp LClampReentry0
27:
28: LClampHigh1:
29: movl C(bbextentt),%edx
30: jmp LClampReentry1
31: LClampHighOrLow1:
32: jg LClampHigh1
33: xorl %edx,%edx
34: jmp LClampReentry1
35:
36: LClampLow2:
37: movl $2048,%ebp
38: jmp LClampReentry2
39: LClampHigh2:
40: movl C(bbextents),%ebp
41: jmp LClampReentry2
42:
43: LClampLow3:
44: movl $2048,%ecx
45: jmp LClampReentry3
46: LClampHigh3:
47: movl C(bbextentt),%ecx
48: jmp LClampReentry3
49:
50: LClampLow4:
51: movl $2048,%eax
52: jmp LClampReentry4
53: LClampHigh4:
54: movl C(bbextents),%eax
55: jmp LClampReentry4
56:
57: LClampLow5:
58: movl $2048,%ebx
59: jmp LClampReentry5
60: LClampHigh5:
61: movl C(bbextentt),%ebx
62: jmp LClampReentry5
63:
64:
65: #define pspans 4+16
66:
67: .align 4
68: .globl C(D_SpriteDrawSpans)
69: C(D_SpriteDrawSpans):
70: pushl %ebp // preserve caller's stack frame
71: pushl %edi
72: pushl %esi // preserve register variables
73: pushl %ebx
74:
75: //
76: // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
77: // and span list pointers, and 1/z step in 0.32 fixed-point
78: //
79: // FIXME: any overlap from rearranging?
80: flds C(d_sdivzstepu)
81: fmuls fp_8
82: movl C(cacheblock),%edx
83: flds C(d_tdivzstepu)
84: fmuls fp_8
85: movl pspans(%esp),%ebx // point to the first span descriptor
86: flds C(d_zistepu)
87: fmuls fp_8
88: movl %edx,pbase // pbase = cacheblock
89: flds C(d_zistepu)
90: fmuls fp_64kx64k
91: fxch %st(3)
92: fstps sdivz8stepu
93: fstps zi8stepu
94: fstps tdivz8stepu
95: fistpl izistep
96: movl izistep,%eax
97: rorl $16,%eax // put upper 16 bits in low word
98: movl sspan_t_count(%ebx),%ecx
99: movl %eax,izistep
100:
101: cmpl $0,%ecx
102: jle LNextSpan
103:
104: LSpanLoop:
105:
106: //
107: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
108: // initial s and t values
109: //
110: // FIXME: pipeline FILD?
111: fildl sspan_t_v(%ebx)
112: fildl sspan_t_u(%ebx)
113:
114: fld %st(1) // dv | du | dv
115: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
116: fld %st(1) // du | dv*d_sdivzstepv | du | dv
117: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
118: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
119: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
120: // dv*d_sdivzstepv | du | dv
121: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
122: // dv*d_sdivzstepv | du | dv
123: faddp %st(0),%st(2) // du*d_tdivzstepu |
124: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
125: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
126: // du*d_tdivzstepu | du | dv
127: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
128: // du*d_tdivzstepu | du | dv
129: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
130: // du*d_sdivzstepu + dv*d_sdivzstepv |
131: // du*d_tdivzstepu | du | dv
132: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
133: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
134: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
135: // du*d_sdivzstepu; stays in %st(2) at end
136: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
137: // s/z
138: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
139: // du*d_tdivzstepu | du | s/z
140: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
141: // du*d_tdivzstepu | du | s/z
142: faddp %st(0),%st(2) // dv*d_zistepv |
143: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
144: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
145: // dv*d_zistepv | s/z
146: fmuls C(d_zistepu) // du*d_zistepu |
147: // dv*d_tdivzstepv + du*d_tdivzstepu |
148: // dv*d_zistepv | s/z
149: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
150: // du*d_zistepu | dv*d_zistepv | s/z
151: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
152: // du*d_tdivzstepu; stays in %st(1) at end
153: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
154: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
155:
156: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
157: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
158: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
159: // du*d_zistepu; stays in %st(0) at end
160: // 1/z | fp_64k | t/z | s/z
161:
162: fld %st(0) // FIXME: get rid of stall on FMUL?
163: fmuls fp_64kx64k
164: fxch %st(1)
165:
166: //
167: // calculate and clamp s & t
168: //
169: fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
170: fxch %st(1)
171:
172: fistpl izi // 0.32 fixed-point 1/z
173: movl izi,%ebp
174:
175: //
176: // set pz to point to the first z-buffer pixel in the span
177: //
178: rorl $16,%ebp // put upper 16 bits in low word
179: movl sspan_t_v(%ebx),%eax
180: movl %ebp,izi
181: movl sspan_t_u(%ebx),%ebp
182: imull C(d_zrowbytes)
183: shll $1,%ebp // a word per pixel
184: addl C(d_pzbuffer),%eax
185: addl %ebp,%eax
186: movl %eax,pz
187:
188: //
189: // point %edi to the first pixel in the span
190: //
191: movl C(d_viewbuffer),%ebp
192: movl sspan_t_v(%ebx),%eax
193: pushl %ebx // preserve spans pointer
194: movl C(tadjust),%edx
195: movl C(sadjust),%esi
196: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
197: addl %ebp,%edi
198: movl sspan_t_u(%ebx),%ebp
199: addl %ebp,%edi // pdest = &pdestspan[scans->u];
200:
201: //
202: // now start the FDIV for the end of the span
203: //
204: cmpl $8,%ecx
205: ja LSetupNotLast1
206:
207: decl %ecx
208: jz LCleanup1 // if only one pixel, no need to start an FDIV
209: movl %ecx,spancountminus1
210:
211: // finish up the s and t calcs
212: fxch %st(1) // z*64k | 1/z | t/z | s/z
213:
214: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.3 ! root 215: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
1.1 root 216: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.3 ! root 217: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
1.1 root 218: fxch %st(1) // s | t | 1/z | t/z | s/z
219: fistpl s // 1/z | t | t/z | s/z
220: fistpl t // 1/z | t/z | s/z
221:
222: fildl spancountminus1
223:
224: flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
225: flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
226: fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
227: fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
228: fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
229: fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
230: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
231: // _d_tdivzstepu*scm1
232: fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
233: // _d_tdivzstepu*scm1
234: faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
235: fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
236: faddp %st(0),%st(3) // _d_sdivzstepu*scm1
237: faddp %st(0),%st(3)
238:
239: flds fp_64k
1.1.1.3 ! root 240: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
1.1 root 241: // overlap
242: jmp LFDIVInFlight1
243:
244: LCleanup1:
245: // finish up the s and t calcs
246: fxch %st(1) // z*64k | 1/z | t/z | s/z
247:
248: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.3 ! root 249: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
1.1 root 250: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.3 ! root 251: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
1.1 root 252: fxch %st(1) // s | t | 1/z | t/z | s/z
253: fistpl s // 1/z | t | t/z | s/z
254: fistpl t // 1/z | t/z | s/z
255: jmp LFDIVInFlight1
256:
257: .align 4
258: LSetupNotLast1:
259: // finish up the s and t calcs
260: fxch %st(1) // z*64k | 1/z | t/z | s/z
261:
262: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.3 ! root 263: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
1.1 root 264: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.3 ! root 265: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
1.1 root 266: fxch %st(1) // s | t | 1/z | t/z | s/z
267: fistpl s // 1/z | t | t/z | s/z
268: fistpl t // 1/z | t/z | s/z
269:
270: fadds zi8stepu
271: fxch %st(2)
272: fadds sdivz8stepu
273: fxch %st(2)
274: flds tdivz8stepu
275: faddp %st(0),%st(2)
276: flds fp_64k
1.1.1.3 ! root 277: fdiv %st(1),%st(0) // z = 1/1/z
! 278: // this is what we've gone to all this trouble to
! 279: // overlap
1.1 root 280: LFDIVInFlight1:
281:
282: addl s,%esi
283: addl t,%edx
284: movl C(bbextents),%ebx
285: movl C(bbextentt),%ebp
286: cmpl %ebx,%esi
287: ja LClampHighOrLow0
288: LClampReentry0:
289: movl %esi,s
290: movl pbase,%ebx
291: shll $16,%esi
292: cmpl %ebp,%edx
293: movl %esi,sfracf
294: ja LClampHighOrLow1
295: LClampReentry1:
296: movl %edx,t
297: movl s,%esi // sfrac = scans->sfrac;
298: shll $16,%edx
299: movl t,%eax // tfrac = scans->tfrac;
300: sarl $16,%esi
301: movl %edx,tfracf
302:
303: //
304: // calculate the texture starting address
305: //
306: sarl $16,%eax
307: addl %ebx,%esi
308: imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
309: addl %eax,%esi // psource = pbase + (sfrac >> 16) +
310: // ((tfrac >> 16) * cachewidth);
311:
312: //
313: // determine whether last span or not
314: //
315: cmpl $8,%ecx
316: jna LLastSegment
317:
318: //
319: // not the last segment; do full 8-wide segment
320: //
321: LNotLastSegment:
322:
323: //
324: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
325: // get there
326: //
327:
328: // pick up after the FDIV that was left in flight previously
329:
1.1.1.3 ! root 330: fld %st(0) // duplicate it
! 331: fmul %st(4),%st(0) // s = s/z * z
1.1 root 332: fxch %st(1)
1.1.1.3 ! root 333: fmul %st(3),%st(0) // t = t/z * z
1.1 root 334: fxch %st(1)
335: fistpl snext
336: fistpl tnext
337: movl snext,%eax
338: movl tnext,%edx
339:
340: subl $8,%ecx // count off this segments' pixels
341: movl C(sadjust),%ebp
1.1.1.3 ! root 342: pushl %ecx // remember count of remaining pixels
1.1 root 343: movl C(tadjust),%ecx
344:
345: addl %eax,%ebp
346: addl %edx,%ecx
347:
348: movl C(bbextents),%eax
349: movl C(bbextentt),%edx
350:
351: cmpl $2048,%ebp
352: jl LClampLow2
353: cmpl %eax,%ebp
354: ja LClampHigh2
355: LClampReentry2:
356:
357: cmpl $2048,%ecx
358: jl LClampLow3
359: cmpl %edx,%ecx
360: ja LClampHigh3
361: LClampReentry3:
362:
363: movl %ebp,snext
364: movl %ecx,tnext
365:
366: subl s,%ebp
367: subl t,%ecx
368:
369: //
370: // set up advancetable
371: //
372: movl %ecx,%eax
373: movl %ebp,%edx
374: sarl $19,%edx // sstep >>= 16;
375: movl C(cachewidth),%ebx
376: sarl $19,%eax // tstep >>= 16;
377: jz LIsZero
378: imull %ebx,%eax // (tstep >> 16) * cachewidth;
379: LIsZero:
380: addl %edx,%eax // add in sstep
381: // (tstep >> 16) * cachewidth + (sstep >> 16);
382: movl tfracf,%edx
383: movl %eax,advancetable+4 // advance base in t
384: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
385: // (sstep >> 16);
386: shll $13,%ebp // left-justify sstep fractional part
387: movl %ebp,sstep
388: movl sfracf,%ebx
389: shll $13,%ecx // left-justify tstep fractional part
390: movl %eax,advancetable // advance extra in t
391: movl %ecx,tstep
392:
393: movl pz,%ecx
394: movl izi,%ebp
395:
396: cmpw (%ecx),%bp
1.1.1.3 ! root 397: jl Lp1
1.1 root 398: movb (%esi),%al // get first source texel
1.1.1.3 ! root 399: cmpb $(TRANSPARENT_COLOR),%al
! 400: jz Lp1
1.1 root 401: movw %bp,(%ecx)
402: movb %al,(%edi) // store first dest pixel
1.1.1.3 ! root 403: Lp1:
1.1 root 404: addl izistep,%ebp
405: adcl $0,%ebp
406: addl tstep,%edx // advance tfrac fractional part by tstep frac
407:
408: sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
409: addl sstep,%ebx // advance sfrac fractional part by sstep frac
410: adcl advancetable+4(,%eax,4),%esi // point to next source texel
411:
412: cmpw 2(%ecx),%bp
1.1.1.3 ! root 413: jl Lp2
1.1 root 414: movb (%esi),%al
1.1.1.3 ! root 415: cmpb $(TRANSPARENT_COLOR),%al
! 416: jz Lp2
1.1 root 417: movw %bp,2(%ecx)
418: movb %al,1(%edi)
1.1.1.3 ! root 419: Lp2:
1.1 root 420: addl izistep,%ebp
421: adcl $0,%ebp
422: addl tstep,%edx
423: sbbl %eax,%eax
424: addl sstep,%ebx
425: adcl advancetable+4(,%eax,4),%esi
426:
427: cmpw 4(%ecx),%bp
1.1.1.3 ! root 428: jl Lp3
1.1 root 429: movb (%esi),%al
1.1.1.3 ! root 430: cmpb $(TRANSPARENT_COLOR),%al
! 431: jz Lp3
1.1 root 432: movw %bp,4(%ecx)
433: movb %al,2(%edi)
1.1.1.3 ! root 434: Lp3:
1.1 root 435: addl izistep,%ebp
436: adcl $0,%ebp
437: addl tstep,%edx
438: sbbl %eax,%eax
439: addl sstep,%ebx
440: adcl advancetable+4(,%eax,4),%esi
441:
442: cmpw 6(%ecx),%bp
1.1.1.3 ! root 443: jl Lp4
1.1 root 444: movb (%esi),%al
1.1.1.3 ! root 445: cmpb $(TRANSPARENT_COLOR),%al
! 446: jz Lp4
1.1 root 447: movw %bp,6(%ecx)
448: movb %al,3(%edi)
1.1.1.3 ! root 449: Lp4:
1.1 root 450: addl izistep,%ebp
451: adcl $0,%ebp
452: addl tstep,%edx
453: sbbl %eax,%eax
454: addl sstep,%ebx
455: adcl advancetable+4(,%eax,4),%esi
456:
457: cmpw 8(%ecx),%bp
1.1.1.3 ! root 458: jl Lp5
1.1 root 459: movb (%esi),%al
1.1.1.3 ! root 460: cmpb $(TRANSPARENT_COLOR),%al
! 461: jz Lp5
1.1 root 462: movw %bp,8(%ecx)
463: movb %al,4(%edi)
1.1.1.3 ! root 464: Lp5:
1.1 root 465: addl izistep,%ebp
466: adcl $0,%ebp
467: addl tstep,%edx
468: sbbl %eax,%eax
469: addl sstep,%ebx
470: adcl advancetable+4(,%eax,4),%esi
471:
472: //
473: // start FDIV for end of next segment in flight, so it can overlap
474: //
475: popl %eax
476: cmpl $8,%eax // more than one segment after this?
477: ja LSetupNotLast2 // yes
478:
479: decl %eax
480: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
481: movl %eax,spancountminus1
482: fildl spancountminus1
483:
484: flds C(d_zistepu) // _d_zistepu | spancountminus1
485: fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
486: flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
487: fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
488: fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
489: faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
490: fxch %st(1) // scm1 | _d_tdivzstepu*scm1
491: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
492: fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
493: faddp %st(0),%st(3) // _d_sdivzstepu*scm1
494: flds fp_64k // 64k | _d_sdivzstepu*scm1
495: fxch %st(1) // _d_sdivzstepu*scm1 | 64k
496: faddp %st(0),%st(4) // 64k
497:
1.1.1.3 ! root 498: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
1.1 root 499: // overlap
500: jmp LFDIVInFlight2
501:
502: .align 4
503: LSetupNotLast2:
504: fadds zi8stepu
505: fxch %st(2)
506: fadds sdivz8stepu
507: fxch %st(2)
508: flds tdivz8stepu
509: faddp %st(0),%st(2)
510: flds fp_64k
1.1.1.3 ! root 511: fdiv %st(1),%st(0) // z = 1/1/z
! 512: // this is what we've gone to all this trouble to
! 513: // overlap
1.1 root 514: LFDIVInFlight2:
515: pushl %eax
516:
517: cmpw 10(%ecx),%bp
1.1.1.3 ! root 518: jl Lp6
1.1 root 519: movb (%esi),%al
1.1.1.3 ! root 520: cmpb $(TRANSPARENT_COLOR),%al
! 521: jz Lp6
1.1 root 522: movw %bp,10(%ecx)
523: movb %al,5(%edi)
1.1.1.3 ! root 524: Lp6:
1.1 root 525: addl izistep,%ebp
526: adcl $0,%ebp
527: addl tstep,%edx
528: sbbl %eax,%eax
529: addl sstep,%ebx
530: adcl advancetable+4(,%eax,4),%esi
531:
532: cmpw 12(%ecx),%bp
1.1.1.3 ! root 533: jl Lp7
1.1 root 534: movb (%esi),%al
1.1.1.3 ! root 535: cmpb $(TRANSPARENT_COLOR),%al
! 536: jz Lp7
1.1 root 537: movw %bp,12(%ecx)
538: movb %al,6(%edi)
1.1.1.3 ! root 539: Lp7:
1.1 root 540: addl izistep,%ebp
541: adcl $0,%ebp
542: addl tstep,%edx
543: sbbl %eax,%eax
544: addl sstep,%ebx
545: adcl advancetable+4(,%eax,4),%esi
546:
547: cmpw 14(%ecx),%bp
1.1.1.3 ! root 548: jl Lp8
1.1 root 549: movb (%esi),%al
1.1.1.3 ! root 550: cmpb $(TRANSPARENT_COLOR),%al
! 551: jz Lp8
1.1 root 552: movw %bp,14(%ecx)
553: movb %al,7(%edi)
1.1.1.3 ! root 554: Lp8:
1.1 root 555: addl izistep,%ebp
556: adcl $0,%ebp
557: addl tstep,%edx
558: sbbl %eax,%eax
559: addl sstep,%ebx
560: adcl advancetable+4(,%eax,4),%esi
561:
562: addl $8,%edi
563: addl $16,%ecx
564: movl %edx,tfracf
565: movl snext,%edx
566: movl %ebx,sfracf
567: movl tnext,%ebx
568: movl %edx,s
569: movl %ebx,t
570:
571: movl %ecx,pz
572: movl %ebp,izi
573:
574: popl %ecx // retrieve count
575:
576: //
577: // determine whether last span or not
578: //
579: cmpl $8,%ecx // are there multiple segments remaining?
580: ja LNotLastSegment // yes
581:
582: //
583: // last segment of scan
584: //
585: LLastSegment:
586:
587: //
588: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
589: // get there. The number of pixels left is variable, and we want to land on the
590: // last pixel, not step one past it, so we can't run into arithmetic problems
591: //
592: testl %ecx,%ecx
593: jz LNoSteps // just draw the last pixel and we're done
594:
595: // pick up after the FDIV that was left in flight previously
596:
597:
1.1.1.3 ! root 598: fld %st(0) // duplicate it
! 599: fmul %st(4),%st(0) // s = s/z * z
1.1 root 600: fxch %st(1)
1.1.1.3 ! root 601: fmul %st(3),%st(0) // t = t/z * z
1.1 root 602: fxch %st(1)
603: fistpl snext
604: fistpl tnext
605:
606: movl C(tadjust),%ebx
607: movl C(sadjust),%eax
608:
609: addl snext,%eax
610: addl tnext,%ebx
611:
612: movl C(bbextents),%ebp
613: movl C(bbextentt),%edx
614:
615: cmpl $2048,%eax
616: jl LClampLow4
617: cmpl %ebp,%eax
618: ja LClampHigh4
619: LClampReentry4:
620: movl %eax,snext
621:
622: cmpl $2048,%ebx
623: jl LClampLow5
624: cmpl %edx,%ebx
625: ja LClampHigh5
626: LClampReentry5:
627:
628: cmpl $1,%ecx // don't bother
629: je LOnlyOneStep // if two pixels in segment, there's only one step,
630: // of the segment length
631: subl s,%eax
632: subl t,%ebx
633:
634: addl %eax,%eax // convert to 15.17 format so multiply by 1.31
635: addl %ebx,%ebx // reciprocal yields 16.48
636: imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
637: movl %edx,%ebp
638:
639: movl %ebx,%eax
640: imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
641:
642: LSetEntryvec:
643: //
644: // set up advancetable
645: //
1.1.1.3 ! root 646: movl spr8entryvec_table(,%ecx,4),%ebx
1.1 root 647: movl %edx,%eax
648: pushl %ebx // entry point into code for RET later
649: movl %ebp,%ecx
650: sarl $16,%ecx // sstep >>= 16;
651: movl C(cachewidth),%ebx
652: sarl $16,%edx // tstep >>= 16;
653: jz LIsZeroLast
654: imull %ebx,%edx // (tstep >> 16) * cachewidth;
655: LIsZeroLast:
656: addl %ecx,%edx // add in sstep
657: // (tstep >> 16) * cachewidth + (sstep >> 16);
658: movl tfracf,%ecx
659: movl %edx,advancetable+4 // advance base in t
660: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
661: // (sstep >> 16);
662: shll $16,%ebp // left-justify sstep fractional part
663: movl sfracf,%ebx
664: shll $16,%eax // left-justify tstep fractional part
665: movl %edx,advancetable // advance extra in t
666:
667: movl %eax,tstep
668: movl %ebp,sstep
669: movl %ecx,%edx
670:
671: movl pz,%ecx
672: movl izi,%ebp
673:
674: ret // jump to the number-of-pixels handler
675:
676: //----------------------------------------
677:
678: LNoSteps:
679: movl pz,%ecx
680: subl $7,%edi // adjust for hardwired offset
681: subl $14,%ecx
682: jmp LEndSpan
683:
684:
685: LOnlyOneStep:
686: subl s,%eax
687: subl t,%ebx
688: movl %eax,%ebp
689: movl %ebx,%edx
690: jmp LSetEntryvec
691:
692: //----------------------------------------
693:
1.1.1.3 ! root 694: .globl Spr8Entry2_8
! 695: Spr8Entry2_8:
1.1 root 696: subl $6,%edi // adjust for hardwired offsets
697: subl $12,%ecx
698: movb (%esi),%al
699: jmp LLEntry2_8
700:
701: //----------------------------------------
702:
1.1.1.3 ! root 703: .globl Spr8Entry3_8
! 704: Spr8Entry3_8:
1.1 root 705: subl $5,%edi // adjust for hardwired offsets
706: subl $10,%ecx
707: jmp LLEntry3_8
708:
709: //----------------------------------------
710:
1.1.1.3 ! root 711: .globl Spr8Entry4_8
! 712: Spr8Entry4_8:
1.1 root 713: subl $4,%edi // adjust for hardwired offsets
714: subl $8,%ecx
715: jmp LLEntry4_8
716:
717: //----------------------------------------
718:
1.1.1.3 ! root 719: .globl Spr8Entry5_8
! 720: Spr8Entry5_8:
1.1 root 721: subl $3,%edi // adjust for hardwired offsets
722: subl $6,%ecx
723: jmp LLEntry5_8
724:
725: //----------------------------------------
726:
1.1.1.3 ! root 727: .globl Spr8Entry6_8
! 728: Spr8Entry6_8:
1.1 root 729: subl $2,%edi // adjust for hardwired offsets
730: subl $4,%ecx
731: jmp LLEntry6_8
732:
733: //----------------------------------------
734:
1.1.1.3 ! root 735: .globl Spr8Entry7_8
! 736: Spr8Entry7_8:
1.1 root 737: decl %edi // adjust for hardwired offsets
738: subl $2,%ecx
739: jmp LLEntry7_8
740:
741: //----------------------------------------
742:
1.1.1.3 ! root 743: .globl Spr8Entry8_8
! 744: Spr8Entry8_8:
1.1 root 745: cmpw (%ecx),%bp
1.1.1.3 ! root 746: jl Lp9
1.1 root 747: movb (%esi),%al
1.1.1.3 ! root 748: cmpb $(TRANSPARENT_COLOR),%al
! 749: jz Lp9
1.1 root 750: movw %bp,(%ecx)
751: movb %al,(%edi)
1.1.1.3 ! root 752: Lp9:
1.1 root 753: addl izistep,%ebp
754: adcl $0,%ebp
755: addl tstep,%edx
756: sbbl %eax,%eax
757: addl sstep,%ebx
758: adcl advancetable+4(,%eax,4),%esi
759: LLEntry7_8:
760: cmpw 2(%ecx),%bp
1.1.1.3 ! root 761: jl Lp10
1.1 root 762: movb (%esi),%al
1.1.1.3 ! root 763: cmpb $(TRANSPARENT_COLOR),%al
! 764: jz Lp10
1.1 root 765: movw %bp,2(%ecx)
766: movb %al,1(%edi)
1.1.1.3 ! root 767: Lp10:
1.1 root 768: addl izistep,%ebp
769: adcl $0,%ebp
770: addl tstep,%edx
771: sbbl %eax,%eax
772: addl sstep,%ebx
773: adcl advancetable+4(,%eax,4),%esi
774: LLEntry6_8:
775: cmpw 4(%ecx),%bp
1.1.1.3 ! root 776: jl Lp11
1.1 root 777: movb (%esi),%al
1.1.1.3 ! root 778: cmpb $(TRANSPARENT_COLOR),%al
! 779: jz Lp11
1.1 root 780: movw %bp,4(%ecx)
781: movb %al,2(%edi)
1.1.1.3 ! root 782: Lp11:
1.1 root 783: addl izistep,%ebp
784: adcl $0,%ebp
785: addl tstep,%edx
786: sbbl %eax,%eax
787: addl sstep,%ebx
788: adcl advancetable+4(,%eax,4),%esi
789: LLEntry5_8:
790: cmpw 6(%ecx),%bp
1.1.1.3 ! root 791: jl Lp12
1.1 root 792: movb (%esi),%al
1.1.1.3 ! root 793: cmpb $(TRANSPARENT_COLOR),%al
! 794: jz Lp12
1.1 root 795: movw %bp,6(%ecx)
796: movb %al,3(%edi)
1.1.1.3 ! root 797: Lp12:
1.1 root 798: addl izistep,%ebp
799: adcl $0,%ebp
800: addl tstep,%edx
801: sbbl %eax,%eax
802: addl sstep,%ebx
803: adcl advancetable+4(,%eax,4),%esi
804: LLEntry4_8:
805: cmpw 8(%ecx),%bp
1.1.1.3 ! root 806: jl Lp13
1.1 root 807: movb (%esi),%al
1.1.1.3 ! root 808: cmpb $(TRANSPARENT_COLOR),%al
! 809: jz Lp13
1.1 root 810: movw %bp,8(%ecx)
811: movb %al,4(%edi)
1.1.1.3 ! root 812: Lp13:
1.1 root 813: addl izistep,%ebp
814: adcl $0,%ebp
815: addl tstep,%edx
816: sbbl %eax,%eax
817: addl sstep,%ebx
818: adcl advancetable+4(,%eax,4),%esi
819: LLEntry3_8:
820: cmpw 10(%ecx),%bp
1.1.1.3 ! root 821: jl Lp14
1.1 root 822: movb (%esi),%al
1.1.1.3 ! root 823: cmpb $(TRANSPARENT_COLOR),%al
! 824: jz Lp14
1.1 root 825: movw %bp,10(%ecx)
826: movb %al,5(%edi)
1.1.1.3 ! root 827: Lp14:
1.1 root 828: addl izistep,%ebp
829: adcl $0,%ebp
830: addl tstep,%edx
831: sbbl %eax,%eax
832: addl sstep,%ebx
833: adcl advancetable+4(,%eax,4),%esi
834: LLEntry2_8:
835: cmpw 12(%ecx),%bp
1.1.1.3 ! root 836: jl Lp15
1.1 root 837: movb (%esi),%al
1.1.1.3 ! root 838: cmpb $(TRANSPARENT_COLOR),%al
! 839: jz Lp15
1.1 root 840: movw %bp,12(%ecx)
841: movb %al,6(%edi)
1.1.1.3 ! root 842: Lp15:
1.1 root 843: addl izistep,%ebp
844: adcl $0,%ebp
845: addl tstep,%edx
846: sbbl %eax,%eax
847: addl sstep,%ebx
848: adcl advancetable+4(,%eax,4),%esi
849:
850: LEndSpan:
851: cmpw 14(%ecx),%bp
1.1.1.3 ! root 852: jl Lp16
1.1 root 853: movb (%esi),%al // load first texel in segment
1.1.1.3 ! root 854: cmpb $(TRANSPARENT_COLOR),%al
! 855: jz Lp16
1.1 root 856: movw %bp,14(%ecx)
857: movb %al,7(%edi)
1.1.1.3 ! root 858: Lp16:
1.1 root 859:
860: //
861: // clear s/z, t/z, 1/z from FP stack
862: //
863: fstp %st(0)
864: fstp %st(0)
865: fstp %st(0)
866:
867: popl %ebx // restore spans pointer
868: LNextSpan:
1.1.1.3 ! root 869: addl $(sspan_t_size),%ebx // point to next span
1.1 root 870: movl sspan_t_count(%ebx),%ecx
871: cmpl $0,%ecx // any more spans?
872: jg LSpanLoop // yes
873: jz LNextSpan // yes, but this one's empty
874:
875: popl %ebx // restore register variables
876: popl %esi
877: popl %edi
878: popl %ebp // restore the caller's stack frame
879: ret
880:
881: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.