|
|
1.1 root 1: //
2: // d_spr8.s
3: // x86 assembly-language horizontal 8-bpp transparent span-drawing code.
4: //
5:
6: #include "asm_i386.h"
7: #include "quakeasm.h"
8: #include "asm_draw.h"
9:
10: #if id386
11:
12: //----------------------------------------------------------------------
13: // 8-bpp horizontal span drawing code for polygons, with transparency.
14: //----------------------------------------------------------------------
15:
1.1.1.2 ! root 16: .data
! 17:
! 18: .align 4
! 19: s: .long 0
! 20: t: .long 0
! 21: snext: .long 0
! 22: tnext: .long 0
! 23: sfracf: .long 0
! 24: tfracf: .long 0
! 25: pbase: .long 0
! 26: pdestspan: .long 0
! 27: zi8stepu: .long 0
! 28: sdivz8stepu: .long 0
! 29: tdivz8stepu: .long 0
! 30: fp_64k: .long 0x47800000 // (float)0x10000
! 31: fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
! 32: fp_8: .long 0x41000000 // (float)8
! 33: spancountminus1: .long 0
! 34: izistep: .long 0
! 35: izi: .long 0
! 36: pz: .long 0
! 37:
! 38: // dummy, 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
! 39: reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
! 40: .long 0x19999999, 0x15555555, 0x12492492
! 41:
! 42: entryvec_table: .long 0, LEntry2_8, LEntry3_8, LEntry4_8
! 43: .long LEntry5_8, LEntry6_8, LEntry7_8, LEntry8_8
! 44:
! 45: //
! 46: // advancetable is 8 bytes, but points to the middle of that range so negative
! 47: // offsets will work
! 48: //
! 49: advancetable: .long 0, 0
! 50: sstep: .long 0
! 51: tstep: .long 0
! 52:
1.1 root 53: .text
54:
55: // out-of-line, rarely-needed clamping code
56:
57: LClampHigh0:
58: movl C(bbextents),%esi
59: jmp LClampReentry0
60: LClampHighOrLow0:
61: jg LClampHigh0
62: xorl %esi,%esi
63: jmp LClampReentry0
64:
65: LClampHigh1:
66: movl C(bbextentt),%edx
67: jmp LClampReentry1
68: LClampHighOrLow1:
69: jg LClampHigh1
70: xorl %edx,%edx
71: jmp LClampReentry1
72:
73: LClampLow2:
74: movl $2048,%ebp
75: jmp LClampReentry2
76: LClampHigh2:
77: movl C(bbextents),%ebp
78: jmp LClampReentry2
79:
80: LClampLow3:
81: movl $2048,%ecx
82: jmp LClampReentry3
83: LClampHigh3:
84: movl C(bbextentt),%ecx
85: jmp LClampReentry3
86:
87: LClampLow4:
88: movl $2048,%eax
89: jmp LClampReentry4
90: LClampHigh4:
91: movl C(bbextents),%eax
92: jmp LClampReentry4
93:
94: LClampLow5:
95: movl $2048,%ebx
96: jmp LClampReentry5
97: LClampHigh5:
98: movl C(bbextentt),%ebx
99: jmp LClampReentry5
100:
101:
102: #define pspans 4+16
103:
104: .align 4
105: .globl C(D_SpriteDrawSpans)
106: C(D_SpriteDrawSpans):
107: pushl %ebp // preserve caller's stack frame
108: pushl %edi
109: pushl %esi // preserve register variables
110: pushl %ebx
111:
112: //
113: // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
114: // and span list pointers, and 1/z step in 0.32 fixed-point
115: //
116: // FIXME: any overlap from rearranging?
117: flds C(d_sdivzstepu)
118: fmuls fp_8
119: movl C(cacheblock),%edx
120: flds C(d_tdivzstepu)
121: fmuls fp_8
122: movl pspans(%esp),%ebx // point to the first span descriptor
123: flds C(d_zistepu)
124: fmuls fp_8
125: movl %edx,pbase // pbase = cacheblock
126: flds C(d_zistepu)
127: fmuls fp_64kx64k
128: fxch %st(3)
129: fstps sdivz8stepu
130: fstps zi8stepu
131: fstps tdivz8stepu
132: fistpl izistep
133: movl izistep,%eax
134: rorl $16,%eax // put upper 16 bits in low word
135: movl sspan_t_count(%ebx),%ecx
136: movl %eax,izistep
137:
138: cmpl $0,%ecx
139: jle LNextSpan
140:
141: LSpanLoop:
142:
143: //
144: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
145: // initial s and t values
146: //
147: // FIXME: pipeline FILD?
148: fildl sspan_t_v(%ebx)
149: fildl sspan_t_u(%ebx)
150:
151: fld %st(1) // dv | du | dv
152: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
153: fld %st(1) // du | dv*d_sdivzstepv | du | dv
154: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
155: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
156: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
157: // dv*d_sdivzstepv | du | dv
158: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
159: // dv*d_sdivzstepv | du | dv
160: faddp %st(0),%st(2) // du*d_tdivzstepu |
161: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
162: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
163: // du*d_tdivzstepu | du | dv
164: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
165: // du*d_tdivzstepu | du | dv
166: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
167: // du*d_sdivzstepu + dv*d_sdivzstepv |
168: // du*d_tdivzstepu | du | dv
169: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
170: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
171: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
172: // du*d_sdivzstepu; stays in %st(2) at end
173: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
174: // s/z
175: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
176: // du*d_tdivzstepu | du | s/z
177: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
178: // du*d_tdivzstepu | du | s/z
179: faddp %st(0),%st(2) // dv*d_zistepv |
180: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
181: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
182: // dv*d_zistepv | s/z
183: fmuls C(d_zistepu) // du*d_zistepu |
184: // dv*d_tdivzstepv + du*d_tdivzstepu |
185: // dv*d_zistepv | s/z
186: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
187: // du*d_zistepu | dv*d_zistepv | s/z
188: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
189: // du*d_tdivzstepu; stays in %st(1) at end
190: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
191: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
192:
193: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
194: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
195: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
196: // du*d_zistepu; stays in %st(0) at end
197: // 1/z | fp_64k | t/z | s/z
198:
199: fld %st(0) // FIXME: get rid of stall on FMUL?
200: fmuls fp_64kx64k
201: fxch %st(1)
202:
203: //
204: // calculate and clamp s & t
205: //
206: fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
207: fxch %st(1)
208:
209: fistpl izi // 0.32 fixed-point 1/z
210: movl izi,%ebp
211:
212: //
213: // set pz to point to the first z-buffer pixel in the span
214: //
215: rorl $16,%ebp // put upper 16 bits in low word
216: movl sspan_t_v(%ebx),%eax
217: movl %ebp,izi
218: movl sspan_t_u(%ebx),%ebp
219: imull C(d_zrowbytes)
220: shll $1,%ebp // a word per pixel
221: addl C(d_pzbuffer),%eax
222: addl %ebp,%eax
223: movl %eax,pz
224:
225: //
226: // point %edi to the first pixel in the span
227: //
228: movl C(d_viewbuffer),%ebp
229: movl sspan_t_v(%ebx),%eax
230: pushl %ebx // preserve spans pointer
231: movl C(tadjust),%edx
232: movl C(sadjust),%esi
233: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
234: addl %ebp,%edi
235: movl sspan_t_u(%ebx),%ebp
236: addl %ebp,%edi // pdest = &pdestspan[scans->u];
237:
238: //
239: // now start the FDIV for the end of the span
240: //
241: cmpl $8,%ecx
242: ja LSetupNotLast1
243:
244: decl %ecx
245: jz LCleanup1 // if only one pixel, no need to start an FDIV
246: movl %ecx,spancountminus1
247:
248: // finish up the s and t calcs
249: fxch %st(1) // z*64k | 1/z | t/z | s/z
250:
251: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root 252: fmul %st(4),%st // s | z*64k | 1/z | t/z | s/z
1.1 root 253: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root 254: fmul %st(3),%st // t | s | 1/z | t/z | s/z
1.1 root 255: fxch %st(1) // s | t | 1/z | t/z | s/z
256: fistpl s // 1/z | t | t/z | s/z
257: fistpl t // 1/z | t/z | s/z
258:
259: fildl spancountminus1
260:
261: flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
262: flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
263: fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
264: fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
265: fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
266: fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
267: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
268: // _d_tdivzstepu*scm1
269: fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
270: // _d_tdivzstepu*scm1
271: faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
272: fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
273: faddp %st(0),%st(3) // _d_sdivzstepu*scm1
274: faddp %st(0),%st(3)
275:
276: flds fp_64k
1.1.1.2 ! root 277: fdiv %st(1),%st // this is what we've gone to all this trouble to
1.1 root 278: // overlap
279: jmp LFDIVInFlight1
280:
281: LCleanup1:
282: // finish up the s and t calcs
283: fxch %st(1) // z*64k | 1/z | t/z | s/z
284:
285: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root 286: fmul %st(4),%st // s | z*64k | 1/z | t/z | s/z
1.1 root 287: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root 288: fmul %st(3),%st // t | s | 1/z | t/z | s/z
1.1 root 289: fxch %st(1) // s | t | 1/z | t/z | s/z
290: fistpl s // 1/z | t | t/z | s/z
291: fistpl t // 1/z | t/z | s/z
292: jmp LFDIVInFlight1
293:
294: .align 4
295: LSetupNotLast1:
296: // finish up the s and t calcs
297: fxch %st(1) // z*64k | 1/z | t/z | s/z
298:
299: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root 300: fmul %st(4),%st // s | z*64k | 1/z | t/z | s/z
1.1 root 301: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root 302: fmul %st(3),%st // t | s | 1/z | t/z | s/z
1.1 root 303: fxch %st(1) // s | t | 1/z | t/z | s/z
304: fistpl s // 1/z | t | t/z | s/z
305: fistpl t // 1/z | t/z | s/z
306:
307: fadds zi8stepu
308: fxch %st(2)
309: fadds sdivz8stepu
310: fxch %st(2)
311: flds tdivz8stepu
312: faddp %st(0),%st(2)
313: flds fp_64k
1.1.1.2 ! root 314: fdiv %st(1),%st // z = 1/1/z
! 315: // this is what we've gone to all this trouble to
! 316: // overlap
1.1 root 317: LFDIVInFlight1:
318:
319: addl s,%esi
320: addl t,%edx
321: movl C(bbextents),%ebx
322: movl C(bbextentt),%ebp
323: cmpl %ebx,%esi
324: ja LClampHighOrLow0
325: LClampReentry0:
326: movl %esi,s
327: movl pbase,%ebx
328: shll $16,%esi
329: cmpl %ebp,%edx
330: movl %esi,sfracf
331: ja LClampHighOrLow1
332: LClampReentry1:
333: movl %edx,t
334: movl s,%esi // sfrac = scans->sfrac;
335: shll $16,%edx
336: movl t,%eax // tfrac = scans->tfrac;
337: sarl $16,%esi
338: movl %edx,tfracf
339:
340: //
341: // calculate the texture starting address
342: //
343: sarl $16,%eax
344: addl %ebx,%esi
345: imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
346: addl %eax,%esi // psource = pbase + (sfrac >> 16) +
347: // ((tfrac >> 16) * cachewidth);
348:
349: //
350: // determine whether last span or not
351: //
352: cmpl $8,%ecx
353: jna LLastSegment
354:
355: //
356: // not the last segment; do full 8-wide segment
357: //
358: LNotLastSegment:
359:
360: //
361: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
362: // get there
363: //
364:
365: // pick up after the FDIV that was left in flight previously
366:
1.1.1.2 ! root 367: fld %st(0) // duplicate it
! 368: fmul %st(4),%st // s = s/z * z
1.1 root 369: fxch %st(1)
1.1.1.2 ! root 370: fmul %st(3),%st // t = t/z * z
1.1 root 371: fxch %st(1)
372: fistpl snext
373: fistpl tnext
374: movl snext,%eax
375: movl tnext,%edx
376:
377: subl $8,%ecx // count off this segments' pixels
378: movl C(sadjust),%ebp
1.1.1.2 ! root 379: push %ecx // remember count of remaining pixels
1.1 root 380: movl C(tadjust),%ecx
381:
382: addl %eax,%ebp
383: addl %edx,%ecx
384:
385: movl C(bbextents),%eax
386: movl C(bbextentt),%edx
387:
388: cmpl $2048,%ebp
389: jl LClampLow2
390: cmpl %eax,%ebp
391: ja LClampHigh2
392: LClampReentry2:
393:
394: cmpl $2048,%ecx
395: jl LClampLow3
396: cmpl %edx,%ecx
397: ja LClampHigh3
398: LClampReentry3:
399:
400: movl %ebp,snext
401: movl %ecx,tnext
402:
403: subl s,%ebp
404: subl t,%ecx
405:
406: //
407: // set up advancetable
408: //
409: movl %ecx,%eax
410: movl %ebp,%edx
411: sarl $19,%edx // sstep >>= 16;
412: movl C(cachewidth),%ebx
413: sarl $19,%eax // tstep >>= 16;
414: jz LIsZero
415: imull %ebx,%eax // (tstep >> 16) * cachewidth;
416: LIsZero:
417: addl %edx,%eax // add in sstep
418: // (tstep >> 16) * cachewidth + (sstep >> 16);
419: movl tfracf,%edx
420: movl %eax,advancetable+4 // advance base in t
421: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
422: // (sstep >> 16);
423: shll $13,%ebp // left-justify sstep fractional part
424: movl %ebp,sstep
425: movl sfracf,%ebx
426: shll $13,%ecx // left-justify tstep fractional part
427: movl %eax,advancetable // advance extra in t
428: movl %ecx,tstep
429:
430: movl pz,%ecx
431: movl izi,%ebp
432:
433: cmpw (%ecx),%bp
1.1.1.2 ! root 434: jl 1f
1.1 root 435: movb (%esi),%al // get first source texel
1.1.1.2 ! root 436: cmpb $TRANSPARENT_COLOR,%al
! 437: jz 1f
1.1 root 438: movw %bp,(%ecx)
439: movb %al,(%edi) // store first dest pixel
1.1.1.2 ! root 440: 1:
1.1 root 441: addl izistep,%ebp
442: adcl $0,%ebp
443: addl tstep,%edx // advance tfrac fractional part by tstep frac
444:
445: sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
446: addl sstep,%ebx // advance sfrac fractional part by sstep frac
447: adcl advancetable+4(,%eax,4),%esi // point to next source texel
448:
449: cmpw 2(%ecx),%bp
1.1.1.2 ! root 450: jl 1f
1.1 root 451: movb (%esi),%al
1.1.1.2 ! root 452: cmpb $TRANSPARENT_COLOR,%al
! 453: jz 1f
1.1 root 454: movw %bp,2(%ecx)
455: movb %al,1(%edi)
1.1.1.2 ! root 456: 1:
1.1 root 457: addl izistep,%ebp
458: adcl $0,%ebp
459: addl tstep,%edx
460: sbbl %eax,%eax
461: addl sstep,%ebx
462: adcl advancetable+4(,%eax,4),%esi
463:
464: cmpw 4(%ecx),%bp
1.1.1.2 ! root 465: jl 1f
1.1 root 466: movb (%esi),%al
1.1.1.2 ! root 467: cmpb $TRANSPARENT_COLOR,%al
! 468: jz 1f
1.1 root 469: movw %bp,4(%ecx)
470: movb %al,2(%edi)
1.1.1.2 ! root 471: 1:
1.1 root 472: addl izistep,%ebp
473: adcl $0,%ebp
474: addl tstep,%edx
475: sbbl %eax,%eax
476: addl sstep,%ebx
477: adcl advancetable+4(,%eax,4),%esi
478:
479: cmpw 6(%ecx),%bp
1.1.1.2 ! root 480: jl 1f
1.1 root 481: movb (%esi),%al
1.1.1.2 ! root 482: cmpb $TRANSPARENT_COLOR,%al
! 483: jz 1f
1.1 root 484: movw %bp,6(%ecx)
485: movb %al,3(%edi)
1.1.1.2 ! root 486: 1:
1.1 root 487: addl izistep,%ebp
488: adcl $0,%ebp
489: addl tstep,%edx
490: sbbl %eax,%eax
491: addl sstep,%ebx
492: adcl advancetable+4(,%eax,4),%esi
493:
494: cmpw 8(%ecx),%bp
1.1.1.2 ! root 495: jl 1f
1.1 root 496: movb (%esi),%al
1.1.1.2 ! root 497: cmpb $TRANSPARENT_COLOR,%al
! 498: jz 1f
1.1 root 499: movw %bp,8(%ecx)
500: movb %al,4(%edi)
1.1.1.2 ! root 501: 1:
1.1 root 502: addl izistep,%ebp
503: adcl $0,%ebp
504: addl tstep,%edx
505: sbbl %eax,%eax
506: addl sstep,%ebx
507: adcl advancetable+4(,%eax,4),%esi
508:
509: //
510: // start FDIV for end of next segment in flight, so it can overlap
511: //
512: popl %eax
513: cmpl $8,%eax // more than one segment after this?
514: ja LSetupNotLast2 // yes
515:
516: decl %eax
517: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
518: movl %eax,spancountminus1
519: fildl spancountminus1
520:
521: flds C(d_zistepu) // _d_zistepu | spancountminus1
522: fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
523: flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
524: fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
525: fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
526: faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
527: fxch %st(1) // scm1 | _d_tdivzstepu*scm1
528: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
529: fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
530: faddp %st(0),%st(3) // _d_sdivzstepu*scm1
531: flds fp_64k // 64k | _d_sdivzstepu*scm1
532: fxch %st(1) // _d_sdivzstepu*scm1 | 64k
533: faddp %st(0),%st(4) // 64k
534:
1.1.1.2 ! root 535: fdiv %st(1),%st // this is what we've gone to all this trouble to
1.1 root 536: // overlap
537: jmp LFDIVInFlight2
538:
539: .align 4
540: LSetupNotLast2:
541: fadds zi8stepu
542: fxch %st(2)
543: fadds sdivz8stepu
544: fxch %st(2)
545: flds tdivz8stepu
546: faddp %st(0),%st(2)
547: flds fp_64k
1.1.1.2 ! root 548: fdiv %st(1),%st // z = 1/1/z
! 549: // this is what we've gone to all this trouble to
! 550: // overlap
1.1 root 551: LFDIVInFlight2:
552: pushl %eax
553:
554: cmpw 10(%ecx),%bp
1.1.1.2 ! root 555: jl 1f
1.1 root 556: movb (%esi),%al
1.1.1.2 ! root 557: cmpb $TRANSPARENT_COLOR,%al
! 558: jz 1f
1.1 root 559: movw %bp,10(%ecx)
560: movb %al,5(%edi)
1.1.1.2 ! root 561: 1:
1.1 root 562: addl izistep,%ebp
563: adcl $0,%ebp
564: addl tstep,%edx
565: sbbl %eax,%eax
566: addl sstep,%ebx
567: adcl advancetable+4(,%eax,4),%esi
568:
569: cmpw 12(%ecx),%bp
1.1.1.2 ! root 570: jl 1f
1.1 root 571: movb (%esi),%al
1.1.1.2 ! root 572: cmpb $TRANSPARENT_COLOR,%al
! 573: jz 1f
1.1 root 574: movw %bp,12(%ecx)
575: movb %al,6(%edi)
1.1.1.2 ! root 576: 1:
1.1 root 577: addl izistep,%ebp
578: adcl $0,%ebp
579: addl tstep,%edx
580: sbbl %eax,%eax
581: addl sstep,%ebx
582: adcl advancetable+4(,%eax,4),%esi
583:
584: cmpw 14(%ecx),%bp
1.1.1.2 ! root 585: jl 1f
1.1 root 586: movb (%esi),%al
1.1.1.2 ! root 587: cmpb $TRANSPARENT_COLOR,%al
! 588: jz 1f
1.1 root 589: movw %bp,14(%ecx)
590: movb %al,7(%edi)
1.1.1.2 ! root 591: 1:
1.1 root 592: addl izistep,%ebp
593: adcl $0,%ebp
594: addl tstep,%edx
595: sbbl %eax,%eax
596: addl sstep,%ebx
597: adcl advancetable+4(,%eax,4),%esi
598:
599: addl $8,%edi
600: addl $16,%ecx
601: movl %edx,tfracf
602: movl snext,%edx
603: movl %ebx,sfracf
604: movl tnext,%ebx
605: movl %edx,s
606: movl %ebx,t
607:
608: movl %ecx,pz
609: movl %ebp,izi
610:
611: popl %ecx // retrieve count
612:
613: //
614: // determine whether last span or not
615: //
616: cmpl $8,%ecx // are there multiple segments remaining?
617: ja LNotLastSegment // yes
618:
619: //
620: // last segment of scan
621: //
622: LLastSegment:
623:
624: //
625: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
626: // get there. The number of pixels left is variable, and we want to land on the
627: // last pixel, not step one past it, so we can't run into arithmetic problems
628: //
629: testl %ecx,%ecx
630: jz LNoSteps // just draw the last pixel and we're done
631:
632: // pick up after the FDIV that was left in flight previously
633:
634:
1.1.1.2 ! root 635: fld %st(0) // duplicate it
! 636: fmul %st(4),%st // s = s/z * z
1.1 root 637: fxch %st(1)
1.1.1.2 ! root 638: fmul %st(3),%st // t = t/z * z
1.1 root 639: fxch %st(1)
640: fistpl snext
641: fistpl tnext
642:
643: movl C(tadjust),%ebx
644: movl C(sadjust),%eax
645:
646: addl snext,%eax
647: addl tnext,%ebx
648:
649: movl C(bbextents),%ebp
650: movl C(bbextentt),%edx
651:
652: cmpl $2048,%eax
653: jl LClampLow4
654: cmpl %ebp,%eax
655: ja LClampHigh4
656: LClampReentry4:
657: movl %eax,snext
658:
659: cmpl $2048,%ebx
660: jl LClampLow5
661: cmpl %edx,%ebx
662: ja LClampHigh5
663: LClampReentry5:
664:
665: cmpl $1,%ecx // don't bother
666: je LOnlyOneStep // if two pixels in segment, there's only one step,
667: // of the segment length
668: subl s,%eax
669: subl t,%ebx
670:
671: addl %eax,%eax // convert to 15.17 format so multiply by 1.31
672: addl %ebx,%ebx // reciprocal yields 16.48
673: imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
674: movl %edx,%ebp
675:
676: movl %ebx,%eax
677: imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
678:
679: LSetEntryvec:
680: //
681: // set up advancetable
682: //
1.1.1.2 ! root 683: movl entryvec_table(,%ecx,4),%ebx
1.1 root 684: movl %edx,%eax
685: pushl %ebx // entry point into code for RET later
686: movl %ebp,%ecx
687: sarl $16,%ecx // sstep >>= 16;
688: movl C(cachewidth),%ebx
689: sarl $16,%edx // tstep >>= 16;
690: jz LIsZeroLast
691: imull %ebx,%edx // (tstep >> 16) * cachewidth;
692: LIsZeroLast:
693: addl %ecx,%edx // add in sstep
694: // (tstep >> 16) * cachewidth + (sstep >> 16);
695: movl tfracf,%ecx
696: movl %edx,advancetable+4 // advance base in t
697: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
698: // (sstep >> 16);
699: shll $16,%ebp // left-justify sstep fractional part
700: movl sfracf,%ebx
701: shll $16,%eax // left-justify tstep fractional part
702: movl %edx,advancetable // advance extra in t
703:
704: movl %eax,tstep
705: movl %ebp,sstep
706: movl %ecx,%edx
707:
708: movl pz,%ecx
709: movl izi,%ebp
710:
711: ret // jump to the number-of-pixels handler
712:
713: //----------------------------------------
714:
715: LNoSteps:
716: movl pz,%ecx
717: subl $7,%edi // adjust for hardwired offset
718: subl $14,%ecx
719: jmp LEndSpan
720:
721:
722: LOnlyOneStep:
723: subl s,%eax
724: subl t,%ebx
725: movl %eax,%ebp
726: movl %ebx,%edx
727: jmp LSetEntryvec
728:
729: //----------------------------------------
730:
1.1.1.2 ! root 731: LEntry2_8:
1.1 root 732: subl $6,%edi // adjust for hardwired offsets
733: subl $12,%ecx
734: movb (%esi),%al
735: jmp LLEntry2_8
736:
737: //----------------------------------------
738:
1.1.1.2 ! root 739: LEntry3_8:
1.1 root 740: subl $5,%edi // adjust for hardwired offsets
741: subl $10,%ecx
742: jmp LLEntry3_8
743:
744: //----------------------------------------
745:
1.1.1.2 ! root 746: LEntry4_8:
1.1 root 747: subl $4,%edi // adjust for hardwired offsets
748: subl $8,%ecx
749: jmp LLEntry4_8
750:
751: //----------------------------------------
752:
1.1.1.2 ! root 753: LEntry5_8:
1.1 root 754: subl $3,%edi // adjust for hardwired offsets
755: subl $6,%ecx
756: jmp LLEntry5_8
757:
758: //----------------------------------------
759:
1.1.1.2 ! root 760: LEntry6_8:
1.1 root 761: subl $2,%edi // adjust for hardwired offsets
762: subl $4,%ecx
763: jmp LLEntry6_8
764:
765: //----------------------------------------
766:
1.1.1.2 ! root 767: LEntry7_8:
1.1 root 768: decl %edi // adjust for hardwired offsets
769: subl $2,%ecx
770: jmp LLEntry7_8
771:
772: //----------------------------------------
773:
1.1.1.2 ! root 774: LEntry8_8:
1.1 root 775: cmpw (%ecx),%bp
1.1.1.2 ! root 776: jl 1f
1.1 root 777: movb (%esi),%al
1.1.1.2 ! root 778: cmpb $TRANSPARENT_COLOR,%al
! 779: jz 1f
1.1 root 780: movw %bp,(%ecx)
781: movb %al,(%edi)
1.1.1.2 ! root 782: 1:
1.1 root 783: addl izistep,%ebp
784: adcl $0,%ebp
785: addl tstep,%edx
786: sbbl %eax,%eax
787: addl sstep,%ebx
788: adcl advancetable+4(,%eax,4),%esi
789: LLEntry7_8:
790: cmpw 2(%ecx),%bp
1.1.1.2 ! root 791: jl 1f
1.1 root 792: movb (%esi),%al
1.1.1.2 ! root 793: cmpb $TRANSPARENT_COLOR,%al
! 794: jz 1f
1.1 root 795: movw %bp,2(%ecx)
796: movb %al,1(%edi)
1.1.1.2 ! root 797: 1:
1.1 root 798: addl izistep,%ebp
799: adcl $0,%ebp
800: addl tstep,%edx
801: sbbl %eax,%eax
802: addl sstep,%ebx
803: adcl advancetable+4(,%eax,4),%esi
804: LLEntry6_8:
805: cmpw 4(%ecx),%bp
1.1.1.2 ! root 806: jl 1f
1.1 root 807: movb (%esi),%al
1.1.1.2 ! root 808: cmpb $TRANSPARENT_COLOR,%al
! 809: jz 1f
1.1 root 810: movw %bp,4(%ecx)
811: movb %al,2(%edi)
1.1.1.2 ! root 812: 1:
1.1 root 813: addl izistep,%ebp
814: adcl $0,%ebp
815: addl tstep,%edx
816: sbbl %eax,%eax
817: addl sstep,%ebx
818: adcl advancetable+4(,%eax,4),%esi
819: LLEntry5_8:
820: cmpw 6(%ecx),%bp
1.1.1.2 ! root 821: jl 1f
1.1 root 822: movb (%esi),%al
1.1.1.2 ! root 823: cmpb $TRANSPARENT_COLOR,%al
! 824: jz 1f
1.1 root 825: movw %bp,6(%ecx)
826: movb %al,3(%edi)
1.1.1.2 ! root 827: 1:
1.1 root 828: addl izistep,%ebp
829: adcl $0,%ebp
830: addl tstep,%edx
831: sbbl %eax,%eax
832: addl sstep,%ebx
833: adcl advancetable+4(,%eax,4),%esi
834: LLEntry4_8:
835: cmpw 8(%ecx),%bp
1.1.1.2 ! root 836: jl 1f
1.1 root 837: movb (%esi),%al
1.1.1.2 ! root 838: cmpb $TRANSPARENT_COLOR,%al
! 839: jz 1f
1.1 root 840: movw %bp,8(%ecx)
841: movb %al,4(%edi)
1.1.1.2 ! root 842: 1:
1.1 root 843: addl izistep,%ebp
844: adcl $0,%ebp
845: addl tstep,%edx
846: sbbl %eax,%eax
847: addl sstep,%ebx
848: adcl advancetable+4(,%eax,4),%esi
849: LLEntry3_8:
850: cmpw 10(%ecx),%bp
1.1.1.2 ! root 851: jl 1f
1.1 root 852: movb (%esi),%al
1.1.1.2 ! root 853: cmpb $TRANSPARENT_COLOR,%al
! 854: jz 1f
1.1 root 855: movw %bp,10(%ecx)
856: movb %al,5(%edi)
1.1.1.2 ! root 857: 1:
1.1 root 858: addl izistep,%ebp
859: adcl $0,%ebp
860: addl tstep,%edx
861: sbbl %eax,%eax
862: addl sstep,%ebx
863: adcl advancetable+4(,%eax,4),%esi
864: LLEntry2_8:
865: cmpw 12(%ecx),%bp
1.1.1.2 ! root 866: jl 1f
1.1 root 867: movb (%esi),%al
1.1.1.2 ! root 868: cmpb $TRANSPARENT_COLOR,%al
! 869: jz 1f
1.1 root 870: movw %bp,12(%ecx)
871: movb %al,6(%edi)
1.1.1.2 ! root 872: 1:
1.1 root 873: addl izistep,%ebp
874: adcl $0,%ebp
875: addl tstep,%edx
876: sbbl %eax,%eax
877: addl sstep,%ebx
878: adcl advancetable+4(,%eax,4),%esi
879:
880: LEndSpan:
881: cmpw 14(%ecx),%bp
1.1.1.2 ! root 882: jl 1f
1.1 root 883: movb (%esi),%al // load first texel in segment
1.1.1.2 ! root 884: cmpb $TRANSPARENT_COLOR,%al
! 885: jz 1f
1.1 root 886: movw %bp,14(%ecx)
887: movb %al,7(%edi)
1.1.1.2 ! root 888: 1:
1.1 root 889:
890: //
891: // clear s/z, t/z, 1/z from FP stack
892: //
893: fstp %st(0)
894: fstp %st(0)
895: fstp %st(0)
896:
897: popl %ebx // restore spans pointer
898: LNextSpan:
1.1.1.2 ! root 899: addl $sspan_t_size,%ebx // point to next span
1.1 root 900: movl sspan_t_count(%ebx),%ecx
901: cmpl $0,%ecx // any more spans?
902: jg LSpanLoop // yes
903: jz LNextSpan // yes, but this one's empty
904:
905: popl %ebx // restore register variables
906: popl %esi
907: popl %edi
908: popl %ebp // restore the caller's stack frame
909: ret
910:
911: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.