|
|
1.1 root 1: //
2: // d_draw.s
3: // x86 assembly-language horizontal 8-bpp span-drawing code.
4: //
5:
6: #include "asm_i386.h"
7: #include "quakeasm.h"
8: #include "asm_draw.h"
9: #include "d_ifacea.h"
10:
1.1.1.3 ! root 11: #if id386
1.1 root 12:
13: //----------------------------------------------------------------------
14: // 8-bpp horizontal span drawing code for polygons, with no transparency.
15: //
16: // Assumes there is at least one span in pspans, and that every span
17: // contains at least one pixel
18: //----------------------------------------------------------------------
19:
20: .text
21:
22: // out-of-line, rarely-needed clamping code
23:
24: LClampHigh0:
25: movl C(bbextents),%esi
26: jmp LClampReentry0
27: LClampHighOrLow0:
28: jg LClampHigh0
29: xorl %esi,%esi
30: jmp LClampReentry0
31:
32: LClampHigh1:
33: movl C(bbextentt),%edx
34: jmp LClampReentry1
35: LClampHighOrLow1:
36: jg LClampHigh1
37: xorl %edx,%edx
38: jmp LClampReentry1
39:
40: LClampLow2:
41: movl $2048,%ebp
42: jmp LClampReentry2
43: LClampHigh2:
44: movl C(bbextents),%ebp
45: jmp LClampReentry2
46:
47: LClampLow3:
48: movl $2048,%ecx
49: jmp LClampReentry3
50: LClampHigh3:
51: movl C(bbextentt),%ecx
52: jmp LClampReentry3
53:
54: LClampLow4:
55: movl $2048,%eax
56: jmp LClampReentry4
57: LClampHigh4:
58: movl C(bbextents),%eax
59: jmp LClampReentry4
60:
61: LClampLow5:
62: movl $2048,%ebx
63: jmp LClampReentry5
64: LClampHigh5:
65: movl C(bbextentt),%ebx
66: jmp LClampReentry5
67:
68:
69: #define pspans 4+16
70:
71: .align 4
72: .globl C(D_DrawSpans8)
73: C(D_DrawSpans8):
74: pushl %ebp // preserve caller's stack frame
75: pushl %edi
76: pushl %esi // preserve register variables
77: pushl %ebx
78:
79: //
80: // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
81: // and span list pointers
82: //
83: // TODO: any overlap from rearranging?
84: flds C(d_sdivzstepu)
85: fmuls fp_8
86: movl C(cacheblock),%edx
87: flds C(d_tdivzstepu)
88: fmuls fp_8
89: movl pspans(%esp),%ebx // point to the first span descriptor
90: flds C(d_zistepu)
91: fmuls fp_8
92: movl %edx,pbase // pbase = cacheblock
93: fstps zi8stepu
94: fstps tdivz8stepu
95: fstps sdivz8stepu
96:
97: LSpanLoop:
98: //
99: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
100: // initial s and t values
101: //
102: // FIXME: pipeline FILD?
103: fildl espan_t_v(%ebx)
104: fildl espan_t_u(%ebx)
105:
106: fld %st(1) // dv | du | dv
107: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
108: fld %st(1) // du | dv*d_sdivzstepv | du | dv
109: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
110: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
111: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
112: // dv*d_sdivzstepv | du | dv
113: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
114: // dv*d_sdivzstepv | du | dv
115: faddp %st(0),%st(2) // du*d_tdivzstepu |
116: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
117: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
118: // du*d_tdivzstepu | du | dv
119: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
120: // du*d_tdivzstepu | du | dv
121: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
122: // du*d_sdivzstepu + dv*d_sdivzstepv |
123: // du*d_tdivzstepu | du | dv
124: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
125: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
126: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
127: // du*d_sdivzstepu; stays in %st(2) at end
128: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
129: // s/z
130: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
131: // du*d_tdivzstepu | du | s/z
132: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
133: // du*d_tdivzstepu | du | s/z
134: faddp %st(0),%st(2) // dv*d_zistepv |
135: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
136: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
137: // dv*d_zistepv | s/z
138: fmuls C(d_zistepu) // du*d_zistepu |
139: // dv*d_tdivzstepv + du*d_tdivzstepu |
140: // dv*d_zistepv | s/z
141: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
142: // du*d_zistepu | dv*d_zistepv | s/z
143: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
144: // du*d_tdivzstepu; stays in %st(1) at end
145: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
146: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
147:
148: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
149: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
150: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
151: // du*d_zistepu; stays in %st(0) at end
152: // 1/z | fp_64k | t/z | s/z
153: //
154: // calculate and clamp s & t
155: //
156: fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
157:
158: //
159: // point %edi to the first pixel in the span
160: //
161: movl C(d_viewbuffer),%ecx
162: movl espan_t_v(%ebx),%eax
163: movl %ebx,pspantemp // preserve spans pointer
164:
165: movl C(tadjust),%edx
166: movl C(sadjust),%esi
167: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
168: addl %ecx,%edi
169: movl espan_t_u(%ebx),%ecx
170: addl %ecx,%edi // pdest = &pdestspan[scans->u];
171: movl espan_t_count(%ebx),%ecx
172:
173: //
174: // now start the FDIV for the end of the span
175: //
176: cmpl $8,%ecx
177: ja LSetupNotLast1
178:
179: decl %ecx
180: jz LCleanup1 // if only one pixel, no need to start an FDIV
181: movl %ecx,spancountminus1
182:
183: // finish up the s and t calcs
184: fxch %st(1) // z*64k | 1/z | t/z | s/z
185:
186: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.3 ! root 187: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
1.1 root 188: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.3 ! root 189: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
1.1 root 190: fxch %st(1) // s | t | 1/z | t/z | s/z
191: fistpl s // 1/z | t | t/z | s/z
192: fistpl t // 1/z | t/z | s/z
193:
194: fildl spancountminus1
195:
196: flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
197: flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
198: fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
199: fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
200: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
201: fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
202: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
203: // C(d_tdivzstepu)*scm1
204: fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
205: // C(d_tdivzstepu)*scm1
206: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
207: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
208: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
209: faddp %st(0),%st(3)
210:
211: flds fp_64k
1.1.1.3 ! root 212: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
1.1 root 213: // overlap
214: jmp LFDIVInFlight1
215:
216: LCleanup1:
217: // finish up the s and t calcs
218: fxch %st(1) // z*64k | 1/z | t/z | s/z
219:
220: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.3 ! root 221: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
1.1 root 222: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.3 ! root 223: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
1.1 root 224: fxch %st(1) // s | t | 1/z | t/z | s/z
225: fistpl s // 1/z | t | t/z | s/z
226: fistpl t // 1/z | t/z | s/z
227: jmp LFDIVInFlight1
228:
229: .align 4
230: LSetupNotLast1:
231: // finish up the s and t calcs
232: fxch %st(1) // z*64k | 1/z | t/z | s/z
233:
234: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.3 ! root 235: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
1.1 root 236: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.3 ! root 237: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
1.1 root 238: fxch %st(1) // s | t | 1/z | t/z | s/z
239: fistpl s // 1/z | t | t/z | s/z
240: fistpl t // 1/z | t/z | s/z
241:
242: fadds zi8stepu
243: fxch %st(2)
244: fadds sdivz8stepu
245: fxch %st(2)
246: flds tdivz8stepu
247: faddp %st(0),%st(2)
248: flds fp_64k
1.1.1.3 ! root 249: fdiv %st(1),%st(0) // z = 1/1/z
! 250: // this is what we've gone to all this trouble to
! 251: // overlap
1.1 root 252: LFDIVInFlight1:
253:
254: addl s,%esi
255: addl t,%edx
256: movl C(bbextents),%ebx
257: movl C(bbextentt),%ebp
258: cmpl %ebx,%esi
259: ja LClampHighOrLow0
260: LClampReentry0:
261: movl %esi,s
262: movl pbase,%ebx
263: shll $16,%esi
264: cmpl %ebp,%edx
265: movl %esi,sfracf
266: ja LClampHighOrLow1
267: LClampReentry1:
268: movl %edx,t
269: movl s,%esi // sfrac = scans->sfrac;
270: shll $16,%edx
271: movl t,%eax // tfrac = scans->tfrac;
272: sarl $16,%esi
273: movl %edx,tfracf
274:
275: //
276: // calculate the texture starting address
277: //
278: sarl $16,%eax
279: movl C(cachewidth),%edx
1.1.1.3 ! root 280: imull %edx,%eax // (tfrac >> 16) * cachewidth
1.1 root 281: addl %ebx,%esi
282: addl %eax,%esi // psource = pbase + (sfrac >> 16) +
283: // ((tfrac >> 16) * cachewidth);
284:
285: //
286: // determine whether last span or not
287: //
288: cmpl $8,%ecx
289: jna LLastSegment
290:
291: //
292: // not the last segment; do full 8-wide segment
293: //
294: LNotLastSegment:
295:
296: //
297: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
298: // get there
299: //
300:
301: // pick up after the FDIV that was left in flight previously
302:
1.1.1.3 ! root 303: fld %st(0) // duplicate it
! 304: fmul %st(4),%st(0) // s = s/z * z
1.1 root 305: fxch %st(1)
1.1.1.3 ! root 306: fmul %st(3),%st(0) // t = t/z * z
1.1 root 307: fxch %st(1)
308: fistpl snext
309: fistpl tnext
310: movl snext,%eax
311: movl tnext,%edx
312:
313: movb (%esi),%bl // get first source texel
314: subl $8,%ecx // count off this segments' pixels
315: movl C(sadjust),%ebp
316: movl %ecx,counttemp // remember count of remaining pixels
317:
318: movl C(tadjust),%ecx
319: movb %bl,(%edi) // store first dest pixel
320:
321: addl %eax,%ebp
322: addl %edx,%ecx
323:
324: movl C(bbextents),%eax
325: movl C(bbextentt),%edx
326:
327: cmpl $2048,%ebp
328: jl LClampLow2
329: cmpl %eax,%ebp
330: ja LClampHigh2
331: LClampReentry2:
332:
333: cmpl $2048,%ecx
334: jl LClampLow3
335: cmpl %edx,%ecx
336: ja LClampHigh3
337: LClampReentry3:
338:
339: movl %ebp,snext
340: movl %ecx,tnext
341:
342: subl s,%ebp
343: subl t,%ecx
344:
345: //
346: // set up advancetable
347: //
348: movl %ecx,%eax
349: movl %ebp,%edx
350: sarl $19,%eax // tstep >>= 16;
351: jz LZero
352: sarl $19,%edx // sstep >>= 16;
353: movl C(cachewidth),%ebx
1.1.1.3 ! root 354: imull %ebx,%eax
1.1 root 355: jmp LSetUp1
356:
357: LZero:
358: sarl $19,%edx // sstep >>= 16;
359: movl C(cachewidth),%ebx
360:
361: LSetUp1:
362:
363: addl %edx,%eax // add in sstep
364: // (tstep >> 16) * cachewidth + (sstep >> 16);
365: movl tfracf,%edx
366: movl %eax,advancetable+4 // advance base in t
367: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
368: // (sstep >> 16);
369: shll $13,%ebp // left-justify sstep fractional part
370: movl sfracf,%ebx
371: shll $13,%ecx // left-justify tstep fractional part
372: movl %eax,advancetable // advance extra in t
373:
374: movl %ecx,tstep
375: addl %ecx,%edx // advance tfrac fractional part by tstep frac
376:
377: sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
378: addl %ebp,%ebx // advance sfrac fractional part by sstep frac
379: adcl advancetable+4(,%ecx,4),%esi // point to next source texel
380:
381: addl tstep,%edx
382: sbbl %ecx,%ecx
383: movb (%esi),%al
384: addl %ebp,%ebx
385: movb %al,1(%edi)
386: adcl advancetable+4(,%ecx,4),%esi
387:
388: addl tstep,%edx
389: sbbl %ecx,%ecx
390: addl %ebp,%ebx
391: movb (%esi),%al
392: adcl advancetable+4(,%ecx,4),%esi
393:
394: addl tstep,%edx
395: sbbl %ecx,%ecx
396: movb %al,2(%edi)
397: addl %ebp,%ebx
398: movb (%esi),%al
399: adcl advancetable+4(,%ecx,4),%esi
400:
401: addl tstep,%edx
402: sbbl %ecx,%ecx
403: movb %al,3(%edi)
404: addl %ebp,%ebx
405: movb (%esi),%al
406: adcl advancetable+4(,%ecx,4),%esi
407:
408:
409: //
410: // start FDIV for end of next segment in flight, so it can overlap
411: //
412: movl counttemp,%ecx
413: cmpl $8,%ecx // more than one segment after this?
414: ja LSetupNotLast2 // yes
415:
416: decl %ecx
417: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
418: movl %ecx,spancountminus1
419: fildl spancountminus1
420:
421: flds C(d_zistepu) // C(d_zistepu) | spancountminus1
422: fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
423: flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
424: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
425: fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
426: faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
427: fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
428: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
429: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
430: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
431: flds fp_64k // 64k | C(d_sdivzstepu)*scm1
432: fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
433: faddp %st(0),%st(4) // 64k
434:
1.1.1.3 ! root 435: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
1.1 root 436: // overlap
437: jmp LFDIVInFlight2
438:
439: .align 4
440: LSetupNotLast2:
441: fadds zi8stepu
442: fxch %st(2)
443: fadds sdivz8stepu
444: fxch %st(2)
445: flds tdivz8stepu
446: faddp %st(0),%st(2)
447: flds fp_64k
1.1.1.3 ! root 448: fdiv %st(1),%st(0) // z = 1/1/z
! 449: // this is what we've gone to all this trouble to
! 450: // overlap
1.1 root 451: LFDIVInFlight2:
452: movl %ecx,counttemp
453:
454: addl tstep,%edx
455: sbbl %ecx,%ecx
456: movb %al,4(%edi)
457: addl %ebp,%ebx
458: movb (%esi),%al
459: adcl advancetable+4(,%ecx,4),%esi
460:
461: addl tstep,%edx
462: sbbl %ecx,%ecx
463: movb %al,5(%edi)
464: addl %ebp,%ebx
465: movb (%esi),%al
466: adcl advancetable+4(,%ecx,4),%esi
467:
468: addl tstep,%edx
469: sbbl %ecx,%ecx
470: movb %al,6(%edi)
471: addl %ebp,%ebx
472: movb (%esi),%al
473: adcl advancetable+4(,%ecx,4),%esi
474:
475: addl $8,%edi
476: movl %edx,tfracf
477: movl snext,%edx
478: movl %ebx,sfracf
479: movl tnext,%ebx
480: movl %edx,s
481: movl %ebx,t
482:
483: movl counttemp,%ecx // retrieve count
484:
485: //
486: // determine whether last span or not
487: //
488: cmpl $8,%ecx // are there multiple segments remaining?
489: movb %al,-1(%edi)
490: ja LNotLastSegment // yes
491:
492: //
493: // last segment of scan
494: //
495: LLastSegment:
496:
497: //
498: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
499: // get there. The number of pixels left is variable, and we want to land on the
500: // last pixel, not step one past it, so we can't run into arithmetic problems
501: //
502: testl %ecx,%ecx
503: jz LNoSteps // just draw the last pixel and we're done
504:
505: // pick up after the FDIV that was left in flight previously
506:
507:
1.1.1.3 ! root 508: fld %st(0) // duplicate it
! 509: fmul %st(4),%st(0) // s = s/z * z
1.1 root 510: fxch %st(1)
1.1.1.3 ! root 511: fmul %st(3),%st(0) // t = t/z * z
1.1 root 512: fxch %st(1)
513: fistpl snext
514: fistpl tnext
515:
516: movb (%esi),%al // load first texel in segment
517: movl C(tadjust),%ebx
518: movb %al,(%edi) // store first pixel in segment
519: movl C(sadjust),%eax
520:
521: addl snext,%eax
522: addl tnext,%ebx
523:
524: movl C(bbextents),%ebp
525: movl C(bbextentt),%edx
526:
527: cmpl $2048,%eax
528: jl LClampLow4
529: cmpl %ebp,%eax
530: ja LClampHigh4
531: LClampReentry4:
532: movl %eax,snext
533:
534: cmpl $2048,%ebx
535: jl LClampLow5
536: cmpl %edx,%ebx
537: ja LClampHigh5
538: LClampReentry5:
539:
540: cmpl $1,%ecx // don't bother
541: je LOnlyOneStep // if two pixels in segment, there's only one step,
542: // of the segment length
543: subl s,%eax
544: subl t,%ebx
545:
546: addl %eax,%eax // convert to 15.17 format so multiply by 1.31
547: addl %ebx,%ebx // reciprocal yields 16.48
548:
549: imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
550: movl %edx,%ebp
551:
552: movl %ebx,%eax
553: imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
554:
555: LSetEntryvec:
556: //
557: // set up advancetable
558: //
559: movl entryvec_table(,%ecx,4),%ebx
560: movl %edx,%eax
561: movl %ebx,jumptemp // entry point into code for RET later
562: movl %ebp,%ecx
563: sarl $16,%edx // tstep >>= 16;
564: movl C(cachewidth),%ebx
565: sarl $16,%ecx // sstep >>= 16;
1.1.1.3 ! root 566: imull %ebx,%edx
1.1 root 567:
568: addl %ecx,%edx // add in sstep
569: // (tstep >> 16) * cachewidth + (sstep >> 16);
570: movl tfracf,%ecx
571: movl %edx,advancetable+4 // advance base in t
572: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
573: // (sstep >> 16);
574: shll $16,%ebp // left-justify sstep fractional part
575: movl sfracf,%ebx
576: shll $16,%eax // left-justify tstep fractional part
577: movl %edx,advancetable // advance extra in t
578:
579: movl %eax,tstep
580: movl %ecx,%edx
581: addl %eax,%edx
582: sbbl %ecx,%ecx
583: addl %ebp,%ebx
584: adcl advancetable+4(,%ecx,4),%esi
585:
586: jmp *jumptemp // jump to the number-of-pixels handler
587:
588: //----------------------------------------
589:
590: LNoSteps:
591: movb (%esi),%al // load first texel in segment
592: subl $7,%edi // adjust for hardwired offset
593: jmp LEndSpan
594:
595:
596: LOnlyOneStep:
597: subl s,%eax
598: subl t,%ebx
599: movl %eax,%ebp
600: movl %ebx,%edx
601: jmp LSetEntryvec
602:
603: //----------------------------------------
604:
1.1.1.3 ! root 605: .globl Entry2_8
! 606: Entry2_8:
1.1 root 607: subl $6,%edi // adjust for hardwired offsets
608: movb (%esi),%al
609: jmp LLEntry2_8
610:
611: //----------------------------------------
612:
1.1.1.3 ! root 613: .globl Entry3_8
! 614: Entry3_8:
1.1 root 615: subl $5,%edi // adjust for hardwired offsets
616: addl %eax,%edx
617: movb (%esi),%al
618: sbbl %ecx,%ecx
619: addl %ebp,%ebx
620: adcl advancetable+4(,%ecx,4),%esi
621: jmp LLEntry3_8
622:
623: //----------------------------------------
624:
1.1.1.3 ! root 625: .globl Entry4_8
! 626: Entry4_8:
1.1 root 627: subl $4,%edi // adjust for hardwired offsets
628: addl %eax,%edx
629: movb (%esi),%al
630: sbbl %ecx,%ecx
631: addl %ebp,%ebx
632: adcl advancetable+4(,%ecx,4),%esi
633: addl tstep,%edx
634: jmp LLEntry4_8
635:
636: //----------------------------------------
637:
1.1.1.3 ! root 638: .globl Entry5_8
! 639: Entry5_8:
1.1 root 640: subl $3,%edi // adjust for hardwired offsets
641: addl %eax,%edx
642: movb (%esi),%al
643: sbbl %ecx,%ecx
644: addl %ebp,%ebx
645: adcl advancetable+4(,%ecx,4),%esi
646: addl tstep,%edx
647: jmp LLEntry5_8
648:
649: //----------------------------------------
650:
1.1.1.3 ! root 651: .globl Entry6_8
! 652: Entry6_8:
1.1 root 653: subl $2,%edi // adjust for hardwired offsets
654: addl %eax,%edx
655: movb (%esi),%al
656: sbbl %ecx,%ecx
657: addl %ebp,%ebx
658: adcl advancetable+4(,%ecx,4),%esi
659: addl tstep,%edx
660: jmp LLEntry6_8
661:
662: //----------------------------------------
663:
1.1.1.3 ! root 664: .globl Entry7_8
! 665: Entry7_8:
1.1 root 666: decl %edi // adjust for hardwired offsets
667: addl %eax,%edx
668: movb (%esi),%al
669: sbbl %ecx,%ecx
670: addl %ebp,%ebx
671: adcl advancetable+4(,%ecx,4),%esi
672: addl tstep,%edx
673: jmp LLEntry7_8
674:
675: //----------------------------------------
676:
1.1.1.3 ! root 677: .globl Entry8_8
! 678: Entry8_8:
1.1 root 679: addl %eax,%edx
680: movb (%esi),%al
681: sbbl %ecx,%ecx
682: addl %ebp,%ebx
683: adcl advancetable+4(,%ecx,4),%esi
684:
685: addl tstep,%edx
686: sbbl %ecx,%ecx
687: movb %al,1(%edi)
688: addl %ebp,%ebx
689: movb (%esi),%al
690: adcl advancetable+4(,%ecx,4),%esi
691: addl tstep,%edx
692: LLEntry7_8:
693: sbbl %ecx,%ecx
694: movb %al,2(%edi)
695: addl %ebp,%ebx
696: movb (%esi),%al
697: adcl advancetable+4(,%ecx,4),%esi
698: addl tstep,%edx
699: LLEntry6_8:
700: sbbl %ecx,%ecx
701: movb %al,3(%edi)
702: addl %ebp,%ebx
703: movb (%esi),%al
704: adcl advancetable+4(,%ecx,4),%esi
705: addl tstep,%edx
706: LLEntry5_8:
707: sbbl %ecx,%ecx
708: movb %al,4(%edi)
709: addl %ebp,%ebx
710: movb (%esi),%al
711: adcl advancetable+4(,%ecx,4),%esi
712: addl tstep,%edx
713: LLEntry4_8:
714: sbbl %ecx,%ecx
715: movb %al,5(%edi)
716: addl %ebp,%ebx
717: movb (%esi),%al
718: adcl advancetable+4(,%ecx,4),%esi
719: LLEntry3_8:
720: movb %al,6(%edi)
721: movb (%esi),%al
722: LLEntry2_8:
723:
724: LEndSpan:
725:
726: //
727: // clear s/z, t/z, 1/z from FP stack
728: //
729: fstp %st(0)
730: fstp %st(0)
731: fstp %st(0)
732:
733: movl pspantemp,%ebx // restore spans pointer
734: movl espan_t_pnext(%ebx),%ebx // point to next span
735: testl %ebx,%ebx // any more spans?
736: movb %al,7(%edi)
737: jnz LSpanLoop // more spans
738:
739: popl %ebx // restore register variables
740: popl %esi
741: popl %edi
742: popl %ebp // restore the caller's stack frame
743: ret
744:
745: //----------------------------------------------------------------------
746: // 8-bpp horizontal span z drawing codefor polygons, with no transparency.
747: //
748: // Assumes there is at least one span in pzspans, and that every span
749: // contains at least one pixel
750: //----------------------------------------------------------------------
751:
752: .text
753:
754: // z-clamp on a non-negative gradient span
755: LClamp:
756: movl $0x40000000,%edx
757: xorl %ebx,%ebx
758: fstp %st(0)
759: jmp LZDraw
760:
761: // z-clamp on a negative gradient span
762: LClampNeg:
763: movl $0x40000000,%edx
764: xorl %ebx,%ebx
765: fstp %st(0)
766: jmp LZDrawNeg
767:
768:
769: #define pzspans 4+16
770:
771: .globl C(D_DrawZSpans)
772: C(D_DrawZSpans):
773: pushl %ebp // preserve caller's stack frame
774: pushl %edi
775: pushl %esi // preserve register variables
776: pushl %ebx
777:
778: flds C(d_zistepu)
779: movl C(d_zistepu),%eax
780: movl pzspans(%esp),%esi
1.1.1.3 ! root 781: testl %eax,%eax
1.1 root 782: jz LFNegSpan
783:
784: fmuls Float2ToThe31nd
785: fistpl izistep // note: we are relying on FP exceptions being turned
786: // off here to avoid range problems
787: movl izistep,%ebx // remains loaded for all spans
788:
789: LFSpanLoop:
790: // set up the initial 1/z value
791: fildl espan_t_v(%esi)
792: fildl espan_t_u(%esi)
793: movl espan_t_v(%esi),%ecx
794: movl C(d_pzbuffer),%edi
795: fmuls C(d_zistepu)
796: fxch %st(1)
797: fmuls C(d_zistepv)
798: fxch %st(1)
799: fadds C(d_ziorigin)
800: imull C(d_zrowbytes),%ecx
801: faddp %st(0),%st(1)
802:
803: // clamp if z is nearer than 2 (1/z > 0.5)
804: fcoms float_point5
805: addl %ecx,%edi
806: movl espan_t_u(%esi),%edx
807: addl %edx,%edx // word count
1.1.1.3 ! root 808: movl espan_t_count(%esi),%ecx
1.1 root 809: addl %edx,%edi // pdest = &pdestspan[scans->u];
810: pushl %esi // preserve spans pointer
1.1.1.3 ! root 811: fnstsw %ax
1.1 root 812: testb $0x45,%ah
813: jz LClamp
814:
815: fmuls Float2ToThe31nd
816: fistpl izi // note: we are relying on FP exceptions being turned
817: // off here to avoid problems when the span is closer
818: // than 1/(2**31)
819: movl izi,%edx
820:
821: // at this point:
822: // %ebx = izistep
823: // %ecx = count
824: // %edx = izi
825: // %edi = pdest
826:
827: LZDraw:
828:
829: // do a single pixel up front, if necessary to dword align the destination
830: testl $2,%edi
831: jz LFMiddle
832: movl %edx,%eax
833: addl %ebx,%edx
834: shrl $16,%eax
835: decl %ecx
836: movw %ax,(%edi)
837: addl $2,%edi
838:
839: // do middle a pair of aligned dwords at a time
840: LFMiddle:
841: pushl %ecx
842: shrl $1,%ecx // count / 2
843: jz LFLast // no aligned dwords to do
844: shrl $1,%ecx // (count / 2) / 2
845: jnc LFMiddleLoop // even number of aligned dwords to do
846:
847: movl %edx,%eax
848: addl %ebx,%edx
849: shrl $16,%eax
850: movl %edx,%esi
851: addl %ebx,%edx
852: andl $0xFFFF0000,%esi
853: orl %esi,%eax
854: movl %eax,(%edi)
855: addl $4,%edi
856: andl %ecx,%ecx
857: jz LFLast
858:
859: LFMiddleLoop:
860: movl %edx,%eax
861: addl %ebx,%edx
862: shrl $16,%eax
863: movl %edx,%esi
864: addl %ebx,%edx
865: andl $0xFFFF0000,%esi
866: orl %esi,%eax
867: movl %edx,%ebp
868: movl %eax,(%edi)
869: addl %ebx,%edx
870: shrl $16,%ebp
871: movl %edx,%esi
872: addl %ebx,%edx
873: andl $0xFFFF0000,%esi
874: orl %esi,%ebp
875: movl %ebp,4(%edi) // FIXME: eliminate register contention
876: addl $8,%edi
877:
878: decl %ecx
879: jnz LFMiddleLoop
880:
881: LFLast:
882: popl %ecx // retrieve count
883: popl %esi // retrieve span pointer
884:
885: // do the last, unaligned pixel, if there is one
886: andl $1,%ecx // is there an odd pixel left to do?
887: jz LFSpanDone // no
888: shrl $16,%edx
889: movw %dx,(%edi) // do the final pixel's z
890:
891: LFSpanDone:
892: movl espan_t_pnext(%esi),%esi
893: testl %esi,%esi
894: jnz LFSpanLoop
895:
896: jmp LFDone
897:
898: LFNegSpan:
899: fmuls FloatMinus2ToThe31nd
900: fistpl izistep // note: we are relying on FP exceptions being turned
901: // off here to avoid range problems
902: movl izistep,%ebx // remains loaded for all spans
903:
904: LFNegSpanLoop:
905: // set up the initial 1/z value
906: fildl espan_t_v(%esi)
907: fildl espan_t_u(%esi)
908: movl espan_t_v(%esi),%ecx
909: movl C(d_pzbuffer),%edi
910: fmuls C(d_zistepu)
911: fxch %st(1)
912: fmuls C(d_zistepv)
913: fxch %st(1)
914: fadds C(d_ziorigin)
915: imull C(d_zrowbytes),%ecx
916: faddp %st(0),%st(1)
917:
918: // clamp if z is nearer than 2 (1/z > 0.5)
919: fcoms float_point5
920: addl %ecx,%edi
921: movl espan_t_u(%esi),%edx
922: addl %edx,%edx // word count
1.1.1.3 ! root 923: movl espan_t_count(%esi),%ecx
1.1 root 924: addl %edx,%edi // pdest = &pdestspan[scans->u];
925: pushl %esi // preserve spans pointer
1.1.1.3 ! root 926: fnstsw %ax
1.1 root 927: testb $0x45,%ah
928: jz LClampNeg
929:
930: fmuls Float2ToThe31nd
931: fistpl izi // note: we are relying on FP exceptions being turned
932: // off here to avoid problems when the span is closer
933: // than 1/(2**31)
934: movl izi,%edx
935:
936: // at this point:
937: // %ebx = izistep
938: // %ecx = count
939: // %edx = izi
940: // %edi = pdest
941:
942: LZDrawNeg:
943:
944: // do a single pixel up front, if necessary to dword align the destination
945: testl $2,%edi
946: jz LFNegMiddle
947: movl %edx,%eax
948: subl %ebx,%edx
949: shrl $16,%eax
950: decl %ecx
951: movw %ax,(%edi)
952: addl $2,%edi
953:
954: // do middle a pair of aligned dwords at a time
955: LFNegMiddle:
956: pushl %ecx
957: shrl $1,%ecx // count / 2
958: jz LFNegLast // no aligned dwords to do
959: shrl $1,%ecx // (count / 2) / 2
960: jnc LFNegMiddleLoop // even number of aligned dwords to do
961:
962: movl %edx,%eax
963: subl %ebx,%edx
964: shrl $16,%eax
965: movl %edx,%esi
966: subl %ebx,%edx
967: andl $0xFFFF0000,%esi
968: orl %esi,%eax
969: movl %eax,(%edi)
970: addl $4,%edi
971: andl %ecx,%ecx
972: jz LFNegLast
973:
974: LFNegMiddleLoop:
975: movl %edx,%eax
976: subl %ebx,%edx
977: shrl $16,%eax
978: movl %edx,%esi
979: subl %ebx,%edx
980: andl $0xFFFF0000,%esi
981: orl %esi,%eax
982: movl %edx,%ebp
983: movl %eax,(%edi)
984: subl %ebx,%edx
985: shrl $16,%ebp
986: movl %edx,%esi
987: subl %ebx,%edx
988: andl $0xFFFF0000,%esi
989: orl %esi,%ebp
990: movl %ebp,4(%edi) // FIXME: eliminate register contention
991: addl $8,%edi
992:
993: decl %ecx
994: jnz LFNegMiddleLoop
995:
996: LFNegLast:
997: popl %ecx // retrieve count
998: popl %esi // retrieve span pointer
999:
1000: // do the last, unaligned pixel, if there is one
1001: andl $1,%ecx // is there an odd pixel left to do?
1002: jz LFNegSpanDone // no
1003: shrl $16,%edx
1004: movw %dx,(%edi) // do the final pixel's z
1005:
1006: LFNegSpanDone:
1007: movl espan_t_pnext(%esi),%esi
1008: testl %esi,%esi
1009: jnz LFNegSpanLoop
1010:
1011: LFDone:
1012: popl %ebx // restore register variables
1013: popl %esi
1014: popl %edi
1015: popl %ebp // restore the caller's stack frame
1016: ret
1017:
1018: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.