|
|
1.1 root 1: //
2: // d_draw16.s
3: // x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
4: // subdivision.
1.1.1.2 ! root 5: //f
1.1 root 6:
7: #include "asm_i386.h"
8: #include "quakeasm.h"
9: #include "asm_draw.h"
10: #include "d_ifacea.h"
11:
1.1.1.2 ! root 12: #if id386
1.1 root 13:
14: //----------------------------------------------------------------------
15: // 8-bpp horizontal span drawing code for polygons, with no transparency and
16: // 16-pixel subdivision.
17: //
18: // Assumes there is at least one span in pspans, and that every span
19: // contains at least one pixel
20: //----------------------------------------------------------------------
21:
22: .data
23:
24: .text
25:
26: // out-of-line, rarely-needed clamping code
27:
28: LClampHigh0:
29: movl C(bbextents),%esi
30: jmp LClampReentry0
31: LClampHighOrLow0:
32: jg LClampHigh0
33: xorl %esi,%esi
34: jmp LClampReentry0
35:
36: LClampHigh1:
37: movl C(bbextentt),%edx
38: jmp LClampReentry1
39: LClampHighOrLow1:
40: jg LClampHigh1
41: xorl %edx,%edx
42: jmp LClampReentry1
43:
44: LClampLow2:
45: movl $4096,%ebp
46: jmp LClampReentry2
47: LClampHigh2:
48: movl C(bbextents),%ebp
49: jmp LClampReentry2
50:
51: LClampLow3:
52: movl $4096,%ecx
53: jmp LClampReentry3
54: LClampHigh3:
55: movl C(bbextentt),%ecx
56: jmp LClampReentry3
57:
58: LClampLow4:
59: movl $4096,%eax
60: jmp LClampReentry4
61: LClampHigh4:
62: movl C(bbextents),%eax
63: jmp LClampReentry4
64:
65: LClampLow5:
66: movl $4096,%ebx
67: jmp LClampReentry5
68: LClampHigh5:
69: movl C(bbextentt),%ebx
70: jmp LClampReentry5
71:
72:
73: #define pspans 4+16
74:
75: .align 4
76: .globl C(D_DrawSpans16)
77: C(D_DrawSpans16):
78: pushl %ebp // preserve caller's stack frame
79: pushl %edi
80: pushl %esi // preserve register variables
81: pushl %ebx
82:
83: //
84: // set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
85: // and span list pointers
86: //
87: // TODO: any overlap from rearranging?
88: flds C(d_sdivzstepu)
89: fmuls fp_16
90: movl C(cacheblock),%edx
91: flds C(d_tdivzstepu)
92: fmuls fp_16
93: movl pspans(%esp),%ebx // point to the first span descriptor
94: flds C(d_zistepu)
95: fmuls fp_16
96: movl %edx,pbase // pbase = cacheblock
97: fstps zi16stepu
98: fstps tdivz16stepu
99: fstps sdivz16stepu
100:
101: LSpanLoop:
102: //
103: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
104: // initial s and t values
105: //
106: // FIXME: pipeline FILD?
107: fildl espan_t_v(%ebx)
108: fildl espan_t_u(%ebx)
109:
110: fld %st(1) // dv | du | dv
111: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
112: fld %st(1) // du | dv*d_sdivzstepv | du | dv
113: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
114: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
115: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
116: // dv*d_sdivzstepv | du | dv
117: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
118: // dv*d_sdivzstepv | du | dv
119: faddp %st(0),%st(2) // du*d_tdivzstepu |
120: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
121: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
122: // du*d_tdivzstepu | du | dv
123: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
124: // du*d_tdivzstepu | du | dv
125: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
126: // du*d_sdivzstepu + dv*d_sdivzstepv |
127: // du*d_tdivzstepu | du | dv
128: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
129: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
130: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
131: // du*d_sdivzstepu; stays in %st(2) at end
132: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
133: // s/z
134: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
135: // du*d_tdivzstepu | du | s/z
136: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
137: // du*d_tdivzstepu | du | s/z
138: faddp %st(0),%st(2) // dv*d_zistepv |
139: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
140: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
141: // dv*d_zistepv | s/z
142: fmuls C(d_zistepu) // du*d_zistepu |
143: // dv*d_tdivzstepv + du*d_tdivzstepu |
144: // dv*d_zistepv | s/z
145: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
146: // du*d_zistepu | dv*d_zistepv | s/z
147: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
148: // du*d_tdivzstepu; stays in %st(1) at end
149: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
150: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
151:
152: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
153: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
154: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
155: // du*d_zistepu; stays in %st(0) at end
156: // 1/z | fp_64k | t/z | s/z
157: //
158: // calculate and clamp s & t
159: //
160: fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
161:
162: //
163: // point %edi to the first pixel in the span
164: //
165: movl C(d_viewbuffer),%ecx
166: movl espan_t_v(%ebx),%eax
167: movl %ebx,pspantemp // preserve spans pointer
168:
169: movl C(tadjust),%edx
170: movl C(sadjust),%esi
171: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
172: addl %ecx,%edi
173: movl espan_t_u(%ebx),%ecx
174: addl %ecx,%edi // pdest = &pdestspan[scans->u];
175: movl espan_t_count(%ebx),%ecx
176:
177: //
178: // now start the FDIV for the end of the span
179: //
180: cmpl $16,%ecx
181: ja LSetupNotLast1
182:
183: decl %ecx
184: jz LCleanup1 // if only one pixel, no need to start an FDIV
185: movl %ecx,spancountminus1
186:
187: // finish up the s and t calcs
188: fxch %st(1) // z*64k | 1/z | t/z | s/z
189:
190: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root 191: fmul %st(4),%st // s | z*64k | 1/z | t/z | s/z
1.1 root 192: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root 193: fmul %st(3),%st // t | s | 1/z | t/z | s/z
1.1 root 194: fxch %st(1) // s | t | 1/z | t/z | s/z
195: fistpl s // 1/z | t | t/z | s/z
196: fistpl t // 1/z | t/z | s/z
197:
198: fildl spancountminus1
199:
200: flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
201: flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
202: fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
203: fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
204: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
205: fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
206: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
207: // C(d_tdivzstepu)*scm1
208: fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
209: // C(d_tdivzstepu)*scm1
210: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
211: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
212: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
213: faddp %st(0),%st(3)
214:
215: flds fp_64k
1.1.1.2 ! root 216: fdiv %st(1),%st // this is what we've gone to all this trouble to
1.1 root 217: // overlap
218: jmp LFDIVInFlight1
219:
220: LCleanup1:
221: // finish up the s and t calcs
222: fxch %st(1) // z*64k | 1/z | t/z | s/z
223:
224: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root 225: fmul %st(4),%st // s | z*64k | 1/z | t/z | s/z
1.1 root 226: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root 227: fmul %st(3),%st // t | s | 1/z | t/z | s/z
1.1 root 228: fxch %st(1) // s | t | 1/z | t/z | s/z
229: fistpl s // 1/z | t | t/z | s/z
230: fistpl t // 1/z | t/z | s/z
231: jmp LFDIVInFlight1
232:
233: .align 4
234: LSetupNotLast1:
235: // finish up the s and t calcs
236: fxch %st(1) // z*64k | 1/z | t/z | s/z
237:
238: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root 239: fmul %st(4),%st // s | z*64k | 1/z | t/z | s/z
1.1 root 240: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root 241: fmul %st(3),%st // t | s | 1/z | t/z | s/z
1.1 root 242: fxch %st(1) // s | t | 1/z | t/z | s/z
243: fistpl s // 1/z | t | t/z | s/z
244: fistpl t // 1/z | t/z | s/z
245:
246: fadds zi16stepu
247: fxch %st(2)
248: fadds sdivz16stepu
249: fxch %st(2)
250: flds tdivz16stepu
251: faddp %st(0),%st(2)
252: flds fp_64k
1.1.1.2 ! root 253: fdiv %st(1),%st // z = 1/1/z
! 254: // this is what we've gone to all this trouble to
! 255: // overlap
1.1 root 256: LFDIVInFlight1:
257:
258: addl s,%esi
259: addl t,%edx
260: movl C(bbextents),%ebx
261: movl C(bbextentt),%ebp
262: cmpl %ebx,%esi
263: ja LClampHighOrLow0
264: LClampReentry0:
265: movl %esi,s
266: movl pbase,%ebx
267: shll $16,%esi
268: cmpl %ebp,%edx
269: movl %esi,sfracf
270: ja LClampHighOrLow1
271: LClampReentry1:
272: movl %edx,t
273: movl s,%esi // sfrac = scans->sfrac;
274: shll $16,%edx
275: movl t,%eax // tfrac = scans->tfrac;
276: sarl $16,%esi
277: movl %edx,tfracf
278:
279: //
280: // calculate the texture starting address
281: //
282: sarl $16,%eax
283: movl C(cachewidth),%edx
1.1.1.2 ! root 284: imul %edx,%eax // (tfrac >> 16) * cachewidth
1.1 root 285: addl %ebx,%esi
286: addl %eax,%esi // psource = pbase + (sfrac >> 16) +
287: // ((tfrac >> 16) * cachewidth);
288: //
289: // determine whether last span or not
290: //
291: cmpl $16,%ecx
292: jna LLastSegment
293:
294: //
295: // not the last segment; do full 16-wide segment
296: //
297: LNotLastSegment:
298:
299: //
300: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
301: // get there
302: //
303:
304: // pick up after the FDIV that was left in flight previously
305:
1.1.1.2 ! root 306: fld %st(0) // duplicate it
! 307: fmul %st(4),%st // s = s/z * z
1.1 root 308: fxch %st(1)
1.1.1.2 ! root 309: fmul %st(3),%st // t = t/z * z
1.1 root 310: fxch %st(1)
311: fistpl snext
312: fistpl tnext
313: movl snext,%eax
314: movl tnext,%edx
315:
316: movb (%esi),%bl // get first source texel
317: subl $16,%ecx // count off this segments' pixels
318: movl C(sadjust),%ebp
319: movl %ecx,counttemp // remember count of remaining pixels
320:
321: movl C(tadjust),%ecx
322: movb %bl,(%edi) // store first dest pixel
323:
324: addl %eax,%ebp
325: addl %edx,%ecx
326:
327: movl C(bbextents),%eax
328: movl C(bbextentt),%edx
329:
330: cmpl $4096,%ebp
331: jl LClampLow2
332: cmpl %eax,%ebp
333: ja LClampHigh2
334: LClampReentry2:
335:
336: cmpl $4096,%ecx
337: jl LClampLow3
338: cmpl %edx,%ecx
339: ja LClampHigh3
340: LClampReentry3:
341:
342: movl %ebp,snext
343: movl %ecx,tnext
344:
345: subl s,%ebp
346: subl t,%ecx
347:
348: //
349: // set up advancetable
350: //
351: movl %ecx,%eax
352: movl %ebp,%edx
353: sarl $20,%eax // tstep >>= 16;
354: jz LZero
355: sarl $20,%edx // sstep >>= 16;
356: movl C(cachewidth),%ebx
1.1.1.2 ! root 357: imul %ebx,%eax
1.1 root 358: jmp LSetUp1
359:
360: LZero:
361: sarl $20,%edx // sstep >>= 16;
362: movl C(cachewidth),%ebx
363:
364: LSetUp1:
365:
366: addl %edx,%eax // add in sstep
367: // (tstep >> 16) * cachewidth + (sstep >> 16);
368: movl tfracf,%edx
369: movl %eax,advancetable+4 // advance base in t
370: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
371: // (sstep >> 16);
372: shll $12,%ebp // left-justify sstep fractional part
373: movl sfracf,%ebx
374: shll $12,%ecx // left-justify tstep fractional part
375: movl %eax,advancetable // advance extra in t
376:
377: movl %ecx,tstep
378: addl %ecx,%edx // advance tfrac fractional part by tstep frac
379:
380: sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
381: addl %ebp,%ebx // advance sfrac fractional part by sstep frac
382: adcl advancetable+4(,%ecx,4),%esi // point to next source texel
383:
384: addl tstep,%edx
385: sbbl %ecx,%ecx
386: movb (%esi),%al
387: addl %ebp,%ebx
388: movb %al,1(%edi)
389: adcl advancetable+4(,%ecx,4),%esi
390:
391: addl tstep,%edx
392: sbbl %ecx,%ecx
393: addl %ebp,%ebx
394: movb (%esi),%al
395: adcl advancetable+4(,%ecx,4),%esi
396:
397: addl tstep,%edx
398: sbbl %ecx,%ecx
399: movb %al,2(%edi)
400: addl %ebp,%ebx
401: movb (%esi),%al
402: adcl advancetable+4(,%ecx,4),%esi
403:
404: addl tstep,%edx
405: sbbl %ecx,%ecx
406: movb %al,3(%edi)
407: addl %ebp,%ebx
408: movb (%esi),%al
409: adcl advancetable+4(,%ecx,4),%esi
410:
411: addl tstep,%edx
412: sbbl %ecx,%ecx
413: movb %al,4(%edi)
414: addl %ebp,%ebx
415: movb (%esi),%al
416: adcl advancetable+4(,%ecx,4),%esi
417:
418: addl tstep,%edx
419: sbbl %ecx,%ecx
420: movb %al,5(%edi)
421: addl %ebp,%ebx
422: movb (%esi),%al
423: adcl advancetable+4(,%ecx,4),%esi
424:
425: addl tstep,%edx
426: sbbl %ecx,%ecx
427: movb %al,6(%edi)
428: addl %ebp,%ebx
429: movb (%esi),%al
430: adcl advancetable+4(,%ecx,4),%esi
431:
432: addl tstep,%edx
433: sbbl %ecx,%ecx
434: movb %al,7(%edi)
435: addl %ebp,%ebx
436: movb (%esi),%al
437: adcl advancetable+4(,%ecx,4),%esi
438:
439:
440: //
441: // start FDIV for end of next segment in flight, so it can overlap
442: //
443: movl counttemp,%ecx
444: cmpl $16,%ecx // more than one segment after this?
445: ja LSetupNotLast2 // yes
446:
447: decl %ecx
448: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
449: movl %ecx,spancountminus1
450: fildl spancountminus1
451:
452: flds C(d_zistepu) // C(d_zistepu) | spancountminus1
453: fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
454: flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
455: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
456: fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
457: faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
458: fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
459: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
460: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
461: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
462: flds fp_64k // 64k | C(d_sdivzstepu)*scm1
463: fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
464: faddp %st(0),%st(4) // 64k
465:
1.1.1.2 ! root 466: fdiv %st(1),%st // this is what we've gone to all this trouble to
1.1 root 467: // overlap
468: jmp LFDIVInFlight2
469:
470: .align 4
471: LSetupNotLast2:
472: fadds zi16stepu
473: fxch %st(2)
474: fadds sdivz16stepu
475: fxch %st(2)
476: flds tdivz16stepu
477: faddp %st(0),%st(2)
478: flds fp_64k
1.1.1.2 ! root 479: fdiv %st(1),%st // z = 1/1/z
! 480: // this is what we've gone to all this trouble to
! 481: // overlap
1.1 root 482: LFDIVInFlight2:
483: movl %ecx,counttemp
484:
485: addl tstep,%edx
486: sbbl %ecx,%ecx
487: movb %al,8(%edi)
488: addl %ebp,%ebx
489: movb (%esi),%al
490: adcl advancetable+4(,%ecx,4),%esi
491:
492: addl tstep,%edx
493: sbbl %ecx,%ecx
494: movb %al,9(%edi)
495: addl %ebp,%ebx
496: movb (%esi),%al
497: adcl advancetable+4(,%ecx,4),%esi
498:
499: addl tstep,%edx
500: sbbl %ecx,%ecx
501: movb %al,10(%edi)
502: addl %ebp,%ebx
503: movb (%esi),%al
504: adcl advancetable+4(,%ecx,4),%esi
505:
506: addl tstep,%edx
507: sbbl %ecx,%ecx
508: movb %al,11(%edi)
509: addl %ebp,%ebx
510: movb (%esi),%al
511: adcl advancetable+4(,%ecx,4),%esi
512:
513: addl tstep,%edx
514: sbbl %ecx,%ecx
515: movb %al,12(%edi)
516: addl %ebp,%ebx
517: movb (%esi),%al
518: adcl advancetable+4(,%ecx,4),%esi
519:
520: addl tstep,%edx
521: sbbl %ecx,%ecx
522: movb %al,13(%edi)
523: addl %ebp,%ebx
524: movb (%esi),%al
525: adcl advancetable+4(,%ecx,4),%esi
526:
527: addl tstep,%edx
528: sbbl %ecx,%ecx
529: movb %al,14(%edi)
530: addl %ebp,%ebx
531: movb (%esi),%al
532: adcl advancetable+4(,%ecx,4),%esi
533:
534: addl $16,%edi
535: movl %edx,tfracf
536: movl snext,%edx
537: movl %ebx,sfracf
538: movl tnext,%ebx
539: movl %edx,s
540: movl %ebx,t
541:
542: movl counttemp,%ecx // retrieve count
543:
544: //
545: // determine whether last span or not
546: //
547: cmpl $16,%ecx // are there multiple segments remaining?
548: movb %al,-1(%edi)
549: ja LNotLastSegment // yes
550:
551: //
552: // last segment of scan
553: //
554: LLastSegment:
555:
556: //
557: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
558: // get there. The number of pixels left is variable, and we want to land on the
559: // last pixel, not step one past it, so we can't run into arithmetic problems
560: //
561: testl %ecx,%ecx
562: jz LNoSteps // just draw the last pixel and we're done
563:
564: // pick up after the FDIV that was left in flight previously
565:
566:
1.1.1.2 ! root 567: fld %st(0) // duplicate it
! 568: fmul %st(4),%st // s = s/z * z
1.1 root 569: fxch %st(1)
1.1.1.2 ! root 570: fmul %st(3),%st // t = t/z * z
1.1 root 571: fxch %st(1)
572: fistpl snext
573: fistpl tnext
574:
575: movb (%esi),%al // load first texel in segment
576: movl C(tadjust),%ebx
577: movb %al,(%edi) // store first pixel in segment
578: movl C(sadjust),%eax
579:
580: addl snext,%eax
581: addl tnext,%ebx
582:
583: movl C(bbextents),%ebp
584: movl C(bbextentt),%edx
585:
586: cmpl $4096,%eax
587: jl LClampLow4
588: cmpl %ebp,%eax
589: ja LClampHigh4
590: LClampReentry4:
591: movl %eax,snext
592:
593: cmpl $4096,%ebx
594: jl LClampLow5
595: cmpl %edx,%ebx
596: ja LClampHigh5
597: LClampReentry5:
598:
599: cmpl $1,%ecx // don't bother
600: je LOnlyOneStep // if two pixels in segment, there's only one step,
601: // of the segment length
602: subl s,%eax
603: subl t,%ebx
604:
605: addl %eax,%eax // convert to 15.17 format so multiply by 1.31
606: addl %ebx,%ebx // reciprocal yields 16.48
607:
608: imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
609: // (spancount-1)
610: movl %edx,%ebp
611:
612: movl %ebx,%eax
613: imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
614: // (spancount-1)
615: LSetEntryvec:
616: //
617: // set up advancetable
618: //
619: movl entryvec_table_16(,%ecx,4),%ebx
620: movl %edx,%eax
621: movl %ebx,jumptemp // entry point into code for RET later
622: movl %ebp,%ecx
623: sarl $16,%edx // tstep >>= 16;
624: movl C(cachewidth),%ebx
625: sarl $16,%ecx // sstep >>= 16;
1.1.1.2 ! root 626: imul %ebx,%edx
1.1 root 627:
628: addl %ecx,%edx // add in sstep
629: // (tstep >> 16) * cachewidth + (sstep >> 16);
630: movl tfracf,%ecx
631: movl %edx,advancetable+4 // advance base in t
632: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
633: // (sstep >> 16);
634: shll $16,%ebp // left-justify sstep fractional part
635: movl sfracf,%ebx
636: shll $16,%eax // left-justify tstep fractional part
637: movl %edx,advancetable // advance extra in t
638:
639: movl %eax,tstep
640: movl %ecx,%edx
641: addl %eax,%edx
642: sbbl %ecx,%ecx
643: addl %ebp,%ebx
644: adcl advancetable+4(,%ecx,4),%esi
645:
646: jmp *jumptemp // jump to the number-of-pixels handler
647:
648: //----------------------------------------
649:
650: LNoSteps:
651: movb (%esi),%al // load first texel in segment
652: subl $15,%edi // adjust for hardwired offset
653: jmp LEndSpan
654:
655:
656: LOnlyOneStep:
657: subl s,%eax
658: subl t,%ebx
659: movl %eax,%ebp
660: movl %ebx,%edx
661: jmp LSetEntryvec
662:
663: //----------------------------------------
664:
665: .globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
666: .globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
667: .globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
668: .globl Entry14_16, Entry15_16, Entry16_16
669:
670: Entry2_16:
671: subl $14,%edi // adjust for hardwired offsets
672: movb (%esi),%al
673: jmp LEntry2_16
674:
675: //----------------------------------------
676:
677: Entry3_16:
678: subl $13,%edi // adjust for hardwired offsets
679: addl %eax,%edx
680: movb (%esi),%al
681: sbbl %ecx,%ecx
682: addl %ebp,%ebx
683: adcl advancetable+4(,%ecx,4),%esi
684: jmp LEntry3_16
685:
686: //----------------------------------------
687:
688: Entry4_16:
689: subl $12,%edi // adjust for hardwired offsets
690: addl %eax,%edx
691: movb (%esi),%al
692: sbbl %ecx,%ecx
693: addl %ebp,%ebx
694: adcl advancetable+4(,%ecx,4),%esi
695: addl tstep,%edx
696: jmp LEntry4_16
697:
698: //----------------------------------------
699:
700: Entry5_16:
701: subl $11,%edi // adjust for hardwired offsets
702: addl %eax,%edx
703: movb (%esi),%al
704: sbbl %ecx,%ecx
705: addl %ebp,%ebx
706: adcl advancetable+4(,%ecx,4),%esi
707: addl tstep,%edx
708: jmp LEntry5_16
709:
710: //----------------------------------------
711:
712: Entry6_16:
713: subl $10,%edi // adjust for hardwired offsets
714: addl %eax,%edx
715: movb (%esi),%al
716: sbbl %ecx,%ecx
717: addl %ebp,%ebx
718: adcl advancetable+4(,%ecx,4),%esi
719: addl tstep,%edx
720: jmp LEntry6_16
721:
722: //----------------------------------------
723:
724: Entry7_16:
725: subl $9,%edi // adjust for hardwired offsets
726: addl %eax,%edx
727: movb (%esi),%al
728: sbbl %ecx,%ecx
729: addl %ebp,%ebx
730: adcl advancetable+4(,%ecx,4),%esi
731: addl tstep,%edx
732: jmp LEntry7_16
733:
734: //----------------------------------------
735:
736: Entry8_16:
737: subl $8,%edi // adjust for hardwired offsets
738: addl %eax,%edx
739: movb (%esi),%al
740: sbbl %ecx,%ecx
741: addl %ebp,%ebx
742: adcl advancetable+4(,%ecx,4),%esi
743: addl tstep,%edx
744: jmp LEntry8_16
745:
746: //----------------------------------------
747:
748: Entry9_16:
749: subl $7,%edi // adjust for hardwired offsets
750: addl %eax,%edx
751: movb (%esi),%al
752: sbbl %ecx,%ecx
753: addl %ebp,%ebx
754: adcl advancetable+4(,%ecx,4),%esi
755: addl tstep,%edx
756: jmp LEntry9_16
757:
758: //----------------------------------------
759:
760: Entry10_16:
761: subl $6,%edi // adjust for hardwired offsets
762: addl %eax,%edx
763: movb (%esi),%al
764: sbbl %ecx,%ecx
765: addl %ebp,%ebx
766: adcl advancetable+4(,%ecx,4),%esi
767: addl tstep,%edx
768: jmp LEntry10_16
769:
770: //----------------------------------------
771:
772: Entry11_16:
773: subl $5,%edi // adjust for hardwired offsets
774: addl %eax,%edx
775: movb (%esi),%al
776: sbbl %ecx,%ecx
777: addl %ebp,%ebx
778: adcl advancetable+4(,%ecx,4),%esi
779: addl tstep,%edx
780: jmp LEntry11_16
781:
782: //----------------------------------------
783:
784: Entry12_16:
785: subl $4,%edi // adjust for hardwired offsets
786: addl %eax,%edx
787: movb (%esi),%al
788: sbbl %ecx,%ecx
789: addl %ebp,%ebx
790: adcl advancetable+4(,%ecx,4),%esi
791: addl tstep,%edx
792: jmp LEntry12_16
793:
794: //----------------------------------------
795:
796: Entry13_16:
797: subl $3,%edi // adjust for hardwired offsets
798: addl %eax,%edx
799: movb (%esi),%al
800: sbbl %ecx,%ecx
801: addl %ebp,%ebx
802: adcl advancetable+4(,%ecx,4),%esi
803: addl tstep,%edx
804: jmp LEntry13_16
805:
806: //----------------------------------------
807:
808: Entry14_16:
809: subl $2,%edi // adjust for hardwired offsets
810: addl %eax,%edx
811: movb (%esi),%al
812: sbbl %ecx,%ecx
813: addl %ebp,%ebx
814: adcl advancetable+4(,%ecx,4),%esi
815: addl tstep,%edx
816: jmp LEntry14_16
817:
818: //----------------------------------------
819:
820: Entry15_16:
821: decl %edi // adjust for hardwired offsets
822: addl %eax,%edx
823: movb (%esi),%al
824: sbbl %ecx,%ecx
825: addl %ebp,%ebx
826: adcl advancetable+4(,%ecx,4),%esi
827: addl tstep,%edx
828: jmp LEntry15_16
829:
830: //----------------------------------------
831:
832: Entry16_16:
833: addl %eax,%edx
834: movb (%esi),%al
835: sbbl %ecx,%ecx
836: addl %ebp,%ebx
837: adcl advancetable+4(,%ecx,4),%esi
838:
839: addl tstep,%edx
840: sbbl %ecx,%ecx
841: movb %al,1(%edi)
842: addl %ebp,%ebx
843: movb (%esi),%al
844: adcl advancetable+4(,%ecx,4),%esi
845: addl tstep,%edx
846: LEntry15_16:
847: sbbl %ecx,%ecx
848: movb %al,2(%edi)
849: addl %ebp,%ebx
850: movb (%esi),%al
851: adcl advancetable+4(,%ecx,4),%esi
852: addl tstep,%edx
853: LEntry14_16:
854: sbbl %ecx,%ecx
855: movb %al,3(%edi)
856: addl %ebp,%ebx
857: movb (%esi),%al
858: adcl advancetable+4(,%ecx,4),%esi
859: addl tstep,%edx
860: LEntry13_16:
861: sbbl %ecx,%ecx
862: movb %al,4(%edi)
863: addl %ebp,%ebx
864: movb (%esi),%al
865: adcl advancetable+4(,%ecx,4),%esi
866: addl tstep,%edx
867: LEntry12_16:
868: sbbl %ecx,%ecx
869: movb %al,5(%edi)
870: addl %ebp,%ebx
871: movb (%esi),%al
872: adcl advancetable+4(,%ecx,4),%esi
873: addl tstep,%edx
874: LEntry11_16:
875: sbbl %ecx,%ecx
876: movb %al,6(%edi)
877: addl %ebp,%ebx
878: movb (%esi),%al
879: adcl advancetable+4(,%ecx,4),%esi
880: addl tstep,%edx
881: LEntry10_16:
882: sbbl %ecx,%ecx
883: movb %al,7(%edi)
884: addl %ebp,%ebx
885: movb (%esi),%al
886: adcl advancetable+4(,%ecx,4),%esi
887: addl tstep,%edx
888: LEntry9_16:
889: sbbl %ecx,%ecx
890: movb %al,8(%edi)
891: addl %ebp,%ebx
892: movb (%esi),%al
893: adcl advancetable+4(,%ecx,4),%esi
894: addl tstep,%edx
895: LEntry8_16:
896: sbbl %ecx,%ecx
897: movb %al,9(%edi)
898: addl %ebp,%ebx
899: movb (%esi),%al
900: adcl advancetable+4(,%ecx,4),%esi
901: addl tstep,%edx
902: LEntry7_16:
903: sbbl %ecx,%ecx
904: movb %al,10(%edi)
905: addl %ebp,%ebx
906: movb (%esi),%al
907: adcl advancetable+4(,%ecx,4),%esi
908: addl tstep,%edx
909: LEntry6_16:
910: sbbl %ecx,%ecx
911: movb %al,11(%edi)
912: addl %ebp,%ebx
913: movb (%esi),%al
914: adcl advancetable+4(,%ecx,4),%esi
915: addl tstep,%edx
916: LEntry5_16:
917: sbbl %ecx,%ecx
918: movb %al,12(%edi)
919: addl %ebp,%ebx
920: movb (%esi),%al
921: adcl advancetable+4(,%ecx,4),%esi
922: addl tstep,%edx
923: LEntry4_16:
924: sbbl %ecx,%ecx
925: movb %al,13(%edi)
926: addl %ebp,%ebx
927: movb (%esi),%al
928: adcl advancetable+4(,%ecx,4),%esi
929: LEntry3_16:
930: movb %al,14(%edi)
931: movb (%esi),%al
932: LEntry2_16:
933:
934: LEndSpan:
935:
936: //
937: // clear s/z, t/z, 1/z from FP stack
938: //
939: fstp %st(0)
940: fstp %st(0)
941: fstp %st(0)
942:
943: movl pspantemp,%ebx // restore spans pointer
944: movl espan_t_pnext(%ebx),%ebx // point to next span
945: testl %ebx,%ebx // any more spans?
946: movb %al,15(%edi)
947: jnz LSpanLoop // more spans
948:
949: popl %ebx // restore register variables
950: popl %esi
951: popl %edi
952: popl %ebp // restore the caller's stack frame
953: ret
954:
955: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.