|
|
1.1 root 1: /*
2: Copyright (C) 1996-1997 Id Software, Inc.
3:
4: This program is free software; you can redistribute it and/or
5: modify it under the terms of the GNU General Public License
6: as published by the Free Software Foundation; either version 2
7: of the License, or (at your option) any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12:
13: See the GNU General Public License for more details.
14:
15: You should have received a copy of the GNU General Public License
16: along with this program; if not, write to the Free Software
17: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18:
19: */
20: //
21: // d_draw16.s
22: // x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
23: // subdivision.
24: //
25:
26: #include "asm_i386.h"
27: #include "quakeasm.h"
28: #include "asm_draw.h"
29: #include "d_ifacea.h"
30:
31: #if id386
32:
33: //----------------------------------------------------------------------
34: // 8-bpp horizontal span drawing code for polygons, with no transparency and
35: // 16-pixel subdivision.
36: //
37: // Assumes there is at least one span in pspans, and that every span
38: // contains at least one pixel
39: //----------------------------------------------------------------------
40:
41: .data
42:
43: .text
44:
45: // out-of-line, rarely-needed clamping code
46:
47: LClampHigh0:
48: movl C(bbextents),%esi
49: jmp LClampReentry0
50: LClampHighOrLow0:
51: jg LClampHigh0
52: xorl %esi,%esi
53: jmp LClampReentry0
54:
55: LClampHigh1:
56: movl C(bbextentt),%edx
57: jmp LClampReentry1
58: LClampHighOrLow1:
59: jg LClampHigh1
60: xorl %edx,%edx
61: jmp LClampReentry1
62:
63: LClampLow2:
64: movl $4096,%ebp
65: jmp LClampReentry2
66: LClampHigh2:
67: movl C(bbextents),%ebp
68: jmp LClampReentry2
69:
70: LClampLow3:
71: movl $4096,%ecx
72: jmp LClampReentry3
73: LClampHigh3:
74: movl C(bbextentt),%ecx
75: jmp LClampReentry3
76:
77: LClampLow4:
78: movl $4096,%eax
79: jmp LClampReentry4
80: LClampHigh4:
81: movl C(bbextents),%eax
82: jmp LClampReentry4
83:
84: LClampLow5:
85: movl $4096,%ebx
86: jmp LClampReentry5
87: LClampHigh5:
88: movl C(bbextentt),%ebx
89: jmp LClampReentry5
90:
91:
92: #define pspans 4+16
93:
94: .align 4
95: .globl C(D_DrawSpans16)
96: C(D_DrawSpans16):
97: pushl %ebp // preserve caller's stack frame
98: pushl %edi
99: pushl %esi // preserve register variables
100: pushl %ebx
101:
102: //
103: // set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
104: // and span list pointers
105: //
106: // TODO: any overlap from rearranging?
107: flds C(d_sdivzstepu)
108: fmuls fp_16
109: movl C(cacheblock),%edx
110: flds C(d_tdivzstepu)
111: fmuls fp_16
112: movl pspans(%esp),%ebx // point to the first span descriptor
113: flds C(d_zistepu)
114: fmuls fp_16
115: movl %edx,pbase // pbase = cacheblock
116: fstps zi16stepu
117: fstps tdivz16stepu
118: fstps sdivz16stepu
119:
120: LSpanLoop:
121: //
122: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
123: // initial s and t values
124: //
125: // FIXME: pipeline FILD?
126: fildl espan_t_v(%ebx)
127: fildl espan_t_u(%ebx)
128:
129: fld %st(1) // dv | du | dv
130: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
131: fld %st(1) // du | dv*d_sdivzstepv | du | dv
132: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
133: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
134: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
135: // dv*d_sdivzstepv | du | dv
136: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
137: // dv*d_sdivzstepv | du | dv
138: faddp %st(0),%st(2) // du*d_tdivzstepu |
139: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
140: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
141: // du*d_tdivzstepu | du | dv
142: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
143: // du*d_tdivzstepu | du | dv
144: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
145: // du*d_sdivzstepu + dv*d_sdivzstepv |
146: // du*d_tdivzstepu | du | dv
147: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
148: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
149: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
150: // du*d_sdivzstepu; stays in %st(2) at end
151: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
152: // s/z
153: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
154: // du*d_tdivzstepu | du | s/z
155: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
156: // du*d_tdivzstepu | du | s/z
157: faddp %st(0),%st(2) // dv*d_zistepv |
158: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
159: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
160: // dv*d_zistepv | s/z
161: fmuls C(d_zistepu) // du*d_zistepu |
162: // dv*d_tdivzstepv + du*d_tdivzstepu |
163: // dv*d_zistepv | s/z
164: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
165: // du*d_zistepu | dv*d_zistepv | s/z
166: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
167: // du*d_tdivzstepu; stays in %st(1) at end
168: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
169: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
170:
171: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
172: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
173: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
174: // du*d_zistepu; stays in %st(0) at end
175: // 1/z | fp_64k | t/z | s/z
176: //
177: // calculate and clamp s & t
178: //
179: fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
180:
181: //
182: // point %edi to the first pixel in the span
183: //
184: movl C(d_viewbuffer),%ecx
185: movl espan_t_v(%ebx),%eax
186: movl %ebx,pspantemp // preserve spans pointer
187:
188: movl C(tadjust),%edx
189: movl C(sadjust),%esi
190: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
191: addl %ecx,%edi
192: movl espan_t_u(%ebx),%ecx
193: addl %ecx,%edi // pdest = &pdestspan[scans->u];
194: movl espan_t_count(%ebx),%ecx
195:
196: //
197: // now start the FDIV for the end of the span
198: //
199: cmpl $16,%ecx
200: ja LSetupNotLast1
201:
202: decl %ecx
203: jz LCleanup1 // if only one pixel, no need to start an FDIV
204: movl %ecx,spancountminus1
205:
206: // finish up the s and t calcs
207: fxch %st(1) // z*64k | 1/z | t/z | s/z
208:
209: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
210: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
211: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
212: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
213: fxch %st(1) // s | t | 1/z | t/z | s/z
214: fistpl s // 1/z | t | t/z | s/z
215: fistpl t // 1/z | t/z | s/z
216:
217: fildl spancountminus1
218:
219: flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
220: flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
221: fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
222: fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
223: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
224: fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
225: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
226: // C(d_tdivzstepu)*scm1
227: fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
228: // C(d_tdivzstepu)*scm1
229: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
230: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
231: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
232: faddp %st(0),%st(3)
233:
234: flds fp_64k
235: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
236: // overlap
237: jmp LFDIVInFlight1
238:
239: LCleanup1:
240: // finish up the s and t calcs
241: fxch %st(1) // z*64k | 1/z | t/z | s/z
242:
243: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
244: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
245: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
246: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
247: fxch %st(1) // s | t | 1/z | t/z | s/z
248: fistpl s // 1/z | t | t/z | s/z
249: fistpl t // 1/z | t/z | s/z
250: jmp LFDIVInFlight1
251:
252: .align 4
253: LSetupNotLast1:
254: // finish up the s and t calcs
255: fxch %st(1) // z*64k | 1/z | t/z | s/z
256:
257: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
258: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
259: fxch %st(1) // z*64k | s | 1/z | t/z | s/z
260: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
261: fxch %st(1) // s | t | 1/z | t/z | s/z
262: fistpl s // 1/z | t | t/z | s/z
263: fistpl t // 1/z | t/z | s/z
264:
265: fadds zi16stepu
266: fxch %st(2)
267: fadds sdivz16stepu
268: fxch %st(2)
269: flds tdivz16stepu
270: faddp %st(0),%st(2)
271: flds fp_64k
272: fdiv %st(1),%st(0) // z = 1/1/z
273: // this is what we've gone to all this trouble to
274: // overlap
275: LFDIVInFlight1:
276:
277: addl s,%esi
278: addl t,%edx
279: movl C(bbextents),%ebx
280: movl C(bbextentt),%ebp
281: cmpl %ebx,%esi
282: ja LClampHighOrLow0
283: LClampReentry0:
284: movl %esi,s
285: movl pbase,%ebx
286: shll $16,%esi
287: cmpl %ebp,%edx
288: movl %esi,sfracf
289: ja LClampHighOrLow1
290: LClampReentry1:
291: movl %edx,t
292: movl s,%esi // sfrac = scans->sfrac;
293: shll $16,%edx
294: movl t,%eax // tfrac = scans->tfrac;
295: sarl $16,%esi
296: movl %edx,tfracf
297:
298: //
299: // calculate the texture starting address
300: //
301: sarl $16,%eax
302: movl C(cachewidth),%edx
303: imull %edx,%eax // (tfrac >> 16) * cachewidth
304: addl %ebx,%esi
305: addl %eax,%esi // psource = pbase + (sfrac >> 16) +
306: // ((tfrac >> 16) * cachewidth);
307: //
308: // determine whether last span or not
309: //
310: cmpl $16,%ecx
311: jna LLastSegment
312:
313: //
314: // not the last segment; do full 16-wide segment
315: //
316: LNotLastSegment:
317:
318: //
319: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
320: // get there
321: //
322:
323: // pick up after the FDIV that was left in flight previously
324:
325: fld %st(0) // duplicate it
326: fmul %st(4),%st(0) // s = s/z * z
327: fxch %st(1)
328: fmul %st(3),%st(0) // t = t/z * z
329: fxch %st(1)
330: fistpl snext
331: fistpl tnext
332: movl snext,%eax
333: movl tnext,%edx
334:
335: movb (%esi),%bl // get first source texel
336: subl $16,%ecx // count off this segments' pixels
337: movl C(sadjust),%ebp
338: movl %ecx,counttemp // remember count of remaining pixels
339:
340: movl C(tadjust),%ecx
341: movb %bl,(%edi) // store first dest pixel
342:
343: addl %eax,%ebp
344: addl %edx,%ecx
345:
346: movl C(bbextents),%eax
347: movl C(bbextentt),%edx
348:
349: cmpl $4096,%ebp
350: jl LClampLow2
351: cmpl %eax,%ebp
352: ja LClampHigh2
353: LClampReentry2:
354:
355: cmpl $4096,%ecx
356: jl LClampLow3
357: cmpl %edx,%ecx
358: ja LClampHigh3
359: LClampReentry3:
360:
361: movl %ebp,snext
362: movl %ecx,tnext
363:
364: subl s,%ebp
365: subl t,%ecx
366:
367: //
368: // set up advancetable
369: //
370: movl %ecx,%eax
371: movl %ebp,%edx
372: sarl $20,%eax // tstep >>= 16;
373: jz LZero
374: sarl $20,%edx // sstep >>= 16;
375: movl C(cachewidth),%ebx
376: imull %ebx,%eax
377: jmp LSetUp1
378:
379: LZero:
380: sarl $20,%edx // sstep >>= 16;
381: movl C(cachewidth),%ebx
382:
383: LSetUp1:
384:
385: addl %edx,%eax // add in sstep
386: // (tstep >> 16) * cachewidth + (sstep >> 16);
387: movl tfracf,%edx
388: movl %eax,advancetable+4 // advance base in t
389: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
390: // (sstep >> 16);
391: shll $12,%ebp // left-justify sstep fractional part
392: movl sfracf,%ebx
393: shll $12,%ecx // left-justify tstep fractional part
394: movl %eax,advancetable // advance extra in t
395:
396: movl %ecx,tstep
397: addl %ecx,%edx // advance tfrac fractional part by tstep frac
398:
399: sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
400: addl %ebp,%ebx // advance sfrac fractional part by sstep frac
401: adcl advancetable+4(,%ecx,4),%esi // point to next source texel
402:
403: addl tstep,%edx
404: sbbl %ecx,%ecx
405: movb (%esi),%al
406: addl %ebp,%ebx
407: movb %al,1(%edi)
408: adcl advancetable+4(,%ecx,4),%esi
409:
410: addl tstep,%edx
411: sbbl %ecx,%ecx
412: addl %ebp,%ebx
413: movb (%esi),%al
414: adcl advancetable+4(,%ecx,4),%esi
415:
416: addl tstep,%edx
417: sbbl %ecx,%ecx
418: movb %al,2(%edi)
419: addl %ebp,%ebx
420: movb (%esi),%al
421: adcl advancetable+4(,%ecx,4),%esi
422:
423: addl tstep,%edx
424: sbbl %ecx,%ecx
425: movb %al,3(%edi)
426: addl %ebp,%ebx
427: movb (%esi),%al
428: adcl advancetable+4(,%ecx,4),%esi
429:
430: addl tstep,%edx
431: sbbl %ecx,%ecx
432: movb %al,4(%edi)
433: addl %ebp,%ebx
434: movb (%esi),%al
435: adcl advancetable+4(,%ecx,4),%esi
436:
437: addl tstep,%edx
438: sbbl %ecx,%ecx
439: movb %al,5(%edi)
440: addl %ebp,%ebx
441: movb (%esi),%al
442: adcl advancetable+4(,%ecx,4),%esi
443:
444: addl tstep,%edx
445: sbbl %ecx,%ecx
446: movb %al,6(%edi)
447: addl %ebp,%ebx
448: movb (%esi),%al
449: adcl advancetable+4(,%ecx,4),%esi
450:
451: addl tstep,%edx
452: sbbl %ecx,%ecx
453: movb %al,7(%edi)
454: addl %ebp,%ebx
455: movb (%esi),%al
456: adcl advancetable+4(,%ecx,4),%esi
457:
458:
459: //
460: // start FDIV for end of next segment in flight, so it can overlap
461: //
462: movl counttemp,%ecx
463: cmpl $16,%ecx // more than one segment after this?
464: ja LSetupNotLast2 // yes
465:
466: decl %ecx
467: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
468: movl %ecx,spancountminus1
469: fildl spancountminus1
470:
471: flds C(d_zistepu) // C(d_zistepu) | spancountminus1
472: fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
473: flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
474: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
475: fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
476: faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
477: fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
478: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
479: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
480: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
481: flds fp_64k // 64k | C(d_sdivzstepu)*scm1
482: fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
483: faddp %st(0),%st(4) // 64k
484:
485: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
486: // overlap
487: jmp LFDIVInFlight2
488:
489: .align 4
490: LSetupNotLast2:
491: fadds zi16stepu
492: fxch %st(2)
493: fadds sdivz16stepu
494: fxch %st(2)
495: flds tdivz16stepu
496: faddp %st(0),%st(2)
497: flds fp_64k
498: fdiv %st(1),%st(0) // z = 1/1/z
499: // this is what we've gone to all this trouble to
500: // overlap
501: LFDIVInFlight2:
502: movl %ecx,counttemp
503:
504: addl tstep,%edx
505: sbbl %ecx,%ecx
506: movb %al,8(%edi)
507: addl %ebp,%ebx
508: movb (%esi),%al
509: adcl advancetable+4(,%ecx,4),%esi
510:
511: addl tstep,%edx
512: sbbl %ecx,%ecx
513: movb %al,9(%edi)
514: addl %ebp,%ebx
515: movb (%esi),%al
516: adcl advancetable+4(,%ecx,4),%esi
517:
518: addl tstep,%edx
519: sbbl %ecx,%ecx
520: movb %al,10(%edi)
521: addl %ebp,%ebx
522: movb (%esi),%al
523: adcl advancetable+4(,%ecx,4),%esi
524:
525: addl tstep,%edx
526: sbbl %ecx,%ecx
527: movb %al,11(%edi)
528: addl %ebp,%ebx
529: movb (%esi),%al
530: adcl advancetable+4(,%ecx,4),%esi
531:
532: addl tstep,%edx
533: sbbl %ecx,%ecx
534: movb %al,12(%edi)
535: addl %ebp,%ebx
536: movb (%esi),%al
537: adcl advancetable+4(,%ecx,4),%esi
538:
539: addl tstep,%edx
540: sbbl %ecx,%ecx
541: movb %al,13(%edi)
542: addl %ebp,%ebx
543: movb (%esi),%al
544: adcl advancetable+4(,%ecx,4),%esi
545:
546: addl tstep,%edx
547: sbbl %ecx,%ecx
548: movb %al,14(%edi)
549: addl %ebp,%ebx
550: movb (%esi),%al
551: adcl advancetable+4(,%ecx,4),%esi
552:
553: addl $16,%edi
554: movl %edx,tfracf
555: movl snext,%edx
556: movl %ebx,sfracf
557: movl tnext,%ebx
558: movl %edx,s
559: movl %ebx,t
560:
561: movl counttemp,%ecx // retrieve count
562:
563: //
564: // determine whether last span or not
565: //
566: cmpl $16,%ecx // are there multiple segments remaining?
567: movb %al,-1(%edi)
568: ja LNotLastSegment // yes
569:
570: //
571: // last segment of scan
572: //
573: LLastSegment:
574:
575: //
576: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
577: // get there. The number of pixels left is variable, and we want to land on the
578: // last pixel, not step one past it, so we can't run into arithmetic problems
579: //
580: testl %ecx,%ecx
581: jz LNoSteps // just draw the last pixel and we're done
582:
583: // pick up after the FDIV that was left in flight previously
584:
585:
586: fld %st(0) // duplicate it
587: fmul %st(4),%st(0) // s = s/z * z
588: fxch %st(1)
589: fmul %st(3),%st(0) // t = t/z * z
590: fxch %st(1)
591: fistpl snext
592: fistpl tnext
593:
594: movb (%esi),%al // load first texel in segment
595: movl C(tadjust),%ebx
596: movb %al,(%edi) // store first pixel in segment
597: movl C(sadjust),%eax
598:
599: addl snext,%eax
600: addl tnext,%ebx
601:
602: movl C(bbextents),%ebp
603: movl C(bbextentt),%edx
604:
605: cmpl $4096,%eax
606: jl LClampLow4
607: cmpl %ebp,%eax
608: ja LClampHigh4
609: LClampReentry4:
610: movl %eax,snext
611:
612: cmpl $4096,%ebx
613: jl LClampLow5
614: cmpl %edx,%ebx
615: ja LClampHigh5
616: LClampReentry5:
617:
618: cmpl $1,%ecx // don't bother
619: je LOnlyOneStep // if two pixels in segment, there's only one step,
620: // of the segment length
621: subl s,%eax
622: subl t,%ebx
623:
624: addl %eax,%eax // convert to 15.17 format so multiply by 1.31
625: addl %ebx,%ebx // reciprocal yields 16.48
626:
627: imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
628: // (spancount-1)
629: movl %edx,%ebp
630:
631: movl %ebx,%eax
632: imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
633: // (spancount-1)
634: LSetEntryvec:
635: //
636: // set up advancetable
637: //
638: movl entryvec_table_16(,%ecx,4),%ebx
639: movl %edx,%eax
640: movl %ebx,jumptemp // entry point into code for RET later
641: movl %ebp,%ecx
642: sarl $16,%edx // tstep >>= 16;
643: movl C(cachewidth),%ebx
644: sarl $16,%ecx // sstep >>= 16;
645: imull %ebx,%edx
646:
647: addl %ecx,%edx // add in sstep
648: // (tstep >> 16) * cachewidth + (sstep >> 16);
649: movl tfracf,%ecx
650: movl %edx,advancetable+4 // advance base in t
651: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
652: // (sstep >> 16);
653: shll $16,%ebp // left-justify sstep fractional part
654: movl sfracf,%ebx
655: shll $16,%eax // left-justify tstep fractional part
656: movl %edx,advancetable // advance extra in t
657:
658: movl %eax,tstep
659: movl %ecx,%edx
660: addl %eax,%edx
661: sbbl %ecx,%ecx
662: addl %ebp,%ebx
663: adcl advancetable+4(,%ecx,4),%esi
664:
665: jmp *jumptemp // jump to the number-of-pixels handler
666:
667: //----------------------------------------
668:
669: LNoSteps:
670: movb (%esi),%al // load first texel in segment
671: subl $15,%edi // adjust for hardwired offset
672: jmp LEndSpan
673:
674:
675: LOnlyOneStep:
676: subl s,%eax
677: subl t,%ebx
678: movl %eax,%ebp
679: movl %ebx,%edx
680: jmp LSetEntryvec
681:
682: //----------------------------------------
683:
684: .globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
685: .globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
686: .globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
687: .globl Entry14_16, Entry15_16, Entry16_16
688:
689: Entry2_16:
690: subl $14,%edi // adjust for hardwired offsets
691: movb (%esi),%al
692: jmp LEntry2_16
693:
694: //----------------------------------------
695:
696: Entry3_16:
697: subl $13,%edi // adjust for hardwired offsets
698: addl %eax,%edx
699: movb (%esi),%al
700: sbbl %ecx,%ecx
701: addl %ebp,%ebx
702: adcl advancetable+4(,%ecx,4),%esi
703: jmp LEntry3_16
704:
705: //----------------------------------------
706:
707: Entry4_16:
708: subl $12,%edi // adjust for hardwired offsets
709: addl %eax,%edx
710: movb (%esi),%al
711: sbbl %ecx,%ecx
712: addl %ebp,%ebx
713: adcl advancetable+4(,%ecx,4),%esi
714: addl tstep,%edx
715: jmp LEntry4_16
716:
717: //----------------------------------------
718:
719: Entry5_16:
720: subl $11,%edi // adjust for hardwired offsets
721: addl %eax,%edx
722: movb (%esi),%al
723: sbbl %ecx,%ecx
724: addl %ebp,%ebx
725: adcl advancetable+4(,%ecx,4),%esi
726: addl tstep,%edx
727: jmp LEntry5_16
728:
729: //----------------------------------------
730:
731: Entry6_16:
732: subl $10,%edi // adjust for hardwired offsets
733: addl %eax,%edx
734: movb (%esi),%al
735: sbbl %ecx,%ecx
736: addl %ebp,%ebx
737: adcl advancetable+4(,%ecx,4),%esi
738: addl tstep,%edx
739: jmp LEntry6_16
740:
741: //----------------------------------------
742:
743: Entry7_16:
744: subl $9,%edi // adjust for hardwired offsets
745: addl %eax,%edx
746: movb (%esi),%al
747: sbbl %ecx,%ecx
748: addl %ebp,%ebx
749: adcl advancetable+4(,%ecx,4),%esi
750: addl tstep,%edx
751: jmp LEntry7_16
752:
753: //----------------------------------------
754:
755: Entry8_16:
756: subl $8,%edi // adjust for hardwired offsets
757: addl %eax,%edx
758: movb (%esi),%al
759: sbbl %ecx,%ecx
760: addl %ebp,%ebx
761: adcl advancetable+4(,%ecx,4),%esi
762: addl tstep,%edx
763: jmp LEntry8_16
764:
765: //----------------------------------------
766:
767: Entry9_16:
768: subl $7,%edi // adjust for hardwired offsets
769: addl %eax,%edx
770: movb (%esi),%al
771: sbbl %ecx,%ecx
772: addl %ebp,%ebx
773: adcl advancetable+4(,%ecx,4),%esi
774: addl tstep,%edx
775: jmp LEntry9_16
776:
777: //----------------------------------------
778:
779: Entry10_16:
780: subl $6,%edi // adjust for hardwired offsets
781: addl %eax,%edx
782: movb (%esi),%al
783: sbbl %ecx,%ecx
784: addl %ebp,%ebx
785: adcl advancetable+4(,%ecx,4),%esi
786: addl tstep,%edx
787: jmp LEntry10_16
788:
789: //----------------------------------------
790:
791: Entry11_16:
792: subl $5,%edi // adjust for hardwired offsets
793: addl %eax,%edx
794: movb (%esi),%al
795: sbbl %ecx,%ecx
796: addl %ebp,%ebx
797: adcl advancetable+4(,%ecx,4),%esi
798: addl tstep,%edx
799: jmp LEntry11_16
800:
801: //----------------------------------------
802:
803: Entry12_16:
804: subl $4,%edi // adjust for hardwired offsets
805: addl %eax,%edx
806: movb (%esi),%al
807: sbbl %ecx,%ecx
808: addl %ebp,%ebx
809: adcl advancetable+4(,%ecx,4),%esi
810: addl tstep,%edx
811: jmp LEntry12_16
812:
813: //----------------------------------------
814:
815: Entry13_16:
816: subl $3,%edi // adjust for hardwired offsets
817: addl %eax,%edx
818: movb (%esi),%al
819: sbbl %ecx,%ecx
820: addl %ebp,%ebx
821: adcl advancetable+4(,%ecx,4),%esi
822: addl tstep,%edx
823: jmp LEntry13_16
824:
825: //----------------------------------------
826:
827: Entry14_16:
828: subl $2,%edi // adjust for hardwired offsets
829: addl %eax,%edx
830: movb (%esi),%al
831: sbbl %ecx,%ecx
832: addl %ebp,%ebx
833: adcl advancetable+4(,%ecx,4),%esi
834: addl tstep,%edx
835: jmp LEntry14_16
836:
837: //----------------------------------------
838:
839: Entry15_16:
840: decl %edi // adjust for hardwired offsets
841: addl %eax,%edx
842: movb (%esi),%al
843: sbbl %ecx,%ecx
844: addl %ebp,%ebx
845: adcl advancetable+4(,%ecx,4),%esi
846: addl tstep,%edx
847: jmp LEntry15_16
848:
849: //----------------------------------------
850:
851: Entry16_16:
852: addl %eax,%edx
853: movb (%esi),%al
854: sbbl %ecx,%ecx
855: addl %ebp,%ebx
856: adcl advancetable+4(,%ecx,4),%esi
857:
858: addl tstep,%edx
859: sbbl %ecx,%ecx
860: movb %al,1(%edi)
861: addl %ebp,%ebx
862: movb (%esi),%al
863: adcl advancetable+4(,%ecx,4),%esi
864: addl tstep,%edx
865: LEntry15_16:
866: sbbl %ecx,%ecx
867: movb %al,2(%edi)
868: addl %ebp,%ebx
869: movb (%esi),%al
870: adcl advancetable+4(,%ecx,4),%esi
871: addl tstep,%edx
872: LEntry14_16:
873: sbbl %ecx,%ecx
874: movb %al,3(%edi)
875: addl %ebp,%ebx
876: movb (%esi),%al
877: adcl advancetable+4(,%ecx,4),%esi
878: addl tstep,%edx
879: LEntry13_16:
880: sbbl %ecx,%ecx
881: movb %al,4(%edi)
882: addl %ebp,%ebx
883: movb (%esi),%al
884: adcl advancetable+4(,%ecx,4),%esi
885: addl tstep,%edx
886: LEntry12_16:
887: sbbl %ecx,%ecx
888: movb %al,5(%edi)
889: addl %ebp,%ebx
890: movb (%esi),%al
891: adcl advancetable+4(,%ecx,4),%esi
892: addl tstep,%edx
893: LEntry11_16:
894: sbbl %ecx,%ecx
895: movb %al,6(%edi)
896: addl %ebp,%ebx
897: movb (%esi),%al
898: adcl advancetable+4(,%ecx,4),%esi
899: addl tstep,%edx
900: LEntry10_16:
901: sbbl %ecx,%ecx
902: movb %al,7(%edi)
903: addl %ebp,%ebx
904: movb (%esi),%al
905: adcl advancetable+4(,%ecx,4),%esi
906: addl tstep,%edx
907: LEntry9_16:
908: sbbl %ecx,%ecx
909: movb %al,8(%edi)
910: addl %ebp,%ebx
911: movb (%esi),%al
912: adcl advancetable+4(,%ecx,4),%esi
913: addl tstep,%edx
914: LEntry8_16:
915: sbbl %ecx,%ecx
916: movb %al,9(%edi)
917: addl %ebp,%ebx
918: movb (%esi),%al
919: adcl advancetable+4(,%ecx,4),%esi
920: addl tstep,%edx
921: LEntry7_16:
922: sbbl %ecx,%ecx
923: movb %al,10(%edi)
924: addl %ebp,%ebx
925: movb (%esi),%al
926: adcl advancetable+4(,%ecx,4),%esi
927: addl tstep,%edx
928: LEntry6_16:
929: sbbl %ecx,%ecx
930: movb %al,11(%edi)
931: addl %ebp,%ebx
932: movb (%esi),%al
933: adcl advancetable+4(,%ecx,4),%esi
934: addl tstep,%edx
935: LEntry5_16:
936: sbbl %ecx,%ecx
937: movb %al,12(%edi)
938: addl %ebp,%ebx
939: movb (%esi),%al
940: adcl advancetable+4(,%ecx,4),%esi
941: addl tstep,%edx
942: LEntry4_16:
943: sbbl %ecx,%ecx
944: movb %al,13(%edi)
945: addl %ebp,%ebx
946: movb (%esi),%al
947: adcl advancetable+4(,%ecx,4),%esi
948: LEntry3_16:
949: movb %al,14(%edi)
950: movb (%esi),%al
951: LEntry2_16:
952:
953: LEndSpan:
954:
955: //
956: // clear s/z, t/z, 1/z from FP stack
957: //
958: fstp %st(0)
959: fstp %st(0)
960: fstp %st(0)
961:
962: movl pspantemp,%ebx // restore spans pointer
963: movl espan_t_pnext(%ebx),%ebx // point to next span
964: testl %ebx,%ebx // any more spans?
965: movb %al,15(%edi)
966: jnz LSpanLoop // more spans
967:
968: popl %ebx // restore register variables
969: popl %esi
970: popl %edi
971: popl %ebp // restore the caller's stack frame
972: ret
973:
974: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.