|
|
1.1 ! root 1: /* ! 2: Copyright (C) 1996-1997 Id Software, Inc. ! 3: ! 4: This program is free software; you can redistribute it and/or ! 5: modify it under the terms of the GNU General Public License ! 6: as published by the Free Software Foundation; either version 2 ! 7: of the License, or (at your option) any later version. ! 8: ! 9: This program is distributed in the hope that it will be useful, ! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of ! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! 12: ! 13: See the GNU General Public License for more details. ! 14: ! 15: You should have received a copy of the GNU General Public License ! 16: along with this program; if not, write to the Free Software ! 17: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ! 18: ! 19: */ ! 20: // ! 21: // d_draw16.s ! 22: // x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel ! 23: // subdivision. ! 24: // ! 25: ! 26: #include "asm_i386.h" ! 27: #include "quakeasm.h" ! 28: #include "asm_draw.h" ! 29: #include "d_ifacea.h" ! 30: ! 31: #if id386 ! 32: ! 33: //---------------------------------------------------------------------- ! 34: // 8-bpp horizontal span drawing code for polygons, with no transparency and ! 35: // 16-pixel subdivision. ! 36: // ! 37: // Assumes there is at least one span in pspans, and that every span ! 38: // contains at least one pixel ! 39: //---------------------------------------------------------------------- ! 40: ! 41: .data ! 42: ! 43: .text ! 44: ! 45: // out-of-line, rarely-needed clamping code ! 46: ! 47: LClampHigh0: ! 48: movl C(bbextents),%esi ! 49: jmp LClampReentry0 ! 50: LClampHighOrLow0: ! 51: jg LClampHigh0 ! 52: xorl %esi,%esi ! 53: jmp LClampReentry0 ! 54: ! 55: LClampHigh1: ! 56: movl C(bbextentt),%edx ! 57: jmp LClampReentry1 ! 58: LClampHighOrLow1: ! 59: jg LClampHigh1 ! 60: xorl %edx,%edx ! 61: jmp LClampReentry1 ! 62: ! 63: LClampLow2: ! 64: movl $4096,%ebp ! 65: jmp LClampReentry2 ! 66: LClampHigh2: ! 67: movl C(bbextents),%ebp ! 68: jmp LClampReentry2 ! 69: ! 70: LClampLow3: ! 71: movl $4096,%ecx ! 72: jmp LClampReentry3 ! 73: LClampHigh3: ! 74: movl C(bbextentt),%ecx ! 75: jmp LClampReentry3 ! 76: ! 77: LClampLow4: ! 78: movl $4096,%eax ! 79: jmp LClampReentry4 ! 80: LClampHigh4: ! 81: movl C(bbextents),%eax ! 82: jmp LClampReentry4 ! 83: ! 84: LClampLow5: ! 85: movl $4096,%ebx ! 86: jmp LClampReentry5 ! 87: LClampHigh5: ! 88: movl C(bbextentt),%ebx ! 89: jmp LClampReentry5 ! 90: ! 91: ! 92: #define pspans 4+16 ! 93: ! 94: .align 4 ! 95: .globl C(D_DrawSpans16) ! 96: C(D_DrawSpans16): ! 97: pushl %ebp // preserve caller's stack frame ! 98: pushl %edi ! 99: pushl %esi // preserve register variables ! 100: pushl %ebx ! 101: ! 102: // ! 103: // set up scaled-by-16 steps, for 16-long segments; also set up cacheblock ! 104: // and span list pointers ! 105: // ! 106: // TODO: any overlap from rearranging? ! 107: flds C(d_sdivzstepu) ! 108: fmuls fp_16 ! 109: movl C(cacheblock),%edx ! 110: flds C(d_tdivzstepu) ! 111: fmuls fp_16 ! 112: movl pspans(%esp),%ebx // point to the first span descriptor ! 113: flds C(d_zistepu) ! 114: fmuls fp_16 ! 115: movl %edx,pbase // pbase = cacheblock ! 116: fstps zi16stepu ! 117: fstps tdivz16stepu ! 118: fstps sdivz16stepu ! 119: ! 120: LSpanLoop: ! 121: // ! 122: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the ! 123: // initial s and t values ! 124: // ! 125: // FIXME: pipeline FILD? ! 126: fildl espan_t_v(%ebx) ! 127: fildl espan_t_u(%ebx) ! 128: ! 129: fld %st(1) // dv | du | dv ! 130: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv ! 131: fld %st(1) // du | dv*d_sdivzstepv | du | dv ! 132: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv ! 133: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv ! 134: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | ! 135: // dv*d_sdivzstepv | du | dv ! 136: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | ! 137: // dv*d_sdivzstepv | du | dv ! 138: faddp %st(0),%st(2) // du*d_tdivzstepu | ! 139: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv ! 140: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | ! 141: // du*d_tdivzstepu | du | dv ! 142: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | ! 143: // du*d_tdivzstepu | du | dv ! 144: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | ! 145: // du*d_sdivzstepu + dv*d_sdivzstepv | ! 146: // du*d_tdivzstepu | du | dv ! 147: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | ! 148: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv ! 149: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + ! 150: // du*d_sdivzstepu; stays in %st(2) at end ! 151: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | ! 152: // s/z ! 153: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | ! 154: // du*d_tdivzstepu | du | s/z ! 155: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | ! 156: // du*d_tdivzstepu | du | s/z ! 157: faddp %st(0),%st(2) // dv*d_zistepv | ! 158: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z ! 159: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | ! 160: // dv*d_zistepv | s/z ! 161: fmuls C(d_zistepu) // du*d_zistepu | ! 162: // dv*d_tdivzstepv + du*d_tdivzstepu | ! 163: // dv*d_zistepv | s/z ! 164: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | ! 165: // du*d_zistepu | dv*d_zistepv | s/z ! 166: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + ! 167: // du*d_tdivzstepu; stays in %st(1) at end ! 168: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z ! 169: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z ! 170: ! 171: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z ! 172: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z ! 173: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + ! 174: // du*d_zistepu; stays in %st(0) at end ! 175: // 1/z | fp_64k | t/z | s/z ! 176: // ! 177: // calculate and clamp s & t ! 178: // ! 179: fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z ! 180: ! 181: // ! 182: // point %edi to the first pixel in the span ! 183: // ! 184: movl C(d_viewbuffer),%ecx ! 185: movl espan_t_v(%ebx),%eax ! 186: movl %ebx,pspantemp // preserve spans pointer ! 187: ! 188: movl C(tadjust),%edx ! 189: movl C(sadjust),%esi ! 190: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth ! 191: addl %ecx,%edi ! 192: movl espan_t_u(%ebx),%ecx ! 193: addl %ecx,%edi // pdest = &pdestspan[scans->u]; ! 194: movl espan_t_count(%ebx),%ecx ! 195: ! 196: // ! 197: // now start the FDIV for the end of the span ! 198: // ! 199: cmpl $16,%ecx ! 200: ja LSetupNotLast1 ! 201: ! 202: decl %ecx ! 203: jz LCleanup1 // if only one pixel, no need to start an FDIV ! 204: movl %ecx,spancountminus1 ! 205: ! 206: // finish up the s and t calcs ! 207: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 208: ! 209: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 210: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 211: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 212: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 213: fxch %st(1) // s | t | 1/z | t/z | s/z ! 214: fistpl s // 1/z | t | t/z | s/z ! 215: fistpl t // 1/z | t/z | s/z ! 216: ! 217: fildl spancountminus1 ! 218: ! 219: flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 ! 220: flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 ! 221: fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 ! 222: fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 ! 223: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 ! 224: fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 ! 225: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | ! 226: // C(d_tdivzstepu)*scm1 ! 227: fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | ! 228: // C(d_tdivzstepu)*scm1 ! 229: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 ! 230: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 ! 231: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 ! 232: faddp %st(0),%st(3) ! 233: ! 234: flds fp_64k ! 235: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to ! 236: // overlap ! 237: jmp LFDIVInFlight1 ! 238: ! 239: LCleanup1: ! 240: // finish up the s and t calcs ! 241: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 242: ! 243: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 244: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 245: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 246: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 247: fxch %st(1) // s | t | 1/z | t/z | s/z ! 248: fistpl s // 1/z | t | t/z | s/z ! 249: fistpl t // 1/z | t/z | s/z ! 250: jmp LFDIVInFlight1 ! 251: ! 252: .align 4 ! 253: LSetupNotLast1: ! 254: // finish up the s and t calcs ! 255: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 256: ! 257: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 258: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 259: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 260: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 261: fxch %st(1) // s | t | 1/z | t/z | s/z ! 262: fistpl s // 1/z | t | t/z | s/z ! 263: fistpl t // 1/z | t/z | s/z ! 264: ! 265: fadds zi16stepu ! 266: fxch %st(2) ! 267: fadds sdivz16stepu ! 268: fxch %st(2) ! 269: flds tdivz16stepu ! 270: faddp %st(0),%st(2) ! 271: flds fp_64k ! 272: fdiv %st(1),%st(0) // z = 1/1/z ! 273: // this is what we've gone to all this trouble to ! 274: // overlap ! 275: LFDIVInFlight1: ! 276: ! 277: addl s,%esi ! 278: addl t,%edx ! 279: movl C(bbextents),%ebx ! 280: movl C(bbextentt),%ebp ! 281: cmpl %ebx,%esi ! 282: ja LClampHighOrLow0 ! 283: LClampReentry0: ! 284: movl %esi,s ! 285: movl pbase,%ebx ! 286: shll $16,%esi ! 287: cmpl %ebp,%edx ! 288: movl %esi,sfracf ! 289: ja LClampHighOrLow1 ! 290: LClampReentry1: ! 291: movl %edx,t ! 292: movl s,%esi // sfrac = scans->sfrac; ! 293: shll $16,%edx ! 294: movl t,%eax // tfrac = scans->tfrac; ! 295: sarl $16,%esi ! 296: movl %edx,tfracf ! 297: ! 298: // ! 299: // calculate the texture starting address ! 300: // ! 301: sarl $16,%eax ! 302: movl C(cachewidth),%edx ! 303: imull %edx,%eax // (tfrac >> 16) * cachewidth ! 304: addl %ebx,%esi ! 305: addl %eax,%esi // psource = pbase + (sfrac >> 16) + ! 306: // ((tfrac >> 16) * cachewidth); ! 307: // ! 308: // determine whether last span or not ! 309: // ! 310: cmpl $16,%ecx ! 311: jna LLastSegment ! 312: ! 313: // ! 314: // not the last segment; do full 16-wide segment ! 315: // ! 316: LNotLastSegment: ! 317: ! 318: // ! 319: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to ! 320: // get there ! 321: // ! 322: ! 323: // pick up after the FDIV that was left in flight previously ! 324: ! 325: fld %st(0) // duplicate it ! 326: fmul %st(4),%st(0) // s = s/z * z ! 327: fxch %st(1) ! 328: fmul %st(3),%st(0) // t = t/z * z ! 329: fxch %st(1) ! 330: fistpl snext ! 331: fistpl tnext ! 332: movl snext,%eax ! 333: movl tnext,%edx ! 334: ! 335: movb (%esi),%bl // get first source texel ! 336: subl $16,%ecx // count off this segments' pixels ! 337: movl C(sadjust),%ebp ! 338: movl %ecx,counttemp // remember count of remaining pixels ! 339: ! 340: movl C(tadjust),%ecx ! 341: movb %bl,(%edi) // store first dest pixel ! 342: ! 343: addl %eax,%ebp ! 344: addl %edx,%ecx ! 345: ! 346: movl C(bbextents),%eax ! 347: movl C(bbextentt),%edx ! 348: ! 349: cmpl $4096,%ebp ! 350: jl LClampLow2 ! 351: cmpl %eax,%ebp ! 352: ja LClampHigh2 ! 353: LClampReentry2: ! 354: ! 355: cmpl $4096,%ecx ! 356: jl LClampLow3 ! 357: cmpl %edx,%ecx ! 358: ja LClampHigh3 ! 359: LClampReentry3: ! 360: ! 361: movl %ebp,snext ! 362: movl %ecx,tnext ! 363: ! 364: subl s,%ebp ! 365: subl t,%ecx ! 366: ! 367: // ! 368: // set up advancetable ! 369: // ! 370: movl %ecx,%eax ! 371: movl %ebp,%edx ! 372: sarl $20,%eax // tstep >>= 16; ! 373: jz LZero ! 374: sarl $20,%edx // sstep >>= 16; ! 375: movl C(cachewidth),%ebx ! 376: imull %ebx,%eax ! 377: jmp LSetUp1 ! 378: ! 379: LZero: ! 380: sarl $20,%edx // sstep >>= 16; ! 381: movl C(cachewidth),%ebx ! 382: ! 383: LSetUp1: ! 384: ! 385: addl %edx,%eax // add in sstep ! 386: // (tstep >> 16) * cachewidth + (sstep >> 16); ! 387: movl tfracf,%edx ! 388: movl %eax,advancetable+4 // advance base in t ! 389: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + ! 390: // (sstep >> 16); ! 391: shll $12,%ebp // left-justify sstep fractional part ! 392: movl sfracf,%ebx ! 393: shll $12,%ecx // left-justify tstep fractional part ! 394: movl %eax,advancetable // advance extra in t ! 395: ! 396: movl %ecx,tstep ! 397: addl %ecx,%edx // advance tfrac fractional part by tstep frac ! 398: ! 399: sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) ! 400: addl %ebp,%ebx // advance sfrac fractional part by sstep frac ! 401: adcl advancetable+4(,%ecx,4),%esi // point to next source texel ! 402: ! 403: addl tstep,%edx ! 404: sbbl %ecx,%ecx ! 405: movb (%esi),%al ! 406: addl %ebp,%ebx ! 407: movb %al,1(%edi) ! 408: adcl advancetable+4(,%ecx,4),%esi ! 409: ! 410: addl tstep,%edx ! 411: sbbl %ecx,%ecx ! 412: addl %ebp,%ebx ! 413: movb (%esi),%al ! 414: adcl advancetable+4(,%ecx,4),%esi ! 415: ! 416: addl tstep,%edx ! 417: sbbl %ecx,%ecx ! 418: movb %al,2(%edi) ! 419: addl %ebp,%ebx ! 420: movb (%esi),%al ! 421: adcl advancetable+4(,%ecx,4),%esi ! 422: ! 423: addl tstep,%edx ! 424: sbbl %ecx,%ecx ! 425: movb %al,3(%edi) ! 426: addl %ebp,%ebx ! 427: movb (%esi),%al ! 428: adcl advancetable+4(,%ecx,4),%esi ! 429: ! 430: addl tstep,%edx ! 431: sbbl %ecx,%ecx ! 432: movb %al,4(%edi) ! 433: addl %ebp,%ebx ! 434: movb (%esi),%al ! 435: adcl advancetable+4(,%ecx,4),%esi ! 436: ! 437: addl tstep,%edx ! 438: sbbl %ecx,%ecx ! 439: movb %al,5(%edi) ! 440: addl %ebp,%ebx ! 441: movb (%esi),%al ! 442: adcl advancetable+4(,%ecx,4),%esi ! 443: ! 444: addl tstep,%edx ! 445: sbbl %ecx,%ecx ! 446: movb %al,6(%edi) ! 447: addl %ebp,%ebx ! 448: movb (%esi),%al ! 449: adcl advancetable+4(,%ecx,4),%esi ! 450: ! 451: addl tstep,%edx ! 452: sbbl %ecx,%ecx ! 453: movb %al,7(%edi) ! 454: addl %ebp,%ebx ! 455: movb (%esi),%al ! 456: adcl advancetable+4(,%ecx,4),%esi ! 457: ! 458: ! 459: // ! 460: // start FDIV for end of next segment in flight, so it can overlap ! 461: // ! 462: movl counttemp,%ecx ! 463: cmpl $16,%ecx // more than one segment after this? ! 464: ja LSetupNotLast2 // yes ! 465: ! 466: decl %ecx ! 467: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV ! 468: movl %ecx,spancountminus1 ! 469: fildl spancountminus1 ! 470: ! 471: flds C(d_zistepu) // C(d_zistepu) | spancountminus1 ! 472: fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 ! 473: flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 ! 474: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 ! 475: fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 ! 476: faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1 ! 477: fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1 ! 478: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 ! 479: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 ! 480: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 ! 481: flds fp_64k // 64k | C(d_sdivzstepu)*scm1 ! 482: fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k ! 483: faddp %st(0),%st(4) // 64k ! 484: ! 485: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to ! 486: // overlap ! 487: jmp LFDIVInFlight2 ! 488: ! 489: .align 4 ! 490: LSetupNotLast2: ! 491: fadds zi16stepu ! 492: fxch %st(2) ! 493: fadds sdivz16stepu ! 494: fxch %st(2) ! 495: flds tdivz16stepu ! 496: faddp %st(0),%st(2) ! 497: flds fp_64k ! 498: fdiv %st(1),%st(0) // z = 1/1/z ! 499: // this is what we've gone to all this trouble to ! 500: // overlap ! 501: LFDIVInFlight2: ! 502: movl %ecx,counttemp ! 503: ! 504: addl tstep,%edx ! 505: sbbl %ecx,%ecx ! 506: movb %al,8(%edi) ! 507: addl %ebp,%ebx ! 508: movb (%esi),%al ! 509: adcl advancetable+4(,%ecx,4),%esi ! 510: ! 511: addl tstep,%edx ! 512: sbbl %ecx,%ecx ! 513: movb %al,9(%edi) ! 514: addl %ebp,%ebx ! 515: movb (%esi),%al ! 516: adcl advancetable+4(,%ecx,4),%esi ! 517: ! 518: addl tstep,%edx ! 519: sbbl %ecx,%ecx ! 520: movb %al,10(%edi) ! 521: addl %ebp,%ebx ! 522: movb (%esi),%al ! 523: adcl advancetable+4(,%ecx,4),%esi ! 524: ! 525: addl tstep,%edx ! 526: sbbl %ecx,%ecx ! 527: movb %al,11(%edi) ! 528: addl %ebp,%ebx ! 529: movb (%esi),%al ! 530: adcl advancetable+4(,%ecx,4),%esi ! 531: ! 532: addl tstep,%edx ! 533: sbbl %ecx,%ecx ! 534: movb %al,12(%edi) ! 535: addl %ebp,%ebx ! 536: movb (%esi),%al ! 537: adcl advancetable+4(,%ecx,4),%esi ! 538: ! 539: addl tstep,%edx ! 540: sbbl %ecx,%ecx ! 541: movb %al,13(%edi) ! 542: addl %ebp,%ebx ! 543: movb (%esi),%al ! 544: adcl advancetable+4(,%ecx,4),%esi ! 545: ! 546: addl tstep,%edx ! 547: sbbl %ecx,%ecx ! 548: movb %al,14(%edi) ! 549: addl %ebp,%ebx ! 550: movb (%esi),%al ! 551: adcl advancetable+4(,%ecx,4),%esi ! 552: ! 553: addl $16,%edi ! 554: movl %edx,tfracf ! 555: movl snext,%edx ! 556: movl %ebx,sfracf ! 557: movl tnext,%ebx ! 558: movl %edx,s ! 559: movl %ebx,t ! 560: ! 561: movl counttemp,%ecx // retrieve count ! 562: ! 563: // ! 564: // determine whether last span or not ! 565: // ! 566: cmpl $16,%ecx // are there multiple segments remaining? ! 567: movb %al,-1(%edi) ! 568: ja LNotLastSegment // yes ! 569: ! 570: // ! 571: // last segment of scan ! 572: // ! 573: LLastSegment: ! 574: ! 575: // ! 576: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to ! 577: // get there. The number of pixels left is variable, and we want to land on the ! 578: // last pixel, not step one past it, so we can't run into arithmetic problems ! 579: // ! 580: testl %ecx,%ecx ! 581: jz LNoSteps // just draw the last pixel and we're done ! 582: ! 583: // pick up after the FDIV that was left in flight previously ! 584: ! 585: ! 586: fld %st(0) // duplicate it ! 587: fmul %st(4),%st(0) // s = s/z * z ! 588: fxch %st(1) ! 589: fmul %st(3),%st(0) // t = t/z * z ! 590: fxch %st(1) ! 591: fistpl snext ! 592: fistpl tnext ! 593: ! 594: movb (%esi),%al // load first texel in segment ! 595: movl C(tadjust),%ebx ! 596: movb %al,(%edi) // store first pixel in segment ! 597: movl C(sadjust),%eax ! 598: ! 599: addl snext,%eax ! 600: addl tnext,%ebx ! 601: ! 602: movl C(bbextents),%ebp ! 603: movl C(bbextentt),%edx ! 604: ! 605: cmpl $4096,%eax ! 606: jl LClampLow4 ! 607: cmpl %ebp,%eax ! 608: ja LClampHigh4 ! 609: LClampReentry4: ! 610: movl %eax,snext ! 611: ! 612: cmpl $4096,%ebx ! 613: jl LClampLow5 ! 614: cmpl %edx,%ebx ! 615: ja LClampHigh5 ! 616: LClampReentry5: ! 617: ! 618: cmpl $1,%ecx // don't bother ! 619: je LOnlyOneStep // if two pixels in segment, there's only one step, ! 620: // of the segment length ! 621: subl s,%eax ! 622: subl t,%ebx ! 623: ! 624: addl %eax,%eax // convert to 15.17 format so multiply by 1.31 ! 625: addl %ebx,%ebx // reciprocal yields 16.48 ! 626: ! 627: imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) / ! 628: // (spancount-1) ! 629: movl %edx,%ebp ! 630: ! 631: movl %ebx,%eax ! 632: imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) / ! 633: // (spancount-1) ! 634: LSetEntryvec: ! 635: // ! 636: // set up advancetable ! 637: // ! 638: movl entryvec_table_16(,%ecx,4),%ebx ! 639: movl %edx,%eax ! 640: movl %ebx,jumptemp // entry point into code for RET later ! 641: movl %ebp,%ecx ! 642: sarl $16,%edx // tstep >>= 16; ! 643: movl C(cachewidth),%ebx ! 644: sarl $16,%ecx // sstep >>= 16; ! 645: imull %ebx,%edx ! 646: ! 647: addl %ecx,%edx // add in sstep ! 648: // (tstep >> 16) * cachewidth + (sstep >> 16); ! 649: movl tfracf,%ecx ! 650: movl %edx,advancetable+4 // advance base in t ! 651: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + ! 652: // (sstep >> 16); ! 653: shll $16,%ebp // left-justify sstep fractional part ! 654: movl sfracf,%ebx ! 655: shll $16,%eax // left-justify tstep fractional part ! 656: movl %edx,advancetable // advance extra in t ! 657: ! 658: movl %eax,tstep ! 659: movl %ecx,%edx ! 660: addl %eax,%edx ! 661: sbbl %ecx,%ecx ! 662: addl %ebp,%ebx ! 663: adcl advancetable+4(,%ecx,4),%esi ! 664: ! 665: jmp *jumptemp // jump to the number-of-pixels handler ! 666: ! 667: //---------------------------------------- ! 668: ! 669: LNoSteps: ! 670: movb (%esi),%al // load first texel in segment ! 671: subl $15,%edi // adjust for hardwired offset ! 672: jmp LEndSpan ! 673: ! 674: ! 675: LOnlyOneStep: ! 676: subl s,%eax ! 677: subl t,%ebx ! 678: movl %eax,%ebp ! 679: movl %ebx,%edx ! 680: jmp LSetEntryvec ! 681: ! 682: //---------------------------------------- ! 683: ! 684: .globl Entry2_16, Entry3_16, Entry4_16, Entry5_16 ! 685: .globl Entry6_16, Entry7_16, Entry8_16, Entry9_16 ! 686: .globl Entry10_16, Entry11_16, Entry12_16, Entry13_16 ! 687: .globl Entry14_16, Entry15_16, Entry16_16 ! 688: ! 689: Entry2_16: ! 690: subl $14,%edi // adjust for hardwired offsets ! 691: movb (%esi),%al ! 692: jmp LEntry2_16 ! 693: ! 694: //---------------------------------------- ! 695: ! 696: Entry3_16: ! 697: subl $13,%edi // adjust for hardwired offsets ! 698: addl %eax,%edx ! 699: movb (%esi),%al ! 700: sbbl %ecx,%ecx ! 701: addl %ebp,%ebx ! 702: adcl advancetable+4(,%ecx,4),%esi ! 703: jmp LEntry3_16 ! 704: ! 705: //---------------------------------------- ! 706: ! 707: Entry4_16: ! 708: subl $12,%edi // adjust for hardwired offsets ! 709: addl %eax,%edx ! 710: movb (%esi),%al ! 711: sbbl %ecx,%ecx ! 712: addl %ebp,%ebx ! 713: adcl advancetable+4(,%ecx,4),%esi ! 714: addl tstep,%edx ! 715: jmp LEntry4_16 ! 716: ! 717: //---------------------------------------- ! 718: ! 719: Entry5_16: ! 720: subl $11,%edi // adjust for hardwired offsets ! 721: addl %eax,%edx ! 722: movb (%esi),%al ! 723: sbbl %ecx,%ecx ! 724: addl %ebp,%ebx ! 725: adcl advancetable+4(,%ecx,4),%esi ! 726: addl tstep,%edx ! 727: jmp LEntry5_16 ! 728: ! 729: //---------------------------------------- ! 730: ! 731: Entry6_16: ! 732: subl $10,%edi // adjust for hardwired offsets ! 733: addl %eax,%edx ! 734: movb (%esi),%al ! 735: sbbl %ecx,%ecx ! 736: addl %ebp,%ebx ! 737: adcl advancetable+4(,%ecx,4),%esi ! 738: addl tstep,%edx ! 739: jmp LEntry6_16 ! 740: ! 741: //---------------------------------------- ! 742: ! 743: Entry7_16: ! 744: subl $9,%edi // adjust for hardwired offsets ! 745: addl %eax,%edx ! 746: movb (%esi),%al ! 747: sbbl %ecx,%ecx ! 748: addl %ebp,%ebx ! 749: adcl advancetable+4(,%ecx,4),%esi ! 750: addl tstep,%edx ! 751: jmp LEntry7_16 ! 752: ! 753: //---------------------------------------- ! 754: ! 755: Entry8_16: ! 756: subl $8,%edi // adjust for hardwired offsets ! 757: addl %eax,%edx ! 758: movb (%esi),%al ! 759: sbbl %ecx,%ecx ! 760: addl %ebp,%ebx ! 761: adcl advancetable+4(,%ecx,4),%esi ! 762: addl tstep,%edx ! 763: jmp LEntry8_16 ! 764: ! 765: //---------------------------------------- ! 766: ! 767: Entry9_16: ! 768: subl $7,%edi // adjust for hardwired offsets ! 769: addl %eax,%edx ! 770: movb (%esi),%al ! 771: sbbl %ecx,%ecx ! 772: addl %ebp,%ebx ! 773: adcl advancetable+4(,%ecx,4),%esi ! 774: addl tstep,%edx ! 775: jmp LEntry9_16 ! 776: ! 777: //---------------------------------------- ! 778: ! 779: Entry10_16: ! 780: subl $6,%edi // adjust for hardwired offsets ! 781: addl %eax,%edx ! 782: movb (%esi),%al ! 783: sbbl %ecx,%ecx ! 784: addl %ebp,%ebx ! 785: adcl advancetable+4(,%ecx,4),%esi ! 786: addl tstep,%edx ! 787: jmp LEntry10_16 ! 788: ! 789: //---------------------------------------- ! 790: ! 791: Entry11_16: ! 792: subl $5,%edi // adjust for hardwired offsets ! 793: addl %eax,%edx ! 794: movb (%esi),%al ! 795: sbbl %ecx,%ecx ! 796: addl %ebp,%ebx ! 797: adcl advancetable+4(,%ecx,4),%esi ! 798: addl tstep,%edx ! 799: jmp LEntry11_16 ! 800: ! 801: //---------------------------------------- ! 802: ! 803: Entry12_16: ! 804: subl $4,%edi // adjust for hardwired offsets ! 805: addl %eax,%edx ! 806: movb (%esi),%al ! 807: sbbl %ecx,%ecx ! 808: addl %ebp,%ebx ! 809: adcl advancetable+4(,%ecx,4),%esi ! 810: addl tstep,%edx ! 811: jmp LEntry12_16 ! 812: ! 813: //---------------------------------------- ! 814: ! 815: Entry13_16: ! 816: subl $3,%edi // adjust for hardwired offsets ! 817: addl %eax,%edx ! 818: movb (%esi),%al ! 819: sbbl %ecx,%ecx ! 820: addl %ebp,%ebx ! 821: adcl advancetable+4(,%ecx,4),%esi ! 822: addl tstep,%edx ! 823: jmp LEntry13_16 ! 824: ! 825: //---------------------------------------- ! 826: ! 827: Entry14_16: ! 828: subl $2,%edi // adjust for hardwired offsets ! 829: addl %eax,%edx ! 830: movb (%esi),%al ! 831: sbbl %ecx,%ecx ! 832: addl %ebp,%ebx ! 833: adcl advancetable+4(,%ecx,4),%esi ! 834: addl tstep,%edx ! 835: jmp LEntry14_16 ! 836: ! 837: //---------------------------------------- ! 838: ! 839: Entry15_16: ! 840: decl %edi // adjust for hardwired offsets ! 841: addl %eax,%edx ! 842: movb (%esi),%al ! 843: sbbl %ecx,%ecx ! 844: addl %ebp,%ebx ! 845: adcl advancetable+4(,%ecx,4),%esi ! 846: addl tstep,%edx ! 847: jmp LEntry15_16 ! 848: ! 849: //---------------------------------------- ! 850: ! 851: Entry16_16: ! 852: addl %eax,%edx ! 853: movb (%esi),%al ! 854: sbbl %ecx,%ecx ! 855: addl %ebp,%ebx ! 856: adcl advancetable+4(,%ecx,4),%esi ! 857: ! 858: addl tstep,%edx ! 859: sbbl %ecx,%ecx ! 860: movb %al,1(%edi) ! 861: addl %ebp,%ebx ! 862: movb (%esi),%al ! 863: adcl advancetable+4(,%ecx,4),%esi ! 864: addl tstep,%edx ! 865: LEntry15_16: ! 866: sbbl %ecx,%ecx ! 867: movb %al,2(%edi) ! 868: addl %ebp,%ebx ! 869: movb (%esi),%al ! 870: adcl advancetable+4(,%ecx,4),%esi ! 871: addl tstep,%edx ! 872: LEntry14_16: ! 873: sbbl %ecx,%ecx ! 874: movb %al,3(%edi) ! 875: addl %ebp,%ebx ! 876: movb (%esi),%al ! 877: adcl advancetable+4(,%ecx,4),%esi ! 878: addl tstep,%edx ! 879: LEntry13_16: ! 880: sbbl %ecx,%ecx ! 881: movb %al,4(%edi) ! 882: addl %ebp,%ebx ! 883: movb (%esi),%al ! 884: adcl advancetable+4(,%ecx,4),%esi ! 885: addl tstep,%edx ! 886: LEntry12_16: ! 887: sbbl %ecx,%ecx ! 888: movb %al,5(%edi) ! 889: addl %ebp,%ebx ! 890: movb (%esi),%al ! 891: adcl advancetable+4(,%ecx,4),%esi ! 892: addl tstep,%edx ! 893: LEntry11_16: ! 894: sbbl %ecx,%ecx ! 895: movb %al,6(%edi) ! 896: addl %ebp,%ebx ! 897: movb (%esi),%al ! 898: adcl advancetable+4(,%ecx,4),%esi ! 899: addl tstep,%edx ! 900: LEntry10_16: ! 901: sbbl %ecx,%ecx ! 902: movb %al,7(%edi) ! 903: addl %ebp,%ebx ! 904: movb (%esi),%al ! 905: adcl advancetable+4(,%ecx,4),%esi ! 906: addl tstep,%edx ! 907: LEntry9_16: ! 908: sbbl %ecx,%ecx ! 909: movb %al,8(%edi) ! 910: addl %ebp,%ebx ! 911: movb (%esi),%al ! 912: adcl advancetable+4(,%ecx,4),%esi ! 913: addl tstep,%edx ! 914: LEntry8_16: ! 915: sbbl %ecx,%ecx ! 916: movb %al,9(%edi) ! 917: addl %ebp,%ebx ! 918: movb (%esi),%al ! 919: adcl advancetable+4(,%ecx,4),%esi ! 920: addl tstep,%edx ! 921: LEntry7_16: ! 922: sbbl %ecx,%ecx ! 923: movb %al,10(%edi) ! 924: addl %ebp,%ebx ! 925: movb (%esi),%al ! 926: adcl advancetable+4(,%ecx,4),%esi ! 927: addl tstep,%edx ! 928: LEntry6_16: ! 929: sbbl %ecx,%ecx ! 930: movb %al,11(%edi) ! 931: addl %ebp,%ebx ! 932: movb (%esi),%al ! 933: adcl advancetable+4(,%ecx,4),%esi ! 934: addl tstep,%edx ! 935: LEntry5_16: ! 936: sbbl %ecx,%ecx ! 937: movb %al,12(%edi) ! 938: addl %ebp,%ebx ! 939: movb (%esi),%al ! 940: adcl advancetable+4(,%ecx,4),%esi ! 941: addl tstep,%edx ! 942: LEntry4_16: ! 943: sbbl %ecx,%ecx ! 944: movb %al,13(%edi) ! 945: addl %ebp,%ebx ! 946: movb (%esi),%al ! 947: adcl advancetable+4(,%ecx,4),%esi ! 948: LEntry3_16: ! 949: movb %al,14(%edi) ! 950: movb (%esi),%al ! 951: LEntry2_16: ! 952: ! 953: LEndSpan: ! 954: ! 955: // ! 956: // clear s/z, t/z, 1/z from FP stack ! 957: // ! 958: fstp %st(0) ! 959: fstp %st(0) ! 960: fstp %st(0) ! 961: ! 962: movl pspantemp,%ebx // restore spans pointer ! 963: movl espan_t_pnext(%ebx),%ebx // point to next span ! 964: testl %ebx,%ebx // any more spans? ! 965: movb %al,15(%edi) ! 966: jnz LSpanLoop // more spans ! 967: ! 968: popl %ebx // restore register variables ! 969: popl %esi ! 970: popl %edi ! 971: popl %ebp // restore the caller's stack frame ! 972: ret ! 973: ! 974: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.