|
|
1.1 ! root 1: // ! 2: // d_spr8.s ! 3: // x86 assembly-language horizontal 8-bpp transparent span-drawing code. ! 4: // ! 5: ! 6: #include "asm_i386.h" ! 7: #include "quakeasm.h" ! 8: #include "asm_draw.h" ! 9: ! 10: #if id386 ! 11: ! 12: //---------------------------------------------------------------------- ! 13: // 8-bpp horizontal span drawing code for polygons, with transparency. ! 14: //---------------------------------------------------------------------- ! 15: ! 16: .text ! 17: ! 18: // out-of-line, rarely-needed clamping code ! 19: ! 20: LClampHigh0: ! 21: movl C(bbextents),%esi ! 22: jmp LClampReentry0 ! 23: LClampHighOrLow0: ! 24: jg LClampHigh0 ! 25: xorl %esi,%esi ! 26: jmp LClampReentry0 ! 27: ! 28: LClampHigh1: ! 29: movl C(bbextentt),%edx ! 30: jmp LClampReentry1 ! 31: LClampHighOrLow1: ! 32: jg LClampHigh1 ! 33: xorl %edx,%edx ! 34: jmp LClampReentry1 ! 35: ! 36: LClampLow2: ! 37: movl $2048,%ebp ! 38: jmp LClampReentry2 ! 39: LClampHigh2: ! 40: movl C(bbextents),%ebp ! 41: jmp LClampReentry2 ! 42: ! 43: LClampLow3: ! 44: movl $2048,%ecx ! 45: jmp LClampReentry3 ! 46: LClampHigh3: ! 47: movl C(bbextentt),%ecx ! 48: jmp LClampReentry3 ! 49: ! 50: LClampLow4: ! 51: movl $2048,%eax ! 52: jmp LClampReentry4 ! 53: LClampHigh4: ! 54: movl C(bbextents),%eax ! 55: jmp LClampReentry4 ! 56: ! 57: LClampLow5: ! 58: movl $2048,%ebx ! 59: jmp LClampReentry5 ! 60: LClampHigh5: ! 61: movl C(bbextentt),%ebx ! 62: jmp LClampReentry5 ! 63: ! 64: ! 65: #define pspans 4+16 ! 66: ! 67: .align 4 ! 68: .globl C(D_SpriteDrawSpans) ! 69: C(D_SpriteDrawSpans): ! 70: pushl %ebp // preserve caller's stack frame ! 71: pushl %edi ! 72: pushl %esi // preserve register variables ! 73: pushl %ebx ! 74: ! 75: // ! 76: // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock ! 77: // and span list pointers, and 1/z step in 0.32 fixed-point ! 78: // ! 79: // FIXME: any overlap from rearranging? ! 80: flds C(d_sdivzstepu) ! 81: fmuls fp_8 ! 82: movl C(cacheblock),%edx ! 83: flds C(d_tdivzstepu) ! 84: fmuls fp_8 ! 85: movl pspans(%esp),%ebx // point to the first span descriptor ! 86: flds C(d_zistepu) ! 87: fmuls fp_8 ! 88: movl %edx,pbase // pbase = cacheblock ! 89: flds C(d_zistepu) ! 90: fmuls fp_64kx64k ! 91: fxch %st(3) ! 92: fstps sdivz8stepu ! 93: fstps zi8stepu ! 94: fstps tdivz8stepu ! 95: fistpl izistep ! 96: movl izistep,%eax ! 97: rorl $16,%eax // put upper 16 bits in low word ! 98: movl sspan_t_count(%ebx),%ecx ! 99: movl %eax,izistep ! 100: ! 101: cmpl $0,%ecx ! 102: jle LNextSpan ! 103: ! 104: LSpanLoop: ! 105: ! 106: // ! 107: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the ! 108: // initial s and t values ! 109: // ! 110: // FIXME: pipeline FILD? ! 111: fildl sspan_t_v(%ebx) ! 112: fildl sspan_t_u(%ebx) ! 113: ! 114: fld %st(1) // dv | du | dv ! 115: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv ! 116: fld %st(1) // du | dv*d_sdivzstepv | du | dv ! 117: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv ! 118: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv ! 119: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | ! 120: // dv*d_sdivzstepv | du | dv ! 121: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | ! 122: // dv*d_sdivzstepv | du | dv ! 123: faddp %st(0),%st(2) // du*d_tdivzstepu | ! 124: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv ! 125: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | ! 126: // du*d_tdivzstepu | du | dv ! 127: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | ! 128: // du*d_tdivzstepu | du | dv ! 129: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | ! 130: // du*d_sdivzstepu + dv*d_sdivzstepv | ! 131: // du*d_tdivzstepu | du | dv ! 132: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | ! 133: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv ! 134: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + ! 135: // du*d_sdivzstepu; stays in %st(2) at end ! 136: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | ! 137: // s/z ! 138: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | ! 139: // du*d_tdivzstepu | du | s/z ! 140: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | ! 141: // du*d_tdivzstepu | du | s/z ! 142: faddp %st(0),%st(2) // dv*d_zistepv | ! 143: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z ! 144: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | ! 145: // dv*d_zistepv | s/z ! 146: fmuls C(d_zistepu) // du*d_zistepu | ! 147: // dv*d_tdivzstepv + du*d_tdivzstepu | ! 148: // dv*d_zistepv | s/z ! 149: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | ! 150: // du*d_zistepu | dv*d_zistepv | s/z ! 151: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + ! 152: // du*d_tdivzstepu; stays in %st(1) at end ! 153: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z ! 154: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z ! 155: ! 156: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z ! 157: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z ! 158: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + ! 159: // du*d_zistepu; stays in %st(0) at end ! 160: // 1/z | fp_64k | t/z | s/z ! 161: ! 162: fld %st(0) // FIXME: get rid of stall on FMUL? ! 163: fmuls fp_64kx64k ! 164: fxch %st(1) ! 165: ! 166: // ! 167: // calculate and clamp s & t ! 168: // ! 169: fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z ! 170: fxch %st(1) ! 171: ! 172: fistpl izi // 0.32 fixed-point 1/z ! 173: movl izi,%ebp ! 174: ! 175: // ! 176: // set pz to point to the first z-buffer pixel in the span ! 177: // ! 178: rorl $16,%ebp // put upper 16 bits in low word ! 179: movl sspan_t_v(%ebx),%eax ! 180: movl %ebp,izi ! 181: movl sspan_t_u(%ebx),%ebp ! 182: imull C(d_zrowbytes) ! 183: shll $1,%ebp // a word per pixel ! 184: addl C(d_pzbuffer),%eax ! 185: addl %ebp,%eax ! 186: movl %eax,pz ! 187: ! 188: // ! 189: // point %edi to the first pixel in the span ! 190: // ! 191: movl C(d_viewbuffer),%ebp ! 192: movl sspan_t_v(%ebx),%eax ! 193: pushl %ebx // preserve spans pointer ! 194: movl C(tadjust),%edx ! 195: movl C(sadjust),%esi ! 196: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth ! 197: addl %ebp,%edi ! 198: movl sspan_t_u(%ebx),%ebp ! 199: addl %ebp,%edi // pdest = &pdestspan[scans->u]; ! 200: ! 201: // ! 202: // now start the FDIV for the end of the span ! 203: // ! 204: cmpl $8,%ecx ! 205: ja LSetupNotLast1 ! 206: ! 207: decl %ecx ! 208: jz LCleanup1 // if only one pixel, no need to start an FDIV ! 209: movl %ecx,spancountminus1 ! 210: ! 211: // finish up the s and t calcs ! 212: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 213: ! 214: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 215: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 216: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 217: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 218: fxch %st(1) // s | t | 1/z | t/z | s/z ! 219: fistpl s // 1/z | t | t/z | s/z ! 220: fistpl t // 1/z | t/z | s/z ! 221: ! 222: fildl spancountminus1 ! 223: ! 224: flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1 ! 225: flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1 ! 226: fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1 ! 227: fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 ! 228: fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 ! 229: fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1 ! 230: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 | ! 231: // _d_tdivzstepu*scm1 ! 232: fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 | ! 233: // _d_tdivzstepu*scm1 ! 234: faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 ! 235: fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 ! 236: faddp %st(0),%st(3) // _d_sdivzstepu*scm1 ! 237: faddp %st(0),%st(3) ! 238: ! 239: flds fp_64k ! 240: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to ! 241: // overlap ! 242: jmp LFDIVInFlight1 ! 243: ! 244: LCleanup1: ! 245: // finish up the s and t calcs ! 246: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 247: ! 248: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 249: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 250: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 251: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 252: fxch %st(1) // s | t | 1/z | t/z | s/z ! 253: fistpl s // 1/z | t | t/z | s/z ! 254: fistpl t // 1/z | t/z | s/z ! 255: jmp LFDIVInFlight1 ! 256: ! 257: .align 4 ! 258: LSetupNotLast1: ! 259: // finish up the s and t calcs ! 260: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 261: ! 262: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 263: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 264: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 265: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 266: fxch %st(1) // s | t | 1/z | t/z | s/z ! 267: fistpl s // 1/z | t | t/z | s/z ! 268: fistpl t // 1/z | t/z | s/z ! 269: ! 270: fadds zi8stepu ! 271: fxch %st(2) ! 272: fadds sdivz8stepu ! 273: fxch %st(2) ! 274: flds tdivz8stepu ! 275: faddp %st(0),%st(2) ! 276: flds fp_64k ! 277: fdiv %st(1),%st(0) // z = 1/1/z ! 278: // this is what we've gone to all this trouble to ! 279: // overlap ! 280: LFDIVInFlight1: ! 281: ! 282: addl s,%esi ! 283: addl t,%edx ! 284: movl C(bbextents),%ebx ! 285: movl C(bbextentt),%ebp ! 286: cmpl %ebx,%esi ! 287: ja LClampHighOrLow0 ! 288: LClampReentry0: ! 289: movl %esi,s ! 290: movl pbase,%ebx ! 291: shll $16,%esi ! 292: cmpl %ebp,%edx ! 293: movl %esi,sfracf ! 294: ja LClampHighOrLow1 ! 295: LClampReentry1: ! 296: movl %edx,t ! 297: movl s,%esi // sfrac = scans->sfrac; ! 298: shll $16,%edx ! 299: movl t,%eax // tfrac = scans->tfrac; ! 300: sarl $16,%esi ! 301: movl %edx,tfracf ! 302: ! 303: // ! 304: // calculate the texture starting address ! 305: // ! 306: sarl $16,%eax ! 307: addl %ebx,%esi ! 308: imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth ! 309: addl %eax,%esi // psource = pbase + (sfrac >> 16) + ! 310: // ((tfrac >> 16) * cachewidth); ! 311: ! 312: // ! 313: // determine whether last span or not ! 314: // ! 315: cmpl $8,%ecx ! 316: jna LLastSegment ! 317: ! 318: // ! 319: // not the last segment; do full 8-wide segment ! 320: // ! 321: LNotLastSegment: ! 322: ! 323: // ! 324: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to ! 325: // get there ! 326: // ! 327: ! 328: // pick up after the FDIV that was left in flight previously ! 329: ! 330: fld %st(0) // duplicate it ! 331: fmul %st(4),%st(0) // s = s/z * z ! 332: fxch %st(1) ! 333: fmul %st(3),%st(0) // t = t/z * z ! 334: fxch %st(1) ! 335: fistpl snext ! 336: fistpl tnext ! 337: movl snext,%eax ! 338: movl tnext,%edx ! 339: ! 340: subl $8,%ecx // count off this segments' pixels ! 341: movl C(sadjust),%ebp ! 342: pushl %ecx // remember count of remaining pixels ! 343: movl C(tadjust),%ecx ! 344: ! 345: addl %eax,%ebp ! 346: addl %edx,%ecx ! 347: ! 348: movl C(bbextents),%eax ! 349: movl C(bbextentt),%edx ! 350: ! 351: cmpl $2048,%ebp ! 352: jl LClampLow2 ! 353: cmpl %eax,%ebp ! 354: ja LClampHigh2 ! 355: LClampReentry2: ! 356: ! 357: cmpl $2048,%ecx ! 358: jl LClampLow3 ! 359: cmpl %edx,%ecx ! 360: ja LClampHigh3 ! 361: LClampReentry3: ! 362: ! 363: movl %ebp,snext ! 364: movl %ecx,tnext ! 365: ! 366: subl s,%ebp ! 367: subl t,%ecx ! 368: ! 369: // ! 370: // set up advancetable ! 371: // ! 372: movl %ecx,%eax ! 373: movl %ebp,%edx ! 374: sarl $19,%edx // sstep >>= 16; ! 375: movl C(cachewidth),%ebx ! 376: sarl $19,%eax // tstep >>= 16; ! 377: jz LIsZero ! 378: imull %ebx,%eax // (tstep >> 16) * cachewidth; ! 379: LIsZero: ! 380: addl %edx,%eax // add in sstep ! 381: // (tstep >> 16) * cachewidth + (sstep >> 16); ! 382: movl tfracf,%edx ! 383: movl %eax,advancetable+4 // advance base in t ! 384: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + ! 385: // (sstep >> 16); ! 386: shll $13,%ebp // left-justify sstep fractional part ! 387: movl %ebp,sstep ! 388: movl sfracf,%ebx ! 389: shll $13,%ecx // left-justify tstep fractional part ! 390: movl %eax,advancetable // advance extra in t ! 391: movl %ecx,tstep ! 392: ! 393: movl pz,%ecx ! 394: movl izi,%ebp ! 395: ! 396: cmpw (%ecx),%bp ! 397: jl Lp1 ! 398: movb (%esi),%al // get first source texel ! 399: cmpb $(TRANSPARENT_COLOR),%al ! 400: jz Lp1 ! 401: movw %bp,(%ecx) ! 402: movb %al,(%edi) // store first dest pixel ! 403: Lp1: ! 404: addl izistep,%ebp ! 405: adcl $0,%ebp ! 406: addl tstep,%edx // advance tfrac fractional part by tstep frac ! 407: ! 408: sbbl %eax,%eax // turn tstep carry into -1 (0 if none) ! 409: addl sstep,%ebx // advance sfrac fractional part by sstep frac ! 410: adcl advancetable+4(,%eax,4),%esi // point to next source texel ! 411: ! 412: cmpw 2(%ecx),%bp ! 413: jl Lp2 ! 414: movb (%esi),%al ! 415: cmpb $(TRANSPARENT_COLOR),%al ! 416: jz Lp2 ! 417: movw %bp,2(%ecx) ! 418: movb %al,1(%edi) ! 419: Lp2: ! 420: addl izistep,%ebp ! 421: adcl $0,%ebp ! 422: addl tstep,%edx ! 423: sbbl %eax,%eax ! 424: addl sstep,%ebx ! 425: adcl advancetable+4(,%eax,4),%esi ! 426: ! 427: cmpw 4(%ecx),%bp ! 428: jl Lp3 ! 429: movb (%esi),%al ! 430: cmpb $(TRANSPARENT_COLOR),%al ! 431: jz Lp3 ! 432: movw %bp,4(%ecx) ! 433: movb %al,2(%edi) ! 434: Lp3: ! 435: addl izistep,%ebp ! 436: adcl $0,%ebp ! 437: addl tstep,%edx ! 438: sbbl %eax,%eax ! 439: addl sstep,%ebx ! 440: adcl advancetable+4(,%eax,4),%esi ! 441: ! 442: cmpw 6(%ecx),%bp ! 443: jl Lp4 ! 444: movb (%esi),%al ! 445: cmpb $(TRANSPARENT_COLOR),%al ! 446: jz Lp4 ! 447: movw %bp,6(%ecx) ! 448: movb %al,3(%edi) ! 449: Lp4: ! 450: addl izistep,%ebp ! 451: adcl $0,%ebp ! 452: addl tstep,%edx ! 453: sbbl %eax,%eax ! 454: addl sstep,%ebx ! 455: adcl advancetable+4(,%eax,4),%esi ! 456: ! 457: cmpw 8(%ecx),%bp ! 458: jl Lp5 ! 459: movb (%esi),%al ! 460: cmpb $(TRANSPARENT_COLOR),%al ! 461: jz Lp5 ! 462: movw %bp,8(%ecx) ! 463: movb %al,4(%edi) ! 464: Lp5: ! 465: addl izistep,%ebp ! 466: adcl $0,%ebp ! 467: addl tstep,%edx ! 468: sbbl %eax,%eax ! 469: addl sstep,%ebx ! 470: adcl advancetable+4(,%eax,4),%esi ! 471: ! 472: // ! 473: // start FDIV for end of next segment in flight, so it can overlap ! 474: // ! 475: popl %eax ! 476: cmpl $8,%eax // more than one segment after this? ! 477: ja LSetupNotLast2 // yes ! 478: ! 479: decl %eax ! 480: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV ! 481: movl %eax,spancountminus1 ! 482: fildl spancountminus1 ! 483: ! 484: flds C(d_zistepu) // _d_zistepu | spancountminus1 ! 485: fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1 ! 486: flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 ! 487: fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 ! 488: fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1 ! 489: faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1 ! 490: fxch %st(1) // scm1 | _d_tdivzstepu*scm1 ! 491: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 ! 492: fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 ! 493: faddp %st(0),%st(3) // _d_sdivzstepu*scm1 ! 494: flds fp_64k // 64k | _d_sdivzstepu*scm1 ! 495: fxch %st(1) // _d_sdivzstepu*scm1 | 64k ! 496: faddp %st(0),%st(4) // 64k ! 497: ! 498: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to ! 499: // overlap ! 500: jmp LFDIVInFlight2 ! 501: ! 502: .align 4 ! 503: LSetupNotLast2: ! 504: fadds zi8stepu ! 505: fxch %st(2) ! 506: fadds sdivz8stepu ! 507: fxch %st(2) ! 508: flds tdivz8stepu ! 509: faddp %st(0),%st(2) ! 510: flds fp_64k ! 511: fdiv %st(1),%st(0) // z = 1/1/z ! 512: // this is what we've gone to all this trouble to ! 513: // overlap ! 514: LFDIVInFlight2: ! 515: pushl %eax ! 516: ! 517: cmpw 10(%ecx),%bp ! 518: jl Lp6 ! 519: movb (%esi),%al ! 520: cmpb $(TRANSPARENT_COLOR),%al ! 521: jz Lp6 ! 522: movw %bp,10(%ecx) ! 523: movb %al,5(%edi) ! 524: Lp6: ! 525: addl izistep,%ebp ! 526: adcl $0,%ebp ! 527: addl tstep,%edx ! 528: sbbl %eax,%eax ! 529: addl sstep,%ebx ! 530: adcl advancetable+4(,%eax,4),%esi ! 531: ! 532: cmpw 12(%ecx),%bp ! 533: jl Lp7 ! 534: movb (%esi),%al ! 535: cmpb $(TRANSPARENT_COLOR),%al ! 536: jz Lp7 ! 537: movw %bp,12(%ecx) ! 538: movb %al,6(%edi) ! 539: Lp7: ! 540: addl izistep,%ebp ! 541: adcl $0,%ebp ! 542: addl tstep,%edx ! 543: sbbl %eax,%eax ! 544: addl sstep,%ebx ! 545: adcl advancetable+4(,%eax,4),%esi ! 546: ! 547: cmpw 14(%ecx),%bp ! 548: jl Lp8 ! 549: movb (%esi),%al ! 550: cmpb $(TRANSPARENT_COLOR),%al ! 551: jz Lp8 ! 552: movw %bp,14(%ecx) ! 553: movb %al,7(%edi) ! 554: Lp8: ! 555: addl izistep,%ebp ! 556: adcl $0,%ebp ! 557: addl tstep,%edx ! 558: sbbl %eax,%eax ! 559: addl sstep,%ebx ! 560: adcl advancetable+4(,%eax,4),%esi ! 561: ! 562: addl $8,%edi ! 563: addl $16,%ecx ! 564: movl %edx,tfracf ! 565: movl snext,%edx ! 566: movl %ebx,sfracf ! 567: movl tnext,%ebx ! 568: movl %edx,s ! 569: movl %ebx,t ! 570: ! 571: movl %ecx,pz ! 572: movl %ebp,izi ! 573: ! 574: popl %ecx // retrieve count ! 575: ! 576: // ! 577: // determine whether last span or not ! 578: // ! 579: cmpl $8,%ecx // are there multiple segments remaining? ! 580: ja LNotLastSegment // yes ! 581: ! 582: // ! 583: // last segment of scan ! 584: // ! 585: LLastSegment: ! 586: ! 587: // ! 588: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to ! 589: // get there. The number of pixels left is variable, and we want to land on the ! 590: // last pixel, not step one past it, so we can't run into arithmetic problems ! 591: // ! 592: testl %ecx,%ecx ! 593: jz LNoSteps // just draw the last pixel and we're done ! 594: ! 595: // pick up after the FDIV that was left in flight previously ! 596: ! 597: ! 598: fld %st(0) // duplicate it ! 599: fmul %st(4),%st(0) // s = s/z * z ! 600: fxch %st(1) ! 601: fmul %st(3),%st(0) // t = t/z * z ! 602: fxch %st(1) ! 603: fistpl snext ! 604: fistpl tnext ! 605: ! 606: movl C(tadjust),%ebx ! 607: movl C(sadjust),%eax ! 608: ! 609: addl snext,%eax ! 610: addl tnext,%ebx ! 611: ! 612: movl C(bbextents),%ebp ! 613: movl C(bbextentt),%edx ! 614: ! 615: cmpl $2048,%eax ! 616: jl LClampLow4 ! 617: cmpl %ebp,%eax ! 618: ja LClampHigh4 ! 619: LClampReentry4: ! 620: movl %eax,snext ! 621: ! 622: cmpl $2048,%ebx ! 623: jl LClampLow5 ! 624: cmpl %edx,%ebx ! 625: ja LClampHigh5 ! 626: LClampReentry5: ! 627: ! 628: cmpl $1,%ecx // don't bother ! 629: je LOnlyOneStep // if two pixels in segment, there's only one step, ! 630: // of the segment length ! 631: subl s,%eax ! 632: subl t,%ebx ! 633: ! 634: addl %eax,%eax // convert to 15.17 format so multiply by 1.31 ! 635: addl %ebx,%ebx // reciprocal yields 16.48 ! 636: imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) ! 637: movl %edx,%ebp ! 638: ! 639: movl %ebx,%eax ! 640: imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) ! 641: ! 642: LSetEntryvec: ! 643: // ! 644: // set up advancetable ! 645: // ! 646: movl spr8entryvec_table(,%ecx,4),%ebx ! 647: movl %edx,%eax ! 648: pushl %ebx // entry point into code for RET later ! 649: movl %ebp,%ecx ! 650: sarl $16,%ecx // sstep >>= 16; ! 651: movl C(cachewidth),%ebx ! 652: sarl $16,%edx // tstep >>= 16; ! 653: jz LIsZeroLast ! 654: imull %ebx,%edx // (tstep >> 16) * cachewidth; ! 655: LIsZeroLast: ! 656: addl %ecx,%edx // add in sstep ! 657: // (tstep >> 16) * cachewidth + (sstep >> 16); ! 658: movl tfracf,%ecx ! 659: movl %edx,advancetable+4 // advance base in t ! 660: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + ! 661: // (sstep >> 16); ! 662: shll $16,%ebp // left-justify sstep fractional part ! 663: movl sfracf,%ebx ! 664: shll $16,%eax // left-justify tstep fractional part ! 665: movl %edx,advancetable // advance extra in t ! 666: ! 667: movl %eax,tstep ! 668: movl %ebp,sstep ! 669: movl %ecx,%edx ! 670: ! 671: movl pz,%ecx ! 672: movl izi,%ebp ! 673: ! 674: ret // jump to the number-of-pixels handler ! 675: ! 676: //---------------------------------------- ! 677: ! 678: LNoSteps: ! 679: movl pz,%ecx ! 680: subl $7,%edi // adjust for hardwired offset ! 681: subl $14,%ecx ! 682: jmp LEndSpan ! 683: ! 684: ! 685: LOnlyOneStep: ! 686: subl s,%eax ! 687: subl t,%ebx ! 688: movl %eax,%ebp ! 689: movl %ebx,%edx ! 690: jmp LSetEntryvec ! 691: ! 692: //---------------------------------------- ! 693: ! 694: .globl Spr8Entry2_8 ! 695: Spr8Entry2_8: ! 696: subl $6,%edi // adjust for hardwired offsets ! 697: subl $12,%ecx ! 698: movb (%esi),%al ! 699: jmp LLEntry2_8 ! 700: ! 701: //---------------------------------------- ! 702: ! 703: .globl Spr8Entry3_8 ! 704: Spr8Entry3_8: ! 705: subl $5,%edi // adjust for hardwired offsets ! 706: subl $10,%ecx ! 707: jmp LLEntry3_8 ! 708: ! 709: //---------------------------------------- ! 710: ! 711: .globl Spr8Entry4_8 ! 712: Spr8Entry4_8: ! 713: subl $4,%edi // adjust for hardwired offsets ! 714: subl $8,%ecx ! 715: jmp LLEntry4_8 ! 716: ! 717: //---------------------------------------- ! 718: ! 719: .globl Spr8Entry5_8 ! 720: Spr8Entry5_8: ! 721: subl $3,%edi // adjust for hardwired offsets ! 722: subl $6,%ecx ! 723: jmp LLEntry5_8 ! 724: ! 725: //---------------------------------------- ! 726: ! 727: .globl Spr8Entry6_8 ! 728: Spr8Entry6_8: ! 729: subl $2,%edi // adjust for hardwired offsets ! 730: subl $4,%ecx ! 731: jmp LLEntry6_8 ! 732: ! 733: //---------------------------------------- ! 734: ! 735: .globl Spr8Entry7_8 ! 736: Spr8Entry7_8: ! 737: decl %edi // adjust for hardwired offsets ! 738: subl $2,%ecx ! 739: jmp LLEntry7_8 ! 740: ! 741: //---------------------------------------- ! 742: ! 743: .globl Spr8Entry8_8 ! 744: Spr8Entry8_8: ! 745: cmpw (%ecx),%bp ! 746: jl Lp9 ! 747: movb (%esi),%al ! 748: cmpb $(TRANSPARENT_COLOR),%al ! 749: jz Lp9 ! 750: movw %bp,(%ecx) ! 751: movb %al,(%edi) ! 752: Lp9: ! 753: addl izistep,%ebp ! 754: adcl $0,%ebp ! 755: addl tstep,%edx ! 756: sbbl %eax,%eax ! 757: addl sstep,%ebx ! 758: adcl advancetable+4(,%eax,4),%esi ! 759: LLEntry7_8: ! 760: cmpw 2(%ecx),%bp ! 761: jl Lp10 ! 762: movb (%esi),%al ! 763: cmpb $(TRANSPARENT_COLOR),%al ! 764: jz Lp10 ! 765: movw %bp,2(%ecx) ! 766: movb %al,1(%edi) ! 767: Lp10: ! 768: addl izistep,%ebp ! 769: adcl $0,%ebp ! 770: addl tstep,%edx ! 771: sbbl %eax,%eax ! 772: addl sstep,%ebx ! 773: adcl advancetable+4(,%eax,4),%esi ! 774: LLEntry6_8: ! 775: cmpw 4(%ecx),%bp ! 776: jl Lp11 ! 777: movb (%esi),%al ! 778: cmpb $(TRANSPARENT_COLOR),%al ! 779: jz Lp11 ! 780: movw %bp,4(%ecx) ! 781: movb %al,2(%edi) ! 782: Lp11: ! 783: addl izistep,%ebp ! 784: adcl $0,%ebp ! 785: addl tstep,%edx ! 786: sbbl %eax,%eax ! 787: addl sstep,%ebx ! 788: adcl advancetable+4(,%eax,4),%esi ! 789: LLEntry5_8: ! 790: cmpw 6(%ecx),%bp ! 791: jl Lp12 ! 792: movb (%esi),%al ! 793: cmpb $(TRANSPARENT_COLOR),%al ! 794: jz Lp12 ! 795: movw %bp,6(%ecx) ! 796: movb %al,3(%edi) ! 797: Lp12: ! 798: addl izistep,%ebp ! 799: adcl $0,%ebp ! 800: addl tstep,%edx ! 801: sbbl %eax,%eax ! 802: addl sstep,%ebx ! 803: adcl advancetable+4(,%eax,4),%esi ! 804: LLEntry4_8: ! 805: cmpw 8(%ecx),%bp ! 806: jl Lp13 ! 807: movb (%esi),%al ! 808: cmpb $(TRANSPARENT_COLOR),%al ! 809: jz Lp13 ! 810: movw %bp,8(%ecx) ! 811: movb %al,4(%edi) ! 812: Lp13: ! 813: addl izistep,%ebp ! 814: adcl $0,%ebp ! 815: addl tstep,%edx ! 816: sbbl %eax,%eax ! 817: addl sstep,%ebx ! 818: adcl advancetable+4(,%eax,4),%esi ! 819: LLEntry3_8: ! 820: cmpw 10(%ecx),%bp ! 821: jl Lp14 ! 822: movb (%esi),%al ! 823: cmpb $(TRANSPARENT_COLOR),%al ! 824: jz Lp14 ! 825: movw %bp,10(%ecx) ! 826: movb %al,5(%edi) ! 827: Lp14: ! 828: addl izistep,%ebp ! 829: adcl $0,%ebp ! 830: addl tstep,%edx ! 831: sbbl %eax,%eax ! 832: addl sstep,%ebx ! 833: adcl advancetable+4(,%eax,4),%esi ! 834: LLEntry2_8: ! 835: cmpw 12(%ecx),%bp ! 836: jl Lp15 ! 837: movb (%esi),%al ! 838: cmpb $(TRANSPARENT_COLOR),%al ! 839: jz Lp15 ! 840: movw %bp,12(%ecx) ! 841: movb %al,6(%edi) ! 842: Lp15: ! 843: addl izistep,%ebp ! 844: adcl $0,%ebp ! 845: addl tstep,%edx ! 846: sbbl %eax,%eax ! 847: addl sstep,%ebx ! 848: adcl advancetable+4(,%eax,4),%esi ! 849: ! 850: LEndSpan: ! 851: cmpw 14(%ecx),%bp ! 852: jl Lp16 ! 853: movb (%esi),%al // load first texel in segment ! 854: cmpb $(TRANSPARENT_COLOR),%al ! 855: jz Lp16 ! 856: movw %bp,14(%ecx) ! 857: movb %al,7(%edi) ! 858: Lp16: ! 859: ! 860: // ! 861: // clear s/z, t/z, 1/z from FP stack ! 862: // ! 863: fstp %st(0) ! 864: fstp %st(0) ! 865: fstp %st(0) ! 866: ! 867: popl %ebx // restore spans pointer ! 868: LNextSpan: ! 869: addl $(sspan_t_size),%ebx // point to next span ! 870: movl sspan_t_count(%ebx),%ecx ! 871: cmpl $0,%ecx // any more spans? ! 872: jg LSpanLoop // yes ! 873: jz LNextSpan // yes, but this one's empty ! 874: ! 875: popl %ebx // restore register variables ! 876: popl %esi ! 877: popl %edi ! 878: popl %ebp // restore the caller's stack frame ! 879: ret ! 880: ! 881: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.