|
|
1.1 ! root 1: // ! 2: // d_draw.s ! 3: // x86 assembly-language horizontal 8-bpp span-drawing code. ! 4: // ! 5: ! 6: #include "asm_i386.h" ! 7: #include "quakeasm.h" ! 8: #include "asm_draw.h" ! 9: #include "d_ifacea.h" ! 10: ! 11: #if id386 ! 12: ! 13: //---------------------------------------------------------------------- ! 14: // 8-bpp horizontal span drawing code for polygons, with no transparency. ! 15: // ! 16: // Assumes there is at least one span in pspans, and that every span ! 17: // contains at least one pixel ! 18: //---------------------------------------------------------------------- ! 19: ! 20: .text ! 21: ! 22: // out-of-line, rarely-needed clamping code ! 23: ! 24: LClampHigh0: ! 25: movl C(bbextents),%esi ! 26: jmp LClampReentry0 ! 27: LClampHighOrLow0: ! 28: jg LClampHigh0 ! 29: xorl %esi,%esi ! 30: jmp LClampReentry0 ! 31: ! 32: LClampHigh1: ! 33: movl C(bbextentt),%edx ! 34: jmp LClampReentry1 ! 35: LClampHighOrLow1: ! 36: jg LClampHigh1 ! 37: xorl %edx,%edx ! 38: jmp LClampReentry1 ! 39: ! 40: LClampLow2: ! 41: movl $2048,%ebp ! 42: jmp LClampReentry2 ! 43: LClampHigh2: ! 44: movl C(bbextents),%ebp ! 45: jmp LClampReentry2 ! 46: ! 47: LClampLow3: ! 48: movl $2048,%ecx ! 49: jmp LClampReentry3 ! 50: LClampHigh3: ! 51: movl C(bbextentt),%ecx ! 52: jmp LClampReentry3 ! 53: ! 54: LClampLow4: ! 55: movl $2048,%eax ! 56: jmp LClampReentry4 ! 57: LClampHigh4: ! 58: movl C(bbextents),%eax ! 59: jmp LClampReentry4 ! 60: ! 61: LClampLow5: ! 62: movl $2048,%ebx ! 63: jmp LClampReentry5 ! 64: LClampHigh5: ! 65: movl C(bbextentt),%ebx ! 66: jmp LClampReentry5 ! 67: ! 68: ! 69: #define pspans 4+16 ! 70: ! 71: .align 4 ! 72: .globl C(D_DrawSpans8) ! 73: C(D_DrawSpans8): ! 74: pushl %ebp // preserve caller's stack frame ! 75: pushl %edi ! 76: pushl %esi // preserve register variables ! 77: pushl %ebx ! 78: ! 79: // ! 80: // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock ! 81: // and span list pointers ! 82: // ! 83: // TODO: any overlap from rearranging? ! 84: flds C(d_sdivzstepu) ! 85: fmuls fp_8 ! 86: movl C(cacheblock),%edx ! 87: flds C(d_tdivzstepu) ! 88: fmuls fp_8 ! 89: movl pspans(%esp),%ebx // point to the first span descriptor ! 90: flds C(d_zistepu) ! 91: fmuls fp_8 ! 92: movl %edx,pbase // pbase = cacheblock ! 93: fstps zi8stepu ! 94: fstps tdivz8stepu ! 95: fstps sdivz8stepu ! 96: ! 97: LSpanLoop: ! 98: // ! 99: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the ! 100: // initial s and t values ! 101: // ! 102: // FIXME: pipeline FILD? ! 103: fildl espan_t_v(%ebx) ! 104: fildl espan_t_u(%ebx) ! 105: ! 106: fld %st(1) // dv | du | dv ! 107: fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv ! 108: fld %st(1) // du | dv*d_sdivzstepv | du | dv ! 109: fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv ! 110: fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv ! 111: fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | ! 112: // dv*d_sdivzstepv | du | dv ! 113: fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | ! 114: // dv*d_sdivzstepv | du | dv ! 115: faddp %st(0),%st(2) // du*d_tdivzstepu | ! 116: // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv ! 117: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | ! 118: // du*d_tdivzstepu | du | dv ! 119: fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | ! 120: // du*d_tdivzstepu | du | dv ! 121: fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | ! 122: // du*d_sdivzstepu + dv*d_sdivzstepv | ! 123: // du*d_tdivzstepu | du | dv ! 124: fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | ! 125: // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv ! 126: fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + ! 127: // du*d_sdivzstepu; stays in %st(2) at end ! 128: fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | ! 129: // s/z ! 130: fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | ! 131: // du*d_tdivzstepu | du | s/z ! 132: fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | ! 133: // du*d_tdivzstepu | du | s/z ! 134: faddp %st(0),%st(2) // dv*d_zistepv | ! 135: // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z ! 136: fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | ! 137: // dv*d_zistepv | s/z ! 138: fmuls C(d_zistepu) // du*d_zistepu | ! 139: // dv*d_tdivzstepv + du*d_tdivzstepu | ! 140: // dv*d_zistepv | s/z ! 141: fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | ! 142: // du*d_zistepu | dv*d_zistepv | s/z ! 143: fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + ! 144: // du*d_tdivzstepu; stays in %st(1) at end ! 145: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z ! 146: faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z ! 147: ! 148: flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z ! 149: fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z ! 150: fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + ! 151: // du*d_zistepu; stays in %st(0) at end ! 152: // 1/z | fp_64k | t/z | s/z ! 153: // ! 154: // calculate and clamp s & t ! 155: // ! 156: fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z ! 157: ! 158: // ! 159: // point %edi to the first pixel in the span ! 160: // ! 161: movl C(d_viewbuffer),%ecx ! 162: movl espan_t_v(%ebx),%eax ! 163: movl %ebx,pspantemp // preserve spans pointer ! 164: ! 165: movl C(tadjust),%edx ! 166: movl C(sadjust),%esi ! 167: movl C(d_scantable)(,%eax,4),%edi // v * screenwidth ! 168: addl %ecx,%edi ! 169: movl espan_t_u(%ebx),%ecx ! 170: addl %ecx,%edi // pdest = &pdestspan[scans->u]; ! 171: movl espan_t_count(%ebx),%ecx ! 172: ! 173: // ! 174: // now start the FDIV for the end of the span ! 175: // ! 176: cmpl $8,%ecx ! 177: ja LSetupNotLast1 ! 178: ! 179: decl %ecx ! 180: jz LCleanup1 // if only one pixel, no need to start an FDIV ! 181: movl %ecx,spancountminus1 ! 182: ! 183: // finish up the s and t calcs ! 184: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 185: ! 186: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 187: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 188: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 189: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 190: fxch %st(1) // s | t | 1/z | t/z | s/z ! 191: fistpl s // 1/z | t | t/z | s/z ! 192: fistpl t // 1/z | t/z | s/z ! 193: ! 194: fildl spancountminus1 ! 195: ! 196: flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 ! 197: flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 ! 198: fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 ! 199: fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 ! 200: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 ! 201: fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 ! 202: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | ! 203: // C(d_tdivzstepu)*scm1 ! 204: fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | ! 205: // C(d_tdivzstepu)*scm1 ! 206: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 ! 207: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 ! 208: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 ! 209: faddp %st(0),%st(3) ! 210: ! 211: flds fp_64k ! 212: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to ! 213: // overlap ! 214: jmp LFDIVInFlight1 ! 215: ! 216: LCleanup1: ! 217: // finish up the s and t calcs ! 218: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 219: ! 220: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 221: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 222: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 223: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 224: fxch %st(1) // s | t | 1/z | t/z | s/z ! 225: fistpl s // 1/z | t | t/z | s/z ! 226: fistpl t // 1/z | t/z | s/z ! 227: jmp LFDIVInFlight1 ! 228: ! 229: .align 4 ! 230: LSetupNotLast1: ! 231: // finish up the s and t calcs ! 232: fxch %st(1) // z*64k | 1/z | t/z | s/z ! 233: ! 234: fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z ! 235: fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z ! 236: fxch %st(1) // z*64k | s | 1/z | t/z | s/z ! 237: fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z ! 238: fxch %st(1) // s | t | 1/z | t/z | s/z ! 239: fistpl s // 1/z | t | t/z | s/z ! 240: fistpl t // 1/z | t/z | s/z ! 241: ! 242: fadds zi8stepu ! 243: fxch %st(2) ! 244: fadds sdivz8stepu ! 245: fxch %st(2) ! 246: flds tdivz8stepu ! 247: faddp %st(0),%st(2) ! 248: flds fp_64k ! 249: fdiv %st(1),%st(0) // z = 1/1/z ! 250: // this is what we've gone to all this trouble to ! 251: // overlap ! 252: LFDIVInFlight1: ! 253: ! 254: addl s,%esi ! 255: addl t,%edx ! 256: movl C(bbextents),%ebx ! 257: movl C(bbextentt),%ebp ! 258: cmpl %ebx,%esi ! 259: ja LClampHighOrLow0 ! 260: LClampReentry0: ! 261: movl %esi,s ! 262: movl pbase,%ebx ! 263: shll $16,%esi ! 264: cmpl %ebp,%edx ! 265: movl %esi,sfracf ! 266: ja LClampHighOrLow1 ! 267: LClampReentry1: ! 268: movl %edx,t ! 269: movl s,%esi // sfrac = scans->sfrac; ! 270: shll $16,%edx ! 271: movl t,%eax // tfrac = scans->tfrac; ! 272: sarl $16,%esi ! 273: movl %edx,tfracf ! 274: ! 275: // ! 276: // calculate the texture starting address ! 277: // ! 278: sarl $16,%eax ! 279: movl C(cachewidth),%edx ! 280: imull %edx,%eax // (tfrac >> 16) * cachewidth ! 281: addl %ebx,%esi ! 282: addl %eax,%esi // psource = pbase + (sfrac >> 16) + ! 283: // ((tfrac >> 16) * cachewidth); ! 284: ! 285: // ! 286: // determine whether last span or not ! 287: // ! 288: cmpl $8,%ecx ! 289: jna LLastSegment ! 290: ! 291: // ! 292: // not the last segment; do full 8-wide segment ! 293: // ! 294: LNotLastSegment: ! 295: ! 296: // ! 297: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to ! 298: // get there ! 299: // ! 300: ! 301: // pick up after the FDIV that was left in flight previously ! 302: ! 303: fld %st(0) // duplicate it ! 304: fmul %st(4),%st(0) // s = s/z * z ! 305: fxch %st(1) ! 306: fmul %st(3),%st(0) // t = t/z * z ! 307: fxch %st(1) ! 308: fistpl snext ! 309: fistpl tnext ! 310: movl snext,%eax ! 311: movl tnext,%edx ! 312: ! 313: movb (%esi),%bl // get first source texel ! 314: subl $8,%ecx // count off this segments' pixels ! 315: movl C(sadjust),%ebp ! 316: movl %ecx,counttemp // remember count of remaining pixels ! 317: ! 318: movl C(tadjust),%ecx ! 319: movb %bl,(%edi) // store first dest pixel ! 320: ! 321: addl %eax,%ebp ! 322: addl %edx,%ecx ! 323: ! 324: movl C(bbextents),%eax ! 325: movl C(bbextentt),%edx ! 326: ! 327: cmpl $2048,%ebp ! 328: jl LClampLow2 ! 329: cmpl %eax,%ebp ! 330: ja LClampHigh2 ! 331: LClampReentry2: ! 332: ! 333: cmpl $2048,%ecx ! 334: jl LClampLow3 ! 335: cmpl %edx,%ecx ! 336: ja LClampHigh3 ! 337: LClampReentry3: ! 338: ! 339: movl %ebp,snext ! 340: movl %ecx,tnext ! 341: ! 342: subl s,%ebp ! 343: subl t,%ecx ! 344: ! 345: // ! 346: // set up advancetable ! 347: // ! 348: movl %ecx,%eax ! 349: movl %ebp,%edx ! 350: sarl $19,%eax // tstep >>= 16; ! 351: jz LZero ! 352: sarl $19,%edx // sstep >>= 16; ! 353: movl C(cachewidth),%ebx ! 354: imull %ebx,%eax ! 355: jmp LSetUp1 ! 356: ! 357: LZero: ! 358: sarl $19,%edx // sstep >>= 16; ! 359: movl C(cachewidth),%ebx ! 360: ! 361: LSetUp1: ! 362: ! 363: addl %edx,%eax // add in sstep ! 364: // (tstep >> 16) * cachewidth + (sstep >> 16); ! 365: movl tfracf,%edx ! 366: movl %eax,advancetable+4 // advance base in t ! 367: addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + ! 368: // (sstep >> 16); ! 369: shll $13,%ebp // left-justify sstep fractional part ! 370: movl sfracf,%ebx ! 371: shll $13,%ecx // left-justify tstep fractional part ! 372: movl %eax,advancetable // advance extra in t ! 373: ! 374: movl %ecx,tstep ! 375: addl %ecx,%edx // advance tfrac fractional part by tstep frac ! 376: ! 377: sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) ! 378: addl %ebp,%ebx // advance sfrac fractional part by sstep frac ! 379: adcl advancetable+4(,%ecx,4),%esi // point to next source texel ! 380: ! 381: addl tstep,%edx ! 382: sbbl %ecx,%ecx ! 383: movb (%esi),%al ! 384: addl %ebp,%ebx ! 385: movb %al,1(%edi) ! 386: adcl advancetable+4(,%ecx,4),%esi ! 387: ! 388: addl tstep,%edx ! 389: sbbl %ecx,%ecx ! 390: addl %ebp,%ebx ! 391: movb (%esi),%al ! 392: adcl advancetable+4(,%ecx,4),%esi ! 393: ! 394: addl tstep,%edx ! 395: sbbl %ecx,%ecx ! 396: movb %al,2(%edi) ! 397: addl %ebp,%ebx ! 398: movb (%esi),%al ! 399: adcl advancetable+4(,%ecx,4),%esi ! 400: ! 401: addl tstep,%edx ! 402: sbbl %ecx,%ecx ! 403: movb %al,3(%edi) ! 404: addl %ebp,%ebx ! 405: movb (%esi),%al ! 406: adcl advancetable+4(,%ecx,4),%esi ! 407: ! 408: ! 409: // ! 410: // start FDIV for end of next segment in flight, so it can overlap ! 411: // ! 412: movl counttemp,%ecx ! 413: cmpl $8,%ecx // more than one segment after this? ! 414: ja LSetupNotLast2 // yes ! 415: ! 416: decl %ecx ! 417: jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV ! 418: movl %ecx,spancountminus1 ! 419: fildl spancountminus1 ! 420: ! 421: flds C(d_zistepu) // C(d_zistepu) | spancountminus1 ! 422: fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 ! 423: flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 ! 424: fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 ! 425: fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 ! 426: faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1 ! 427: fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1 ! 428: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 ! 429: fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 ! 430: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 ! 431: flds fp_64k // 64k | C(d_sdivzstepu)*scm1 ! 432: fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k ! 433: faddp %st(0),%st(4) // 64k ! 434: ! 435: fdiv %st(1),%st(0) // this is what we've gone to all this trouble to ! 436: // overlap ! 437: jmp LFDIVInFlight2 ! 438: ! 439: .align 4 ! 440: LSetupNotLast2: ! 441: fadds zi8stepu ! 442: fxch %st(2) ! 443: fadds sdivz8stepu ! 444: fxch %st(2) ! 445: flds tdivz8stepu ! 446: faddp %st(0),%st(2) ! 447: flds fp_64k ! 448: fdiv %st(1),%st(0) // z = 1/1/z ! 449: // this is what we've gone to all this trouble to ! 450: // overlap ! 451: LFDIVInFlight2: ! 452: movl %ecx,counttemp ! 453: ! 454: addl tstep,%edx ! 455: sbbl %ecx,%ecx ! 456: movb %al,4(%edi) ! 457: addl %ebp,%ebx ! 458: movb (%esi),%al ! 459: adcl advancetable+4(,%ecx,4),%esi ! 460: ! 461: addl tstep,%edx ! 462: sbbl %ecx,%ecx ! 463: movb %al,5(%edi) ! 464: addl %ebp,%ebx ! 465: movb (%esi),%al ! 466: adcl advancetable+4(,%ecx,4),%esi ! 467: ! 468: addl tstep,%edx ! 469: sbbl %ecx,%ecx ! 470: movb %al,6(%edi) ! 471: addl %ebp,%ebx ! 472: movb (%esi),%al ! 473: adcl advancetable+4(,%ecx,4),%esi ! 474: ! 475: addl $8,%edi ! 476: movl %edx,tfracf ! 477: movl snext,%edx ! 478: movl %ebx,sfracf ! 479: movl tnext,%ebx ! 480: movl %edx,s ! 481: movl %ebx,t ! 482: ! 483: movl counttemp,%ecx // retrieve count ! 484: ! 485: // ! 486: // determine whether last span or not ! 487: // ! 488: cmpl $8,%ecx // are there multiple segments remaining? ! 489: movb %al,-1(%edi) ! 490: ja LNotLastSegment // yes ! 491: ! 492: // ! 493: // last segment of scan ! 494: // ! 495: LLastSegment: ! 496: ! 497: // ! 498: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to ! 499: // get there. The number of pixels left is variable, and we want to land on the ! 500: // last pixel, not step one past it, so we can't run into arithmetic problems ! 501: // ! 502: testl %ecx,%ecx ! 503: jz LNoSteps // just draw the last pixel and we're done ! 504: ! 505: // pick up after the FDIV that was left in flight previously ! 506: ! 507: ! 508: fld %st(0) // duplicate it ! 509: fmul %st(4),%st(0) // s = s/z * z ! 510: fxch %st(1) ! 511: fmul %st(3),%st(0) // t = t/z * z ! 512: fxch %st(1) ! 513: fistpl snext ! 514: fistpl tnext ! 515: ! 516: movb (%esi),%al // load first texel in segment ! 517: movl C(tadjust),%ebx ! 518: movb %al,(%edi) // store first pixel in segment ! 519: movl C(sadjust),%eax ! 520: ! 521: addl snext,%eax ! 522: addl tnext,%ebx ! 523: ! 524: movl C(bbextents),%ebp ! 525: movl C(bbextentt),%edx ! 526: ! 527: cmpl $2048,%eax ! 528: jl LClampLow4 ! 529: cmpl %ebp,%eax ! 530: ja LClampHigh4 ! 531: LClampReentry4: ! 532: movl %eax,snext ! 533: ! 534: cmpl $2048,%ebx ! 535: jl LClampLow5 ! 536: cmpl %edx,%ebx ! 537: ja LClampHigh5 ! 538: LClampReentry5: ! 539: ! 540: cmpl $1,%ecx // don't bother ! 541: je LOnlyOneStep // if two pixels in segment, there's only one step, ! 542: // of the segment length ! 543: subl s,%eax ! 544: subl t,%ebx ! 545: ! 546: addl %eax,%eax // convert to 15.17 format so multiply by 1.31 ! 547: addl %ebx,%ebx // reciprocal yields 16.48 ! 548: ! 549: imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) ! 550: movl %edx,%ebp ! 551: ! 552: movl %ebx,%eax ! 553: imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) ! 554: ! 555: LSetEntryvec: ! 556: // ! 557: // set up advancetable ! 558: // ! 559: movl entryvec_table(,%ecx,4),%ebx ! 560: movl %edx,%eax ! 561: movl %ebx,jumptemp // entry point into code for RET later ! 562: movl %ebp,%ecx ! 563: sarl $16,%edx // tstep >>= 16; ! 564: movl C(cachewidth),%ebx ! 565: sarl $16,%ecx // sstep >>= 16; ! 566: imull %ebx,%edx ! 567: ! 568: addl %ecx,%edx // add in sstep ! 569: // (tstep >> 16) * cachewidth + (sstep >> 16); ! 570: movl tfracf,%ecx ! 571: movl %edx,advancetable+4 // advance base in t ! 572: addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + ! 573: // (sstep >> 16); ! 574: shll $16,%ebp // left-justify sstep fractional part ! 575: movl sfracf,%ebx ! 576: shll $16,%eax // left-justify tstep fractional part ! 577: movl %edx,advancetable // advance extra in t ! 578: ! 579: movl %eax,tstep ! 580: movl %ecx,%edx ! 581: addl %eax,%edx ! 582: sbbl %ecx,%ecx ! 583: addl %ebp,%ebx ! 584: adcl advancetable+4(,%ecx,4),%esi ! 585: ! 586: jmp *jumptemp // jump to the number-of-pixels handler ! 587: ! 588: //---------------------------------------- ! 589: ! 590: LNoSteps: ! 591: movb (%esi),%al // load first texel in segment ! 592: subl $7,%edi // adjust for hardwired offset ! 593: jmp LEndSpan ! 594: ! 595: ! 596: LOnlyOneStep: ! 597: subl s,%eax ! 598: subl t,%ebx ! 599: movl %eax,%ebp ! 600: movl %ebx,%edx ! 601: jmp LSetEntryvec ! 602: ! 603: //---------------------------------------- ! 604: ! 605: .globl Entry2_8 ! 606: Entry2_8: ! 607: subl $6,%edi // adjust for hardwired offsets ! 608: movb (%esi),%al ! 609: jmp LLEntry2_8 ! 610: ! 611: //---------------------------------------- ! 612: ! 613: .globl Entry3_8 ! 614: Entry3_8: ! 615: subl $5,%edi // adjust for hardwired offsets ! 616: addl %eax,%edx ! 617: movb (%esi),%al ! 618: sbbl %ecx,%ecx ! 619: addl %ebp,%ebx ! 620: adcl advancetable+4(,%ecx,4),%esi ! 621: jmp LLEntry3_8 ! 622: ! 623: //---------------------------------------- ! 624: ! 625: .globl Entry4_8 ! 626: Entry4_8: ! 627: subl $4,%edi // adjust for hardwired offsets ! 628: addl %eax,%edx ! 629: movb (%esi),%al ! 630: sbbl %ecx,%ecx ! 631: addl %ebp,%ebx ! 632: adcl advancetable+4(,%ecx,4),%esi ! 633: addl tstep,%edx ! 634: jmp LLEntry4_8 ! 635: ! 636: //---------------------------------------- ! 637: ! 638: .globl Entry5_8 ! 639: Entry5_8: ! 640: subl $3,%edi // adjust for hardwired offsets ! 641: addl %eax,%edx ! 642: movb (%esi),%al ! 643: sbbl %ecx,%ecx ! 644: addl %ebp,%ebx ! 645: adcl advancetable+4(,%ecx,4),%esi ! 646: addl tstep,%edx ! 647: jmp LLEntry5_8 ! 648: ! 649: //---------------------------------------- ! 650: ! 651: .globl Entry6_8 ! 652: Entry6_8: ! 653: subl $2,%edi // adjust for hardwired offsets ! 654: addl %eax,%edx ! 655: movb (%esi),%al ! 656: sbbl %ecx,%ecx ! 657: addl %ebp,%ebx ! 658: adcl advancetable+4(,%ecx,4),%esi ! 659: addl tstep,%edx ! 660: jmp LLEntry6_8 ! 661: ! 662: //---------------------------------------- ! 663: ! 664: .globl Entry7_8 ! 665: Entry7_8: ! 666: decl %edi // adjust for hardwired offsets ! 667: addl %eax,%edx ! 668: movb (%esi),%al ! 669: sbbl %ecx,%ecx ! 670: addl %ebp,%ebx ! 671: adcl advancetable+4(,%ecx,4),%esi ! 672: addl tstep,%edx ! 673: jmp LLEntry7_8 ! 674: ! 675: //---------------------------------------- ! 676: ! 677: .globl Entry8_8 ! 678: Entry8_8: ! 679: addl %eax,%edx ! 680: movb (%esi),%al ! 681: sbbl %ecx,%ecx ! 682: addl %ebp,%ebx ! 683: adcl advancetable+4(,%ecx,4),%esi ! 684: ! 685: addl tstep,%edx ! 686: sbbl %ecx,%ecx ! 687: movb %al,1(%edi) ! 688: addl %ebp,%ebx ! 689: movb (%esi),%al ! 690: adcl advancetable+4(,%ecx,4),%esi ! 691: addl tstep,%edx ! 692: LLEntry7_8: ! 693: sbbl %ecx,%ecx ! 694: movb %al,2(%edi) ! 695: addl %ebp,%ebx ! 696: movb (%esi),%al ! 697: adcl advancetable+4(,%ecx,4),%esi ! 698: addl tstep,%edx ! 699: LLEntry6_8: ! 700: sbbl %ecx,%ecx ! 701: movb %al,3(%edi) ! 702: addl %ebp,%ebx ! 703: movb (%esi),%al ! 704: adcl advancetable+4(,%ecx,4),%esi ! 705: addl tstep,%edx ! 706: LLEntry5_8: ! 707: sbbl %ecx,%ecx ! 708: movb %al,4(%edi) ! 709: addl %ebp,%ebx ! 710: movb (%esi),%al ! 711: adcl advancetable+4(,%ecx,4),%esi ! 712: addl tstep,%edx ! 713: LLEntry4_8: ! 714: sbbl %ecx,%ecx ! 715: movb %al,5(%edi) ! 716: addl %ebp,%ebx ! 717: movb (%esi),%al ! 718: adcl advancetable+4(,%ecx,4),%esi ! 719: LLEntry3_8: ! 720: movb %al,6(%edi) ! 721: movb (%esi),%al ! 722: LLEntry2_8: ! 723: ! 724: LEndSpan: ! 725: ! 726: // ! 727: // clear s/z, t/z, 1/z from FP stack ! 728: // ! 729: fstp %st(0) ! 730: fstp %st(0) ! 731: fstp %st(0) ! 732: ! 733: movl pspantemp,%ebx // restore spans pointer ! 734: movl espan_t_pnext(%ebx),%ebx // point to next span ! 735: testl %ebx,%ebx // any more spans? ! 736: movb %al,7(%edi) ! 737: jnz LSpanLoop // more spans ! 738: ! 739: popl %ebx // restore register variables ! 740: popl %esi ! 741: popl %edi ! 742: popl %ebp // restore the caller's stack frame ! 743: ret ! 744: ! 745: //---------------------------------------------------------------------- ! 746: // 8-bpp horizontal span z drawing codefor polygons, with no transparency. ! 747: // ! 748: // Assumes there is at least one span in pzspans, and that every span ! 749: // contains at least one pixel ! 750: //---------------------------------------------------------------------- ! 751: ! 752: .text ! 753: ! 754: // z-clamp on a non-negative gradient span ! 755: LClamp: ! 756: movl $0x40000000,%edx ! 757: xorl %ebx,%ebx ! 758: fstp %st(0) ! 759: jmp LZDraw ! 760: ! 761: // z-clamp on a negative gradient span ! 762: LClampNeg: ! 763: movl $0x40000000,%edx ! 764: xorl %ebx,%ebx ! 765: fstp %st(0) ! 766: jmp LZDrawNeg ! 767: ! 768: ! 769: #define pzspans 4+16 ! 770: ! 771: .globl C(D_DrawZSpans) ! 772: C(D_DrawZSpans): ! 773: pushl %ebp // preserve caller's stack frame ! 774: pushl %edi ! 775: pushl %esi // preserve register variables ! 776: pushl %ebx ! 777: ! 778: flds C(d_zistepu) ! 779: movl C(d_zistepu),%eax ! 780: movl pzspans(%esp),%esi ! 781: testl %eax,%eax ! 782: jz LFNegSpan ! 783: ! 784: fmuls Float2ToThe31nd ! 785: fistpl izistep // note: we are relying on FP exceptions being turned ! 786: // off here to avoid range problems ! 787: movl izistep,%ebx // remains loaded for all spans ! 788: ! 789: LFSpanLoop: ! 790: // set up the initial 1/z value ! 791: fildl espan_t_v(%esi) ! 792: fildl espan_t_u(%esi) ! 793: movl espan_t_v(%esi),%ecx ! 794: movl C(d_pzbuffer),%edi ! 795: fmuls C(d_zistepu) ! 796: fxch %st(1) ! 797: fmuls C(d_zistepv) ! 798: fxch %st(1) ! 799: fadds C(d_ziorigin) ! 800: imull C(d_zrowbytes),%ecx ! 801: faddp %st(0),%st(1) ! 802: ! 803: // clamp if z is nearer than 2 (1/z > 0.5) ! 804: fcoms float_point5 ! 805: addl %ecx,%edi ! 806: movl espan_t_u(%esi),%edx ! 807: addl %edx,%edx // word count ! 808: movl espan_t_count(%esi),%ecx ! 809: addl %edx,%edi // pdest = &pdestspan[scans->u]; ! 810: pushl %esi // preserve spans pointer ! 811: fnstsw %ax ! 812: testb $0x45,%ah ! 813: jz LClamp ! 814: ! 815: fmuls Float2ToThe31nd ! 816: fistpl izi // note: we are relying on FP exceptions being turned ! 817: // off here to avoid problems when the span is closer ! 818: // than 1/(2**31) ! 819: movl izi,%edx ! 820: ! 821: // at this point: ! 822: // %ebx = izistep ! 823: // %ecx = count ! 824: // %edx = izi ! 825: // %edi = pdest ! 826: ! 827: LZDraw: ! 828: ! 829: // do a single pixel up front, if necessary to dword align the destination ! 830: testl $2,%edi ! 831: jz LFMiddle ! 832: movl %edx,%eax ! 833: addl %ebx,%edx ! 834: shrl $16,%eax ! 835: decl %ecx ! 836: movw %ax,(%edi) ! 837: addl $2,%edi ! 838: ! 839: // do middle a pair of aligned dwords at a time ! 840: LFMiddle: ! 841: pushl %ecx ! 842: shrl $1,%ecx // count / 2 ! 843: jz LFLast // no aligned dwords to do ! 844: shrl $1,%ecx // (count / 2) / 2 ! 845: jnc LFMiddleLoop // even number of aligned dwords to do ! 846: ! 847: movl %edx,%eax ! 848: addl %ebx,%edx ! 849: shrl $16,%eax ! 850: movl %edx,%esi ! 851: addl %ebx,%edx ! 852: andl $0xFFFF0000,%esi ! 853: orl %esi,%eax ! 854: movl %eax,(%edi) ! 855: addl $4,%edi ! 856: andl %ecx,%ecx ! 857: jz LFLast ! 858: ! 859: LFMiddleLoop: ! 860: movl %edx,%eax ! 861: addl %ebx,%edx ! 862: shrl $16,%eax ! 863: movl %edx,%esi ! 864: addl %ebx,%edx ! 865: andl $0xFFFF0000,%esi ! 866: orl %esi,%eax ! 867: movl %edx,%ebp ! 868: movl %eax,(%edi) ! 869: addl %ebx,%edx ! 870: shrl $16,%ebp ! 871: movl %edx,%esi ! 872: addl %ebx,%edx ! 873: andl $0xFFFF0000,%esi ! 874: orl %esi,%ebp ! 875: movl %ebp,4(%edi) // FIXME: eliminate register contention ! 876: addl $8,%edi ! 877: ! 878: decl %ecx ! 879: jnz LFMiddleLoop ! 880: ! 881: LFLast: ! 882: popl %ecx // retrieve count ! 883: popl %esi // retrieve span pointer ! 884: ! 885: // do the last, unaligned pixel, if there is one ! 886: andl $1,%ecx // is there an odd pixel left to do? ! 887: jz LFSpanDone // no ! 888: shrl $16,%edx ! 889: movw %dx,(%edi) // do the final pixel's z ! 890: ! 891: LFSpanDone: ! 892: movl espan_t_pnext(%esi),%esi ! 893: testl %esi,%esi ! 894: jnz LFSpanLoop ! 895: ! 896: jmp LFDone ! 897: ! 898: LFNegSpan: ! 899: fmuls FloatMinus2ToThe31nd ! 900: fistpl izistep // note: we are relying on FP exceptions being turned ! 901: // off here to avoid range problems ! 902: movl izistep,%ebx // remains loaded for all spans ! 903: ! 904: LFNegSpanLoop: ! 905: // set up the initial 1/z value ! 906: fildl espan_t_v(%esi) ! 907: fildl espan_t_u(%esi) ! 908: movl espan_t_v(%esi),%ecx ! 909: movl C(d_pzbuffer),%edi ! 910: fmuls C(d_zistepu) ! 911: fxch %st(1) ! 912: fmuls C(d_zistepv) ! 913: fxch %st(1) ! 914: fadds C(d_ziorigin) ! 915: imull C(d_zrowbytes),%ecx ! 916: faddp %st(0),%st(1) ! 917: ! 918: // clamp if z is nearer than 2 (1/z > 0.5) ! 919: fcoms float_point5 ! 920: addl %ecx,%edi ! 921: movl espan_t_u(%esi),%edx ! 922: addl %edx,%edx // word count ! 923: movl espan_t_count(%esi),%ecx ! 924: addl %edx,%edi // pdest = &pdestspan[scans->u]; ! 925: pushl %esi // preserve spans pointer ! 926: fnstsw %ax ! 927: testb $0x45,%ah ! 928: jz LClampNeg ! 929: ! 930: fmuls Float2ToThe31nd ! 931: fistpl izi // note: we are relying on FP exceptions being turned ! 932: // off here to avoid problems when the span is closer ! 933: // than 1/(2**31) ! 934: movl izi,%edx ! 935: ! 936: // at this point: ! 937: // %ebx = izistep ! 938: // %ecx = count ! 939: // %edx = izi ! 940: // %edi = pdest ! 941: ! 942: LZDrawNeg: ! 943: ! 944: // do a single pixel up front, if necessary to dword align the destination ! 945: testl $2,%edi ! 946: jz LFNegMiddle ! 947: movl %edx,%eax ! 948: subl %ebx,%edx ! 949: shrl $16,%eax ! 950: decl %ecx ! 951: movw %ax,(%edi) ! 952: addl $2,%edi ! 953: ! 954: // do middle a pair of aligned dwords at a time ! 955: LFNegMiddle: ! 956: pushl %ecx ! 957: shrl $1,%ecx // count / 2 ! 958: jz LFNegLast // no aligned dwords to do ! 959: shrl $1,%ecx // (count / 2) / 2 ! 960: jnc LFNegMiddleLoop // even number of aligned dwords to do ! 961: ! 962: movl %edx,%eax ! 963: subl %ebx,%edx ! 964: shrl $16,%eax ! 965: movl %edx,%esi ! 966: subl %ebx,%edx ! 967: andl $0xFFFF0000,%esi ! 968: orl %esi,%eax ! 969: movl %eax,(%edi) ! 970: addl $4,%edi ! 971: andl %ecx,%ecx ! 972: jz LFNegLast ! 973: ! 974: LFNegMiddleLoop: ! 975: movl %edx,%eax ! 976: subl %ebx,%edx ! 977: shrl $16,%eax ! 978: movl %edx,%esi ! 979: subl %ebx,%edx ! 980: andl $0xFFFF0000,%esi ! 981: orl %esi,%eax ! 982: movl %edx,%ebp ! 983: movl %eax,(%edi) ! 984: subl %ebx,%edx ! 985: shrl $16,%ebp ! 986: movl %edx,%esi ! 987: subl %ebx,%edx ! 988: andl $0xFFFF0000,%esi ! 989: orl %esi,%ebp ! 990: movl %ebp,4(%edi) // FIXME: eliminate register contention ! 991: addl $8,%edi ! 992: ! 993: decl %ecx ! 994: jnz LFNegMiddleLoop ! 995: ! 996: LFNegLast: ! 997: popl %ecx // retrieve count ! 998: popl %esi // retrieve span pointer ! 999: ! 1000: // do the last, unaligned pixel, if there is one ! 1001: andl $1,%ecx // is there an odd pixel left to do? ! 1002: jz LFNegSpanDone // no ! 1003: shrl $16,%edx ! 1004: movw %dx,(%edi) // do the final pixel's z ! 1005: ! 1006: LFNegSpanDone: ! 1007: movl espan_t_pnext(%esi),%esi ! 1008: testl %esi,%esi ! 1009: jnz LFNegSpanLoop ! 1010: ! 1011: LFDone: ! 1012: popl %ebx // restore register variables ! 1013: popl %esi ! 1014: popl %edi ! 1015: popl %ebp // restore the caller's stack frame ! 1016: ret ! 1017: ! 1018: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.