|
|
1.1 ! root 1: /* ! 2: Copyright (C) 1996-1997 Id Software, Inc. ! 3: ! 4: This program is free software; you can redistribute it and/or ! 5: modify it under the terms of the GNU General Public License ! 6: as published by the Free Software Foundation; either version 2 ! 7: of the License, or (at your option) any later version. ! 8: ! 9: This program is distributed in the hope that it will be useful, ! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of ! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! 12: ! 13: See the GNU General Public License for more details. ! 14: ! 15: You should have received a copy of the GNU General Public License ! 16: along with this program; if not, write to the Free Software ! 17: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ! 18: ! 19: */ ! 20: // ! 21: // d_parta.s ! 22: // x86 assembly-language 8-bpp particle-drawing code. ! 23: // ! 24: ! 25: #include "asm_i386.h" ! 26: #include "quakeasm.h" ! 27: #include "d_ifacea.h" ! 28: #include "asm_draw.h" ! 29: ! 30: #if id386 ! 31: ! 32: //---------------------------------------------------------------------- ! 33: // 8-bpp particle drawing code. ! 34: //---------------------------------------------------------------------- ! 35: ! 36: //FIXME: comments, full optimization ! 37: ! 38: //---------------------------------------------------------------------- ! 39: // 8-bpp particle queueing code. ! 40: //---------------------------------------------------------------------- ! 41: ! 42: .text ! 43: ! 44: #define P 12+4 ! 45: ! 46: .align 4 ! 47: .globl C(D_DrawParticle) ! 48: C(D_DrawParticle): ! 49: pushl %ebp // preserve caller's stack frame ! 50: pushl %edi // preserve register variables ! 51: pushl %ebx ! 52: ! 53: movl P(%esp),%edi ! 54: ! 55: // FIXME: better FP overlap in general here ! 56: ! 57: // transform point ! 58: // VectorSubtract (p->org, r_origin, local); ! 59: flds C(r_origin) ! 60: fsubrs pt_org(%edi) ! 61: flds pt_org+4(%edi) ! 62: fsubs C(r_origin)+4 ! 63: flds pt_org+8(%edi) ! 64: fsubs C(r_origin)+8 ! 65: fxch %st(2) // local[0] | local[1] | local[2] ! 66: ! 67: // transformed[2] = DotProduct(local, r_ppn); ! 68: flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2] ! 69: fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2] ! 70: flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2] ! 71: fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2] ! 72: flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] | ! 73: // local[1] | local[2] ! 74: fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2] ! 75: fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2] ! 76: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] | ! 77: // local[2] ! 78: faddp %st(0),%st(1) // z | local[0] | local[1] | local[2] ! 79: fld %st(0) // z | z | local[0] | local[1] | ! 80: // local[2] ! 81: fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2] ! 82: fxch %st(1) // z | 1/z | local[0] | local[1] | local[2] ! 83: ! 84: // if (transformed[2] < PARTICLE_Z_CLIP) ! 85: // return; ! 86: fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2] ! 87: fxch %st(3) // local[2] | local[0] | local[1] | 1/z ! 88: ! 89: flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z ! 90: fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z ! 91: flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] | ! 92: // local[1] | 1/z ! 93: ! 94: fnstsw %ax ! 95: testb $1,%ah ! 96: jnz LPop6AndDone ! 97: ! 98: // transformed[1] = DotProduct(local, r_pup); ! 99: fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z ! 100: flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] | ! 101: // local[0] | local[1] | 1/z ! 102: fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] | ! 103: // local[1] | 1/z ! 104: fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] | ! 105: // local[1] | 1/z ! 106: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] | ! 107: // local[1] | 1/z ! 108: faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z ! 109: fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z ! 110: ! 111: // transformed[0] = DotProduct(local, r_pright); ! 112: fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z ! 113: fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z ! 114: fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z ! 115: fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z ! 116: fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z ! 117: fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z ! 118: faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z ! 119: ! 120: faddp %st(0),%st(1) // x | y | 1/z ! 121: fxch %st(1) // y | x | 1/z ! 122: ! 123: // project the point ! 124: fmul %st(2),%st(0) // y/z | x | 1/z ! 125: fxch %st(1) // x | y/z | 1/z ! 126: fmul %st(2),%st(0) // x/z | y/z | 1/z ! 127: fxch %st(1) // y/z | x/z | 1/z ! 128: fsubrs C(ycenter) // v | x/z | 1/z ! 129: fxch %st(1) // x/z | v | 1/z ! 130: fadds C(xcenter) // u | v | 1/z ! 131: // FIXME: preadjust xcenter and ycenter ! 132: fxch %st(1) // v | u | 1/z ! 133: fadds float_point5 // v | u | 1/z ! 134: fxch %st(1) // u | v | 1/z ! 135: fadds float_point5 // u | v | 1/z ! 136: fxch %st(2) // 1/z | v | u ! 137: fmuls DP_32768 // 1/z * 0x8000 | v | u ! 138: fxch %st(2) // u | v | 1/z * 0x8000 ! 139: ! 140: // FIXME: use Terje's fp->int trick here? ! 141: // FIXME: check we're getting proper rounding here ! 142: fistpl DP_u // v | 1/z * 0x8000 ! 143: fistpl DP_v // 1/z * 0x8000 ! 144: ! 145: movl DP_u,%eax ! 146: movl DP_v,%edx ! 147: ! 148: // if ((v > d_vrectbottom_particle) || ! 149: // (u > d_vrectright_particle) || ! 150: // (v < d_vrecty) || ! 151: // (u < d_vrectx)) ! 152: // { ! 153: // continue; ! 154: // } ! 155: ! 156: movl C(d_vrectbottom_particle),%ebx ! 157: movl C(d_vrectright_particle),%ecx ! 158: cmpl %ebx,%edx ! 159: jg LPop1AndDone ! 160: cmpl %ecx,%eax ! 161: jg LPop1AndDone ! 162: movl C(d_vrecty),%ebx ! 163: movl C(d_vrectx),%ecx ! 164: cmpl %ebx,%edx ! 165: jl LPop1AndDone ! 166: ! 167: cmpl %ecx,%eax ! 168: jl LPop1AndDone ! 169: ! 170: flds pt_color(%edi) // color | 1/z * 0x8000 ! 171: // FIXME: use Terje's fast fp->int trick? ! 172: fistpl DP_Color // 1/z * 0x8000 ! 173: ! 174: movl C(d_viewbuffer),%ebx ! 175: ! 176: addl %eax,%ebx ! 177: movl C(d_scantable)(,%edx,4),%edi // point to the pixel ! 178: ! 179: imull C(d_zrowbytes),%edx // point to the z pixel ! 180: ! 181: leal (%edx,%eax,2),%edx ! 182: movl C(d_pzbuffer),%eax ! 183: ! 184: fistpl izi ! 185: ! 186: addl %ebx,%edi ! 187: addl %eax,%edx ! 188: ! 189: // pix = izi >> d_pix_shift; ! 190: ! 191: movl izi,%eax ! 192: movl C(d_pix_shift),%ecx ! 193: shrl %cl,%eax ! 194: movl izi,%ebp ! 195: ! 196: // if (pix < d_pix_min) ! 197: // pix = d_pix_min; ! 198: // else if (pix > d_pix_max) ! 199: // pix = d_pix_max; ! 200: ! 201: movl C(d_pix_min),%ebx ! 202: movl C(d_pix_max),%ecx ! 203: cmpl %ebx,%eax ! 204: jnl LTestPixMax ! 205: movl %ebx,%eax ! 206: jmp LTestDone ! 207: ! 208: LTestPixMax: ! 209: cmpl %ecx,%eax ! 210: jng LTestDone ! 211: movl %ecx,%eax ! 212: LTestDone: ! 213: ! 214: movb DP_Color,%ch ! 215: ! 216: movl C(d_y_aspect_shift),%ebx ! 217: testl %ebx,%ebx ! 218: jnz LDefault ! 219: ! 220: cmpl $4,%eax ! 221: ja LDefault ! 222: ! 223: jmp DP_EntryTable-4(,%eax,4) ! 224: ! 225: // 1x1 ! 226: .globl DP_1x1 ! 227: DP_1x1: ! 228: cmpw %bp,(%edx) // just one pixel to do ! 229: jg LDone ! 230: movw %bp,(%edx) ! 231: movb %ch,(%edi) ! 232: jmp LDone ! 233: ! 234: // 2x2 ! 235: .globl DP_2x2 ! 236: DP_2x2: ! 237: pushl %esi ! 238: movl C(screenwidth),%ebx ! 239: movl C(d_zrowbytes),%esi ! 240: ! 241: cmpw %bp,(%edx) ! 242: jg L2x2_1 ! 243: movw %bp,(%edx) ! 244: movb %ch,(%edi) ! 245: L2x2_1: ! 246: cmpw %bp,2(%edx) ! 247: jg L2x2_2 ! 248: movw %bp,2(%edx) ! 249: movb %ch,1(%edi) ! 250: L2x2_2: ! 251: cmpw %bp,(%edx,%esi,1) ! 252: jg L2x2_3 ! 253: movw %bp,(%edx,%esi,1) ! 254: movb %ch,(%edi,%ebx,1) ! 255: L2x2_3: ! 256: cmpw %bp,2(%edx,%esi,1) ! 257: jg L2x2_4 ! 258: movw %bp,2(%edx,%esi,1) ! 259: movb %ch,1(%edi,%ebx,1) ! 260: L2x2_4: ! 261: ! 262: popl %esi ! 263: jmp LDone ! 264: ! 265: // 3x3 ! 266: .globl DP_3x3 ! 267: DP_3x3: ! 268: pushl %esi ! 269: movl C(screenwidth),%ebx ! 270: movl C(d_zrowbytes),%esi ! 271: ! 272: cmpw %bp,(%edx) ! 273: jg L3x3_1 ! 274: movw %bp,(%edx) ! 275: movb %ch,(%edi) ! 276: L3x3_1: ! 277: cmpw %bp,2(%edx) ! 278: jg L3x3_2 ! 279: movw %bp,2(%edx) ! 280: movb %ch,1(%edi) ! 281: L3x3_2: ! 282: cmpw %bp,4(%edx) ! 283: jg L3x3_3 ! 284: movw %bp,4(%edx) ! 285: movb %ch,2(%edi) ! 286: L3x3_3: ! 287: ! 288: cmpw %bp,(%edx,%esi,1) ! 289: jg L3x3_4 ! 290: movw %bp,(%edx,%esi,1) ! 291: movb %ch,(%edi,%ebx,1) ! 292: L3x3_4: ! 293: cmpw %bp,2(%edx,%esi,1) ! 294: jg L3x3_5 ! 295: movw %bp,2(%edx,%esi,1) ! 296: movb %ch,1(%edi,%ebx,1) ! 297: L3x3_5: ! 298: cmpw %bp,4(%edx,%esi,1) ! 299: jg L3x3_6 ! 300: movw %bp,4(%edx,%esi,1) ! 301: movb %ch,2(%edi,%ebx,1) ! 302: L3x3_6: ! 303: ! 304: cmpw %bp,(%edx,%esi,2) ! 305: jg L3x3_7 ! 306: movw %bp,(%edx,%esi,2) ! 307: movb %ch,(%edi,%ebx,2) ! 308: L3x3_7: ! 309: cmpw %bp,2(%edx,%esi,2) ! 310: jg L3x3_8 ! 311: movw %bp,2(%edx,%esi,2) ! 312: movb %ch,1(%edi,%ebx,2) ! 313: L3x3_8: ! 314: cmpw %bp,4(%edx,%esi,2) ! 315: jg L3x3_9 ! 316: movw %bp,4(%edx,%esi,2) ! 317: movb %ch,2(%edi,%ebx,2) ! 318: L3x3_9: ! 319: ! 320: popl %esi ! 321: jmp LDone ! 322: ! 323: ! 324: // 4x4 ! 325: .globl DP_4x4 ! 326: DP_4x4: ! 327: pushl %esi ! 328: movl C(screenwidth),%ebx ! 329: movl C(d_zrowbytes),%esi ! 330: ! 331: cmpw %bp,(%edx) ! 332: jg L4x4_1 ! 333: movw %bp,(%edx) ! 334: movb %ch,(%edi) ! 335: L4x4_1: ! 336: cmpw %bp,2(%edx) ! 337: jg L4x4_2 ! 338: movw %bp,2(%edx) ! 339: movb %ch,1(%edi) ! 340: L4x4_2: ! 341: cmpw %bp,4(%edx) ! 342: jg L4x4_3 ! 343: movw %bp,4(%edx) ! 344: movb %ch,2(%edi) ! 345: L4x4_3: ! 346: cmpw %bp,6(%edx) ! 347: jg L4x4_4 ! 348: movw %bp,6(%edx) ! 349: movb %ch,3(%edi) ! 350: L4x4_4: ! 351: ! 352: cmpw %bp,(%edx,%esi,1) ! 353: jg L4x4_5 ! 354: movw %bp,(%edx,%esi,1) ! 355: movb %ch,(%edi,%ebx,1) ! 356: L4x4_5: ! 357: cmpw %bp,2(%edx,%esi,1) ! 358: jg L4x4_6 ! 359: movw %bp,2(%edx,%esi,1) ! 360: movb %ch,1(%edi,%ebx,1) ! 361: L4x4_6: ! 362: cmpw %bp,4(%edx,%esi,1) ! 363: jg L4x4_7 ! 364: movw %bp,4(%edx,%esi,1) ! 365: movb %ch,2(%edi,%ebx,1) ! 366: L4x4_7: ! 367: cmpw %bp,6(%edx,%esi,1) ! 368: jg L4x4_8 ! 369: movw %bp,6(%edx,%esi,1) ! 370: movb %ch,3(%edi,%ebx,1) ! 371: L4x4_8: ! 372: ! 373: leal (%edx,%esi,2),%edx ! 374: leal (%edi,%ebx,2),%edi ! 375: ! 376: cmpw %bp,(%edx) ! 377: jg L4x4_9 ! 378: movw %bp,(%edx) ! 379: movb %ch,(%edi) ! 380: L4x4_9: ! 381: cmpw %bp,2(%edx) ! 382: jg L4x4_10 ! 383: movw %bp,2(%edx) ! 384: movb %ch,1(%edi) ! 385: L4x4_10: ! 386: cmpw %bp,4(%edx) ! 387: jg L4x4_11 ! 388: movw %bp,4(%edx) ! 389: movb %ch,2(%edi) ! 390: L4x4_11: ! 391: cmpw %bp,6(%edx) ! 392: jg L4x4_12 ! 393: movw %bp,6(%edx) ! 394: movb %ch,3(%edi) ! 395: L4x4_12: ! 396: ! 397: cmpw %bp,(%edx,%esi,1) ! 398: jg L4x4_13 ! 399: movw %bp,(%edx,%esi,1) ! 400: movb %ch,(%edi,%ebx,1) ! 401: L4x4_13: ! 402: cmpw %bp,2(%edx,%esi,1) ! 403: jg L4x4_14 ! 404: movw %bp,2(%edx,%esi,1) ! 405: movb %ch,1(%edi,%ebx,1) ! 406: L4x4_14: ! 407: cmpw %bp,4(%edx,%esi,1) ! 408: jg L4x4_15 ! 409: movw %bp,4(%edx,%esi,1) ! 410: movb %ch,2(%edi,%ebx,1) ! 411: L4x4_15: ! 412: cmpw %bp,6(%edx,%esi,1) ! 413: jg L4x4_16 ! 414: movw %bp,6(%edx,%esi,1) ! 415: movb %ch,3(%edi,%ebx,1) ! 416: L4x4_16: ! 417: ! 418: popl %esi ! 419: jmp LDone ! 420: ! 421: // default case, handling any size particle ! 422: LDefault: ! 423: ! 424: // count = pix << d_y_aspect_shift; ! 425: ! 426: movl %eax,%ebx ! 427: movl %eax,DP_Pix ! 428: movb C(d_y_aspect_shift),%cl ! 429: shll %cl,%ebx ! 430: ! 431: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) ! 432: // { ! 433: // for (i=0 ; i<pix ; i++) ! 434: // { ! 435: // if (pz[i] <= izi) ! 436: // { ! 437: // pz[i] = izi; ! 438: // pdest[i] = color; ! 439: // } ! 440: // } ! 441: // } ! 442: ! 443: LGenRowLoop: ! 444: movl DP_Pix,%eax ! 445: ! 446: LGenColLoop: ! 447: cmpw %bp,-2(%edx,%eax,2) ! 448: jg LGSkip ! 449: movw %bp,-2(%edx,%eax,2) ! 450: movb %ch,-1(%edi,%eax,1) ! 451: LGSkip: ! 452: decl %eax // --pix ! 453: jnz LGenColLoop ! 454: ! 455: addl C(d_zrowbytes),%edx ! 456: addl C(screenwidth),%edi ! 457: ! 458: decl %ebx // --count ! 459: jnz LGenRowLoop ! 460: ! 461: LDone: ! 462: popl %ebx // restore register variables ! 463: popl %edi ! 464: popl %ebp // restore the caller's stack frame ! 465: ret ! 466: ! 467: LPop6AndDone: ! 468: fstp %st(0) ! 469: fstp %st(0) ! 470: fstp %st(0) ! 471: fstp %st(0) ! 472: fstp %st(0) ! 473: LPop1AndDone: ! 474: fstp %st(0) ! 475: jmp LDone ! 476: ! 477: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.