|
|
1.1 ! root 1: // ! 2: // d_parta.s ! 3: // x86 assembly-language 8-bpp particle-drawing code. ! 4: // ! 5: ! 6: #include "asm_i386.h" ! 7: #include "quakeasm.h" ! 8: #include "d_ifacea.h" ! 9: #include "asm_draw.h" ! 10: ! 11: #if id386 ! 12: ! 13: //---------------------------------------------------------------------- ! 14: // 8-bpp particle drawing code. ! 15: //---------------------------------------------------------------------- ! 16: ! 17: //FIXME: comments, full optimization ! 18: ! 19: //---------------------------------------------------------------------- ! 20: // 8-bpp particle queueing code. ! 21: //---------------------------------------------------------------------- ! 22: ! 23: .text ! 24: ! 25: #define P 12+4 ! 26: ! 27: .align 4 ! 28: .globl C(D_DrawParticle) ! 29: C(D_DrawParticle): ! 30: pushl %ebp // preserve caller's stack frame ! 31: pushl %edi // preserve register variables ! 32: pushl %ebx ! 33: ! 34: movl P(%esp),%edi ! 35: ! 36: // FIXME: better FP overlap in general here ! 37: ! 38: // transform point ! 39: // VectorSubtract (p->org, r_origin, local); ! 40: flds C(r_origin) ! 41: fsubrs pt_org(%edi) ! 42: flds pt_org+4(%edi) ! 43: fsubs C(r_origin)+4 ! 44: flds pt_org+8(%edi) ! 45: fsubs C(r_origin)+8 ! 46: fxch %st(2) // local[0] | local[1] | local[2] ! 47: ! 48: // transformed[2] = DotProduct(local, r_ppn); ! 49: flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2] ! 50: fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2] ! 51: flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2] ! 52: fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2] ! 53: flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] | ! 54: // local[1] | local[2] ! 55: fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2] ! 56: fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2] ! 57: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] | ! 58: // local[2] ! 59: faddp %st(0),%st(1) // z | local[0] | local[1] | local[2] ! 60: fld %st(0) // z | z | local[0] | local[1] | ! 61: // local[2] ! 62: fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2] ! 63: fxch %st(1) // z | 1/z | local[0] | local[1] | local[2] ! 64: ! 65: // if (transformed[2] < PARTICLE_Z_CLIP) ! 66: // return; ! 67: fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2] ! 68: fxch %st(3) // local[2] | local[0] | local[1] | 1/z ! 69: ! 70: flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z ! 71: fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z ! 72: flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] | ! 73: // local[1] | 1/z ! 74: ! 75: fnstsw %ax ! 76: testb $1,%ah ! 77: jnz LPop6AndDone ! 78: ! 79: // transformed[1] = DotProduct(local, r_pup); ! 80: fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z ! 81: flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] | ! 82: // local[0] | local[1] | 1/z ! 83: fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] | ! 84: // local[1] | 1/z ! 85: fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] | ! 86: // local[1] | 1/z ! 87: faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] | ! 88: // local[1] | 1/z ! 89: faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z ! 90: fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z ! 91: ! 92: // transformed[0] = DotProduct(local, r_pright); ! 93: fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z ! 94: fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z ! 95: fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z ! 96: fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z ! 97: fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z ! 98: fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z ! 99: faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z ! 100: ! 101: faddp %st(0),%st(1) // x | y | 1/z ! 102: fxch %st(1) // y | x | 1/z ! 103: ! 104: // project the point ! 105: fmul %st(2),%st(0) // y/z | x | 1/z ! 106: fxch %st(1) // x | y/z | 1/z ! 107: fmul %st(2),%st(0) // x/z | y/z | 1/z ! 108: fxch %st(1) // y/z | x/z | 1/z ! 109: fsubrs C(ycenter) // v | x/z | 1/z ! 110: fxch %st(1) // x/z | v | 1/z ! 111: fadds C(xcenter) // u | v | 1/z ! 112: // FIXME: preadjust xcenter and ycenter ! 113: fxch %st(1) // v | u | 1/z ! 114: fadds float_point5 // v | u | 1/z ! 115: fxch %st(1) // u | v | 1/z ! 116: fadds float_point5 // u | v | 1/z ! 117: fxch %st(2) // 1/z | v | u ! 118: fmuls DP_32768 // 1/z * 0x8000 | v | u ! 119: fxch %st(2) // u | v | 1/z * 0x8000 ! 120: ! 121: // FIXME: use Terje's fp->int trick here? ! 122: // FIXME: check we're getting proper rounding here ! 123: fistpl DP_u // v | 1/z * 0x8000 ! 124: fistpl DP_v // 1/z * 0x8000 ! 125: ! 126: movl DP_u,%eax ! 127: movl DP_v,%edx ! 128: ! 129: // if ((v > d_vrectbottom_particle) || ! 130: // (u > d_vrectright_particle) || ! 131: // (v < d_vrecty) || ! 132: // (u < d_vrectx)) ! 133: // { ! 134: // continue; ! 135: // } ! 136: ! 137: movl C(d_vrectbottom_particle),%ebx ! 138: movl C(d_vrectright_particle),%ecx ! 139: cmpl %ebx,%edx ! 140: jg LPop1AndDone ! 141: cmpl %ecx,%eax ! 142: jg LPop1AndDone ! 143: movl C(d_vrecty),%ebx ! 144: movl C(d_vrectx),%ecx ! 145: cmpl %ebx,%edx ! 146: jl LPop1AndDone ! 147: ! 148: cmpl %ecx,%eax ! 149: jl LPop1AndDone ! 150: ! 151: flds pt_color(%edi) // color | 1/z * 0x8000 ! 152: // FIXME: use Terje's fast fp->int trick? ! 153: fistpl DP_Color // 1/z * 0x8000 ! 154: ! 155: movl C(d_viewbuffer),%ebx ! 156: ! 157: addl %eax,%ebx ! 158: movl C(d_scantable)(,%edx,4),%edi // point to the pixel ! 159: ! 160: imull C(d_zrowbytes),%edx // point to the z pixel ! 161: ! 162: leal (%edx,%eax,2),%edx ! 163: movl C(d_pzbuffer),%eax ! 164: ! 165: fistpl izi ! 166: ! 167: addl %ebx,%edi ! 168: addl %eax,%edx ! 169: ! 170: // pix = izi >> d_pix_shift; ! 171: ! 172: movl izi,%eax ! 173: movl C(d_pix_shift),%ecx ! 174: shrl %cl,%eax ! 175: movl izi,%ebp ! 176: ! 177: // if (pix < d_pix_min) ! 178: // pix = d_pix_min; ! 179: // else if (pix > d_pix_max) ! 180: // pix = d_pix_max; ! 181: ! 182: movl C(d_pix_min),%ebx ! 183: movl C(d_pix_max),%ecx ! 184: cmpl %ebx,%eax ! 185: jnl LTestPixMax ! 186: movl %ebx,%eax ! 187: jmp LTestDone ! 188: ! 189: LTestPixMax: ! 190: cmpl %ecx,%eax ! 191: jng LTestDone ! 192: movl %ecx,%eax ! 193: LTestDone: ! 194: ! 195: movb DP_Color,%ch ! 196: ! 197: movl C(d_y_aspect_shift),%ebx ! 198: testl %ebx,%ebx ! 199: jnz LDefault ! 200: ! 201: cmpl $4,%eax ! 202: ja LDefault ! 203: ! 204: jmp DP_EntryTable-4(,%eax,4) ! 205: ! 206: // 1x1 ! 207: .globl DP_1x1 ! 208: DP_1x1: ! 209: cmpw %bp,(%edx) // just one pixel to do ! 210: jg LDone ! 211: movw %bp,(%edx) ! 212: movb %ch,(%edi) ! 213: jmp LDone ! 214: ! 215: // 2x2 ! 216: .globl DP_2x2 ! 217: DP_2x2: ! 218: pushl %esi ! 219: movl C(screenwidth),%ebx ! 220: movl C(d_zrowbytes),%esi ! 221: ! 222: cmpw %bp,(%edx) ! 223: jg L2x2_1 ! 224: movw %bp,(%edx) ! 225: movb %ch,(%edi) ! 226: L2x2_1: ! 227: cmpw %bp,2(%edx) ! 228: jg L2x2_2 ! 229: movw %bp,2(%edx) ! 230: movb %ch,1(%edi) ! 231: L2x2_2: ! 232: cmpw %bp,(%edx,%esi,1) ! 233: jg L2x2_3 ! 234: movw %bp,(%edx,%esi,1) ! 235: movb %ch,(%edi,%ebx,1) ! 236: L2x2_3: ! 237: cmpw %bp,2(%edx,%esi,1) ! 238: jg L2x2_4 ! 239: movw %bp,2(%edx,%esi,1) ! 240: movb %ch,1(%edi,%ebx,1) ! 241: L2x2_4: ! 242: ! 243: popl %esi ! 244: jmp LDone ! 245: ! 246: // 3x3 ! 247: .globl DP_3x3 ! 248: DP_3x3: ! 249: pushl %esi ! 250: movl C(screenwidth),%ebx ! 251: movl C(d_zrowbytes),%esi ! 252: ! 253: cmpw %bp,(%edx) ! 254: jg L3x3_1 ! 255: movw %bp,(%edx) ! 256: movb %ch,(%edi) ! 257: L3x3_1: ! 258: cmpw %bp,2(%edx) ! 259: jg L3x3_2 ! 260: movw %bp,2(%edx) ! 261: movb %ch,1(%edi) ! 262: L3x3_2: ! 263: cmpw %bp,4(%edx) ! 264: jg L3x3_3 ! 265: movw %bp,4(%edx) ! 266: movb %ch,2(%edi) ! 267: L3x3_3: ! 268: ! 269: cmpw %bp,(%edx,%esi,1) ! 270: jg L3x3_4 ! 271: movw %bp,(%edx,%esi,1) ! 272: movb %ch,(%edi,%ebx,1) ! 273: L3x3_4: ! 274: cmpw %bp,2(%edx,%esi,1) ! 275: jg L3x3_5 ! 276: movw %bp,2(%edx,%esi,1) ! 277: movb %ch,1(%edi,%ebx,1) ! 278: L3x3_5: ! 279: cmpw %bp,4(%edx,%esi,1) ! 280: jg L3x3_6 ! 281: movw %bp,4(%edx,%esi,1) ! 282: movb %ch,2(%edi,%ebx,1) ! 283: L3x3_6: ! 284: ! 285: cmpw %bp,(%edx,%esi,2) ! 286: jg L3x3_7 ! 287: movw %bp,(%edx,%esi,2) ! 288: movb %ch,(%edi,%ebx,2) ! 289: L3x3_7: ! 290: cmpw %bp,2(%edx,%esi,2) ! 291: jg L3x3_8 ! 292: movw %bp,2(%edx,%esi,2) ! 293: movb %ch,1(%edi,%ebx,2) ! 294: L3x3_8: ! 295: cmpw %bp,4(%edx,%esi,2) ! 296: jg L3x3_9 ! 297: movw %bp,4(%edx,%esi,2) ! 298: movb %ch,2(%edi,%ebx,2) ! 299: L3x3_9: ! 300: ! 301: popl %esi ! 302: jmp LDone ! 303: ! 304: ! 305: // 4x4 ! 306: .globl DP_4x4 ! 307: DP_4x4: ! 308: pushl %esi ! 309: movl C(screenwidth),%ebx ! 310: movl C(d_zrowbytes),%esi ! 311: ! 312: cmpw %bp,(%edx) ! 313: jg L4x4_1 ! 314: movw %bp,(%edx) ! 315: movb %ch,(%edi) ! 316: L4x4_1: ! 317: cmpw %bp,2(%edx) ! 318: jg L4x4_2 ! 319: movw %bp,2(%edx) ! 320: movb %ch,1(%edi) ! 321: L4x4_2: ! 322: cmpw %bp,4(%edx) ! 323: jg L4x4_3 ! 324: movw %bp,4(%edx) ! 325: movb %ch,2(%edi) ! 326: L4x4_3: ! 327: cmpw %bp,6(%edx) ! 328: jg L4x4_4 ! 329: movw %bp,6(%edx) ! 330: movb %ch,3(%edi) ! 331: L4x4_4: ! 332: ! 333: cmpw %bp,(%edx,%esi,1) ! 334: jg L4x4_5 ! 335: movw %bp,(%edx,%esi,1) ! 336: movb %ch,(%edi,%ebx,1) ! 337: L4x4_5: ! 338: cmpw %bp,2(%edx,%esi,1) ! 339: jg L4x4_6 ! 340: movw %bp,2(%edx,%esi,1) ! 341: movb %ch,1(%edi,%ebx,1) ! 342: L4x4_6: ! 343: cmpw %bp,4(%edx,%esi,1) ! 344: jg L4x4_7 ! 345: movw %bp,4(%edx,%esi,1) ! 346: movb %ch,2(%edi,%ebx,1) ! 347: L4x4_7: ! 348: cmpw %bp,6(%edx,%esi,1) ! 349: jg L4x4_8 ! 350: movw %bp,6(%edx,%esi,1) ! 351: movb %ch,3(%edi,%ebx,1) ! 352: L4x4_8: ! 353: ! 354: leal (%edx,%esi,2),%edx ! 355: leal (%edi,%ebx,2),%edi ! 356: ! 357: cmpw %bp,(%edx) ! 358: jg L4x4_9 ! 359: movw %bp,(%edx) ! 360: movb %ch,(%edi) ! 361: L4x4_9: ! 362: cmpw %bp,2(%edx) ! 363: jg L4x4_10 ! 364: movw %bp,2(%edx) ! 365: movb %ch,1(%edi) ! 366: L4x4_10: ! 367: cmpw %bp,4(%edx) ! 368: jg L4x4_11 ! 369: movw %bp,4(%edx) ! 370: movb %ch,2(%edi) ! 371: L4x4_11: ! 372: cmpw %bp,6(%edx) ! 373: jg L4x4_12 ! 374: movw %bp,6(%edx) ! 375: movb %ch,3(%edi) ! 376: L4x4_12: ! 377: ! 378: cmpw %bp,(%edx,%esi,1) ! 379: jg L4x4_13 ! 380: movw %bp,(%edx,%esi,1) ! 381: movb %ch,(%edi,%ebx,1) ! 382: L4x4_13: ! 383: cmpw %bp,2(%edx,%esi,1) ! 384: jg L4x4_14 ! 385: movw %bp,2(%edx,%esi,1) ! 386: movb %ch,1(%edi,%ebx,1) ! 387: L4x4_14: ! 388: cmpw %bp,4(%edx,%esi,1) ! 389: jg L4x4_15 ! 390: movw %bp,4(%edx,%esi,1) ! 391: movb %ch,2(%edi,%ebx,1) ! 392: L4x4_15: ! 393: cmpw %bp,6(%edx,%esi,1) ! 394: jg L4x4_16 ! 395: movw %bp,6(%edx,%esi,1) ! 396: movb %ch,3(%edi,%ebx,1) ! 397: L4x4_16: ! 398: ! 399: popl %esi ! 400: jmp LDone ! 401: ! 402: // default case, handling any size particle ! 403: LDefault: ! 404: ! 405: // count = pix << d_y_aspect_shift; ! 406: ! 407: movl %eax,%ebx ! 408: movl %eax,DP_Pix ! 409: movb C(d_y_aspect_shift),%cl ! 410: shll %cl,%ebx ! 411: ! 412: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) ! 413: // { ! 414: // for (i=0 ; i<pix ; i++) ! 415: // { ! 416: // if (pz[i] <= izi) ! 417: // { ! 418: // pz[i] = izi; ! 419: // pdest[i] = color; ! 420: // } ! 421: // } ! 422: // } ! 423: ! 424: LGenRowLoop: ! 425: movl DP_Pix,%eax ! 426: ! 427: LGenColLoop: ! 428: cmpw %bp,-2(%edx,%eax,2) ! 429: jg LGSkip ! 430: movw %bp,-2(%edx,%eax,2) ! 431: movb %ch,-1(%edi,%eax,1) ! 432: LGSkip: ! 433: decl %eax // --pix ! 434: jnz LGenColLoop ! 435: ! 436: addl C(d_zrowbytes),%edx ! 437: addl C(screenwidth),%edi ! 438: ! 439: decl %ebx // --count ! 440: jnz LGenRowLoop ! 441: ! 442: LDone: ! 443: popl %ebx // restore register variables ! 444: popl %edi ! 445: popl %ebp // restore the caller's stack frame ! 446: ret ! 447: ! 448: LPop6AndDone: ! 449: fstp %st(0) ! 450: fstp %st(0) ! 451: fstp %st(0) ! 452: fstp %st(0) ! 453: fstp %st(0) ! 454: LPop1AndDone: ! 455: fstp %st(0) ! 456: jmp LDone ! 457: ! 458: #endif // id386
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.