quake1/d_draw16.s - annotate

Return to d_draw16.s CVS log
Up to [Quake] / quake1
Annotation of quake1/d_draw16.s, revision 1.1.1.2

1.1       root        1: //
                      2: // d_draw16.s
                      3: // x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
                      4: // subdivision.
1.1.1.2 ! root        5: //f
1.1       root        6: 
                      7: #include "asm_i386.h"
                      8: #include "quakeasm.h"
                      9: #include "asm_draw.h"
                     10: #include "d_ifacea.h"
                     11: 
1.1.1.2 ! root       12: #if id386
1.1       root       13: 
                     14: //----------------------------------------------------------------------
                     15: // 8-bpp horizontal span drawing code for polygons, with no transparency and
                     16: // 16-pixel subdivision.
                     17: //
                     18: // Assumes there is at least one span in pspans, and that every span
                     19: // contains at least one pixel
                     20: //----------------------------------------------------------------------
                     21: 
                     22:        .data
                     23: 
                     24:        .text
                     25: 
                     26: // out-of-line, rarely-needed clamping code
                     27: 
                     28: LClampHigh0:
                     29:        movl    C(bbextents),%esi
                     30:        jmp             LClampReentry0
                     31: LClampHighOrLow0:
                     32:        jg              LClampHigh0
                     33:        xorl    %esi,%esi
                     34:        jmp             LClampReentry0
                     35: 
                     36: LClampHigh1:
                     37:        movl    C(bbextentt),%edx
                     38:        jmp             LClampReentry1
                     39: LClampHighOrLow1:
                     40:        jg              LClampHigh1
                     41:        xorl    %edx,%edx
                     42:        jmp             LClampReentry1
                     43: 
                     44: LClampLow2:
                     45:        movl    $4096,%ebp
                     46:        jmp             LClampReentry2
                     47: LClampHigh2:
                     48:        movl    C(bbextents),%ebp
                     49:        jmp             LClampReentry2
                     50: 
                     51: LClampLow3:
                     52:        movl    $4096,%ecx
                     53:        jmp             LClampReentry3
                     54: LClampHigh3:
                     55:        movl    C(bbextentt),%ecx
                     56:        jmp             LClampReentry3
                     57: 
                     58: LClampLow4:
                     59:        movl    $4096,%eax
                     60:        jmp             LClampReentry4
                     61: LClampHigh4:
                     62:        movl    C(bbextents),%eax
                     63:        jmp             LClampReentry4
                     64: 
                     65: LClampLow5:
                     66:        movl    $4096,%ebx
                     67:        jmp             LClampReentry5
                     68: LClampHigh5:
                     69:        movl    C(bbextentt),%ebx
                     70:        jmp             LClampReentry5
                     71: 
                     72: 
                     73: #define pspans 4+16
                     74: 
                     75:        .align 4
                     76: .globl C(D_DrawSpans16)
                     77: C(D_DrawSpans16):
                     78:        pushl   %ebp                            // preserve caller's stack frame
                     79:        pushl   %edi
                     80:        pushl   %esi                            // preserve register variables
                     81:        pushl   %ebx
                     82: 
                     83: //
                     84: // set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
                     85: // and span list pointers
                     86: //
                     87: // TODO: any overlap from rearranging?
                     88:        flds    C(d_sdivzstepu)
                     89:        fmuls   fp_16
                     90:        movl    C(cacheblock),%edx
                     91:        flds    C(d_tdivzstepu)
                     92:        fmuls   fp_16
                     93:        movl    pspans(%esp),%ebx       // point to the first span descriptor
                     94:        flds    C(d_zistepu)
                     95:        fmuls   fp_16
                     96:        movl    %edx,pbase                      // pbase = cacheblock
                     97:        fstps   zi16stepu
                     98:        fstps   tdivz16stepu
                     99:        fstps   sdivz16stepu
                    100: 
                    101: LSpanLoop:
                    102: //
                    103: // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
                    104: // initial s and t values
                    105: //
                    106: // FIXME: pipeline FILD?
                    107:        fildl   espan_t_v(%ebx)
                    108:        fildl   espan_t_u(%ebx)
                    109: 
                    110:        fld             %st(1)                  // dv | du | dv
                    111:        fmuls   C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
                    112:        fld             %st(1)                  // du | dv*d_sdivzstepv | du | dv
                    113:        fmuls   C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
                    114:        fld             %st(2)                  // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
                    115:        fmuls   C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
                    116:                                                        //  dv*d_sdivzstepv | du | dv
                    117:        fxch    %st(1)                  // du*d_sdivzstepu | du*d_tdivzstepu |
                    118:                                                        //  dv*d_sdivzstepv | du | dv
                    119:        faddp   %st(0),%st(2)   // du*d_tdivzstepu |
                    120:                                                        //  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
                    121:        fxch    %st(1)                  // du*d_sdivzstepu + dv*d_sdivzstepv |
                    122:                                                        //  du*d_tdivzstepu | du | dv
                    123:        fld             %st(3)                  // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
                    124:                                                        //  du*d_tdivzstepu | du | dv
                    125:        fmuls   C(d_tdivzstepv) // dv*d_tdivzstepv |
                    126:                                                        //  du*d_sdivzstepu + dv*d_sdivzstepv |
                    127:                                                        //  du*d_tdivzstepu | du | dv
                    128:        fxch    %st(1)                  // du*d_sdivzstepu + dv*d_sdivzstepv |
                    129:                                                        //  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
                    130:        fadds   C(d_sdivzorigin)        // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
                    131:                                                        //  du*d_sdivzstepu; stays in %st(2) at end
                    132:        fxch    %st(4)                  // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
                    133:                                                        //  s/z
                    134:        fmuls   C(d_zistepv)            // dv*d_zistepv | dv*d_tdivzstepv |
                    135:                                                        //  du*d_tdivzstepu | du | s/z
                    136:        fxch    %st(1)                  // dv*d_tdivzstepv |  dv*d_zistepv |
                    137:                                                        //  du*d_tdivzstepu | du | s/z
                    138:        faddp   %st(0),%st(2)   // dv*d_zistepv |
                    139:                                                        //  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
                    140:        fxch    %st(2)                  // du | dv*d_tdivzstepv + du*d_tdivzstepu |
                    141:                                                        //  dv*d_zistepv | s/z
                    142:        fmuls   C(d_zistepu)            // du*d_zistepu |
                    143:                                                        //  dv*d_tdivzstepv + du*d_tdivzstepu |
                    144:                                                        //  dv*d_zistepv | s/z
                    145:        fxch    %st(1)                  // dv*d_tdivzstepv + du*d_tdivzstepu |
                    146:                                                        //  du*d_zistepu | dv*d_zistepv | s/z
                    147:        fadds   C(d_tdivzorigin)        // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
                    148:                                                        //  du*d_tdivzstepu; stays in %st(1) at end
                    149:        fxch    %st(2)                  // dv*d_zistepv | du*d_zistepu | t/z | s/z
                    150:        faddp   %st(0),%st(1)   // dv*d_zistepv + du*d_zistepu | t/z | s/z
                    151: 
                    152:        flds    fp_64k                  // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
                    153:        fxch    %st(1)                  // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
                    154:        fadds   C(d_ziorigin)           // zi = d_ziorigin + dv*d_zistepv +
                    155:                                                        //  du*d_zistepu; stays in %st(0) at end
                    156:                                                        // 1/z | fp_64k | t/z | s/z
                    157: //
                    158: // calculate and clamp s & t
                    159: //
                    160:        fdivr   %st(0),%st(1)   // 1/z | z*64k | t/z | s/z
                    161: 
                    162: //
                    163: // point %edi to the first pixel in the span
                    164: //
                    165:        movl    C(d_viewbuffer),%ecx
                    166:        movl    espan_t_v(%ebx),%eax
                    167:        movl    %ebx,pspantemp  // preserve spans pointer
                    168: 
                    169:        movl    C(tadjust),%edx
                    170:        movl    C(sadjust),%esi
                    171:        movl    C(d_scantable)(,%eax,4),%edi    // v * screenwidth
                    172:        addl    %ecx,%edi
                    173:        movl    espan_t_u(%ebx),%ecx
                    174:        addl    %ecx,%edi                               // pdest = &pdestspan[scans->u];
                    175:        movl    espan_t_count(%ebx),%ecx
                    176: 
                    177: //
                    178: // now start the FDIV for the end of the span
                    179: //
                    180:        cmpl    $16,%ecx
                    181:        ja              LSetupNotLast1
                    182: 
                    183:        decl    %ecx
                    184:        jz              LCleanup1               // if only one pixel, no need to start an FDIV
                    185:        movl    %ecx,spancountminus1
                    186: 
                    187: // finish up the s and t calcs
                    188:        fxch    %st(1)                  // z*64k | 1/z | t/z | s/z
                    189: 
                    190:        fld             %st(0)                  // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root      191:        fmul    %st(4),%st              // s | z*64k | 1/z | t/z | s/z
1.1       root      192:        fxch    %st(1)                  // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root      193:        fmul    %st(3),%st              // t | s | 1/z | t/z | s/z
1.1       root      194:        fxch    %st(1)                  // s | t | 1/z | t/z | s/z
                    195:        fistpl  s                               // 1/z | t | t/z | s/z
                    196:        fistpl  t                               // 1/z | t/z | s/z
                    197: 
                    198:        fildl   spancountminus1
                    199: 
                    200:        flds    C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
                    201:        flds    C(d_zistepu)            // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
                    202:        fmul    %st(2),%st(0)   // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
                    203:        fxch    %st(1)                  // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
                    204:        fmul    %st(2),%st(0)   // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
                    205:        fxch    %st(2)                  // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
                    206:        fmuls   C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
                    207:                                                        //  C(d_tdivzstepu)*scm1
                    208:        fxch    %st(1)                  // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
                    209:                                                        //  C(d_tdivzstepu)*scm1
                    210:        faddp   %st(0),%st(3)   // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
                    211:        fxch    %st(1)                  // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
                    212:        faddp   %st(0),%st(3)   // C(d_sdivzstepu)*scm1
                    213:        faddp   %st(0),%st(3)
                    214: 
                    215:        flds    fp_64k
1.1.1.2 ! root      216:        fdiv    %st(1),%st              // this is what we've gone to all this trouble to
1.1       root      217:                                                        //  overlap
                    218:        jmp             LFDIVInFlight1
                    219: 
                    220: LCleanup1:
                    221: // finish up the s and t calcs
                    222:        fxch    %st(1)                  // z*64k | 1/z | t/z | s/z
                    223: 
                    224:        fld             %st(0)                  // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root      225:        fmul    %st(4),%st              // s | z*64k | 1/z | t/z | s/z
1.1       root      226:        fxch    %st(1)                  // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root      227:        fmul    %st(3),%st              // t | s | 1/z | t/z | s/z
1.1       root      228:        fxch    %st(1)                  // s | t | 1/z | t/z | s/z
                    229:        fistpl  s                               // 1/z | t | t/z | s/z
                    230:        fistpl  t                               // 1/z | t/z | s/z
                    231:        jmp             LFDIVInFlight1
                    232: 
                    233:        .align  4
                    234: LSetupNotLast1:
                    235: // finish up the s and t calcs
                    236:        fxch    %st(1)                  // z*64k | 1/z | t/z | s/z
                    237: 
                    238:        fld             %st(0)                  // z*64k | z*64k | 1/z | t/z | s/z
1.1.1.2 ! root      239:        fmul    %st(4),%st              // s | z*64k | 1/z | t/z | s/z
1.1       root      240:        fxch    %st(1)                  // z*64k | s | 1/z | t/z | s/z
1.1.1.2 ! root      241:        fmul    %st(3),%st              // t | s | 1/z | t/z | s/z
1.1       root      242:        fxch    %st(1)                  // s | t | 1/z | t/z | s/z
                    243:        fistpl  s                               // 1/z | t | t/z | s/z
                    244:        fistpl  t                               // 1/z | t/z | s/z
                    245: 
                    246:        fadds   zi16stepu
                    247:        fxch    %st(2)
                    248:        fadds   sdivz16stepu
                    249:        fxch    %st(2)
                    250:        flds    tdivz16stepu
                    251:        faddp   %st(0),%st(2)
                    252:        flds    fp_64k
1.1.1.2 ! root      253:        fdiv    %st(1),%st      // z = 1/1/z
        !           254:                                                // this is what we've gone to all this trouble to
        !           255:                                                //  overlap
1.1       root      256: LFDIVInFlight1:
                    257: 
                    258:        addl    s,%esi
                    259:        addl    t,%edx
                    260:        movl    C(bbextents),%ebx
                    261:        movl    C(bbextentt),%ebp
                    262:        cmpl    %ebx,%esi
                    263:        ja              LClampHighOrLow0
                    264: LClampReentry0:
                    265:        movl    %esi,s
                    266:        movl    pbase,%ebx
                    267:        shll    $16,%esi
                    268:        cmpl    %ebp,%edx
                    269:        movl    %esi,sfracf
                    270:        ja              LClampHighOrLow1
                    271: LClampReentry1:
                    272:        movl    %edx,t
                    273:        movl    s,%esi                                  // sfrac = scans->sfrac;
                    274:        shll    $16,%edx
                    275:        movl    t,%eax                                  // tfrac = scans->tfrac;
                    276:        sarl    $16,%esi
                    277:        movl    %edx,tfracf
                    278: 
                    279: //
                    280: // calculate the texture starting address
                    281: //
                    282:        sarl    $16,%eax
                    283:        movl    C(cachewidth),%edx
1.1.1.2 ! root      284:        imul    %edx,%eax                               // (tfrac >> 16) * cachewidth
1.1       root      285:        addl    %ebx,%esi
                    286:        addl    %eax,%esi                               // psource = pbase + (sfrac >> 16) +
                    287:                                                                        //           ((tfrac >> 16) * cachewidth);
                    288: //
                    289: // determine whether last span or not
                    290: //
                    291:        cmpl    $16,%ecx
                    292:        jna             LLastSegment
                    293: 
                    294: //
                    295: // not the last segment; do full 16-wide segment
                    296: //
                    297: LNotLastSegment:
                    298: 
                    299: //
                    300: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
                    301: // get there
                    302: //
                    303: 
                    304: // pick up after the FDIV that was left in flight previously
                    305: 
1.1.1.2 ! root      306:        fld             %st(0)          // duplicate it
        !           307:        fmul    %st(4),%st      // s = s/z * z
1.1       root      308:        fxch    %st(1)
1.1.1.2 ! root      309:        fmul    %st(3),%st      // t = t/z * z
1.1       root      310:        fxch    %st(1)
                    311:        fistpl  snext
                    312:        fistpl  tnext
                    313:        movl    snext,%eax
                    314:        movl    tnext,%edx
                    315: 
                    316:        movb    (%esi),%bl      // get first source texel
                    317:        subl    $16,%ecx                // count off this segments' pixels
                    318:        movl    C(sadjust),%ebp
                    319:        movl    %ecx,counttemp  // remember count of remaining pixels
                    320: 
                    321:        movl    C(tadjust),%ecx
                    322:        movb    %bl,(%edi)      // store first dest pixel
                    323: 
                    324:        addl    %eax,%ebp
                    325:        addl    %edx,%ecx
                    326: 
                    327:        movl    C(bbextents),%eax
                    328:        movl    C(bbextentt),%edx
                    329: 
                    330:        cmpl    $4096,%ebp
                    331:        jl              LClampLow2
                    332:        cmpl    %eax,%ebp
                    333:        ja              LClampHigh2
                    334: LClampReentry2:
                    335: 
                    336:        cmpl    $4096,%ecx
                    337:        jl              LClampLow3
                    338:        cmpl    %edx,%ecx
                    339:        ja              LClampHigh3
                    340: LClampReentry3:
                    341: 
                    342:        movl    %ebp,snext
                    343:        movl    %ecx,tnext
                    344: 
                    345:        subl    s,%ebp
                    346:        subl    t,%ecx
                    347:        
                    348: //
                    349: // set up advancetable
                    350: //
                    351:        movl    %ecx,%eax
                    352:        movl    %ebp,%edx
                    353:        sarl    $20,%eax                        // tstep >>= 16;
                    354:        jz              LZero
                    355:        sarl    $20,%edx                        // sstep >>= 16;
                    356:        movl    C(cachewidth),%ebx
1.1.1.2 ! root      357:        imul    %ebx,%eax
1.1       root      358:        jmp             LSetUp1
                    359: 
                    360: LZero:
                    361:        sarl    $20,%edx                        // sstep >>= 16;
                    362:        movl    C(cachewidth),%ebx
                    363: 
                    364: LSetUp1:
                    365: 
                    366:        addl    %edx,%eax                       // add in sstep
                    367:                                                                // (tstep >> 16) * cachewidth + (sstep >> 16);
                    368:        movl    tfracf,%edx
                    369:        movl    %eax,advancetable+4     // advance base in t
                    370:        addl    %ebx,%eax                       // ((tstep >> 16) + 1) * cachewidth +
                    371:                                                                //  (sstep >> 16);
                    372:        shll    $12,%ebp                        // left-justify sstep fractional part
                    373:        movl    sfracf,%ebx
                    374:        shll    $12,%ecx                        // left-justify tstep fractional part
                    375:        movl    %eax,advancetable       // advance extra in t
                    376: 
                    377:        movl    %ecx,tstep
                    378:        addl    %ecx,%edx                       // advance tfrac fractional part by tstep frac
                    379: 
                    380:        sbbl    %ecx,%ecx                       // turn tstep carry into -1 (0 if none)
                    381:        addl    %ebp,%ebx                       // advance sfrac fractional part by sstep frac
                    382:        adcl    advancetable+4(,%ecx,4),%esi    // point to next source texel
                    383: 
                    384:        addl    tstep,%edx
                    385:        sbbl    %ecx,%ecx
                    386:        movb    (%esi),%al
                    387:        addl    %ebp,%ebx
                    388:        movb    %al,1(%edi)
                    389:        adcl    advancetable+4(,%ecx,4),%esi
                    390: 
                    391:        addl    tstep,%edx
                    392:        sbbl    %ecx,%ecx
                    393:        addl    %ebp,%ebx
                    394:        movb    (%esi),%al
                    395:        adcl    advancetable+4(,%ecx,4),%esi
                    396: 
                    397:        addl    tstep,%edx
                    398:        sbbl    %ecx,%ecx
                    399:        movb    %al,2(%edi)
                    400:        addl    %ebp,%ebx
                    401:        movb    (%esi),%al
                    402:        adcl    advancetable+4(,%ecx,4),%esi
                    403: 
                    404:        addl    tstep,%edx
                    405:        sbbl    %ecx,%ecx
                    406:        movb    %al,3(%edi)
                    407:        addl    %ebp,%ebx
                    408:        movb    (%esi),%al
                    409:        adcl    advancetable+4(,%ecx,4),%esi
                    410: 
                    411:        addl    tstep,%edx
                    412:        sbbl    %ecx,%ecx
                    413:        movb    %al,4(%edi)
                    414:        addl    %ebp,%ebx
                    415:        movb    (%esi),%al
                    416:        adcl    advancetable+4(,%ecx,4),%esi
                    417: 
                    418:        addl    tstep,%edx
                    419:        sbbl    %ecx,%ecx
                    420:        movb    %al,5(%edi)
                    421:        addl    %ebp,%ebx
                    422:        movb    (%esi),%al
                    423:        adcl    advancetable+4(,%ecx,4),%esi
                    424: 
                    425:        addl    tstep,%edx
                    426:        sbbl    %ecx,%ecx
                    427:        movb    %al,6(%edi)
                    428:        addl    %ebp,%ebx
                    429:        movb    (%esi),%al
                    430:        adcl    advancetable+4(,%ecx,4),%esi
                    431: 
                    432:        addl    tstep,%edx
                    433:        sbbl    %ecx,%ecx
                    434:        movb    %al,7(%edi)
                    435:        addl    %ebp,%ebx
                    436:        movb    (%esi),%al
                    437:        adcl    advancetable+4(,%ecx,4),%esi
                    438: 
                    439: 
                    440: //
                    441: // start FDIV for end of next segment in flight, so it can overlap
                    442: //
                    443:        movl    counttemp,%ecx
                    444:        cmpl    $16,%ecx                        // more than one segment after this?
                    445:        ja              LSetupNotLast2  // yes
                    446: 
                    447:        decl    %ecx
                    448:        jz              LFDIVInFlight2  // if only one pixel, no need to start an FDIV
                    449:        movl    %ecx,spancountminus1
                    450:        fildl   spancountminus1
                    451: 
                    452:        flds    C(d_zistepu)            // C(d_zistepu) | spancountminus1
                    453:        fmul    %st(1),%st(0)   // C(d_zistepu)*scm1 | scm1
                    454:        flds    C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
                    455:        fmul    %st(2),%st(0)   // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
                    456:        fxch    %st(1)                  // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
                    457:        faddp   %st(0),%st(3)   // C(d_tdivzstepu)*scm1 | scm1
                    458:        fxch    %st(1)                  // scm1 | C(d_tdivzstepu)*scm1
                    459:        fmuls   C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
                    460:        fxch    %st(1)                  // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
                    461:        faddp   %st(0),%st(3)   // C(d_sdivzstepu)*scm1
                    462:        flds    fp_64k                  // 64k | C(d_sdivzstepu)*scm1
                    463:        fxch    %st(1)                  // C(d_sdivzstepu)*scm1 | 64k
                    464:        faddp   %st(0),%st(4)   // 64k
                    465: 
1.1.1.2 ! root      466:        fdiv    %st(1),%st              // this is what we've gone to all this trouble to
1.1       root      467:                                                        //  overlap
                    468:        jmp             LFDIVInFlight2
                    469: 
                    470:        .align  4
                    471: LSetupNotLast2:
                    472:        fadds   zi16stepu
                    473:        fxch    %st(2)
                    474:        fadds   sdivz16stepu
                    475:        fxch    %st(2)
                    476:        flds    tdivz16stepu
                    477:        faddp   %st(0),%st(2)
                    478:        flds    fp_64k
1.1.1.2 ! root      479:        fdiv    %st(1),%st      // z = 1/1/z
        !           480:                                                // this is what we've gone to all this trouble to
        !           481:                                                //  overlap
1.1       root      482: LFDIVInFlight2:
                    483:        movl    %ecx,counttemp
                    484: 
                    485:        addl    tstep,%edx
                    486:        sbbl    %ecx,%ecx
                    487:        movb    %al,8(%edi)
                    488:        addl    %ebp,%ebx
                    489:        movb    (%esi),%al
                    490:        adcl    advancetable+4(,%ecx,4),%esi
                    491: 
                    492:        addl    tstep,%edx
                    493:        sbbl    %ecx,%ecx
                    494:        movb    %al,9(%edi)
                    495:        addl    %ebp,%ebx
                    496:        movb    (%esi),%al
                    497:        adcl    advancetable+4(,%ecx,4),%esi
                    498: 
                    499:        addl    tstep,%edx
                    500:        sbbl    %ecx,%ecx
                    501:        movb    %al,10(%edi)
                    502:        addl    %ebp,%ebx
                    503:        movb    (%esi),%al
                    504:        adcl    advancetable+4(,%ecx,4),%esi
                    505: 
                    506:        addl    tstep,%edx
                    507:        sbbl    %ecx,%ecx
                    508:        movb    %al,11(%edi)
                    509:        addl    %ebp,%ebx
                    510:        movb    (%esi),%al
                    511:        adcl    advancetable+4(,%ecx,4),%esi
                    512: 
                    513:        addl    tstep,%edx
                    514:        sbbl    %ecx,%ecx
                    515:        movb    %al,12(%edi)
                    516:        addl    %ebp,%ebx
                    517:        movb    (%esi),%al
                    518:        adcl    advancetable+4(,%ecx,4),%esi
                    519: 
                    520:        addl    tstep,%edx
                    521:        sbbl    %ecx,%ecx
                    522:        movb    %al,13(%edi)
                    523:        addl    %ebp,%ebx
                    524:        movb    (%esi),%al
                    525:        adcl    advancetable+4(,%ecx,4),%esi
                    526: 
                    527:        addl    tstep,%edx
                    528:        sbbl    %ecx,%ecx
                    529:        movb    %al,14(%edi)
                    530:        addl    %ebp,%ebx
                    531:        movb    (%esi),%al
                    532:        adcl    advancetable+4(,%ecx,4),%esi
                    533: 
                    534:        addl    $16,%edi
                    535:        movl    %edx,tfracf
                    536:        movl    snext,%edx
                    537:        movl    %ebx,sfracf
                    538:        movl    tnext,%ebx
                    539:        movl    %edx,s
                    540:        movl    %ebx,t
                    541: 
                    542:        movl    counttemp,%ecx          // retrieve count
                    543: 
                    544: //
                    545: // determine whether last span or not
                    546: //
                    547:        cmpl    $16,%ecx                                // are there multiple segments remaining?
                    548:        movb    %al,-1(%edi)
                    549:        ja              LNotLastSegment         // yes
                    550: 
                    551: //
                    552: // last segment of scan
                    553: //
                    554: LLastSegment:
                    555: 
                    556: //
                    557: // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
                    558: // get there. The number of pixels left is variable, and we want to land on the
                    559: // last pixel, not step one past it, so we can't run into arithmetic problems
                    560: //
                    561:        testl   %ecx,%ecx
                    562:        jz              LNoSteps                // just draw the last pixel and we're done
                    563: 
                    564: // pick up after the FDIV that was left in flight previously
                    565: 
                    566: 
1.1.1.2 ! root      567:        fld             %st(0)          // duplicate it
        !           568:        fmul    %st(4),%st      // s = s/z * z
1.1       root      569:        fxch    %st(1)
1.1.1.2 ! root      570:        fmul    %st(3),%st      // t = t/z * z
1.1       root      571:        fxch    %st(1)
                    572:        fistpl  snext
                    573:        fistpl  tnext
                    574: 
                    575:        movb    (%esi),%al              // load first texel in segment
                    576:        movl    C(tadjust),%ebx
                    577:        movb    %al,(%edi)              // store first pixel in segment
                    578:        movl    C(sadjust),%eax
                    579: 
                    580:        addl    snext,%eax
                    581:        addl    tnext,%ebx
                    582: 
                    583:        movl    C(bbextents),%ebp
                    584:        movl    C(bbextentt),%edx
                    585: 
                    586:        cmpl    $4096,%eax
                    587:        jl              LClampLow4
                    588:        cmpl    %ebp,%eax
                    589:        ja              LClampHigh4
                    590: LClampReentry4:
                    591:        movl    %eax,snext
                    592: 
                    593:        cmpl    $4096,%ebx
                    594:        jl              LClampLow5
                    595:        cmpl    %edx,%ebx
                    596:        ja              LClampHigh5
                    597: LClampReentry5:
                    598: 
                    599:        cmpl    $1,%ecx                 // don't bother 
                    600:        je              LOnlyOneStep    // if two pixels in segment, there's only one step,
                    601:                                                        //  of the segment length
                    602:        subl    s,%eax
                    603:        subl    t,%ebx
                    604: 
                    605:        addl    %eax,%eax               // convert to 15.17 format so multiply by 1.31
                    606:        addl    %ebx,%ebx               //  reciprocal yields 16.48
                    607: 
                    608:        imull   reciprocal_table_16-8(,%ecx,4)  // sstep = (snext - s) /
                    609:                                                                                        //  (spancount-1)
                    610:        movl    %edx,%ebp
                    611: 
                    612:        movl    %ebx,%eax
                    613:        imull   reciprocal_table_16-8(,%ecx,4)  // tstep = (tnext - t) /
                    614:                                                                                        //  (spancount-1)
                    615: LSetEntryvec:
                    616: //
                    617: // set up advancetable
                    618: //
                    619:        movl    entryvec_table_16(,%ecx,4),%ebx
                    620:        movl    %edx,%eax
                    621:        movl    %ebx,jumptemp           // entry point into code for RET later
                    622:        movl    %ebp,%ecx
                    623:        sarl    $16,%edx                        // tstep >>= 16;
                    624:        movl    C(cachewidth),%ebx
                    625:        sarl    $16,%ecx                        // sstep >>= 16;
1.1.1.2 ! root      626:        imul    %ebx,%edx
1.1       root      627: 
                    628:        addl    %ecx,%edx                       // add in sstep
                    629:                                                                // (tstep >> 16) * cachewidth + (sstep >> 16);
                    630:        movl    tfracf,%ecx
                    631:        movl    %edx,advancetable+4     // advance base in t
                    632:        addl    %ebx,%edx                       // ((tstep >> 16) + 1) * cachewidth +
                    633:                                                                //  (sstep >> 16);
                    634:        shll    $16,%ebp                        // left-justify sstep fractional part
                    635:        movl    sfracf,%ebx
                    636:        shll    $16,%eax                        // left-justify tstep fractional part
                    637:        movl    %edx,advancetable       // advance extra in t
                    638: 
                    639:        movl    %eax,tstep
                    640:        movl    %ecx,%edx
                    641:        addl    %eax,%edx
                    642:        sbbl    %ecx,%ecx
                    643:        addl    %ebp,%ebx
                    644:        adcl    advancetable+4(,%ecx,4),%esi
                    645: 
                    646:        jmp             *jumptemp                       // jump to the number-of-pixels handler
                    647: 
                    648: //----------------------------------------
                    649: 
                    650: LNoSteps:
                    651:        movb    (%esi),%al              // load first texel in segment
                    652:        subl    $15,%edi                        // adjust for hardwired offset
                    653:        jmp             LEndSpan
                    654: 
                    655: 
                    656: LOnlyOneStep:
                    657:        subl    s,%eax
                    658:        subl    t,%ebx
                    659:        movl    %eax,%ebp
                    660:        movl    %ebx,%edx
                    661:        jmp             LSetEntryvec
                    662: 
                    663: //----------------------------------------
                    664: 
                    665: .globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
                    666: .globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
                    667: .globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
                    668: .globl Entry14_16, Entry15_16, Entry16_16
                    669: 
                    670: Entry2_16:
                    671:        subl    $14,%edi                // adjust for hardwired offsets
                    672:        movb    (%esi),%al
                    673:        jmp             LEntry2_16
                    674: 
                    675: //----------------------------------------
                    676: 
                    677: Entry3_16:
                    678:        subl    $13,%edi                // adjust for hardwired offsets
                    679:        addl    %eax,%edx
                    680:        movb    (%esi),%al
                    681:        sbbl    %ecx,%ecx
                    682:        addl    %ebp,%ebx
                    683:        adcl    advancetable+4(,%ecx,4),%esi
                    684:        jmp             LEntry3_16
                    685: 
                    686: //----------------------------------------
                    687: 
                    688: Entry4_16:
                    689:        subl    $12,%edi                // adjust for hardwired offsets
                    690:        addl    %eax,%edx
                    691:        movb    (%esi),%al
                    692:        sbbl    %ecx,%ecx
                    693:        addl    %ebp,%ebx
                    694:        adcl    advancetable+4(,%ecx,4),%esi
                    695:        addl    tstep,%edx
                    696:        jmp             LEntry4_16
                    697: 
                    698: //----------------------------------------
                    699: 
                    700: Entry5_16:
                    701:        subl    $11,%edi                // adjust for hardwired offsets
                    702:        addl    %eax,%edx
                    703:        movb    (%esi),%al
                    704:        sbbl    %ecx,%ecx
                    705:        addl    %ebp,%ebx
                    706:        adcl    advancetable+4(,%ecx,4),%esi
                    707:        addl    tstep,%edx
                    708:        jmp             LEntry5_16
                    709: 
                    710: //----------------------------------------
                    711: 
                    712: Entry6_16:
                    713:        subl    $10,%edi                // adjust for hardwired offsets
                    714:        addl    %eax,%edx
                    715:        movb    (%esi),%al
                    716:        sbbl    %ecx,%ecx
                    717:        addl    %ebp,%ebx
                    718:        adcl    advancetable+4(,%ecx,4),%esi
                    719:        addl    tstep,%edx
                    720:        jmp             LEntry6_16
                    721: 
                    722: //----------------------------------------
                    723: 
                    724: Entry7_16:
                    725:        subl    $9,%edi         // adjust for hardwired offsets
                    726:        addl    %eax,%edx
                    727:        movb    (%esi),%al
                    728:        sbbl    %ecx,%ecx
                    729:        addl    %ebp,%ebx
                    730:        adcl    advancetable+4(,%ecx,4),%esi
                    731:        addl    tstep,%edx
                    732:        jmp             LEntry7_16
                    733: 
                    734: //----------------------------------------
                    735: 
                    736: Entry8_16:
                    737:        subl    $8,%edi         // adjust for hardwired offsets
                    738:        addl    %eax,%edx
                    739:        movb    (%esi),%al
                    740:        sbbl    %ecx,%ecx
                    741:        addl    %ebp,%ebx
                    742:        adcl    advancetable+4(,%ecx,4),%esi
                    743:        addl    tstep,%edx
                    744:        jmp             LEntry8_16
                    745: 
                    746: //----------------------------------------
                    747: 
                    748: Entry9_16:
                    749:        subl    $7,%edi         // adjust for hardwired offsets
                    750:        addl    %eax,%edx
                    751:        movb    (%esi),%al
                    752:        sbbl    %ecx,%ecx
                    753:        addl    %ebp,%ebx
                    754:        adcl    advancetable+4(,%ecx,4),%esi
                    755:        addl    tstep,%edx
                    756:        jmp             LEntry9_16
                    757: 
                    758: //----------------------------------------
                    759: 
                    760: Entry10_16:
                    761:        subl    $6,%edi         // adjust for hardwired offsets
                    762:        addl    %eax,%edx
                    763:        movb    (%esi),%al
                    764:        sbbl    %ecx,%ecx
                    765:        addl    %ebp,%ebx
                    766:        adcl    advancetable+4(,%ecx,4),%esi
                    767:        addl    tstep,%edx
                    768:        jmp             LEntry10_16
                    769: 
                    770: //----------------------------------------
                    771: 
                    772: Entry11_16:
                    773:        subl    $5,%edi         // adjust for hardwired offsets
                    774:        addl    %eax,%edx
                    775:        movb    (%esi),%al
                    776:        sbbl    %ecx,%ecx
                    777:        addl    %ebp,%ebx
                    778:        adcl    advancetable+4(,%ecx,4),%esi
                    779:        addl    tstep,%edx
                    780:        jmp             LEntry11_16
                    781: 
                    782: //----------------------------------------
                    783: 
                    784: Entry12_16:
                    785:        subl    $4,%edi         // adjust for hardwired offsets
                    786:        addl    %eax,%edx
                    787:        movb    (%esi),%al
                    788:        sbbl    %ecx,%ecx
                    789:        addl    %ebp,%ebx
                    790:        adcl    advancetable+4(,%ecx,4),%esi
                    791:        addl    tstep,%edx
                    792:        jmp             LEntry12_16
                    793: 
                    794: //----------------------------------------
                    795: 
                    796: Entry13_16:
                    797:        subl    $3,%edi         // adjust for hardwired offsets
                    798:        addl    %eax,%edx
                    799:        movb    (%esi),%al
                    800:        sbbl    %ecx,%ecx
                    801:        addl    %ebp,%ebx
                    802:        adcl    advancetable+4(,%ecx,4),%esi
                    803:        addl    tstep,%edx
                    804:        jmp             LEntry13_16
                    805: 
                    806: //----------------------------------------
                    807: 
                    808: Entry14_16:
                    809:        subl    $2,%edi         // adjust for hardwired offsets
                    810:        addl    %eax,%edx
                    811:        movb    (%esi),%al
                    812:        sbbl    %ecx,%ecx
                    813:        addl    %ebp,%ebx
                    814:        adcl    advancetable+4(,%ecx,4),%esi
                    815:        addl    tstep,%edx
                    816:        jmp             LEntry14_16
                    817: 
                    818: //----------------------------------------
                    819: 
                    820: Entry15_16:
                    821:        decl    %edi            // adjust for hardwired offsets
                    822:        addl    %eax,%edx
                    823:        movb    (%esi),%al
                    824:        sbbl    %ecx,%ecx
                    825:        addl    %ebp,%ebx
                    826:        adcl    advancetable+4(,%ecx,4),%esi
                    827:        addl    tstep,%edx
                    828:        jmp             LEntry15_16
                    829: 
                    830: //----------------------------------------
                    831: 
                    832: Entry16_16:
                    833:        addl    %eax,%edx
                    834:        movb    (%esi),%al
                    835:        sbbl    %ecx,%ecx
                    836:        addl    %ebp,%ebx
                    837:        adcl    advancetable+4(,%ecx,4),%esi
                    838: 
                    839:        addl    tstep,%edx
                    840:        sbbl    %ecx,%ecx
                    841:        movb    %al,1(%edi)
                    842:        addl    %ebp,%ebx
                    843:        movb    (%esi),%al
                    844:        adcl    advancetable+4(,%ecx,4),%esi
                    845:        addl    tstep,%edx
                    846: LEntry15_16:
                    847:        sbbl    %ecx,%ecx
                    848:        movb    %al,2(%edi)
                    849:        addl    %ebp,%ebx
                    850:        movb    (%esi),%al
                    851:        adcl    advancetable+4(,%ecx,4),%esi
                    852:        addl    tstep,%edx
                    853: LEntry14_16:
                    854:        sbbl    %ecx,%ecx
                    855:        movb    %al,3(%edi)
                    856:        addl    %ebp,%ebx
                    857:        movb    (%esi),%al
                    858:        adcl    advancetable+4(,%ecx,4),%esi
                    859:        addl    tstep,%edx
                    860: LEntry13_16:
                    861:        sbbl    %ecx,%ecx
                    862:        movb    %al,4(%edi)
                    863:        addl    %ebp,%ebx
                    864:        movb    (%esi),%al
                    865:        adcl    advancetable+4(,%ecx,4),%esi
                    866:        addl    tstep,%edx
                    867: LEntry12_16:
                    868:        sbbl    %ecx,%ecx
                    869:        movb    %al,5(%edi)
                    870:        addl    %ebp,%ebx
                    871:        movb    (%esi),%al
                    872:        adcl    advancetable+4(,%ecx,4),%esi
                    873:        addl    tstep,%edx
                    874: LEntry11_16:
                    875:        sbbl    %ecx,%ecx
                    876:        movb    %al,6(%edi)
                    877:        addl    %ebp,%ebx
                    878:        movb    (%esi),%al
                    879:        adcl    advancetable+4(,%ecx,4),%esi
                    880:        addl    tstep,%edx
                    881: LEntry10_16:
                    882:        sbbl    %ecx,%ecx
                    883:        movb    %al,7(%edi)
                    884:        addl    %ebp,%ebx
                    885:        movb    (%esi),%al
                    886:        adcl    advancetable+4(,%ecx,4),%esi
                    887:        addl    tstep,%edx
                    888: LEntry9_16:
                    889:        sbbl    %ecx,%ecx
                    890:        movb    %al,8(%edi)
                    891:        addl    %ebp,%ebx
                    892:        movb    (%esi),%al
                    893:        adcl    advancetable+4(,%ecx,4),%esi
                    894:        addl    tstep,%edx
                    895: LEntry8_16:
                    896:        sbbl    %ecx,%ecx
                    897:        movb    %al,9(%edi)
                    898:        addl    %ebp,%ebx
                    899:        movb    (%esi),%al
                    900:        adcl    advancetable+4(,%ecx,4),%esi
                    901:        addl    tstep,%edx
                    902: LEntry7_16:
                    903:        sbbl    %ecx,%ecx
                    904:        movb    %al,10(%edi)
                    905:        addl    %ebp,%ebx
                    906:        movb    (%esi),%al
                    907:        adcl    advancetable+4(,%ecx,4),%esi
                    908:        addl    tstep,%edx
                    909: LEntry6_16:
                    910:        sbbl    %ecx,%ecx
                    911:        movb    %al,11(%edi)
                    912:        addl    %ebp,%ebx
                    913:        movb    (%esi),%al
                    914:        adcl    advancetable+4(,%ecx,4),%esi
                    915:        addl    tstep,%edx
                    916: LEntry5_16:
                    917:        sbbl    %ecx,%ecx
                    918:        movb    %al,12(%edi)
                    919:        addl    %ebp,%ebx
                    920:        movb    (%esi),%al
                    921:        adcl    advancetable+4(,%ecx,4),%esi
                    922:        addl    tstep,%edx
                    923: LEntry4_16:
                    924:        sbbl    %ecx,%ecx
                    925:        movb    %al,13(%edi)
                    926:        addl    %ebp,%ebx
                    927:        movb    (%esi),%al
                    928:        adcl    advancetable+4(,%ecx,4),%esi
                    929: LEntry3_16:
                    930:        movb    %al,14(%edi)
                    931:        movb    (%esi),%al
                    932: LEntry2_16:
                    933: 
                    934: LEndSpan:
                    935: 
                    936: //
                    937: // clear s/z, t/z, 1/z from FP stack
                    938: //
                    939:        fstp %st(0)
                    940:        fstp %st(0)
                    941:        fstp %st(0)
                    942: 
                    943:        movl    pspantemp,%ebx                          // restore spans pointer
                    944:        movl    espan_t_pnext(%ebx),%ebx        // point to next span
                    945:        testl   %ebx,%ebx                       // any more spans?
                    946:        movb    %al,15(%edi)
                    947:        jnz             LSpanLoop                       // more spans
                    948: 
                    949:        popl    %ebx                            // restore register variables
                    950:        popl    %esi
                    951:        popl    %edi
                    952:        popl    %ebp                            // restore the caller's stack frame
                    953:        ret
                    954: 
                    955: #endif // id386
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.