Annotation of ntddk/src/video/displays/vga256/i386/vgablts.asm, revision 1.1.1.1

1.1       root        1: ;---------------------------Module-Header------------------------------;
                      2: ; Module Name: vgablts.asm
                      3: ;
                      4: ; Copyright (c) 1992-1993 Microsoft Corporation
                      5: ;-----------------------------------------------------------------------;
                      6: ;-----------------------------------------------------------------------;
                      7: ; VOID vTrgBlt(PDEV * ppdev, ULONG culRcl, RECTL * prcl, MIX ulMix,
                      8: ;              ULONG ulClr, POINTL * pptlBrush)
                      9: ; Input:
                     10: ;  ppdev     - pointer to PDEV for surface to which to draw
                     11: ;  culRcl    - # of rectangles to fill
                     12: ;  prcl      - pointer to list of rectangles to fill
                     13: ;  ulMix     - mix rop with which to fill
                     14: ;  ulClr     - color with which to fill
                     15: ;  pptlBrush - not used
                     16: ;
                     17: ; Performs accelerated solid area fills for all mixes.
                     18: ;
                     19: ;-----------------------------------------------------------------------;
                     20: ;
                     21: ; Note: Assumes all rectangles have positive heights and widths. Will not
                     22: ; work properly if this is not the case.
                     23: ;
                     24: ;-----------------------------------------------------------------------;
                     25: ;
                     26: ; Note: Cases where the width of the whole bytes fill is equal to the
                     27: ; width of the bitmap could be sped up by using a single REP MOVS or REP
                     28: ; STOS, but how often does WIN32 do a fill that's the width of the screen?
                     29: ; Not very.
                     30: ;
                     31: ;-----------------------------------------------------------------------;
                     32: 
                     33:         comment $
                     34: 
                     35: The overall approach of this module is to accept a list of rectangles to
                     36: fill, set up the VGA hardware for the desired fill, and then fill the
                     37: rectangles one at a time. Each rectangle fill is set up for everything
                     38: but vertical parameters, and then decomposed into the sections that
                     39: intersect each VGA bank; each section is drawn in turn. The drawing code
                     40: is heavily unrolled for performance, and vectors are set up so that the
                     41: drawing code appropriate for the desired fill is essentially threaded
                     42: together.
                     43: 
                     44:         commend $
                     45: 
                     46: ;-----------------------------------------------------------------------;
                     47: 
                     48: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
                     49: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
                     50: ; times unrolling. This is the only thing you need to change to control
                     51: ; unrolling.
                     52: 
                     53: LOOP_UNROLL_SHIFT equ 2
                     54: 
                     55: ;-----------------------------------------------------------------------;
                     56: 
                     57:                 .386
                     58: 
                     59: ifndef  DOS_PLATFORM
                     60:         .model  small,c
                     61: else
                     62: ifdef   STD_CALL
                     63:         .model  small,c
                     64: else
                     65:         .model  small,pascal
                     66: endif;  STD_CALL
                     67: endif;  DOS_PLATFORM
                     68: 
                     69:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                     70:         assume fs:nothing,gs:nothing
                     71: 
                     72:         .xlist
                     73:         include stdcall.inc             ;calling convention cmacros
                     74:         include i386\strucs.inc
                     75:         include i386\driver.inc
                     76:         include i386\egavga.inc
                     77:         include i386\unroll.inc
                     78:         include i386\ropdefs.inc
                     79: 
                     80:         .list
                     81: 
                     82: ;-----------------------------------------------------------------------;
                     83: 
                     84:         .data
                     85: 
                     86: ;-----------------------------------------------------------------------;
                     87: ; Left edge clip masks for intrabyte start addresses 0 through 3.
                     88: ; Whole byte cases are flagged as 0ffh.
                     89:         public jLeftMask
                     90: jLeftMask       label   byte
                     91:         db      0ffh,0eh,0ch,08h
                     92: 
                     93: ;-----------------------------------------------------------------------;
                     94: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
                     95: ; 0 through 3. Whole byte cases are flagged as 0ffh.
                     96:         public jRightMask
                     97: jRightMask      label   byte
                     98:         db      0ffh,01h,03h,07h
                     99: 
                    100: ;-----------------------------------------------------------------------;
                    101: ; Tables used to set up for the desired raster op. Note that entries for raster
                    102: ; ops that aren't handled here are generally correct, except that they ignore
                    103: ; need for inversion of the destination, which those rops require.
                    104: 
                    105: ; Table used to force off the drawing color for R2_BLACK (0).
                    106: ; The first entry is ignored; there is no mix 0.
                    107:         public jForceOffTable
                    108: jForceOffTable  db         0
                    109:                 db         000h,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
                    110:                 db         0ffh,0ffh,000h,0ffh,0ffh,0ffh,0ffh,0ffh
                    111: 
                    112: ;-----------------------------------------------------------------------;
                    113: ; Table used to force on the drawing color for R2_NOT (Dn) and R2_WHITE (1).
                    114: ; The first entry is ignored; there is no mix 0.
                    115:         public  jForceOnTable
                    116: jForceOnTable   db      0, 0,0,0,0,0,0ffh,0,0,0,0,0,0,0,0,0,0ffh
                    117: 
                    118: ;-----------------------------------------------------------------------;
                    119: ; Table used to invert the passed-in drawing color for Pn mixes.
                    120: ; The first entry is ignored; there is no mix 0.
                    121:         public  jNotTable
                    122: jNotTable       db      0, 0,0ffh,0ffh,0ffh,0,0,0,0ffh,0,0ffh,0,0ffh,0,0,0,0
                    123: 
                    124: ;-----------------------------------------------------------------------;
                    125: ; Table of VGA ALU logical functions corresponding to mixes. Note that Dn is
                    126: ; handled as a separate preceding inversion pass when part of a more complex
                    127: ; mix.
                    128: ; The first entry is ignored; there is no mix 0.
                    129:         public jALUFuncTable
                    130: jALUFuncTable   db      0
                    131:                 db      DR_SET,DR_AND,DR_AND,DR_SET
                    132:                 db      DR_AND,DR_XOR,DR_XOR,DR_OR
                    133:                 db      DR_AND,DR_XOR,     0,DR_OR
                    134:                 db      DR_SET,DR_OR ,DR_OR ,DR_SET
                    135: 
                    136: ;-----------------------------------------------------------------------;
                    137: ; 1 entries mark rops that require two passes, one to invert the destination
                    138: ; and then another to finish the rop.
                    139: ; The first entry is ignored; there is no mix 0.
                    140:         public  jInvertDest
                    141: jInvertDest     db      0, 0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0
                    142: 
                    143: ;-----------------------------------------------------------------------;
                    144: ; Table of routines to be called to draw edges, according to which edges are
                    145: ; partial and which edges are whole bytes.
                    146:         align   4
                    147: pfnEdgeDrawing  label   dword
                    148:         dd      do_right_edge_bytes
                    149:         dd      do_both_edge_bytes
                    150:         dd      check_next_bank
                    151:         dd      do_left_edge_bytes
                    152: 
                    153: ;-----------------------------------------------------------------------;
                    154: ; Table of pointers to tables used to find entries points in unrolled wide
                    155: ; whole byte code.
                    156: 
                    157:         align   4
                    158: pfnWideWholeRep label   dword
                    159:         dd      pfnDrawWideW00Entry
                    160:         dd      pfnDrawWideW01Entry
                    161:         dd      pfnDrawWideW02Entry
                    162:         dd      pfnDrawWideW03Entry
                    163:         dd      pfnDrawWideW10Entry
                    164:         dd      pfnDrawWideW11Entry
                    165:         dd      pfnDrawWideW12Entry
                    166:         dd      pfnDrawWideW13Entry
                    167:         dd      pfnDrawWideW20Entry
                    168:         dd      pfnDrawWideW21Entry
                    169:         dd      pfnDrawWideW22Entry
                    170:         dd      pfnDrawWideW23Entry
                    171:         dd      pfnDrawWideW30Entry
                    172:         dd      pfnDrawWideW31Entry
                    173:         dd      pfnDrawWideW32Entry
                    174:         dd      pfnDrawWideW33Entry
                    175: 
                    176: ;-----------------------------------------------------------------------;
                    177: ; Table of pointers to tables used to find entry points in narrow,
                    178: ; special-cased unrolled non-replace whole byte code.
                    179: 
                    180: ; Note: The breakpoint where one should switch from special-casing to
                    181: ;  REP MOVSB is purely a guess on my part. 5 seemed reasonable.
                    182: 
                    183:         align   4
                    184: pfnWholeBytesNonReplaceEntries  label   dword
                    185:         dd      0                       ;we never get a 0-wide case
                    186:         dd      pfnDraw1WideRWEntry
                    187:         dd      pfnDraw2WideRWEntry
                    188:         dd      pfnDraw3WideRWEntry
                    189:         dd      pfnDraw4WideRWEntry
                    190: MAX_NON_REPLACE_SPECIAL equ     ($-pfnWholeBytesNonReplaceEntries)/4
                    191: 
                    192: ;-----------------------------------------------------------------------;
                    193: ; Table of pointers to tables used to find entry points in narrow, special-
                    194: ; cased unrolled replace whole byte code.
                    195: 
                    196: ; Note: The breakpoint where one should switch from special-casing to
                    197: ;  REP STOS is purely a guess on my part. 8 seemed reasonable.
                    198: 
                    199: ; Start address MOD 3 is 0.
                    200:         align   4
                    201: pfnWholeBytesMod0ReplaceEntries  label   dword
                    202:         dd      0                       ;we never get a 0-wide case
                    203:         dd      pfnDraw1WideWEntry
                    204:         dd      pfnDraw2WideWEntry
                    205:         dd      pfnDraw3WideWEvenEntry
                    206:         dd      pfnDraw4WideWEntry
                    207:         dd      pfnDraw5WideWEvenEntry
                    208:         dd      pfnDraw6WideWMod3_0Entry
                    209:         dd      pfnDraw7WideWMod3_0Entry
                    210:         dd      pfnDraw8WideWMod3_0Entry
                    211: MAX_REPLACE_SPECIAL equ     ($-pfnWholeBytesMod0ReplaceEntries)/4
                    212: 
                    213: ; Start address MOD 3 is 1.
                    214:         align   4
                    215: pfnWholeBytesMod1ReplaceEntries  label   dword
                    216:         dd      0                       ;we never get a 0-wide case
                    217:         dd      pfnDraw1WideWEntry
                    218:         dd      pfnDraw2WideWEntry
                    219:         dd      pfnDraw3WideWOddEntry
                    220:         dd      pfnDraw4WideWEntry
                    221:         dd      pfnDraw5WideWOddEntry
                    222:         dd      pfnDraw6WideWMod3_1Entry
                    223:         dd      pfnDraw7WideWMod3_1Entry
                    224:         dd      pfnDraw8WideWMod3_1Entry
                    225: 
                    226: ; Start address MOD 3 is 2.
                    227:         align   4
                    228: pfnWholeBytesMod2ReplaceEntries  label   dword
                    229:         dd      0                       ;we never get a 0-wide case
                    230:         dd      pfnDraw1WideWEntry
                    231:         dd      pfnDraw2WideWEntry
                    232:         dd      pfnDraw3WideWEvenEntry
                    233:         dd      pfnDraw4WideWEntry
                    234:         dd      pfnDraw5WideWEvenEntry
                    235:         dd      pfnDraw6WideWMod3_2Entry
                    236:         dd      pfnDraw7WideWMod3_2Entry
                    237:         dd      pfnDraw8WideWMod3_2Entry
                    238: 
                    239: ; Start address MOD 3 is 3.
                    240:         align   4
                    241: pfnWholeBytesMod3ReplaceEntries  label   dword
                    242:         dd      0                       ;we never get a 0-wide case
                    243:         dd      pfnDraw1WideWEntry
                    244:         dd      pfnDraw2WideWEntry
                    245:         dd      pfnDraw3WideWOddEntry
                    246:         dd      pfnDraw4WideWEntry
                    247:         dd      pfnDraw5WideWOddEntry
                    248:         dd      pfnDraw6WideWMod3_1Entry
                    249:         dd      pfnDraw7WideWMod3_3Entry
                    250:         dd      pfnDraw8WideWMod3_3Entry
                    251: 
                    252: ; Master MOD 3 alignment look-up table for entry tables for four possible
                    253: ; alignments for narrow, special-cased unrolled replace whole byte code.
                    254:         align   4
                    255: pfnWholeBytesReplaceMaster      label   dword
                    256:         dd      pfnWholeBytesMod0ReplaceEntries
                    257:         dd      pfnWholeBytesMod1ReplaceEntries
                    258:         dd      pfnWholeBytesMod2ReplaceEntries
                    259:         dd      pfnWholeBytesMod3ReplaceEntries
                    260: 
                    261: ;-----------------------------------------------------------------------;
                    262: 
                    263:                 .code
                    264: 
                    265: ;-----------------------------------------------------------------------;
                    266: 
                    267: cProc   vTrgBlt,24,<         \
                    268:         uses    esi edi ebx, \
                    269:         ppdev:    ptr,       \
                    270:         culRcl:   dword,     \
                    271:         prcl:     ptr RECTL, \
                    272:         ulMix:    dword,     \
                    273:         ulColor:  dword,     \
                    274:         pptlBrsuh:ptr POINTL >
                    275: 
                    276:         local   ulRowOffset :dword      ;Offset from start of scan line of
                    277:                                         ; first byte to fill
                    278:         local   ulWholeBytes :dword     ;# of whole bytes to fill
                    279:         local   ulWholeDwords :dword    ;# of whole dwords to fill
                    280:         local   pfnWholeFn  :dword      ;pointer to routine used to draw
                    281:                                         ; whole bytes
                    282:         local   ulScanWidth :dword      ;offset from start of one scan to start
                    283:                                         ; of next
                    284:         local   ulNextScan  :dword      ;offset from end of one scan line's
                    285:                                         ; fill to start of next
                    286:         local   ulCurrentTopScan :dword ;top scan line to fill in current bank
                    287:         local   ulMasks     :dword      ;low byte = right mask, high byte =
                    288:                                         ; left mask
                    289:         local   ulBottomScan :dword     ;bottom scan line of fill rectangle
                    290:         local   pfnDraw1WideVector :dword ;address at which to enter unrolled
                    291:                                           ; edge loop
                    292:         local   jALUFunc   :dword       ;VGA ALU logical operation (SET, AND,
                    293:                                         ; OR, or XOR)
                    294:         local   pfnStartDrawing :dword  ;pointer to function to call to start
                    295:                                         ; drawing
                    296:         local   pfnContinueDrawing :dword ;pointer to function to call to
                    297:                                         ; continue drawing after doing whole
                    298:                                         ; bytes
                    299:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
                    300:                                         ; address past the left edge when the
                    301:                                         ; left edge is partial
                    302:         local   pfnWholeBytes :dword    ;pointer to table of entry points
                    303:                                         ; into unrolled loops for whole byte
                    304:                                         ; filling
                    305:         local   jInvertDestFirst :dword ;1 if the rop requires a pass to invert
                    306:                                         ; the destination before the normal
                    307:                                         ; pass
                    308:         local   ulDrawingColor :dword   ;color byte with which to fill,
                    309:                                         ; replicated to a dword
                    310:         local   ppfnDrawEdgeTable :dword ;points to table to be used to look up
                    311:                                          ; unrolled entry points for edge
                    312:                                          ; bytes (pfnDraw1WideRWEntry or
                    313:                                          ; pfnDraw1WideWEntry)
                    314: 
                    315: ;-----------------------------------------------------------------------;
                    316: ; CLD is assumed on entry.
                    317: ;-----------------------------------------------------------------------;
                    318: 
                    319: ;-----------------------------------------------------------------------;
                    320: ; Make sure there's something to draw; clip enumerations can be empty.
                    321: ;-----------------------------------------------------------------------;
                    322: 
                    323:         cmp     culRcl,0                ;any rects to fill?
                    324:         jz      vTrgBlts_done           ;no, we're done
                    325: 
                    326: 
                    327: ;-----------------------------------------------------------------------;
                    328: ; Set up variables that are constant for the entire time we're in this
                    329: ; module.
                    330: ;-----------------------------------------------------------------------;
                    331: 
                    332: ;-----------------------------------------------------------------------;
                    333: ; Set up for the desired raster op.
                    334: ;-----------------------------------------------------------------------;
                    335: 
                    336:         sub     ebx,ebx                 ;ignore any background mix; we're only
                    337:         mov     bl,byte ptr ulMix       ; concerned with the foreground in this
                    338:                                         ; module
                    339:         cmp     ebx,R2_NOP              ;is this NOP?
                    340:         jz      vTrgBlts_done           ;yes, we're done
                    341:         mov     al,jInvertDest[ebx]          ;remember whether we need to
                    342:         mov     byte ptr jInvertDestFirst,al ; invert the destination before
                    343:                                              ; finishing the rop
                    344:         mov     ah,byte ptr ulColor     ;get the drawing color
                    345:         and     ah,jForceOffTable[ebx]  ;force color to 0 if necessary
                    346:                                         ; (R2_BLACK)
                    347:         or      ah,jForceOnTable[ebx]   ;force color to 0ffh if necessary
                    348:                                         ; (R2_WHITE, R2_NOT)
                    349:         xor     ah,jNotTable[ebx]       ;invert color if necessary (any Pn mix)
                    350:                                         ;at this point, CH has the color we
                    351:                                         ; want to draw with; set up the VGA
                    352:                                         ; hardware to draw with that color
                    353:         mov     al,ah                   ;replicate the drawing color to a dword
                    354:         mov     edx,eax
                    355:         shl     eax,16
                    356:         mov     ax,dx
                    357:         mov     ulDrawingColor,eax      ;remember drawing color
                    358: 
                    359:         mov     ppfnDrawEdgeTable,offset pfnDraw1WideWEntry
                    360:                                         ;assume replace-type rop, so we can
                    361:                                         ; draw edge bytes with the write-
                    362:                                         ; without-read code pointed to by this
                    363:                                         ; table
                    364:         mov     ah,jALUFuncTable[ebx]   ;get the ALU logical function
                    365:         and     ah,ah                   ;is the logical function DR_SET?
                    366:         .errnz  DR_SET
                    367:         jz      short skip_ALU_set      ;yes, don't have to set because that's
                    368:                                         ; the VGA's default state
                    369:         mov     edx,VGA_BASE + GRAF_ADDR
                    370:         mov     al,GRAF_DATA_ROT
                    371:         out     dx,ax                   ;set the ALU logical function
                    372:         mov     ppfnDrawEdgeTable,offset pfnDraw1WideRWEntry
                    373:                                         ;draw edge bytes with the code pointed
                    374:                                         ; to by this table (read/write)
                    375: skip_ALU_set:
                    376:         mov     byte ptr jALUFunc,ah    ;remember the ALU logical function
                    377: 
                    378: ;-----------------------------------------------------------------------;
                    379: ; Fill the current rectangle with the specified raster op and color.
                    380: ;-----------------------------------------------------------------------;
                    381: 
                    382: fill_rect_loop:
                    383: 
                    384: ;-----------------------------------------------------------------------;
                    385: ; Set up variables that are constant from bank to bank during a single
                    386: ; fill.
                    387: ;-----------------------------------------------------------------------;
                    388: 
                    389: ;-----------------------------------------------------------------------;
                    390: ; Set up masks and widths.
                    391: ;-----------------------------------------------------------------------;
                    392: 
                    393:         mov     edi,prcl                ;point to rectangle to fill
                    394:         mov     eax,[edi].yBottom
                    395:         mov     ulBottomScan,eax        ;remember the bottom scan line of fill
                    396: 
                    397:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
                    398:         mov     ecx,ebx
                    399:         and     ecx,011b                ;intrabyte address of right edge
                    400:         mov     ah,jRightMask[ecx]      ;right edge mask
                    401: 
                    402:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
                    403:         mov     ecx,esi
                    404:         shr     ecx,2                   ;/4 for start offset from left edge
                    405:                                         ; of scan line
                    406:         mov     ulRowOffset,ecx         ;remember offset from start of scan
                    407:                                         ; line
                    408:         sub     ebx,esi                 ;width in pixels of fill
                    409: 
                    410:         and     esi,011b                ;intrabyte address of left edge
                    411:         mov     al,jLeftMask[esi]       ;left edge mask
                    412: 
                    413:         dec     ebx                     ;make inclusive on right
                    414:         add     ebx,esi                 ;inclusive width, starting counting at
                    415:                                         ; the beginning of the left edge byte
                    416:         shr     ebx,2                   ;width of fill in bytes touched - 1
                    417:         jnz     short more_than_1_byte  ;more than 1 byte is involved
                    418: 
                    419: ; Only one byte will be affected. Combine first/last masks.
                    420: 
                    421:         and     al,ah                   ;we'll use first byte mask only
                    422:         xor     ah,ah                   ;want last byte mask to be 0
                    423:         inc     ebx                     ;so there's one count to subtract below
                    424:                                         ; if this isn't a whole edge byte
                    425: more_than_1_byte:
                    426: 
                    427: ; If all pixels in the left edge are altered, combine the first byte into the
                    428: ; whole byte count and clear the first byte mask, because we can handle solid
                    429: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
                    430: 
                    431:         sub     ecx,ecx                 ;edge whole-status accumulator
                    432:         cmp     al,-1                   ;is left edge a whole byte or partial?
                    433:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
                    434:         sub     ebx,ecx                 ;if left edge partial, deduct it from
                    435:                                         ; the whole bytes count
                    436:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
                    437:                                         ; it's partial when pointing to the
                    438:                                         ; whole bytes
                    439:         and     ah,ah                   ;is right edge mask 0, meaning this
                    440:                                         ; fill is only 1 byte wide?
                    441:         jz      short save_masks        ;yes, no need to do anything
                    442:         cmp     ah,-1                   ;is right edge a whole byte or partial?
                    443:         jnz     short save_masks        ;partial
                    444:         add     ecx,2                   ;bit 1 of ECX=0 if right edge partial,
                    445:                                         ; 1 if whole;
                    446:                                         ;bit 1=0 if left edge partial, 1 whole
                    447:         inc     ebx                     ;if right edge whole, include it in the
                    448:                                         ; whole bytes count
                    449: save_masks:
                    450:         mov     ulMasks,eax             ;save left and right clip masks
                    451:         mov     ulWholeBytes,ebx        ;save # of whole bytes
                    452: 
                    453:         mov     ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
                    454:         mov     pfnContinueDrawing,ecx    ; all partial (non-whole) edges
                    455: 
                    456:         and     ebx,ebx                 ;any whole bytes?
                    457:         jz      short start_vec_set     ;no
                    458:                                         ;yes, so draw the whole bytes before
                    459:                                         ; the edge bytes
                    460: 
                    461: ; The whole bytes loop depends on the type of operation being done. If the
                    462: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
                    463: ; else we have to use a MOVSB-type operation (to load the latches with the
                    464: ; existing contents of display memory to allow the ALUs to work).
                    465: 
                    466:         cmp     byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
                    467:         jz      short is_replace_type   ;yes
                    468:                                         ;no, set up for non-replace whole bytes
                    469:         mov     ecx,offset whole_bytes_non_replace_wide
                    470:                                         ;assume too wide to special-case
                    471:         cmp     ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
                    472:         jnb     short start_vec_set     ;yes
                    473:         mov     ecx,pfnWholeBytesNonReplaceEntries[ebx*4] ;no, point to entry
                    474:         mov     pfnWholeBytes,ecx       ; table for width
                    475:         mov     ecx,offset whole_bytes_special
                    476:                                         ;set up to call special routine to fill
                    477:                                         ; whole bytes
                    478:         jmp     short start_vec_set
                    479: 
                    480:         align   4
                    481: is_replace_type:                        ;set up for replace-type rop
                    482:         cmp     ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
                    483:         jnb     short is_wide_replace   ;yes
                    484:                                         ;narrow enough to special case. Look up
                    485:                                         ; the entry table for the special case
                    486:                                         ; base on the start alignment
                    487:         mov     ecx,ulRowOffset
                    488:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
                    489:         and     ecx,011b                ;left edge whole bytes start alignment
                    490:                                         ; MOD 3
                    491:         mov     ecx,pfnWholeBytesReplaceMaster[ecx*4] ;look up table of entry
                    492:                                                       ; tables for alignment
                    493:         mov     ecx,[ecx+ebx*4]         ;look up entry table for width
                    494:         mov     pfnWholeBytes,ecx       ; table for width
                    495:         mov     ecx,offset whole_bytes_special
                    496:                                         ;set up to call special routine to fill
                    497:                                         ; whole bytes
                    498:         jmp     short start_vec_set
                    499: 
                    500:         align   4
                    501: is_wide_replace:                        ;set up for wide replace-type op
                    502:                                         ;Note: assumes there is at least one
                    503:                                         ; full dword involved!
                    504:         mov     ecx,ulRowOffset
                    505:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
                    506:         neg     ecx
                    507:         and     ecx,011b
                    508:         mov     edx,ebx
                    509:         sub     edx,ecx                 ;ignore odd leading bytes
                    510:         mov     eax,edx
                    511:         shr     edx,2                   ;# of whole dwords across (not counting
                    512:                                         ; odd leading & trailing bytes)
                    513:         mov     ulWholeDwords,edx
                    514:         and     eax,011b                ;# of odd (fractional) trailing bytes
                    515:         shl     ecx,2
                    516:         or      ecx,eax                 ;build a look-up index from the number
                    517:                                         ; of leading and trailing bytes
                    518:         mov     ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
                    519:         mov     pfnWholeBytes,ecx          ; back alignment
                    520:         mov     ecx,offset whole_bytes_rep_wide
                    521:                                         ;set up to call routine to perform wide
                    522:                                         ; whole bytes fill
                    523: start_vec_set:
                    524:         mov     pfnStartDrawing,ecx     ; all partial (non-whole) edges
                    525: 
                    526:         mov     ecx,ppdev
                    527:         mov     eax,[ecx].pdev_lPlanarNextScan
                    528:         mov     ulScanWidth,eax         ;local copy of scan line width
                    529:         sub     eax,ebx                 ;EAX = delta to next scan
                    530:         mov     ulNextScan,eax
                    531: 
                    532: 
                    533: ;-----------------------------------------------------------------------;
                    534: ; Fill this rectangle.
                    535: ;-----------------------------------------------------------------------;
                    536: 
                    537:         cmp     byte ptr jInvertDestFirst,1
                    538:                                         ;is this an invert-dest-plus-something-
                    539:                                         ; else rop that requires two passes?
                    540:         jz      short do_invert_dest_rop ;yes, special case with two passes
                    541: 
                    542: do_single_pass:
                    543:         call    draw_banks
                    544: 
                    545: 
                    546: ;-----------------------------------------------------------------------;
                    547: ; See if there are any more rectangles to fill.
                    548: ;-----------------------------------------------------------------------;
                    549: 
                    550:         add     prcl,(size RECTL) ;point to the next rectangle, if there is one
                    551:         dec     culRcl            ;count down the rectangles to fill
                    552:         jnz     fill_rect_loop
                    553: 
                    554: 
                    555: ;-----------------------------------------------------------------------;
                    556: ; We have filled all rectangles.  Restore the VGA to its default state.
                    557: ;-----------------------------------------------------------------------;
                    558: 
                    559:         cmp     byte ptr jALUfunc,DR_SET ;is the logical function already SET?
                    560:         jnz     short @F                 ;no, need to reset it
                    561:         cRet    vTrgBlt                  ;yes, no need to reset it
                    562: 
                    563:         align   4
                    564: @@:
                    565:         mov     edx,VGA_BASE + GRAF_ADDR
                    566:         mov     eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
                    567:         out     dx,ax                              ; SET
                    568: vTrgBlts_done:
                    569:         cRet    vTrgBlt
                    570: 
                    571: 
                    572: ;-----------------------------------------------------------------------;
                    573: ; Handles rops that require two passes, the first being a destination
                    574: ; inversion pass.
                    575: ;-----------------------------------------------------------------------;
                    576: 
                    577:         align   4
                    578: do_invert_dest_rop:
                    579: 
                    580: ; Set up the VGA's hardware for inversion
                    581: 
                    582:         mov     eax,ulDrawingColor      ;remember the normal drawing color
                    583:         push    eax
                    584:         mov     ulDrawingColor,-1       ;with XOR, this flips all bits
                    585: 
                    586:         mov     edx,VGA_BASE + GRAF_ADDR
                    587:         mov     eax,(DR_XOR shl 8) + GRAF_DATA_ROT
                    588:         out     dx,ax                   ;logical function = XOR to invert
                    589: 
                    590: ; Invert the destination
                    591: 
                    592:         call    draw_banks
                    593: 
                    594: ; Restore the VGA's hardware to the state required for the second pass.
                    595: 
                    596:         mov     edx,VGA_BASE + GRAF_ADDR
                    597:         mov     ah,byte ptr jALUFunc
                    598:         mov     al,GRAF_DATA_ROT
                    599:         out     dx,ax                   ;set the ALU logical function back to
                    600:                                         ; proper state for the rest of the rop
                    601: 
                    602:         pop     eax
                    603:         mov     ulDrawingColor,eax      ;restore the normal drawing color
                    604: 
                    605: ; Perform the second pass to finish the rop.
                    606: 
                    607:         jmp     do_single_pass
                    608: 
                    609: 
                    610: ;-----------------------------------------------------------------------;
                    611: ; Fills all banks in the current fill rectangle. Called once per fill
                    612: ; rectangle, except for destination-inversion-plus-something-else rops.
                    613: ;-----------------------------------------------------------------------;
                    614: 
                    615:         align   4
                    616: draw_banks:
                    617: 
                    618: ;-----------------------------------------------------------------------;
                    619: ; Map in the bank containing the top scan to fill, if it's not mapped in
                    620: ; already.
                    621: ;-----------------------------------------------------------------------;
                    622: 
                    623:         mov     edi,prcl                ;point to rectangle to fill
                    624:         mov     ecx,ppdev               ;point to PDEV
                    625:         mov     eax,[edi].yTop          ;top scan line of fill
                    626:         mov     ulCurrentTopScan,eax    ;this will be the fill top in 1st bank
                    627: 
                    628:         cmp     eax,[ecx].pdev_rcl1PlanarClip.yTop ;is fill top less than
                    629:                                                    ; current bank?
                    630:         jl      short map_init_bank             ;yes, map in proper bank
                    631:         cmp     eax,[ecx].pdev_rcl1PlanarClip.yBottom ;fill top greater than
                    632:                                                       ; current bank?
                    633:         jl      short init_bank_mapped          ;no, proper bank already mapped
                    634: map_init_bank:
                    635: 
                    636: ; Map in the bank containing the top scan line of the fill.
                    637: 
                    638:         ptrCall <dword ptr [ecx].pdev_pfnPlanarControl>,<ecx,eax,JustifyTop>
                    639: 
                    640: init_bank_mapped:
                    641: 
                    642: ;-----------------------------------------------------------------------;
                    643: ; Main loop for processing fill in each bank.
                    644: ;-----------------------------------------------------------------------;
                    645: 
                    646: ; Compute the starting address and scan line count for the initial bank.
                    647: 
                    648:         mov     eax,ppdev               ;point to PDEV
                    649:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
                    650:         cmp     ebx,[eax].pdev_rcl1PlanarClip.yBottom
                    651:                                         ;which comes first, the bottom of the
                    652:                                         ; dest rect or the bottom of the
                    653:                                         ; current bank?
                    654:         jl      short BottomScanSet     ;fill bottom comes first, so draw to
                    655:                                         ; that; this is the last bank in fill
                    656:         mov     ebx,[eax].pdev_rcl1PlanarClip.yBottom
                    657:                                         ;bank bottom comes first; draw to
                    658:                                         ; bottom of bank
                    659: BottomScanSet:
                    660:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
                    661:         sub     ebx,edi                 ;# of scans to fill in bank
                    662:         imul    edi,ulScanWidth         ;offset of starting scan line
                    663: 
                    664: ; Note that the start of the bitmap will change each time through the
                    665: ; bank loop, because the start of the bitmap is varied to map the
                    666: ; desired scan line to the banking window.
                    667: 
                    668:         add     edi,[eax].pdev_pvBitmapStart ;start of scan in bitmap
                    669:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
                    670: 
                    671: ; We have computed the starting address and scan count. Time to start drawing
                    672: ; in the initial bank.
                    673: 
                    674:         jmp     pfnStartDrawing
                    675: 
                    676: 
                    677: ;-----------------------------------------------------------------------;
                    678: ; Whole byte fills.
                    679: ;-----------------------------------------------------------------------;
                    680: 
                    681: ;-----------------------------------------------------------------------;
                    682: ; Handles non-replace whole byte fills wider than the maximum special
                    683: ; case width.
                    684: ;
                    685: ; The destination is not involved, so a STOS (or equivalent) can be used
                    686: ; (no read needed before write).
                    687: ;-----------------------------------------------------------------------;
                    688: 
                    689:         align   4
                    690: whole_bytes_rep_wide:
                    691:         push    ebx                     ;save scan count
                    692:         push    edi                     ;save starting address
                    693: 
                    694:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
                    695:                                         ; loop for whole byte width
                    696:         SET_UP_UNROLL_VARS ebx,ecx, ebx,[eax], LOOP_UNROLL_SHIFT
                    697:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
                    698:         mov     esi,ulWholeDwords       ;whole dwords width
                    699:         mov     edx,ulNextScan          ;offset from end of one scan line to
                    700:                                         ; start of next
                    701:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    702: 
                    703:         call    ecx                     ;draw the wide whole bytes
                    704: 
                    705:         pop     edi                     ;restore screen pointer
                    706:         pop     ebx                     ;restore fill scan count
                    707:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
                    708: 
                    709: 
                    710: ;-----------------------------------------------------------------------;
                    711: ; Handles both replace and non-replace whole byte fills narrow enough to
                    712: ; special case.
                    713: ;-----------------------------------------------------------------------;
                    714: 
                    715:         align   4
                    716: whole_bytes_special:
                    717:         push    ebx                     ;save scan count
                    718:         push    edi                     ;save starting address
                    719: 
                    720:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
                    721:                                         ; loop for whole byte width
                    722:         SET_UP_UNROLL_VARS ebx,edx, ebx,[eax], LOOP_UNROLL_SHIFT
                    723:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
                    724:         mov     ecx,ulScanWidth         ;offset to next scan line
                    725:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    726: 
                    727:         call    edx                     ;draw the whole bytes
                    728: 
                    729:         pop     edi                     ;restore screen pointer
                    730:         pop     ebx                     ;restore fill scan count
                    731:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
                    732: 
                    733: 
                    734: ;-----------------------------------------------------------------------;
                    735: ; Handles non-replace whole byte fills wider than the maximum special case
                    736: ; width.
                    737: ;
                    738: ; The destination is involved, so a MOVSB (or equivalent) must be
                    739: ; performed in order to do a read before write to give the ALUs something
                    740: ; to work with.
                    741: ;-----------------------------------------------------------------------;
                    742: 
                    743:         align   4
                    744: whole_bytes_non_replace_wide:
                    745:         push    ebx                     ;save scan count
                    746:         push    edi                     ;save starting address
                    747: 
                    748:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
                    749:         mov     esi,ulWholeBytes        ;whole bytes width
                    750:         mov     edx,ulNextScan          ;offset from end of one scan line to
                    751:                                         ; start of next
                    752:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    753: 
                    754: ;-----------------------------------------------------------------------;
                    755: ; 5-or-wider read before write loop.
                    756: ;
                    757: ; Entry:
                    758: ;       EAX = # of bytes to fill across scan line (needed only by 5-or-wider
                    759: ;               handler)
                    760: ;       EBX = unrolled loop count
                    761: ;       EDX = offset from end of one scan line to the start of the next next
                    762: ;       EDI = start offset
                    763: ;
                    764: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
                    765: 
                    766: ; 5-or-wider read/write.
                    767: 
                    768: draw_wide_rw_loop:
                    769:         mov     ecx,esi
                    770: @@:
                    771:         mov     ah,[edi]        ;latch the target address. The data read
                    772:                                 ; doesn't matter
                    773:         mov     [edi],al        ;merge the drawing color with the latched
                    774:                                 ; target address according to the selected ALU
                    775:                                 ; function, and write the result to display
                    776:                                 ; memory
                    777:         inc     edi             ;point to the next byte
                    778:         dec     ecx
                    779:         jnz     @B
                    780:         add     edi,edx
                    781:         dec     ebx
                    782:         jnz     draw_wide_rw_loop
                    783: 
                    784:         pop     edi                     ;restore screen pointer
                    785:         pop     ebx                     ;restore fill scan count
                    786:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
                    787: 
                    788: 
                    789: ;-----------------------------------------------------------------------;
                    790: ; Process any left/right columns that that have to be done.
                    791: ;
                    792: ;   Currently:
                    793: ;       EBX =   height to fill, in scans
                    794: ;       EDI --> first byte of left edge
                    795: ;-----------------------------------------------------------------------;
                    796: 
                    797: ;-----------------------------------------------------------------------;
                    798: ; Handle case where both edges are partial (non-whole) bytes. We don't
                    799: ; have to read before write because we're using the Map Mask, not the
                    800: ; Bit Mask.
                    801: ;-----------------------------------------------------------------------;
                    802:         align   4
                    803:         public do_both_edge_bytes
                    804: do_both_edge_bytes:
                    805: 
                    806: ; Set up variables for entering unrolled loop.
                    807: 
                    808:         mov     al,byte ptr ulMasks     ;this will become the clip mask for the
                    809:                                         ; left edge
                    810:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
                    811:         out     dx,al                   ;set Map Mask for left edge
                    812: 
                    813:         mov     ecx,ppfnDrawEdgeTable
                    814:         SET_UP_UNROLL_VARS ebx,edx, ebx,[ecx], LOOP_UNROLL_SHIFT
                    815:         mov     pfnDraw1WideVector,edx
                    816: 
                    817:         mov     ecx,ulScanWidth         ;offset from one scan to next
                    818: 
                    819:         mov     esi,ulWholeBytes        ;ESI = # of whole bytes
                    820:         lea     esi,[esi+edi+1]         ;--> start for right edge
                    821:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    822: 
                    823:         push    ebx                     ;preserve scan line count
                    824:         call    edx                     ;jump into the unrolled loop to draw
                    825:         pop     ebx                     ;restore scan line count
                    826: 
                    827:         mov     edi,esi                 ;point to first right edge byte
                    828:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
                    829:                                         ; right edge
                    830:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
                    831:         out     dx,al                   ;set Map Mask for left edge
                    832: 
                    833:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    834: 
                    835:         push    offset edges_done       ;return here
                    836:         jmp     pfnDraw1WideVector      ;jump into the unrolled loop to draw
                    837: 
                    838: ;-----------------------------------------------------------------------;
                    839: ; Handle case where only the left edge is partial (non-whole).
                    840: ;-----------------------------------------------------------------------;
                    841:         align   4
                    842: do_left_edge_bytes:
                    843: 
                    844: ; Set up variables for entering unrolled loop.
                    845: 
                    846:         mov     ecx,ppfnDrawEdgeTable
                    847:         SET_UP_UNROLL_VARS ebx,esi, ebx,[ecx], LOOP_UNROLL_SHIFT
                    848: 
                    849:         mov     ecx,ulScanWidth         ;offset from one scan to next
                    850:         mov     al,byte ptr ulMasks     ;this will become the Bit Mask for the
                    851:                                         ; left edge
                    852:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
                    853:         out     dx,al                   ;set Map Mask for left edge
                    854: 
                    855:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    856: 
                    857:         push    offset edges_done       ;return here
                    858:         jmp     esi                     ;jump into the unrolled loop to draw
                    859: 
                    860: ;-----------------------------------------------------------------------;
                    861: ; Handle case where only the right edge is partial (non-whole).
                    862: ;-----------------------------------------------------------------------;
                    863:         align   4
                    864: do_right_edge_bytes:
                    865: 
                    866: ; Set up variables for entering unrolled loop.
                    867: 
                    868:         mov     ecx,ppfnDrawEdgeTable
                    869:         SET_UP_UNROLL_VARS ebx,esi, ebx,[ecx], LOOP_UNROLL_SHIFT
                    870: 
                    871:         mov     ecx,ulScanWidth         ;offset from one scan to next
                    872:         add     edi,ulWholeBytes        ;--> start for right edge (remember,
                    873:                                         ; left edge is whole, so the left edge
                    874:                                         ; byte is included in the whole byte
                    875:                                         ; count)
                    876:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
                    877:                                         ; right edge
                    878:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
                    879:         out     dx,al                   ;set Map Mask for right edge
                    880: 
                    881:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
                    882: 
                    883:         call    esi                     ;jump into the unrolled loop to draw
                    884: 
                    885: ;-----------------------------------------------------------------------;
                    886: ; We have done all partial edges.
                    887: ;-----------------------------------------------------------------------;
                    888: 
                    889: edges_done:
                    890: 
                    891:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
                    892:         mov     al,MM_ALL               ;restore the default Map Mask of all
                    893:         out     dx,al                   ; planes enabled
                    894: 
                    895: ;-----------------------------------------------------------------------;
                    896: ; See if there are any more banks to process.
                    897: ;-----------------------------------------------------------------------;
                    898: 
                    899: check_next_bank:
                    900: 
                    901:         mov     edi,ppdev
                    902:         mov     eax,[edi].pdev_rcl1PlanarClip.yBottom ;is the fill bottom in
                    903:         cmp     ulBottomScan,eax                      ; the current bank?
                    904:         jle     short banks_done        ;yes, so we're done
                    905:                                         ;no, map in the next bank and fill it
                    906:         mov     ulCurrentTopScan,eax    ;remember where the top of the bank
                    907:                                         ; we're about to map in is (same as
                    908:                                         ; bottom of bank we just did)
                    909: 
                    910:         ptrCall <dword ptr [edi].pdev_pfnPlanarControl>,<edi,eax,JustifyTop>
                    911:                                         ;map in the bank
                    912: 
                    913: ; Compute the starting address and scan line count in this bank.
                    914: 
                    915:         mov     eax,ppdev               ;EAX->target surface
                    916:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
                    917:         cmp     ebx,[eax].pdev_rcl1PlanarClip.yBottom
                    918:                                         ;which comes first, the bottom of the
                    919:                                         ; dest rect or the bottom of the
                    920:                                         ; current bank?
                    921:         jl      short BottomScanSet2    ;fill bottom comes first, so draw to
                    922:                                         ; that; this is the last bank in fill
                    923:         mov     ebx,[eax].pdev_rcl1PlanarClip.yBottom
                    924:                                         ;bank bottom comes first; draw to
                    925:                                         ; bottom of bank
                    926: BottomScanSet2:
                    927:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
                    928:         sub     ebx,edi                 ;# of scans to fill in bank
                    929:         imul    edi,ulScanWidth         ;offset of starting scan line
                    930: 
                    931: ; Note that the start of the bitmap will change each time through the
                    932: ; bank loop, because the start of the bitmap is varied to map the
                    933: ; desired scan line to the banking window.
                    934: 
                    935:         add     edi,[eax].pdev_pvBitmapStart ;start of scan in bitmap
                    936:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
                    937: 
                    938: ; Draw in the new bank.
                    939: 
                    940:         jmp     pfnStartDrawing
                    941: 
                    942: 
                    943: ;-----------------------------------------------------------------------;
                    944: ; Done with all banks in this fill.
                    945: 
                    946: banks_done:
                    947:         retn
                    948: 
                    949: endProc vTrgBlt
                    950: 
                    951: 
                    952: ;-----------------------------------------------------------------------;
                    953: ; Unrolled loops.
                    954: ; There are two kinds of unrolled loops: read-before-write (to load the
                    955: ;  latches), and write-only (for replace-type rops).
                    956: ;-----------------------------------------------------------------------;
                    957: 
                    958: 
                    959: ;-----------------------------------------------------------------------;
                    960: ; Unrolled drawing stuff for cases where read before write is required,
                    961: ; to load the latches.
                    962: ;-----------------------------------------------------------------------;
                    963: 
                    964: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
                    965: ; read before write loops.
                    966: 
                    967:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideRWEntry,RW1,LOOP_UNROLL_COUNT
                    968:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideRWEntry,RW2,LOOP_UNROLL_COUNT
                    969:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideRWEntry,RW3,LOOP_UNROLL_COUNT
                    970:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideRWEntry,RW4,LOOP_UNROLL_COUNT
                    971: 
                    972: ;-----------------------------------------------------------------------;
                    973: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
                    974: ;
                    975: ; Entry:
                    976: ;       AL  = drawing color
                    977: ;       EBX = unrolled loop count
                    978: ;       ECX = scan line width in bytes
                    979: ;       EDI = start offset
                    980: ;
                    981: ; EBX, EDI modified. All other registers preserved.
                    982: 
                    983: ;-----------------------------------------------------------------------;
                    984: ; Macro to draw one read before write byte, then advance to next scan line.
                    985: 
                    986: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                    987: &ENTRY_LABEL&ENTRY_INDEX&:
                    988:         mov     ah,[edi]        ;latch the target address. The data read
                    989:                                 ; doesn't matter
                    990:         mov     [edi],al        ;merge the drawing color with the latched
                    991:                                 ; target address according to the selected ALU
                    992:                                 ; function, and write the result to display
                    993:                                 ; memory
                    994:         add     edi,ecx         ;point to the next scan line
                    995:         endm    ;-----------------------------------;
                    996: 
                    997: ; 1-wide read/write.
                    998: 
                    999:         align   4
                   1000: draw_1_wide_rw_loop     proc    near
                   1001:         UNROLL_LOOP     DRAW_1_WIDE_RW,RW1,LOOP_UNROLL_COUNT
                   1002:         dec     ebx
                   1003:         jnz     draw_1_wide_rw_loop
                   1004: 
                   1005:         ret
                   1006: 
                   1007: draw_1_wide_rw_loop     endp
                   1008: 
                   1009: ;-----------------------------------------------------------------------;
                   1010: ; Macro to draw two read before write bytes, then advance to next scan line.
                   1011: 
                   1012: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                   1013: &ENTRY_LABEL&ENTRY_INDEX&:
                   1014:         mov     ah,[edi]                ;see 1-wide RW case for comments
                   1015:         mov     [edi],al
                   1016:         mov     ah,[edi+1]
                   1017:         mov     [edi+1],al
                   1018:         add     edi,ecx                 ;point to the next scan line
                   1019:         endm    ;-----------------------------------;
                   1020: 
                   1021: ; 2-wide read/write.
                   1022: 
                   1023:         align   4
                   1024: draw_2_wide_rw_loop     proc    near
                   1025:         UNROLL_LOOP     DRAW_2_WIDE_RW,RW2,LOOP_UNROLL_COUNT
                   1026:         dec     ebx
                   1027:         jnz     draw_2_wide_rw_loop
                   1028: 
                   1029:         ret
                   1030: 
                   1031: draw_2_wide_rw_loop     endp
                   1032: 
                   1033: ;-----------------------------------------------------------------------;
                   1034: ; Macro to draw three read before write bytes, then advance to next scan line.
                   1035: 
                   1036: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                   1037: &ENTRY_LABEL&ENTRY_INDEX&:
                   1038:         mov     ah,[edi]                ;see 1-wide RW case for comments
                   1039:         mov     [edi],al
                   1040:         mov     ah,[edi+1]
                   1041:         mov     [edi+1],al
                   1042:         mov     ah,[edi+2]
                   1043:         mov     [edi+2],al
                   1044:         add     edi,ecx                 ;point to the next scan line
                   1045:         endm    ;-----------------------------------;
                   1046: 
                   1047: ; 3-wide read/write.
                   1048: 
                   1049:         align   4
                   1050: draw_3_wide_rw_loop     proc    near
                   1051:         UNROLL_LOOP     DRAW_3_WIDE_RW,RW3,LOOP_UNROLL_COUNT
                   1052:         dec     ebx
                   1053:         jnz     draw_3_wide_rw_loop
                   1054: 
                   1055:         ret
                   1056: 
                   1057: draw_3_wide_rw_loop     endp
                   1058: 
                   1059: ;-----------------------------------------------------------------------;
                   1060: ; Macro to draw three read before write bytes, then advance to next scan line.
                   1061: 
                   1062: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                   1063: &ENTRY_LABEL&ENTRY_INDEX&:
                   1064:         mov     ah,[edi]                ;see 1-wide RW case for comments
                   1065:         mov     [edi],al
                   1066:         mov     ah,[edi+1]
                   1067:         mov     [edi+1],al
                   1068:         mov     ah,[edi+2]
                   1069:         mov     [edi+2],al
                   1070:         mov     ah,[edi+3]
                   1071:         mov     [edi+3],al
                   1072:         add     edi,ecx                 ;point to the next scan line
                   1073:         endm    ;-----------------------------------;
                   1074: 
                   1075: ; 4-wide read/write.
                   1076: 
                   1077:         align   4
                   1078: draw_4_wide_rw_loop     proc    near
                   1079:         UNROLL_LOOP     DRAW_4_WIDE_RW,RW4,LOOP_UNROLL_COUNT
                   1080:         dec     ebx
                   1081:         jnz     draw_4_wide_rw_loop
                   1082: 
                   1083:         ret
                   1084: 
                   1085: draw_4_wide_rw_loop     endp
                   1086: 
                   1087: ;-----------------------------------------------------------------------;
                   1088: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
                   1089: ; for cases where read before write is NOT required.
                   1090: ;-----------------------------------------------------------------------;
                   1091: 
                   1092: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
                   1093: ; Note that there may be separate entry tables for various alignments of a
                   1094: ; specific width, in cases where performance can be improved by using different
                   1095: ; code for different alignments.
                   1096: 
                   1097:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideWEntry,W1,LOOP_UNROLL_COUNT
                   1098:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideWEntry,W2,LOOP_UNROLL_COUNT
                   1099:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
                   1100:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWOddEntry,W3_ODD,LOOP_UNROLL_COUNT
                   1101:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideWEntry,W4,LOOP_UNROLL_COUNT
                   1102:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
                   1103:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWOddEntry,W5_ODD,LOOP_UNROLL_COUNT
                   1104:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_0Entry,W6_MOD3_0,LOOP_UNROLL_COUNT
                   1105:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_1Entry,W6_MOD3_1,LOOP_UNROLL_COUNT
                   1106:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_2Entry,W6_MOD3_2,LOOP_UNROLL_COUNT
                   1107:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_0Entry,W7_MOD3_0,LOOP_UNROLL_COUNT
                   1108:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_1Entry,W7_MOD3_1,LOOP_UNROLL_COUNT
                   1109:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_2Entry,W7_MOD3_2,LOOP_UNROLL_COUNT
                   1110:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_3Entry,W7_MOD3_3,LOOP_UNROLL_COUNT
                   1111:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_0Entry,W8_MOD3_0,LOOP_UNROLL_COUNT
                   1112:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_1Entry,W8_MOD3_1,LOOP_UNROLL_COUNT
                   1113:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_2Entry,W8_MOD3_2,LOOP_UNROLL_COUNT
                   1114:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_3Entry,W8_MOD3_3,LOOP_UNROLL_COUNT
                   1115: 
                   1116: 
                   1117: ;-----------------------------------------------------------------------;
                   1118: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
                   1119: ;
                   1120: ; Entry:
                   1121: ;       EAX = fill color, replicated four times
                   1122: ;       EBX = unrolled loop count
                   1123: ;       ECX = scan line width in bytes
                   1124: ;       EDI = start offset
                   1125: ;
                   1126: ; EBX, EDI modified. All other registers preserved.
                   1127: 
                   1128: ;-----------------------------------------------------------------------;
                   1129: ; Macro to draw one write-only byte, then advance to next scan line.
                   1130: 
                   1131: DRAW_1_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
                   1132: &ENTRY_LABEL&ENTRY_INDEX&:
                   1133:         mov     [edi],al                ;draw the pixel
                   1134:         add     edi,ecx                 ;point to the next scan line
                   1135:         endm    ;-----------------------------------;
                   1136: 
                   1137: ; 1-wide write-only.
                   1138: 
                   1139:         align   4
                   1140: draw_1_wide_w_loop     proc    near
                   1141:         UNROLL_LOOP     DRAW_1_WIDE_W,W1,LOOP_UNROLL_COUNT
                   1142:         dec     ebx
                   1143:         jnz     draw_1_wide_w_loop
                   1144: 
                   1145:         ret
                   1146: 
                   1147: draw_1_wide_w_loop     endp
                   1148: 
                   1149: ;-----------------------------------------------------------------------;
                   1150: ; Macro to draw two write-only bytes, then advance to next scan line.
                   1151: 
                   1152: DRAW_2_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
                   1153: &ENTRY_LABEL&ENTRY_INDEX&:
                   1154:         mov     [edi],ax
                   1155:         add     edi,ecx                 ;point to the next scan line
                   1156:         endm    ;-----------------------------------;
                   1157: 
                   1158: ; 2-wide write-only.
                   1159: 
                   1160:         align   4
                   1161: draw_2_wide_w_loop     proc    near
                   1162:         UNROLL_LOOP     DRAW_2_WIDE_W,W2,LOOP_UNROLL_COUNT
                   1163:         dec     ebx
                   1164:         jnz     draw_2_wide_w_loop
                   1165: 
                   1166:         ret
                   1167: 
                   1168: draw_2_wide_w_loop     endp
                   1169: 
                   1170: ;-----------------------------------------------------------------------;
                   1171: ; Macro to draw three write-only bytes, then advance to next scan line.
                   1172: ; Optimized for even start address.
                   1173: 
                   1174: DRAW_3_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
                   1175: &ENTRY_LABEL&ENTRY_INDEX&:
                   1176:         mov     [edi],ax
                   1177:         mov     [edi+2],al
                   1178:         add     edi,ecx                 ;point to the next scan line
                   1179:         endm    ;-----------------------------------;
                   1180: 
                   1181: ; 3-wide write-only, starting at an even address.
                   1182: 
                   1183:         align   4
                   1184: draw_3_wide_w_even_loop     proc    near
                   1185:         UNROLL_LOOP     DRAW_3_WIDE_W_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
                   1186:         dec     ebx
                   1187:         jnz     draw_3_wide_w_even_loop
                   1188: 
                   1189:         ret
                   1190: 
                   1191: draw_3_wide_w_even_loop     endp
                   1192: 
                   1193: ;-----------------------------------------------------------------------;
                   1194: ; Macro to draw three write-only bytes, then advance to next scan line.
                   1195: ; Optimized for odd start address.
                   1196: 
                   1197: DRAW_3_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
                   1198: &ENTRY_LABEL&ENTRY_INDEX&:
                   1199:         mov     [edi],al
                   1200:         mov     [edi+1],ax
                   1201:         add     edi,ecx                 ;point to the next scan line
                   1202:         endm    ;-----------------------------------;
                   1203: 
                   1204: ; 3-wide write-only, starting at an odd address.
                   1205: 
                   1206:         align   4
                   1207: draw_3_wide_w_odd_loop     proc    near
                   1208:         UNROLL_LOOP     DRAW_3_WIDE_W_ODD,W3_ODD,LOOP_UNROLL_COUNT
                   1209:         dec     ebx
                   1210:         jnz     draw_3_wide_w_odd_loop
                   1211: 
                   1212:         ret
                   1213: 
                   1214: draw_3_wide_w_odd_loop     endp
                   1215: 
                   1216: 
                   1217: ;-----------------------------------------------------------------------;
                   1218: ; Macro to draw four write-only bytes, then advance to next scan line.
                   1219: 
                   1220: DRAW_4_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
                   1221: &ENTRY_LABEL&ENTRY_INDEX&:
                   1222:         mov     [edi],eax
                   1223:         add     edi,ecx                 ;point to the next scan line
                   1224:         endm    ;-----------------------------------;
                   1225: 
                   1226: ; 4-wide write-only.
                   1227: 
                   1228:         align   4
                   1229: draw_4_wide_w_loop     proc    near
                   1230:         UNROLL_LOOP     DRAW_4_WIDE_W,W4,LOOP_UNROLL_COUNT
                   1231:         dec     ebx
                   1232:         jnz     draw_4_wide_w_loop
                   1233: 
                   1234:         ret
                   1235: 
                   1236: draw_4_wide_w_loop     endp
                   1237: 
                   1238: 
                   1239: ;-----------------------------------------------------------------------;
                   1240: ; Macro to draw five write-only bytes, then advance to next scan line.
                   1241: ; Optimized for even start address.
                   1242: 
                   1243: DRAW_5_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
                   1244: &ENTRY_LABEL&ENTRY_INDEX&:
                   1245:         mov     [edi],eax
                   1246:         mov     [edi+4],al
                   1247:         add     edi,ecx                 ;point to the next scan line
                   1248:         endm    ;-----------------------------------;
                   1249: 
                   1250: ; 5-wide write-only, starting at an even address.
                   1251: 
                   1252:         align   4
                   1253: draw_5_wide_w_even_loop     proc    near
                   1254:         UNROLL_LOOP     DRAW_5_WIDE_W_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
                   1255:         dec     ebx
                   1256:         jnz     draw_5_wide_w_even_loop
                   1257: 
                   1258:         ret
                   1259: 
                   1260: draw_5_wide_w_even_loop     endp
                   1261: 
                   1262: 
                   1263: ;-----------------------------------------------------------------------;
                   1264: ; Macro to draw five write-only bytes, then advance to next scan line.
                   1265: ; Optimized for odd start address.
                   1266: 
                   1267: DRAW_5_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
                   1268: &ENTRY_LABEL&ENTRY_INDEX&:
                   1269:         mov     [edi],al
                   1270:         mov     [edi+1],eax
                   1271:         add     edi,ecx                 ;point to the next scan line
                   1272:         endm    ;-----------------------------------;
                   1273: 
                   1274: ; 5-wide write-only, starting at an odd address.
                   1275: 
                   1276:         align   4
                   1277: draw_5_wide_w_odd_loop     proc    near
                   1278:         UNROLL_LOOP     DRAW_5_WIDE_W_ODD,W5_ODD,LOOP_UNROLL_COUNT
                   1279:         dec     ebx
                   1280:         jnz     draw_5_wide_w_odd_loop
                   1281: 
                   1282:         ret
                   1283: 
                   1284: draw_5_wide_w_odd_loop     endp
                   1285: 
                   1286: 
                   1287: ;-----------------------------------------------------------------------;
                   1288: ; Macro to draw six write-only bytes, then advance to next scan line.
                   1289: ; Optimized for start address MOD 3 == 0.
                   1290: 
                   1291: DRAW_6_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
                   1292: &ENTRY_LABEL&ENTRY_INDEX&:
                   1293:         mov     [edi],eax
                   1294:         mov     [edi+4],ax
                   1295:         add     edi,ecx                 ;point to the next scan line
                   1296:         endm    ;-----------------------------------;
                   1297: 
                   1298: ; 6-wide write-only, starting at MOD 3 == 0.
                   1299: 
                   1300:         align   4
                   1301: draw_6_wide_w_mod3_0_loop     proc    near
                   1302:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_0,W6_MOD3_0,LOOP_UNROLL_COUNT
                   1303:         dec     ebx
                   1304:         jnz     draw_6_wide_w_mod3_0_loop
                   1305: 
                   1306:         ret
                   1307: 
                   1308: draw_6_wide_w_mod3_0_loop     endp
                   1309: 
                   1310: 
                   1311: ;-----------------------------------------------------------------------;
                   1312: ; Macro to draw six write-only bytes, then advance to next scan line.
                   1313: ; Optimized for start address MOD 3 == 1 or 3.
                   1314: 
                   1315: DRAW_6_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
                   1316: &ENTRY_LABEL&ENTRY_INDEX&:
                   1317:         mov     [edi],al
                   1318:         mov     [edi+1],eax
                   1319:         mov     [edi+5],al
                   1320:         add     edi,ecx                 ;point to the next scan line
                   1321:         endm    ;-----------------------------------;
                   1322: 
                   1323: ; 6-wide write-only, starting at MOD 3 == 1 or 3.
                   1324: 
                   1325:         align   4
                   1326: draw_6_wide_w_mod3_1_loop     proc    near
                   1327:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_1,W6_MOD3_1,LOOP_UNROLL_COUNT
                   1328:         dec     ebx
                   1329:         jnz     draw_6_wide_w_mod3_1_loop
                   1330: 
                   1331:         ret
                   1332: 
                   1333: draw_6_wide_w_mod3_1_loop     endp
                   1334: 
                   1335: 
                   1336: ;-----------------------------------------------------------------------;
                   1337: ; Macro to draw six write-only bytes, then advance to next scan line.
                   1338: ; Optimized for start address MOD 3 == 2.
                   1339: 
                   1340: DRAW_6_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
                   1341: &ENTRY_LABEL&ENTRY_INDEX&:
                   1342:         mov     [edi],ax
                   1343:         mov     [edi+2],eax
                   1344:         add     edi,ecx                 ;point to the next scan line
                   1345:         endm    ;-----------------------------------;
                   1346: 
                   1347: ; 6-wide write-only, starting at MOD 3 == 2.
                   1348: 
                   1349:         align   4
                   1350: draw_6_wide_w_mod3_2_loop     proc    near
                   1351:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_2,W6_MOD3_2,LOOP_UNROLL_COUNT
                   1352:         dec     ebx
                   1353:         jnz     draw_6_wide_w_mod3_2_loop
                   1354: 
                   1355:         ret
                   1356: 
                   1357: draw_6_wide_w_mod3_2_loop     endp
                   1358: 
                   1359: 
                   1360: ;-----------------------------------------------------------------------;
                   1361: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1362: ; Optimized for start address MOD 3 == 0.
                   1363: 
                   1364: DRAW_7_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
                   1365: &ENTRY_LABEL&ENTRY_INDEX&:
                   1366:         mov     [edi],eax
                   1367:         mov     [edi+4],ax
                   1368:         mov     [edi+6],al
                   1369:         add     edi,ecx                 ;point to the next scan line
                   1370:         endm    ;-----------------------------------;
                   1371: 
                   1372: ; 7-wide write-only, starting at MOD 3 == 0.
                   1373: 
                   1374:         align   4
                   1375: draw_7_wide_w_mod3_0_loop     proc    near
                   1376:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_0,W7_MOD3_0,LOOP_UNROLL_COUNT
                   1377:         dec     ebx
                   1378:         jnz     draw_7_wide_w_mod3_0_loop
                   1379: 
                   1380:         ret
                   1381: 
                   1382: draw_7_wide_w_mod3_0_loop     endp
                   1383: 
                   1384: 
                   1385: ;-----------------------------------------------------------------------;
                   1386: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1387: ; Optimized for start address MOD 3 == 1.
                   1388: 
                   1389: DRAW_7_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
                   1390: &ENTRY_LABEL&ENTRY_INDEX&:
                   1391:         mov     [edi],al
                   1392:         mov     [edi+1],ax
                   1393:         mov     [edi+3],eax
                   1394:         add     edi,ecx                 ;point to the next scan line
                   1395:         endm    ;-----------------------------------;
                   1396: 
                   1397: ; 7-wide write-only, starting at MOD 3 == 0.
                   1398: 
                   1399:         align   4
                   1400: draw_7_wide_w_mod3_1_loop     proc    near
                   1401:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_1,W7_MOD3_1,LOOP_UNROLL_COUNT
                   1402:         dec     ebx
                   1403:         jnz     draw_7_wide_w_mod3_1_loop
                   1404: 
                   1405:         ret
                   1406: 
                   1407: draw_7_wide_w_mod3_1_loop     endp
                   1408: 
                   1409: 
                   1410: ;-----------------------------------------------------------------------;
                   1411: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1412: ; Optimized for start address MOD 3 == 2.
                   1413: 
                   1414: DRAW_7_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
                   1415: &ENTRY_LABEL&ENTRY_INDEX&:
                   1416:         mov     [edi],ax
                   1417:         mov     [edi+2],eax
                   1418:         mov     [edi+6],al
                   1419:         add     edi,ecx                 ;point to the next scan line
                   1420:         endm    ;-----------------------------------;
                   1421: 
                   1422: ; 7-wide write-only, starting at MOD 3 == 2.
                   1423: 
                   1424:         align   4
                   1425: draw_7_wide_w_mod3_2_loop     proc    near
                   1426:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_2,W7_MOD3_2,LOOP_UNROLL_COUNT
                   1427:         dec     ebx
                   1428:         jnz     draw_7_wide_w_mod3_2_loop
                   1429: 
                   1430:         ret
                   1431: 
                   1432: draw_7_wide_w_mod3_2_loop     endp
                   1433: 
                   1434: 
                   1435: ;-----------------------------------------------------------------------;
                   1436: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1437: ; Optimized for start address MOD 3 == 3.
                   1438: 
                   1439: DRAW_7_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
                   1440: &ENTRY_LABEL&ENTRY_INDEX&:
                   1441:         mov     [edi],al
                   1442:         mov     [edi+1],eax
                   1443:         mov     [edi+5],ax
                   1444:         add     edi,ecx                 ;point to the next scan line
                   1445:         endm    ;-----------------------------------;
                   1446: 
                   1447: ; 7-wide write-only, starting at MOD 3 == 3.
                   1448: 
                   1449:         align   4
                   1450: draw_7_wide_w_mod3_3_loop     proc    near
                   1451:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_3,W7_MOD3_3,LOOP_UNROLL_COUNT
                   1452:         dec     ebx
                   1453:         jnz     draw_7_wide_w_mod3_3_loop
                   1454: 
                   1455:         ret
                   1456: 
                   1457: draw_7_wide_w_mod3_3_loop     endp
                   1458: 
                   1459: 
                   1460: ;-----------------------------------------------------------------------;
                   1461: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1462: ; Optimized for start address MOD 3 == 0.
                   1463: 
                   1464: DRAW_8_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
                   1465: &ENTRY_LABEL&ENTRY_INDEX&:
                   1466:         mov     [edi],eax
                   1467:         mov     [edi+4],eax
                   1468:         add     edi,ecx                 ;point to the next scan line
                   1469:         endm    ;-----------------------------------;
                   1470: 
                   1471: ; 8-wide write-only, starting at MOD 3 == 0.
                   1472: 
                   1473:         align   4
                   1474: draw_8_wide_w_mod3_0_loop     proc    near
                   1475:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_0,W8_MOD3_0,LOOP_UNROLL_COUNT
                   1476:         dec     ebx
                   1477:         jnz     draw_8_wide_w_mod3_0_loop
                   1478: 
                   1479:         ret
                   1480: 
                   1481: draw_8_wide_w_mod3_0_loop     endp
                   1482: 
                   1483: 
                   1484: ;-----------------------------------------------------------------------;
                   1485: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1486: ; Optimized for start address MOD 3 == 1.
                   1487: 
                   1488: DRAW_8_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
                   1489: &ENTRY_LABEL&ENTRY_INDEX&:
                   1490:         mov     [edi],al
                   1491:         mov     [edi+1],ax
                   1492:         mov     [edi+3],eax
                   1493:         mov     [edi+7],al
                   1494:         add     edi,ecx                 ;point to the next scan line
                   1495:         endm    ;-----------------------------------;
                   1496: 
                   1497: ; 8-wide write-only, starting at MOD 3 == 0.
                   1498: 
                   1499:         align   4
                   1500: draw_8_wide_w_mod3_1_loop     proc    near
                   1501:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_1,W8_MOD3_1,LOOP_UNROLL_COUNT
                   1502:         dec     ebx
                   1503:         jnz     draw_8_wide_w_mod3_1_loop
                   1504: 
                   1505:         ret
                   1506: 
                   1507: draw_8_wide_w_mod3_1_loop     endp
                   1508: 
                   1509: 
                   1510: ;-----------------------------------------------------------------------;
                   1511: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1512: ; Optimized for start address MOD 3 == 2.
                   1513: 
                   1514: DRAW_8_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
                   1515: &ENTRY_LABEL&ENTRY_INDEX&:
                   1516:         mov     [edi],ax
                   1517:         mov     [edi+2],eax
                   1518:         mov     [edi+6],ax
                   1519:         add     edi,ecx                 ;point to the next scan line
                   1520:         endm    ;-----------------------------------;
                   1521: 
                   1522: ; 8-wide write-only, starting at MOD 3 == 2.
                   1523: 
                   1524:         align   4
                   1525: draw_8_wide_w_mod3_2_loop     proc    near
                   1526:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_2,W8_MOD3_2,LOOP_UNROLL_COUNT
                   1527:         dec     ebx
                   1528:         jnz     draw_8_wide_w_mod3_2_loop
                   1529: 
                   1530:         ret
                   1531: 
                   1532: draw_8_wide_w_mod3_2_loop     endp
                   1533: 
                   1534: 
                   1535: ;-----------------------------------------------------------------------;
                   1536: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1537: ; Optimized for start address MOD 3 == 3.
                   1538: 
                   1539: DRAW_8_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
                   1540: &ENTRY_LABEL&ENTRY_INDEX&:
                   1541:         mov     [edi],al
                   1542:         mov     [edi+1],eax
                   1543:         mov     [edi+5],ax
                   1544:         mov     [edi+7],al
                   1545:         add     edi,ecx                 ;point to the next scan line
                   1546:         endm    ;-----------------------------------;
                   1547: 
                   1548: ; 8-wide write-only, starting at MOD 3 == 3.
                   1549: 
                   1550:         align   4
                   1551: draw_8_wide_w_mod3_3_loop     proc    near
                   1552:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_3,W8_MOD3_3,LOOP_UNROLL_COUNT
                   1553:         dec     ebx
                   1554:         jnz     draw_8_wide_w_mod3_3_loop
                   1555: 
                   1556:         ret
                   1557: 
                   1558: draw_8_wide_w_mod3_3_loop     endp
                   1559: 
                   1560: ;-----------------------------------------------------------------------;
                   1561: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
                   1562: ;-----------------------------------------------------------------------;
                   1563: 
                   1564: ; Tables of entry points into unrolled wide write-only loops.
                   1565:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW00Entry,W_00,LOOP_UNROLL_COUNT
                   1566:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW01Entry,W_01,LOOP_UNROLL_COUNT
                   1567:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW02Entry,W_02,LOOP_UNROLL_COUNT
                   1568:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW03Entry,W_03,LOOP_UNROLL_COUNT
                   1569:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW10Entry,W_10,LOOP_UNROLL_COUNT
                   1570:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW11Entry,W_11,LOOP_UNROLL_COUNT
                   1571:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW12Entry,W_12,LOOP_UNROLL_COUNT
                   1572:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW13Entry,W_13,LOOP_UNROLL_COUNT
                   1573:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW20Entry,W_20,LOOP_UNROLL_COUNT
                   1574:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW21Entry,W_21,LOOP_UNROLL_COUNT
                   1575:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW22Entry,W_22,LOOP_UNROLL_COUNT
                   1576:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW23Entry,W_23,LOOP_UNROLL_COUNT
                   1577:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW30Entry,W_30,LOOP_UNROLL_COUNT
                   1578:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW31Entry,W_31,LOOP_UNROLL_COUNT
                   1579:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW32Entry,W_32,LOOP_UNROLL_COUNT
                   1580:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW33Entry,W_33,LOOP_UNROLL_COUNT
                   1581: 
                   1582: ;-----------------------------------------------------------------------;
                   1583: ; Macro to draw n write-only bytes, 0 leading bytes, 0 trailing bytes,
                   1584: ; then advance to next scan line.
                   1585: 
                   1586: DRAW_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
                   1587: &ENTRY_LABEL&ENTRY_INDEX&:
                   1588:         mov     ecx,esi         ;# of whole dwords
                   1589:         rep     stosd           ;fill all whole bytes as dwords
                   1590:         add     edi,edx         ;point to the next scan line
                   1591:         endm    ;-----------------------------------;
                   1592: 
                   1593: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
                   1594: ;  EAX = 0ffffh
                   1595: ;  EBX = count of scans to fill
                   1596: ;  EDX = offset from end of one scan's fill to start of next
                   1597: ;  ESI = # of dwords to fill
                   1598: ;  EDI = target address to fill
                   1599: 
                   1600:         align   4
                   1601: draw_wide_w_00_loop     proc    near
                   1602:         UNROLL_LOOP     DRAW_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
                   1603:         dec     ebx
                   1604:         jnz     draw_wide_w_00_loop
                   1605: 
                   1606:         ret
                   1607: 
                   1608: draw_wide_w_00_loop     endp
                   1609: 
                   1610: 
                   1611: ;-----------------------------------------------------------------------;
                   1612: ; Macro to draw n write-only bytes, 0 leading bytes, 1 trailing byte,
                   1613: ; then advance to next scan line.
                   1614: 
                   1615: DRAW_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
                   1616: &ENTRY_LABEL&ENTRY_INDEX&:
                   1617:         mov     ecx,esi         ;# of whole dwords
                   1618:         rep     stosd           ;fill whole bytes as dwords
                   1619:         mov     [edi],al        ;fill the trailing byte
                   1620:         inc     edi
                   1621:         add     edi,edx         ;point to the next scan line
                   1622:         endm    ;-----------------------------------;
                   1623: 
                   1624: ; N-wide write-only, 0 leading bytes, 1 trailing byte.
                   1625: ;  EAX = # of dwords to fill
                   1626: ;  EBX = count of scans to fill
                   1627: ;  EDX = offset from end of one scan's fill to start of next
                   1628: ;  ESI = # of dwords to fill
                   1629: ;  EDI = target address to fill
                   1630: 
                   1631:         align   4
                   1632: draw_wide_w_01_loop     proc    near
                   1633:         UNROLL_LOOP     DRAW_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
                   1634:         dec     ebx
                   1635:         jnz     draw_wide_w_01_loop
                   1636: 
                   1637:         ret
                   1638: 
                   1639: draw_wide_w_01_loop     endp
                   1640: 
                   1641: 
                   1642: ;-----------------------------------------------------------------------;
                   1643: ; Macro to draw n write-only bytes, 0 leading bytes, 2 trailing bytes,
                   1644: ; then advance to next scan line.
                   1645: 
                   1646: DRAW_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
                   1647: &ENTRY_LABEL&ENTRY_INDEX&:
                   1648:         mov     ecx,esi         ;# of whole dwords
                   1649:         rep     stosd           ;fill whole bytes as dwords
                   1650:         mov     [edi],ax        ;fill the trailing word
                   1651:         add     edi,2
                   1652:         add     edi,edx         ;point to the next scan line
                   1653:         endm    ;-----------------------------------;
                   1654: 
                   1655: ; N-wide write-only, 0 leading bytes, 2 trailing byte.
                   1656: ;  EAX = # of dwords to fill
                   1657: ;  EBX = count of scans to fill
                   1658: ;  EDX = offset from end of one scan's fill to start of next
                   1659: ;  ESI = # of dwords to fill
                   1660: ;  EDI = target address to fill
                   1661: 
                   1662:         align   4
                   1663: draw_wide_w_02_loop     proc    near
                   1664:         UNROLL_LOOP     DRAW_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
                   1665:         dec     ebx
                   1666:         jnz     draw_wide_w_02_loop
                   1667: 
                   1668:         ret
                   1669: 
                   1670: draw_wide_w_02_loop     endp
                   1671: 
                   1672: 
                   1673: ;-----------------------------------------------------------------------;
                   1674: ; Macro to draw n write-only bytes, 0 leading bytes, 3 trailing bytes,
                   1675: ; then advance to next scan line.
                   1676: 
                   1677: DRAW_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
                   1678: &ENTRY_LABEL&ENTRY_INDEX&:
                   1679:         mov     ecx,esi         ;# of whole dwords
                   1680:         rep     stosd           ;fill whole bytes as dwords
                   1681:         mov     [edi],ax        ;fill the leading word
                   1682:         mov     [edi+2],al      ;fill the trailing byte
                   1683:         add     edi,3
                   1684:         add     edi,edx         ;point to the next scan line
                   1685:         endm    ;-----------------------------------;
                   1686: 
                   1687: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   1688: ;  EAX = # of dwords to fill
                   1689: ;  EBX = count of scans to fill
                   1690: ;  EDX = offset from end of one scan's fill to start of next
                   1691: ;  ESI = # of dwords to fill
                   1692: ;  EDI = target address to fill
                   1693: 
                   1694:         align   4
                   1695: draw_wide_w_03_loop     proc    near
                   1696:         UNROLL_LOOP     DRAW_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
                   1697:         dec     ebx
                   1698:         jnz     draw_wide_w_03_loop
                   1699: 
                   1700:         ret
                   1701: 
                   1702: draw_wide_w_03_loop     endp
                   1703: 
                   1704: 
                   1705: ;-----------------------------------------------------------------------;
                   1706: ; Macro to draw n write-only bytes, 1 leading byte, 0 trailing bytes,
                   1707: ; then advance to next scan line.
                   1708: 
                   1709: DRAW_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
                   1710: &ENTRY_LABEL&ENTRY_INDEX&:
                   1711:         mov     [edi],al        ;fill the leading byte
                   1712:         inc     edi
                   1713:         mov     ecx,esi         ;# of whole dwords
                   1714:         rep     stosd           ;fill all whole bytes as dwords
                   1715:         add     edi,edx         ;point to the next scan line
                   1716:         endm    ;-----------------------------------;
                   1717: 
                   1718: ; N-wide write-only, 1 leading byte, 0 trailing bytes.
                   1719: ;  EAX = # of dwords to fill
                   1720: ;  EBX = count of scans to fill
                   1721: ;  EDX = offset from end of one scan's fill to start of next
                   1722: ;  ESI = # of dwords to fill
                   1723: ;  EDI = target address to fill
                   1724: 
                   1725:         align   4
                   1726: draw_wide_w_10_loop     proc    near
                   1727:         UNROLL_LOOP     DRAW_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
                   1728:         dec     ebx
                   1729:         jnz     draw_wide_w_10_loop
                   1730: 
                   1731:         ret
                   1732: 
                   1733: draw_wide_w_10_loop     endp
                   1734: 
                   1735: 
                   1736: ;-----------------------------------------------------------------------;
                   1737: ; Macro to draw n write-only bytes, 1 leading byte, 1 trailing byte,
                   1738: ; then advance to next scan line.
                   1739: 
                   1740: DRAW_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
                   1741: &ENTRY_LABEL&ENTRY_INDEX&:
                   1742:         mov     [edi],al        ;fill the leading byte
                   1743:         inc     edi
                   1744:         mov     ecx,esi         ;# of whole dwords
                   1745:         rep     stosd           ;fill whole bytes as dwords
                   1746:         mov     [edi],al        ;fill the trailing byte
                   1747:         inc     edi
                   1748:         add     edi,edx         ;point to the next scan line
                   1749:         endm    ;-----------------------------------;
                   1750: 
                   1751: ; N-wide write-only, 1 leading bytes, 1 trailing byte.
                   1752: ;  EAX = # of dwords to fill
                   1753: ;  EBX = count of scans to fill
                   1754: ;  EDX = offset from end of one scan's fill to start of next
                   1755: ;  ESI = # of dwords to fill
                   1756: ;  EDI = target address to fill
                   1757: 
                   1758:         align   4
                   1759: draw_wide_w_11_loop     proc    near
                   1760:         UNROLL_LOOP     DRAW_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
                   1761:         dec     ebx
                   1762:         jnz     draw_wide_w_11_loop
                   1763: 
                   1764:         ret
                   1765: 
                   1766: draw_wide_w_11_loop     endp
                   1767: 
                   1768: 
                   1769: ;-----------------------------------------------------------------------;
                   1770: ; Macro to draw n write-only bytes, 1 leading byte, 2 trailing bytes,
                   1771: ; then advance to next scan line.
                   1772: 
                   1773: DRAW_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
                   1774: &ENTRY_LABEL&ENTRY_INDEX&:
                   1775:         mov     [edi],al        ;fill the leading byte
                   1776:         inc     edi
                   1777:         mov     ecx,esi         ;# of whole dwords
                   1778:         rep     stosd           ;fill whole bytes as dwords
                   1779:         mov     [edi],ax        ;fill the trailing word
                   1780:         add     edi,2
                   1781:         add     edi,edx         ;point to the next scan line
                   1782:         endm    ;-----------------------------------;
                   1783: 
                   1784: ; N-wide write-only, 1 leading bytes, 2 trailing byte.
                   1785: ;  EAX = # of dwords to fill
                   1786: ;  EBX = count of scans to fill
                   1787: ;  EDX = offset from end of one scan's fill to start of next
                   1788: ;  ESI = # of dwords to fill
                   1789: ;  EDI = target address to fill
                   1790: 
                   1791:         align   4
                   1792: draw_wide_w_12_loop     proc    near
                   1793:         UNROLL_LOOP     DRAW_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
                   1794:         dec     ebx
                   1795:         jnz     draw_wide_w_12_loop
                   1796: 
                   1797:         ret
                   1798: 
                   1799: draw_wide_w_12_loop     endp
                   1800: 
                   1801: 
                   1802: ;-----------------------------------------------------------------------;
                   1803: ; Macro to draw n write-only bytes, 1 leading byte, 3 trailing bytes,
                   1804: ; then advance to next scan line.
                   1805: 
                   1806: DRAW_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
                   1807: &ENTRY_LABEL&ENTRY_INDEX&:
                   1808:         mov     [edi],al        ;fill the leading byte
                   1809:         inc     edi
                   1810:         mov     ecx,esi         ;# of whole dwords
                   1811:         rep     stosd           ;fill whole bytes as dwords
                   1812:         mov     [edi],ax        ;fill the trailing word
                   1813:         mov     [edi+2],al      ;fill the trailing byte
                   1814:         add     edi,3
                   1815:         add     edi,edx         ;point to the next scan line
                   1816:         endm    ;-----------------------------------;
                   1817: 
                   1818: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   1819: ;  EAX = # of dwords to fill
                   1820: ;  EBX = count of scans to fill
                   1821: ;  EDX = offset from end of one scan's fill to start of next
                   1822: ;  ESI = # of dwords to fill
                   1823: ;  EDI = target address to fill
                   1824: 
                   1825:         align   4
                   1826: draw_wide_w_13_loop     proc    near
                   1827:         UNROLL_LOOP     DRAW_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
                   1828:         dec     ebx
                   1829:         jnz     draw_wide_w_13_loop
                   1830: 
                   1831:         ret
                   1832: 
                   1833: draw_wide_w_13_loop     endp
                   1834: 
                   1835: 
                   1836: ;-----------------------------------------------------------------------;
                   1837: ; Macro to draw n write-only bytes, 2 leading bytes, 0 trailing bytes,
                   1838: ; then advance to next scan line.
                   1839: 
                   1840: DRAW_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
                   1841: &ENTRY_LABEL&ENTRY_INDEX&:
                   1842:         mov     [edi],ax        ;fill the leading word
                   1843:         add     edi,2
                   1844:         mov     ecx,esi         ;# of whole dwords
                   1845:         rep     stosd           ;fill all whole bytes as dwords
                   1846:         add     edi,edx         ;point to the next scan line
                   1847:         endm    ;-----------------------------------;
                   1848: 
                   1849: ; N-wide write-only, 2 leading bytes, 0 trailing bytes.
                   1850: ;  EAX = # of dwords to fill
                   1851: ;  EBX = count of scans to fill
                   1852: ;  EDX = offset from end of one scan's fill to start of next
                   1853: ;  ESI = # of dwords to fill
                   1854: ;  EDI = target address to fill
                   1855: 
                   1856:         align   4
                   1857: draw_wide_w_20_loop     proc    near
                   1858:         UNROLL_LOOP     DRAW_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
                   1859:         dec     ebx
                   1860:         jnz     draw_wide_w_20_loop
                   1861: 
                   1862:         ret
                   1863: 
                   1864: draw_wide_w_20_loop     endp
                   1865: 
                   1866: 
                   1867: ;-----------------------------------------------------------------------;
                   1868: ; Macro to draw n write-only bytes, 2 leading bytes, 1 trailing byte,
                   1869: ; then advance to next scan line.
                   1870: 
                   1871: DRAW_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
                   1872: &ENTRY_LABEL&ENTRY_INDEX&:
                   1873:         mov     [edi],ax        ;fill the leading word
                   1874:         add     edi,2
                   1875:         mov     ecx,esi         ;# of whole dwords
                   1876:         rep     stosd           ;fill whole bytes as dwords
                   1877:         mov     [edi],al        ;fill the trailing byte
                   1878:         inc     edi
                   1879:         add     edi,edx         ;point to the next scan line
                   1880:         endm    ;-----------------------------------;
                   1881: 
                   1882: ; N-wide write-only, 2 leading bytess, 1 trailing byte.
                   1883: ;  EAX = # of dwords to fill
                   1884: ;  EBX = count of scans to fill
                   1885: ;  EDX = offset from end of one scan's fill to start of next
                   1886: ;  ESI = # of dwords to fill
                   1887: ;  EDI = target address to fill
                   1888: 
                   1889:         align   4
                   1890: draw_wide_w_21_loop     proc    near
                   1891:         UNROLL_LOOP     DRAW_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
                   1892:         dec     ebx
                   1893:         jnz     draw_wide_w_21_loop
                   1894: 
                   1895:         ret
                   1896: 
                   1897: draw_wide_w_21_loop     endp
                   1898: 
                   1899: 
                   1900: ;-----------------------------------------------------------------------;
                   1901: ; Macro to draw n write-only bytes, 2 leading bytes, 2 trailing bytes,
                   1902: ; then advance to next scan line.
                   1903: 
                   1904: DRAW_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
                   1905: &ENTRY_LABEL&ENTRY_INDEX&:
                   1906:         mov     [edi],ax        ;fill the leading word
                   1907:         add     edi,2
                   1908:         mov     ecx,esi         ;# of whole dwords
                   1909:         rep     stosd           ;fill whole bytes as dwords
                   1910:         mov     [edi],ax        ;fill the trailing word
                   1911:         add     edi,2
                   1912:         add     edi,edx         ;point to the next scan line
                   1913:         endm    ;-----------------------------------;
                   1914: 
                   1915: ; N-wide write-only, 2 leading bytess, 2 trailing byte.
                   1916: ;  EAX = # of dwords to fill
                   1917: ;  EBX = count of scans to fill
                   1918: ;  EDX = offset from end of one scan's fill to start of next
                   1919: ;  ESI = # of dwords to fill
                   1920: ;  EDI = target address to fill
                   1921: 
                   1922:         align   4
                   1923: draw_wide_w_22_loop     proc    near
                   1924:         UNROLL_LOOP     DRAW_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
                   1925:         dec     ebx
                   1926:         jnz     draw_wide_w_22_loop
                   1927: 
                   1928:         ret
                   1929: 
                   1930: draw_wide_w_22_loop     endp
                   1931: 
                   1932: 
                   1933: ;-----------------------------------------------------------------------;
                   1934: ; Macro to draw n write-only bytes, 2 leading bytes, 3 trailing bytes,
                   1935: ; then advance to next scan line.
                   1936: 
                   1937: DRAW_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
                   1938: &ENTRY_LABEL&ENTRY_INDEX&:
                   1939:         mov     [edi],ax        ;fill the leading word
                   1940:         add     edi,2
                   1941:         mov     ecx,esi         ;# of whole dwords
                   1942:         rep     stosd           ;fill whole bytes as dwords
                   1943:         mov     [edi],ax        ;fill the trailing word
                   1944:         mov     [edi+2],al      ;fill the trailing byte
                   1945:         add     edi,3
                   1946:         add     edi,edx         ;point to the next scan line
                   1947:         endm    ;-----------------------------------;
                   1948: 
                   1949: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   1950: ;  EAX = # of dwords to fill
                   1951: ;  EBX = count of scans to fill
                   1952: ;  EDX = offset from end of one scan's fill to start of next
                   1953: ;  ESI = # of dwords to fill
                   1954: ;  EDI = target address to fill
                   1955: 
                   1956:         align   4
                   1957: draw_wide_w_23_loop     proc    near
                   1958:         UNROLL_LOOP     DRAW_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
                   1959:         dec     ebx
                   1960:         jnz     draw_wide_w_23_loop
                   1961: 
                   1962:         ret
                   1963: 
                   1964: draw_wide_w_23_loop     endp
                   1965: 
                   1966: 
                   1967: ;-----------------------------------------------------------------------;
                   1968: ; Macro to draw n write-only bytes, 3 leading bytes, 0 trailing bytes,
                   1969: ; then advance to next scan line.
                   1970: 
                   1971: DRAW_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
                   1972: &ENTRY_LABEL&ENTRY_INDEX&:
                   1973:         mov     [edi],al        ;fill the leading byte
                   1974:         mov     [edi+1],ax      ;fill the leading word
                   1975:         add     edi,3
                   1976:         mov     ecx,esi         ;# of whole dwords
                   1977:         rep     stosd           ;fill all whole bytes as dwords
                   1978:         add     edi,edx         ;point to the next scan line
                   1979:         endm    ;-----------------------------------;
                   1980: 
                   1981: ; N-wide write-only, 3 leading bytes, 0 trailing bytes.
                   1982: ;  EAX = # of dwords to fill
                   1983: ;  EBX = count of scans to fill
                   1984: ;  EDX = offset from end of one scan's fill to start of next
                   1985: ;  ESI = # of dwords to fill
                   1986: ;  EDI = target address to fill
                   1987: 
                   1988:         align   4
                   1989: draw_wide_w_30_loop     proc    near
                   1990:         UNROLL_LOOP     DRAW_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
                   1991:         dec     ebx
                   1992:         jnz     draw_wide_w_30_loop
                   1993: 
                   1994:         ret
                   1995: 
                   1996: draw_wide_w_30_loop     endp
                   1997: 
                   1998: 
                   1999: ;-----------------------------------------------------------------------;
                   2000: ; Macro to draw n write-only bytes, 3 leading bytes, 1 trailing byte,
                   2001: ; then advance to next scan line.
                   2002: 
                   2003: DRAW_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
                   2004: &ENTRY_LABEL&ENTRY_INDEX&:
                   2005:         mov     [edi],al        ;fill the leading byte
                   2006:         mov     [edi+1],ax      ;fill the leading word
                   2007:         add     edi,3
                   2008:         mov     ecx,esi         ;# of whole dwords
                   2009:         rep     stosd           ;fill whole bytes as dwords
                   2010:         mov     [edi],al        ;fill the trailing byte
                   2011:         inc     edi
                   2012:         add     edi,edx         ;point to the next scan line
                   2013:         endm    ;-----------------------------------;
                   2014: 
                   2015: ; N-wide write-only, 3 leading bytess, 1 trailing byte.
                   2016: ;  EAX = # of dwords to fill
                   2017: ;  EBX = count of scans to fill
                   2018: ;  EDX = offset from end of one scan's fill to start of next
                   2019: ;  ESI = # of dwords to fill
                   2020: ;  EDI = target address to fill
                   2021: 
                   2022:         align   4
                   2023: draw_wide_w_31_loop     proc    near
                   2024:         UNROLL_LOOP     DRAW_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
                   2025:         dec     ebx
                   2026:         jnz     draw_wide_w_31_loop
                   2027: 
                   2028:         ret
                   2029: 
                   2030: draw_wide_w_31_loop     endp
                   2031: 
                   2032: 
                   2033: ;-----------------------------------------------------------------------;
                   2034: ; Macro to draw n write-only bytes, 3 leading bytes, 2 trailing bytes,
                   2035: ; then advance to next scan line.
                   2036: 
                   2037: DRAW_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
                   2038: &ENTRY_LABEL&ENTRY_INDEX&:
                   2039:         mov     [edi],al        ;fill the leading byte
                   2040:         mov     [edi+1],ax      ;fill the leading word
                   2041:         add     edi,3
                   2042:         mov     ecx,esi         ;# of whole dwords
                   2043:         rep     stosd           ;fill whole bytes as dwords
                   2044:         mov     [edi],ax        ;fill the trailing word
                   2045:         add     edi,2
                   2046:         add     edi,edx         ;point to the next scan line
                   2047:         endm    ;-----------------------------------;
                   2048: 
                   2049: ; N-wide write-only, 3 leading bytess, 2 trailing byte.
                   2050: ;  EAX = # of dwords to fill
                   2051: ;  EBX = count of scans to fill
                   2052: ;  EDX = offset from end of one scan's fill to start of next
                   2053: ;  ESI = # of dwords to fill
                   2054: ;  EDI = target address to fill
                   2055: 
                   2056:         align   4
                   2057: draw_wide_w_32_loop     proc    near
                   2058:         UNROLL_LOOP     DRAW_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
                   2059:         dec     ebx
                   2060:         jnz     draw_wide_w_32_loop
                   2061: 
                   2062:         ret
                   2063: 
                   2064: draw_wide_w_32_loop     endp
                   2065: 
                   2066: 
                   2067: ;-----------------------------------------------------------------------;
                   2068: ; Macro to draw n write-only bytes, 3 leading bytes, 3 trailing bytes,
                   2069: ; then advance to next scan line.
                   2070: 
                   2071: DRAW_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
                   2072: &ENTRY_LABEL&ENTRY_INDEX&:
                   2073:         mov     [edi],al        ;fill the leading byte
                   2074:         mov     [edi+1],ax      ;fill the leading word
                   2075:         add     edi,3
                   2076:         mov     ecx,esi         ;# of whole dwords
                   2077:         rep     stosd           ;fill whole bytes as dwords
                   2078:         mov     [edi],ax        ;fill the trailing word
                   2079:         mov     [edi+2],al      ;fill the trailing byte
                   2080:         add     edi,3
                   2081:         add     edi,edx         ;point to the next scan line
                   2082:         endm    ;-----------------------------------;
                   2083: 
                   2084: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   2085: ;  EAX = # of dwords to fill
                   2086: ;  EBX = count of scans to fill
                   2087: ;  EDX = offset from end of one scan's fill to start of next
                   2088: ;  ESI = # of dwords to fill
                   2089: ;  EDI = target address to fill
                   2090: 
                   2091:         align   4
                   2092: draw_wide_w_33_loop     proc    near
                   2093:         UNROLL_LOOP     DRAW_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
                   2094:         dec     ebx
                   2095:         jnz     draw_wide_w_33_loop
                   2096: 
                   2097:         ret
                   2098: 
                   2099: draw_wide_w_33_loop     endp
                   2100: 
                   2101:         end
                   2102: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.