Annotation of ntddk/src/video/displays/vga/i386/vgablts.asm, revision 1.1.1.1

1.1       root        1: ;---------------------------Module-Header------------------------------;
                      2: ; Module Name: vgablts.asm
                      3: ;
                      4: ; Copyright (c) 1992 Microsoft Corporation
                      5: ;-----------------------------------------------------------------------;
                      6: ;-----------------------------------------------------------------------;
                      7: ; VOID vTrgBlt(PDEVSURF pdsurf, ULONG culRcl, RECTL * prcl, MIX ulMix,
                      8: ;              ULONG ulClr);
                      9: ; Input:
                     10: ;  pdsurf - surface to which to draw
                     11: ;  culRcl - # of rectangles to fill
                     12: ;  prcl   - pointer to list of rectangles to fill
                     13: ;  ulMix  - mix rop with which to fill
                     14: ;  ulClr  - color with which to fill
                     15: ;
                     16: ; Performs accelarated solid area fills for all mixes.
                     17: ;
                     18: ;-----------------------------------------------------------------------;
                     19: ;
                     20: ; Note: Assumes all rectangles have positive heights and widths. Will not
                     21: ; work properly if this is not the case.
                     22: ;
                     23: ;-----------------------------------------------------------------------;
                     24: ;
                     25: ; Note: Cases where the width of the whole bytes fill is equal to the
                     26: ; width of the bitmap could be sped up by using a single REP MOVS or REP
                     27: ; STOS, but how often does WIN32 do a fill that's the width of the screen?
                     28: ; Not very.
                     29: ;
                     30: ;-----------------------------------------------------------------------;
                     31: 
                     32:         comment $
                     33: 
                     34: The overall approach of this module is to accept a list of rectangles to
                     35: fill, set up the VGA hardware for the desired fill, and then fill the
                     36: rectangles one at a time. Each rectangle fill is set up for everything
                     37: but vertical parameters, and then decomposed into the sections that
                     38: intersect each VGA bank; each section is drawn in turn. The drawing code
                     39: is heavily unrolled for performance, and vectors are set up so that the
                     40: drawing code appropriate for the desired fill is essentially threaded
                     41: together.
                     42: 
                     43:         commend $
                     44: 
                     45: ;-----------------------------------------------------------------------;
                     46: 
                     47: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
                     48: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
                     49: ; times unrolling. This is the only thing you need to change to control
                     50: ; unrolling.
                     51: 
                     52: LOOP_UNROLL_SHIFT equ 2
                     53: 
                     54: ;-----------------------------------------------------------------------;
                     55: 
                     56:                 .386
                     57: 
                     58: ifndef  DOS_PLATFORM
                     59:         .model  small,c
                     60: else
                     61: ifdef   STD_CALL
                     62:         .model  small,c
                     63: else
                     64:         .model  small,pascal
                     65: endif;  STD_CALL
                     66: endif;  DOS_PLATFORM
                     67: 
                     68:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                     69:         assume fs:nothing,gs:nothing
                     70: 
                     71:         .xlist
                     72:         include stdcall.inc             ;calling convention cmacros
                     73:         include i386\egavga.inc
                     74:         include i386\strucs.inc
                     75:         include i386\unroll.inc
                     76:         include i386\ropdefs.inc
                     77: 
                     78:         .list
                     79: 
                     80: ;-----------------------------------------------------------------------;
                     81: 
                     82:         .data
                     83: 
                     84: ;-----------------------------------------------------------------------;
                     85: ; Left edge clip masks for intrabyte start addresses 0 through 7.
                     86: ; Whole byte cases are flagged as 0ffh.
                     87:        public jLeftMask
                     88: jLeftMask       label   byte
                     89:         db      0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
                     90: 
                     91: ;-----------------------------------------------------------------------;
                     92: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
                     93: ; 0 through 7. Whole byte cases are flagged as 0ffh.
                     94:        public jRightMask
                     95: jRightMask      label   byte
                     96:         db      0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
                     97: 
                     98: ;-----------------------------------------------------------------------;
                     99: ; Tables used to set up for the desired raster op. Note that entries for raster
                    100: ; ops that aren't handled here are generally correct, except that they ignore
                    101: ; need for inversion of the destination, which those rops require.
                    102: 
                    103: ; Table used to force off the drawing color for R2_BLACK (0).
                    104: ; The first entry is ignored; there is no mix 0.
                    105:        public jForceOffTable
                    106: jForceOffTable  db         0
                    107:                 db         0,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
                    108:                 db      0ffh,0ffh,   0,0ffh,0ffh,0ffh,0ffh,0ffh
                    109: 
                    110: ;-----------------------------------------------------------------------;
                    111: ; Table used to force on the drawing color for R2_NOT (Dn) and R2_WHITE (1).
                    112: ; The first entry is ignored; there is no mix 0.
                    113:        public  jForceOnTable
                    114: jForceOnTable   db      0, 0,0,0,0,0,0ffh,0,0,0,0,0,0,0,0,0,0ffh
                    115: 
                    116: ;-----------------------------------------------------------------------;
                    117: ; Table used to invert the passed-in drawing color for Pn mixes.
                    118: ; The first entry is ignored; there is no mix 0.
                    119:        public  jNotTable
                    120: jNotTable       db      0, 0,0ffh,0ffh,0ffh,0,0,0,0ffh,0,0ffh,0,0ffh,0,0,0,0
                    121: 
                    122: ;-----------------------------------------------------------------------;
                    123: ; Table of VGA ALU logical functions corresponding to mixes. Note that Dn is
                    124: ; handled as a separate preceding inversion pass when part of a more complex
                    125: ; mix.
                    126: ; The first entry is ignored; there is no mix 0.
                    127:        public jALUFuncTable    
                    128: jALUFuncTable   db      0
                    129:                 db      DR_SET,DR_AND,DR_AND,DR_SET
                    130:                 db      DR_AND,DR_XOR,DR_XOR,DR_OR
                    131:                 db      DR_AND,DR_XOR,     0,DR_OR
                    132:                 db      DR_SET,DR_OR ,DR_OR ,DR_SET
                    133: 
                    134: ;-----------------------------------------------------------------------;
                    135: ; 1 entries mark rops that require two passes, one to invert the destination
                    136: ; and then another to finish the rop.
                    137: ; The first entry is ignored; there is no mix 0.
                    138:        public  jInvertDest
                    139: jInvertDest     db      0, 0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0
                    140: 
                    141: ;-----------------------------------------------------------------------;
                    142: ; Table of routines to be called to draw edges, according to which edges are
                    143: ; partial and which edges are whole bytes.
                    144:         align   4
                    145: pfnEdgeDrawing  label   dword
                    146:         dd      do_right_edge_bytes
                    147:         dd      do_both_edge_bytes
                    148:         dd      check_next_bank
                    149:         dd      do_left_edge_bytes
                    150: 
                    151: ;-----------------------------------------------------------------------;
                    152: ; Table of pointers to tables used to find entries points in unrolled wide
                    153: ; whole byte code.
                    154: 
                    155:         align   4
                    156: pfnWideWholeRep label   dword
                    157:         dd      pfnDrawWideW00Entry
                    158:         dd      pfnDrawWideW01Entry
                    159:         dd      pfnDrawWideW02Entry
                    160:         dd      pfnDrawWideW03Entry
                    161:         dd      pfnDrawWideW10Entry
                    162:         dd      pfnDrawWideW11Entry
                    163:         dd      pfnDrawWideW12Entry
                    164:         dd      pfnDrawWideW13Entry
                    165:         dd      pfnDrawWideW20Entry
                    166:         dd      pfnDrawWideW21Entry
                    167:         dd      pfnDrawWideW22Entry
                    168:         dd      pfnDrawWideW23Entry
                    169:         dd      pfnDrawWideW30Entry
                    170:         dd      pfnDrawWideW31Entry
                    171:         dd      pfnDrawWideW32Entry
                    172:         dd      pfnDrawWideW33Entry
                    173: 
                    174: ;-----------------------------------------------------------------------;
                    175: ; Table of pointers to tables used to find entries points in narrow, special-
                    176: ; cased unrolled non-replace whole byte code.
                    177: 
                    178: ; Note: The breakpoint where one should switch from special-casing to
                    179: ;  REP MOVSB is purely a guess on my part. 5 seemed reasonable.
                    180: 
                    181:         align   4
                    182: pfnWholeBytesNonReplaceEntries  label   dword
                    183:         dd      0                       ;we never get a 0-wide case
                    184:         dd      pfnDraw1WideRWEntry
                    185:         dd      pfnDraw2WideRWEntry
                    186:         dd      pfnDraw3WideRWEntry
                    187:         dd      pfnDraw4WideRWEntry
                    188: MAX_NON_REPLACE_SPECIAL equ     ($-pfnWholeBytesNonReplaceEntries)/4
                    189: 
                    190: ;-----------------------------------------------------------------------;
                    191: ; Table of pointers to tables used to find entry points in narrow, special-
                    192: ; cased unrolled replace whole byte code.
                    193: 
                    194: ; Note: The breakpoint where one should switch from special-casing to
                    195: ;  REP STOS is purely a guess on my part. 8 seemed reasonable.
                    196: 
                    197: ; Start address MOD 3 is 0.
                    198:         align   4
                    199: pfnWholeBytesMod0ReplaceEntries  label   dword
                    200:         dd      0                       ;we never get a 0-wide case
                    201:         dd      pfnDraw1WideWEntry
                    202:         dd      pfnDraw2WideWEntry
                    203:         dd      pfnDraw3WideWEvenEntry
                    204:         dd      pfnDraw4WideWEntry
                    205:         dd      pfnDraw5WideWEvenEntry
                    206:         dd      pfnDraw6WideWMod3_0Entry
                    207:         dd      pfnDraw7WideWMod3_0Entry
                    208:         dd      pfnDraw8WideWMod3_0Entry
                    209: MAX_REPLACE_SPECIAL equ     ($-pfnWholeBytesMod0ReplaceEntries)/4
                    210: 
                    211: ; Start address MOD 3 is 1.
                    212:         align   4
                    213: pfnWholeBytesMod1ReplaceEntries  label   dword
                    214:         dd      0                       ;we never get a 0-wide case
                    215:         dd      pfnDraw1WideWEntry
                    216:         dd      pfnDraw2WideWEntry
                    217:         dd      pfnDraw3WideWOddEntry
                    218:         dd      pfnDraw4WideWEntry
                    219:         dd      pfnDraw5WideWOddEntry
                    220:         dd      pfnDraw6WideWMod3_1Entry
                    221:         dd      pfnDraw7WideWMod3_1Entry
                    222:         dd      pfnDraw8WideWMod3_1Entry
                    223: 
                    224: ; Start address MOD 3 is 2.
                    225:         align   4
                    226: pfnWholeBytesMod2ReplaceEntries  label   dword
                    227:         dd      0                       ;we never get a 0-wide case
                    228:         dd      pfnDraw1WideWEntry
                    229:         dd      pfnDraw2WideWEntry
                    230:         dd      pfnDraw3WideWEvenEntry
                    231:         dd      pfnDraw4WideWEntry
                    232:         dd      pfnDraw5WideWEvenEntry
                    233:         dd      pfnDraw6WideWMod3_2Entry
                    234:         dd      pfnDraw7WideWMod3_2Entry
                    235:         dd      pfnDraw8WideWMod3_2Entry
                    236: 
                    237: ; Start address MOD 3 is 3.
                    238:         align   4
                    239: pfnWholeBytesMod3ReplaceEntries  label   dword
                    240:         dd      0                       ;we never get a 0-wide case
                    241:         dd      pfnDraw1WideWEntry
                    242:         dd      pfnDraw2WideWEntry
                    243:         dd      pfnDraw3WideWOddEntry
                    244:         dd      pfnDraw4WideWEntry
                    245:         dd      pfnDraw5WideWOddEntry
                    246:         dd      pfnDraw6WideWMod3_1Entry
                    247:         dd      pfnDraw7WideWMod3_3Entry
                    248:         dd      pfnDraw8WideWMod3_3Entry
                    249: 
                    250: ; Master MOD 3 alignment look-up table for entry tables for four possible
                    251: ; alignments for narrow, special-cased unrolled replace whole byte code.
                    252:         align   4
                    253: pfnWholeBytesReplaceMaster      label   dword
                    254:         dd      pfnWholeBytesMod0ReplaceEntries
                    255:         dd      pfnWholeBytesMod1ReplaceEntries
                    256:         dd      pfnWholeBytesMod2ReplaceEntries
                    257:         dd      pfnWholeBytesMod3ReplaceEntries
                    258: 
                    259: ;-----------------------------------------------------------------------;
                    260: 
                    261:                 .code
                    262: 
                    263: _TEXT$01   SEGMENT DWORD USE32 PUBLIC 'CODE'
                    264:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
                    265: 
                    266: ;-----------------------------------------------------------------------;
                    267: 
                    268: cProc   vTrgBlt,20,<         \
                    269:         uses    esi edi ebx, \
                    270:         pdsurf: ptr DEVSURF, \
                    271:         culRcl: dword,       \
                    272:         prcl:   ptr RECTL,   \
                    273:         ulMix:  dword,       \
                    274:         ulColor:dword        >
                    275: 
                    276:         local   ulRowOffset :dword      ;Offset from start of scan line of
                    277:                                         ; first byte to fill
                    278:         local   ulWholeBytes :dword     ;# of whole bytes to fill
                    279:         local   ulWholeDwords :dword    ;# of whole dwords to fill
                    280:         local   pfnWholeFn  :dword      ;pointer to routine used to draw
                    281:                                         ; whole bytes
                    282:         local   ulScanWidth :dword      ;offset from start of one scan to start
                    283:                                         ; of next
                    284:         local   ulNextScan  :dword      ;offset from end of one scan line's
                    285:                                         ; fill to start of next
                    286:         local   ulCurrentTopScan :dword ;top scan line to fill in current bank
                    287:         local   ulMasks     :dword      ;low byte = right mask, high byte =
                    288:                                         ; left mask
                    289:         local   ulBottomScan :dword     ;bottom scan line of fill rectangle
                    290:         local   pfnDraw1WideVector :dword ;address at which to enter unrolled
                    291:                                           ; edge loop
                    292:         local   jALUFunc   :dword       ;VGA ALU logical operation (SET, AND,
                    293:                                         ; OR, or XOR)
                    294:         local   pfnStartDrawing :dword  ;pointer to function to call to start
                    295:                                         ; drawing
                    296:         local   pfnContinueDrawing :dword ;pointer to function to call to
                    297:                                         ; continue drawing after doing whole
                    298:                                         ; bytes
                    299:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
                    300:                                         ; address past the left edge when the
                    301:                                         ; left edge is partial
                    302:         local   pfnWholeBytes :dword    ;pointer to table of entry points
                    303:                                         ; into unrolled loops for whole byte
                    304:                                         ; filling
                    305:         local   jInvertDestFirst :dword ;1 if the rop requires a pass to invert
                    306:                                         ; the destination before the normal
                    307:                                         ; pass
                    308:         local   jDrawingColor :dword    ;color with which we're drawing
                    309:                                         ; dword to finish out fill
                    310: 
                    311: ;-----------------------------------------------------------------------;
                    312: 
                    313:         cld
                    314: 
                    315: ;-----------------------------------------------------------------------;
                    316: ; Make sure there's something to draw; clip enumerations can be empty.
                    317: ;-----------------------------------------------------------------------;
                    318: 
                    319:         cmp     culRcl,0                ;any rects to fill?
                    320:         jz      vTrgBlts_done           ;no, we're done
                    321: 
                    322: 
                    323: ;-----------------------------------------------------------------------;
                    324: ; Set up variables that are constant for the entire time we're in this
                    325: ; module.
                    326: ;-----------------------------------------------------------------------;
                    327: 
                    328: ;-----------------------------------------------------------------------;
                    329: ; Set up for the desired raster op.
                    330: ;-----------------------------------------------------------------------;
                    331: 
                    332:         sub     ebx,ebx                 ;ignore any background mix; we're only
                    333:         mov     bl,byte ptr ulMix       ; concerned with the foreground in this
                    334:                                         ; module
                    335:         cmp     ebx,R2_NOP              ;is this NOP?
                    336:         jz      vTrgBlts_done           ;yes, we're done
                    337:         mov     al,jInvertDest[ebx]          ;remember whether we need to
                    338:         mov     byte ptr jInvertDestFirst,al ; invert the destination before
                    339:                                              ; finishing the rop
                    340:         mov     ah,byte ptr ulColor     ;get the drawing color
                    341:         and     ah,jForceOffTable[ebx]  ;force color to 0 if necessary
                    342:                                         ; (R2_BLACK)
                    343:         or      ah,jForceOnTable[ebx]   ;force color to 0ffh if necessary
                    344:                                         ; (R2_WHITE, R2_NOT)
                    345:         xor     ah,jNotTable[ebx]       ;invert color if necessary (any Pn mix)
                    346:                                         ;at this point, CH has the color we
                    347:                                         ; want to draw with; set up the VGA
                    348:                                         ; hardware to draw with that color
                    349:         mov     byte ptr jDrawingColor,ah ;remember drawing color for restoring
                    350:                                           ; after inversion
                    351:         mov     edx,VGA_BASE + GRAF_ADDR
                    352:         mov     al,GRAF_SET_RESET       ;set/reset = color to write
                    353:         out     dx,ax
                    354:         mov     eax,0F00h + GRAF_ENAB_SR ;enable set/reset for all planes, so
                    355:         out     dx,ax                   ; set/reset color we just set becomes
                    356:                                         ; the drawing color, regardless of the
                    357:                                         ; value written by the CPU
                    358: 
                    359:         mov     ah,jALUFuncTable[ebx]   ;get the ALU logical function
                    360:         and     ah,ah                   ;is the logical function DR_SET?
                    361:         .errnz  DR_SET
                    362:         jz      short skip_ALU_set      ;yes, don't have to set because that's
                    363:                                         ; the VGA's default state
                    364:         mov     al,GRAF_DATA_ROT
                    365:         out     dx,ax                   ;set the ALU logical function
                    366: skip_ALU_set:
                    367:         mov     byte ptr jALUFunc,ah    ;remember the ALU logical function
                    368: 
                    369:        mov     eax,GRAF_MODE + ((M_AND_WRITE + M_COLOR_READ) SHL 8)
                    370:        out     dx,ax                   ;write mode 3 so we can do the masking
                    371:                                        ; without OUTs, read mode 1 so we can
                    372:                                        ; read 0xFF from memory always, for
                    373:                                        ; ANDing (because Color Don't Care is
                    374:                                        ; all zeros)
                    375: 
                    376: ;-----------------------------------------------------------------------;
                    377: ; Fill the current rectangle with the specified raster op and color.
                    378: ;-----------------------------------------------------------------------;
                    379: 
                    380: fill_rect_loop:
                    381: 
                    382: ;-----------------------------------------------------------------------;
                    383: ; Set up variables that are constant from bank to bank during a single
                    384: ; fill.
                    385: ;-----------------------------------------------------------------------;
                    386: 
                    387: ;-----------------------------------------------------------------------;
                    388: ; Set up masks and widths.
                    389: ;-----------------------------------------------------------------------;
                    390: 
                    391:         mov     edi,prcl                ;point to rectangle to fill
                    392:         mov     eax,[edi].yBottom
                    393:         mov     ulBottomScan,eax        ;remember the bottom scan line of fill
                    394: 
                    395:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
                    396:         mov     ecx,ebx
                    397:         and     ecx,0111b               ;intrabyte address of right edge
                    398:         mov     ah,jRightMask[ecx]      ;right edge mask
                    399: 
                    400:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
                    401:         mov     ecx,esi
                    402:         shr     ecx,3                   ;/8 for start offset from left edge
                    403:                                         ; of scan line
                    404:         mov     ulRowOffset,ecx         ;remember offset from start of scan
                    405:                                         ; line
                    406:         sub     ebx,esi                 ;width in pixels of fill
                    407: 
                    408:         and     esi,0111b               ;intrabyte address of left edge
                    409:         mov     al,jLeftMask[esi]       ;left edge mask
                    410: 
                    411:         dec     ebx                     ;make inclusive on right
                    412:         add     ebx,esi                 ;inclusive width, starting counting at
                    413:                                         ; the beginning of the left edge byte
                    414:         shr     ebx,3                   ;width of fill in bytes touched - 1
                    415:         jnz     short more_than_1_byte  ;more than 1 byte is involved
                    416: 
                    417: ; Only one byte will be affected. Combine first/last masks.
                    418: 
                    419:         and     al,ah                   ;we'll use first byte mask only
                    420:         xor     ah,ah                   ;want last byte mask to be 0
                    421:         inc     ebx                     ;so there's one count to subtract below
                    422:                                         ; if this isn't a whole edge byte
                    423: more_than_1_byte:
                    424: 
                    425: ; If all pixels in the left edge are altered, combine the first byte into the
                    426: ; whole byte count and clear the first byte mask, because we can handle solid
                    427: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
                    428: 
                    429:         sub     ecx,ecx                 ;edge whole-status accumulator
                    430:         cmp     al,-1                   ;is left edge a whole byte or partial?
                    431:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
                    432:         sub     ebx,ecx                 ;if left edge partial, deduct it from
                    433:                                         ; the whole bytes count
                    434:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
                    435:                                         ; it's partial when pointing to the
                    436:                                         ; whole bytes
                    437:         and     ah,ah                   ;is right edge mask 0, meaning this
                    438:                                         ; fill is only 1 byte wide?
                    439:         jz      short save_masks        ;yes, no need to do anything
                    440:         cmp     ah,-1                   ;is right edge a whole byte or partial?
                    441:         jnz     short save_masks        ;partial
                    442:         add     ecx,2                   ;bit 1 of ECX=0 if right edge partial,
                    443:                                         ; 1 if whole;
                    444:                                         ;bit 1=0 if left edge partial, 1 whole
                    445:         inc     ebx                     ;if right edge whole, include it in the
                    446:                                         ; whole bytes count
                    447: save_masks:
                    448:         mov     ulMasks,eax             ;save left and right clip masks
                    449:         mov     ulWholeBytes,ebx        ;save # of whole bytes
                    450: 
                    451:         mov     ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
                    452:         mov     pfnContinueDrawing,ecx    ; all partial (non-whole) edges
                    453: 
                    454:         and     ebx,ebx                 ;any whole bytes?
                    455:         jz      short start_vec_set     ;no
                    456:                                         ;yes, so draw the whole bytes before
                    457:                                         ; the edge bytes
                    458: 
                    459: ; The whole bytes loop depends on the type of operation being done. If the
                    460: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
                    461: ; else we have to use a MOVSB-type operation (to load the latches with the
                    462: ; existing contents of display memory to allow the ALUs to work).
                    463: 
                    464:         cmp     byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
                    465:         jz      short is_replace_type   ;yes
                    466:                                         ;no, set up for non-replace whole bytes
                    467:         mov     ecx,offset whole_bytes_non_replace_wide
                    468:                                         ;assume too wide to special-case
                    469:         cmp     ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
                    470:         jnb     short start_vec_set     ;yes
                    471:         mov     ecx,pfnWholeBytesNonReplaceEntries[ebx*4] ;no, point to entry
                    472:         mov     pfnWholeBytes,ecx       ; table for width
                    473:         mov     ecx,offset whole_bytes_special
                    474:                                         ;set up to call special routine to fill
                    475:                                         ; whole bytes
                    476:         jmp     short start_vec_set
                    477: 
                    478:         align   4
                    479: is_replace_type:                        ;set up for replace-type rop
                    480:         cmp     ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
                    481:         jnb     short is_wide_replace   ;yes
                    482:                                         ;narrow enough to special case. Look up
                    483:                                         ; the entry table for the special case
                    484:                                         ; base on the start alignment
                    485:         mov     ecx,ulRowOffset
                    486:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
                    487:         and     ecx,011b                ;left edge whole bytes start alignment
                    488:                                         ; MOD 3
                    489:         mov     ecx,pfnWholeBytesReplaceMaster[ecx*4] ;look up table of entry
                    490:                                                       ; tables for alignment
                    491:         mov     ecx,[ecx+ebx*4]         ;look up entry table for width
                    492:         mov     pfnWholeBytes,ecx       ; table for width
                    493:         mov     ecx,offset whole_bytes_special
                    494:                                         ;set up to call special routine to fill
                    495:                                         ; whole bytes
                    496:         jmp     short start_vec_set
                    497: 
                    498:         align   4
                    499: is_wide_replace:                        ;set up for wide replace-type op
                    500:                                         ;Note: assumes there is at least one
                    501:                                         ; full dword involved!
                    502:         mov     ecx,ulRowOffset
                    503:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
                    504:         neg     ecx
                    505:         and     ecx,011b
                    506:         mov     edx,ebx
                    507:         sub     edx,ecx                 ;ignore odd leading bytes
                    508:         mov     eax,edx
                    509:         shr     edx,2                   ;# of whole dwords across (not counting
                    510:                                         ; odd leading & trailing bytes)
                    511:         mov     ulWholeDwords,edx
                    512:         and     eax,011b                ;# of odd (fractional) trailing bytes
                    513:         shl     ecx,2
                    514:         or      ecx,eax                 ;build a look-up index from the number
                    515:                                         ; of leading and trailing bytes
                    516:         mov     ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
                    517:         mov     pfnWholeBytes,ecx          ; back alignment
                    518:         mov     ecx,offset whole_bytes_rep_wide
                    519:                                         ;set up to call routine to perform wide
                    520:                                         ; whole bytes fill
                    521: start_vec_set:
                    522:         mov     pfnStartDrawing,ecx     ; all partial (non-whole) edges
                    523: 
                    524:         mov     ecx,pdsurf
                    525:         mov     eax,[ecx].dsurf_lNextScan
                    526:         mov     ulScanWidth,eax         ;local copy of scan line width
                    527:         sub     eax,ebx                 ;EAX = delta to next scan
                    528:         mov     ulNextScan,eax
                    529: 
                    530: 
                    531: ;-----------------------------------------------------------------------;
                    532: ; Fill this rectangle.
                    533: ;-----------------------------------------------------------------------;
                    534: 
                    535:         cmp     byte ptr jInvertDestFirst,1
                    536:                                         ;is this an invert-dest-plus-something-
                    537:                                         ; else rop that requires two passes?
                    538:         jz      short do_invert_dest_rop ;yes, special case with two passes
                    539: 
                    540: do_single_pass:
                    541:         call    draw_banks
                    542: 
                    543: 
                    544: ;-----------------------------------------------------------------------;
                    545: ; See if there are any more rectangles to fill.
                    546: ;-----------------------------------------------------------------------;
                    547: 
                    548:         add     prcl,(size RECTL) ;point to the next rectangle, if there is one
                    549:         dec     culRcl            ;count down the rectangles to fill
                    550:         jnz     fill_rect_loop
                    551: 
                    552: 
                    553: ;-----------------------------------------------------------------------;
                    554: ; We have filled all rectangles.  Restore the VGA to its default state.
                    555: ;-----------------------------------------------------------------------;
                    556: 
                    557:         mov     edx,VGA_BASE + GRAF_ADDR
                    558:         mov     eax,0000h + GRAF_ENAB_SR ;disable set/reset
                    559:         out     dx,ax
                    560:        mov     eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8)
                    561:        out     dx,ax                   ;restore read mode 0 and write mode 0
                    562:         cmp     byte ptr jALUfunc,DR_SET ;is the logical function already SET?
                    563:         jz      short vTrgBlts_done               ;yes, no need to reset it
                    564:         mov     eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
                    565:         out     dx,ax                             ; SET
                    566: vTrgBlts_done:
                    567:         cRet    vTrgBlt
                    568: 
                    569: 
                    570: ;-----------------------------------------------------------------------;
                    571: ; Handles rops that require two passes, the first being a destination
                    572: ; inversion pass.
                    573: ;-----------------------------------------------------------------------;
                    574: 
                    575:         align   4
                    576: do_invert_dest_rop:
                    577: 
                    578: ; Set up the VGA's hardware for inversion
                    579: 
                    580:         mov     edx,VGA_BASE + GRAF_ADDR
                    581:         mov     eax,0ff00h + GRAF_SET_RESET ;set/reset = 0ffh to invert in
                    582:         out     dx,ax                       ; conjunction with XOR
                    583:         mov     eax,(DR_XOR shl 8) + GRAF_DATA_ROT
                    584:         out     dx,ax                   ;logical function = XOR to invert
                    585: 
                    586: ; Invert the destination
                    587: 
                    588:         call    draw_banks
                    589: 
                    590: ; Restore the VGA's hardware to the state required for the second pass.
                    591: 
                    592:         mov     edx,VGA_BASE + GRAF_ADDR
                    593:         mov     ah,byte ptr jDrawingColor
                    594:         mov     al,GRAF_SET_RESET       ;set/reset = color to write
                    595:         out     dx,ax
                    596:         mov     ah,byte ptr jALUFunc
                    597:         mov     al,GRAF_DATA_ROT
                    598:         out     dx,ax                   ;set the ALU logical function
                    599: 
                    600: ; Perform the second pass to finish the rop.
                    601: 
                    602:         jmp     do_single_pass
                    603: 
                    604: 
                    605: ;-----------------------------------------------------------------------;
                    606: ; Fills all banks in the current fill rectangle. Called once per fill
                    607: ; rectangle, except for destination-inversion-plus-something-else rops.
                    608: ;-----------------------------------------------------------------------;
                    609: 
                    610:         align   4
                    611: draw_banks:
                    612: 
                    613: ;-----------------------------------------------------------------------;
                    614: ; Map in the bank containing the top scan to fill, if it's not mapped in
                    615: ; already.
                    616: ;-----------------------------------------------------------------------;
                    617: 
                    618:         mov     edi,prcl                ;point to rectangle to fill
                    619:         mov     ecx,pdsurf              ;point to surface
                    620:         mov     eax,[edi].yTop          ;top scan line of fill
                    621:         mov     ulCurrentTopScan,eax    ;this will be the fill top in 1st bank
                    622: 
                    623:         cmp     eax,[ecx].dsurf_rcl1WindowClip.yTop ;is fill top less than
                    624:                                                     ; current bank?
                    625:         jl      short map_init_bank             ;yes, map in proper bank
                    626:         cmp     eax,[ecx].dsurf_rcl1WindowClip.yBottom ;fill top greater than
                    627:                                                        ; current bank?
                    628:         jl      short init_bank_mapped          ;no, proper bank already mapped
                    629: map_init_bank:
                    630: 
                    631: ; Map in the bank containing the top scan line of the fill.
                    632: 
                    633:         ptrCall <dword ptr [ecx].dsurf_pfnBankControl>,<ecx,eax,JustifyTop>
                    634: 
                    635: init_bank_mapped:
                    636: 
                    637: ;-----------------------------------------------------------------------;
                    638: ; Main loop for processing fill in each bank.
                    639: ;-----------------------------------------------------------------------;
                    640: 
                    641: ; Compute the starting address and scan line count for the initial bank.
                    642: 
                    643:         mov     eax,pdsurf              ;EAX->target surface
                    644:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
                    645:         cmp     ebx,[eax].dsurf_rcl1WindowClip.yBottom
                    646:                                         ;which comes first, the bottom of the
                    647:                                         ; dest rect or the bottom of the
                    648:                                         ; current bank?
                    649:         jl      short BottomScanSet     ;fill bottom comes first, so draw to
                    650:                                         ; that; this is the last bank in fill
                    651:         mov     ebx,[eax].dsurf_rcl1WindowClip.yBottom
                    652:                                         ;bank bottom comes first; draw to
                    653:                                         ; bottom of bank
                    654: BottomScanSet:
                    655:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
                    656:         sub     ebx,edi                 ;# of scans to fill in bank
                    657:         imul    edi,ulScanWidth         ;offset of starting scan line
                    658: 
                    659: ; Note that the start of the bitmap will change each time through the
                    660: ; bank loop, because the start of the bitmap is varied to map the
                    661: ; desired scan line to the banking window.
                    662: 
                    663:         add     edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
                    664:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
                    665: 
                    666: ; We have computed the starting address and scan count. Time to start drawing
                    667: ; in the initial bank.
                    668: 
                    669:         jmp     pfnStartDrawing
                    670: 
                    671: 
                    672: ;-----------------------------------------------------------------------;
                    673: ; Whole byte fills.
                    674: ;-----------------------------------------------------------------------;
                    675: 
                    676: ;-----------------------------------------------------------------------;
                    677: ; Handles non-replace whole byte fills wider than the maximum special
                    678: ; case width.
                    679: ;
                    680: ; The destination is not involved, so a STOS (or equivalent) can be used
                    681: ; (no read needed before write).
                    682: ;-----------------------------------------------------------------------;
                    683: 
                    684:         align   4
                    685: whole_bytes_rep_wide:
                    686:         push    ebx                     ;save scan count
                    687:         push    edi                     ;save starting address
                    688: 
                    689:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
                    690:                                         ; loop for whole byte width
                    691:         SET_UP_UNROLL_VARS ebx,ecx, ebx,[eax], LOOP_UNROLL_SHIFT
                    692:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
                    693:         mov     esi,ulWholeDwords       ;whole dwords width
                    694:        mov     eax,-1                  ;this will become the Bit Mask,
                    695:                                        ; enabling drawing to all bits
                    696:         mov     edx,ulNextScan          ;offset from end of one scan line to
                    697:                                         ; start of next
                    698:         call    ecx                     ;draw the wide whole bytes
                    699: 
                    700:         pop     edi                     ;restore screen pointer
                    701:         pop     ebx                     ;restore fill scan count
                    702:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
                    703: 
                    704: 
                    705: ;-----------------------------------------------------------------------;
                    706: ; Handles both replace and non-replace whole byte fills narrow enough to
                    707: ; special case.
                    708: ;-----------------------------------------------------------------------;
                    709: 
                    710:         align   4
                    711: whole_bytes_special:
                    712:         push    ebx                     ;save scan count
                    713:         push    edi                     ;save starting address
                    714: 
                    715:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
                    716:                                         ; loop for whole byte width
                    717:         SET_UP_UNROLL_VARS ebx,edx, ebx,[eax], LOOP_UNROLL_SHIFT
                    718:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
                    719:         mov     ecx,ulScanWidth         ;offset to next scan line
                    720:        mov     eax,-1                  ;this will become the Bit Mask,
                    721:                                        ; enabling drawing to all bits
                    722:         call    edx                     ;draw the whole bytes
                    723: 
                    724:         pop     edi                     ;restore screen pointer
                    725:         pop     ebx                     ;restore fill scan count
                    726:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
                    727: 
                    728: 
                    729: ;-----------------------------------------------------------------------;
                    730: ; Handles non-replace whole byte fills wider than the maximum special case
                    731: ; width.
                    732: ;
                    733: ; The destination is involved, so a MOVSB (or equivalent) must be
                    734: ; performed in order to do a read before write to give the ALUs something
                    735: ; to work with.
                    736: ;-----------------------------------------------------------------------;
                    737: 
                    738:         align   4
                    739: whole_bytes_non_replace_wide:
                    740:         push    ebx                     ;save scan count
                    741:         push    edi                     ;save starting address
                    742: 
                    743:         SET_UP_UNROLL_VARS ebx,ecx, ebx,pfnDrawWideRWEntry, LOOP_UNROLL_SHIFT
                    744:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
                    745:         mov     eax,ulWholeBytes        ;whole bytes width
                    746:         mov     edx,ulNextScan          ;offset from end of one scan line to
                    747:                                         ; start of next
                    748:         call    ecx                     ;draw the wide whole bytes
                    749: 
                    750:         pop     edi                     ;restore screen pointer
                    751:         pop     ebx                     ;restore fill scan count
                    752:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
                    753: 
                    754: 
                    755: ;-----------------------------------------------------------------------;
                    756: ; Process any left/right columns that that have to be done.
                    757: ;
                    758: ;   Currently:
                    759: ;       EBX =   height to fill, in scans
                    760: ;       EDI --> first byte of left edge
                    761: ;-----------------------------------------------------------------------;
                    762: 
                    763: ;-----------------------------------------------------------------------;
                    764: ; Handle case where both edges are partial (non-whole) bytes.
                    765: ;-----------------------------------------------------------------------;
                    766:         align   4
                    767:        public do_both_edge_bytes
                    768: do_both_edge_bytes:
                    769: 
                    770: ; Set up variables for entering unrolled loop.
                    771: 
                    772:         SET_UP_UNROLL_VARS ebx,edx, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
                    773:         mov     pfnDraw1WideVector,edx
                    774: 
                    775:         mov     ecx,ulScanWidth         ;offset from one scan to next
                    776: 
                    777:         mov     esi,ulWholeBytes        ;ESI = # of whole bytes
                    778:         lea     esi,[esi+edi+1]         ;--> start for right edge
                    779:         mov     al,byte ptr ulMasks     ;this will become the Bit Mask for the
                    780:                                        ; left edge
                    781:         push    ebx                     ;preserve scan line count
                    782:         call    edx                     ;jump into the unrolled loop to draw
                    783:         pop     ebx                     ;restore scan line count
                    784: 
                    785:         mov     edi,esi                 ;point to first right edge byte
                    786:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
                    787:                                        ; right edge
                    788:         push    offset edges_done       ;return here
                    789:         jmp     pfnDraw1WideVector      ;jump into the unrolled loop to draw
                    790: 
                    791: ;-----------------------------------------------------------------------;
                    792: ; Handle case where only the left edge is partial (non-whole).
                    793: ;-----------------------------------------------------------------------;
                    794:         align   4
                    795: do_left_edge_bytes:
                    796: 
                    797: ; Set up variables for entering unrolled loop.
                    798: 
                    799:         SET_UP_UNROLL_VARS ebx,esi, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
                    800: 
                    801:         mov     ecx,ulScanWidth         ;offset from one scan to next
                    802:         mov     al,byte ptr ulMasks     ;this will become the Bit Mask for the
                    803:                                        ; left edge
                    804:         push    offset edges_done       ;return here
                    805:         jmp     esi                     ;jump into the unrolled loop to draw
                    806: 
                    807: ;-----------------------------------------------------------------------;
                    808: ; Handle case where only the right edge is partial (non-whole).
                    809: ;-----------------------------------------------------------------------;
                    810:         align   4
                    811: do_right_edge_bytes:
                    812: 
                    813: ; Set up variables for entering unrolled loop.
                    814: 
                    815:         SET_UP_UNROLL_VARS ebx,esi, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
                    816: 
                    817:         mov     ecx,ulScanWidth         ;offset from one scan to next
                    818:         add     edi,ulWholeBytes        ;--> start for right edge (remember,
                    819:                                         ; left edge is whole, so the left edge
                    820:                                         ; byte is included in the whole byte
                    821:                                         ; count)
                    822:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
                    823:                                        ; right edge
                    824:         call    esi                     ;jump into the unrolled loop to draw
                    825: 
                    826: ;-----------------------------------------------------------------------;
                    827: ; We have done all partial edges.
                    828: ;-----------------------------------------------------------------------;
                    829: 
                    830: edges_done:
                    831: 
                    832: ;-----------------------------------------------------------------------;
                    833: ; See if there are any more banks to process.
                    834: ;-----------------------------------------------------------------------;
                    835: 
                    836: check_next_bank:
                    837: 
                    838:         mov     edi,pdsurf
                    839:         mov     eax,[edi].dsurf_rcl1WindowClip.yBottom ;is the fill bottom in
                    840:         cmp     ulBottomScan,eax                       ; the current bank?
                    841:         jle     short banks_done        ;yes, so we're done
                    842:                                         ;no, map in the next bank and fill it
                    843:         mov     ulCurrentTopScan,eax    ;remember where the top of the bank
                    844:                                         ; we're about to map in is (same as
                    845:                                         ; bottom of bank we just did)
                    846: 
                    847:         ptrCall <dword ptr [edi].dsurf_pfnBankControl>,<edi,eax,JustifyTop>
                    848:                                         ;map in the bank
                    849: 
                    850: ; Compute the starting address and scan line count in this bank.
                    851: 
                    852:         mov     eax,pdsurf              ;EAX->target surface
                    853:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
                    854:         cmp     ebx,[eax].dsurf_rcl1WindowClip.yBottom
                    855:                                         ;which comes first, the bottom of the
                    856:                                         ; dest rect or the bottom of the
                    857:                                         ; current bank?
                    858:         jl      short BottomScanSet2    ;fill bottom comes first, so draw to
                    859:                                         ; that; this is the last bank in fill
                    860:         mov     ebx,[eax].dsurf_rcl1WindowClip.yBottom
                    861:                                         ;bank bottom comes first; draw to
                    862:                                         ; bottom of bank
                    863: BottomScanSet2:
                    864:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
                    865:         sub     ebx,edi                 ;# of scans to fill in bank
                    866:         imul    edi,ulScanWidth         ;offset of starting scan line
                    867: 
                    868: ; Note that the start of the bitmap will change each time through the
                    869: ; bank loop, because the start of the bitmap is varied to map the
                    870: ; desired scan line to the banking window.
                    871: 
                    872:         add     edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
                    873:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
                    874: 
                    875: ; Draw in the new bank.
                    876: 
                    877:         jmp     pfnStartDrawing
                    878: 
                    879: 
                    880: ;-----------------------------------------------------------------------;
                    881: ; Done with all banks in this fill.
                    882: 
                    883: banks_done:
                    884:         PLAIN_RET
                    885: 
                    886: endProc vTrgBlt
                    887: 
                    888: 
                    889: ;-----------------------------------------------------------------------;
                    890: ; Unrolled loops.
                    891: ; There are two kinds of unrolled loops: read-before-write (to load the
                    892: ;  latches), and write-only (for replace-type rops).
                    893: ;-----------------------------------------------------------------------;
                    894: 
                    895: 
                    896: ;-----------------------------------------------------------------------;
                    897: ; Unrolled drawing stuff for cases where read before write is required,
                    898: ; to load the latches.
                    899: ;-----------------------------------------------------------------------;
                    900: 
                    901: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
                    902: ; read before write loops.
                    903: 
                    904:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideRWEntry,RW1,LOOP_UNROLL_COUNT
                    905:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideRWEntry,RW2,LOOP_UNROLL_COUNT
                    906:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideRWEntry,RW3,LOOP_UNROLL_COUNT
                    907:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideRWEntry,RW4,LOOP_UNROLL_COUNT
                    908:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideRWEntry,RWWIDE,LOOP_UNROLL_COUNT
                    909: 
                    910: ;-----------------------------------------------------------------------;
                    911: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
                    912: ;
                    913: ; Entry:
                    914: ;      AL = pixel mask
                    915: ;       EBX = unrolled loop count
                    916: ;       ECX = scan line width in bytes
                    917: ;       EDI = start offset
                    918: ;
                    919: ; EBX, EDI modified. All other registers preserved.
                    920: 
                    921: ;-----------------------------------------------------------------------;
                    922: ; Macro to draw one read before write byte, then advance to next scan line.
                    923: 
                    924: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                    925: &ENTRY_LABEL&ENTRY_INDEX&:
                    926:         and     [edi],al                ;we always read 0xFF, so AL is written
                    927:                                        ; as-is; because we're in write mode 3,
                    928:                                        ; AL becomes the Bit Mask
                    929:         add     edi,ecx                 ;point to the next scan line
                    930:         endm    ;-----------------------------------;
                    931: 
                    932: ; 1-wide read/write.
                    933: 
                    934:         align   4
                    935: draw_1_wide_rw_loop     proc    near
                    936:         UNROLL_LOOP     DRAW_1_WIDE_RW,RW1,LOOP_UNROLL_COUNT
                    937:         dec     ebx
                    938:         jnz     draw_1_wide_rw_loop
                    939: 
                    940:         ret
                    941: 
                    942: draw_1_wide_rw_loop     endp
                    943: 
                    944: ;-----------------------------------------------------------------------;
                    945: ; Macro to draw two read before write bytes, then advance to next scan line.
                    946: 
                    947: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                    948: &ENTRY_LABEL&ENTRY_INDEX&:
                    949:         and     [edi],al
                    950:         and     [edi+1],al
                    951:         add     edi,ecx                 ;point to the next scan line
                    952:         endm    ;-----------------------------------;
                    953: 
                    954: ; 2-wide read/write.
                    955: 
                    956:         align   4
                    957: draw_2_wide_rw_loop     proc    near
                    958:         UNROLL_LOOP     DRAW_2_WIDE_RW,RW2,LOOP_UNROLL_COUNT
                    959:         dec     ebx
                    960:         jnz     draw_2_wide_rw_loop
                    961: 
                    962:         ret
                    963: 
                    964: draw_2_wide_rw_loop     endp
                    965: 
                    966: ;-----------------------------------------------------------------------;
                    967: ; Macro to draw three read before write bytes, then advance to next scan line.
                    968: 
                    969: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                    970: &ENTRY_LABEL&ENTRY_INDEX&:
                    971:         and     [edi],al
                    972:         and     [edi+1],al
                    973:         and     [edi+2],al
                    974:         add     edi,ecx                 ;point to the next scan line
                    975:         endm    ;-----------------------------------;
                    976: 
                    977: ; 3-wide read/write.
                    978: 
                    979:         align   4
                    980: draw_3_wide_rw_loop     proc    near
                    981:         UNROLL_LOOP     DRAW_3_WIDE_RW,RW3,LOOP_UNROLL_COUNT
                    982:         dec     ebx
                    983:         jnz     draw_3_wide_rw_loop
                    984: 
                    985:         ret
                    986: 
                    987: draw_3_wide_rw_loop     endp
                    988: 
                    989: ;-----------------------------------------------------------------------;
                    990: ; Macro to draw four read before write bytes, then advance to next scan line.
                    991: 
                    992: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                    993: &ENTRY_LABEL&ENTRY_INDEX&:
                    994:         and     [edi],al
                    995:         and     [edi+1],al
                    996:         and     [edi+2],al
                    997:         and     [edi+3],al
                    998:         add     edi,ecx                 ;point to the next scan line
                    999:         endm    ;-----------------------------------;
                   1000: 
                   1001: ; 4-wide read/write.
                   1002: 
                   1003:         align   4
                   1004: draw_4_wide_rw_loop     proc    near
                   1005:         UNROLL_LOOP     DRAW_4_WIDE_RW,RW4,LOOP_UNROLL_COUNT
                   1006:         dec     ebx
                   1007:         jnz     draw_4_wide_rw_loop
                   1008: 
                   1009:         ret
                   1010: 
                   1011: draw_4_wide_rw_loop     endp
                   1012: 
                   1013: ;-----------------------------------------------------------------------;
                   1014: ; Unrolled 5-or-wider read before write loop.
                   1015: ;
                   1016: ; Entry:
                   1017: ;       EAX = # of bytes to fill across scan line (needed only by 5-or-wider
                   1018: ;               handler)
                   1019: ;       EBX = unrolled loop count
                   1020: ;       EDX = offset from end of one scan line to the start of the next next
                   1021: ;       EDI = start offset
                   1022: ;
                   1023: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
                   1024: 
                   1025: ;-----------------------------------------------------------------------;
                   1026: ; Macro to draw five or more read before write bytes, then advance to
                   1027: ; next scan line. (Actually, will handle any number of bytes,
                   1028: ; including 0, but there are special-case handlers for narrow cases.)
                   1029: ; Works because reads of display memory return 0ffh, which then becomes the
                   1030: ; Bit Mask as it's written in write mode 3.
                   1031: 
                   1032: DRAW_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
                   1033: &ENTRY_LABEL&ENTRY_INDEX&:
                   1034:         mov     esi,edi
                   1035:         mov     ecx,eax
                   1036:         rep     movsb
                   1037:         add     edi,edx
                   1038:         endm    ;-----------------------------------;
                   1039: 
                   1040: ; 5-or-wider read/write.
                   1041: 
                   1042:         align   4
                   1043: draw_wide_rw_loop proc  near
                   1044:         UNROLL_LOOP     DRAW_WIDE_RW,RWWIDE,LOOP_UNROLL_COUNT
                   1045:         dec     ebx
                   1046:         jnz     draw_wide_rw_loop
                   1047: 
                   1048:         ret
                   1049: 
                   1050: draw_wide_rw_loop endp
                   1051: 
                   1052: 
                   1053: ;-----------------------------------------------------------------------;
                   1054: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
                   1055: ; for cases where read before write is NOT required.
                   1056: ;-----------------------------------------------------------------------;
                   1057: 
                   1058: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
                   1059: ; Note that there may be separate entry tables for various alignments of a
                   1060: ; specific width, in cases where performance can be improved by using different
                   1061: ; code for different alignments.
                   1062: 
                   1063:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideWEntry,W1,LOOP_UNROLL_COUNT
                   1064:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideWEntry,W2,LOOP_UNROLL_COUNT
                   1065:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
                   1066:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWOddEntry,W3_ODD,LOOP_UNROLL_COUNT
                   1067:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideWEntry,W4,LOOP_UNROLL_COUNT
                   1068:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
                   1069:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWOddEntry,W5_ODD,LOOP_UNROLL_COUNT
                   1070:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_0Entry,W6_MOD3_0,LOOP_UNROLL_COUNT
                   1071:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_1Entry,W6_MOD3_1,LOOP_UNROLL_COUNT
                   1072:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_2Entry,W6_MOD3_2,LOOP_UNROLL_COUNT
                   1073:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_0Entry,W7_MOD3_0,LOOP_UNROLL_COUNT
                   1074:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_1Entry,W7_MOD3_1,LOOP_UNROLL_COUNT
                   1075:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_2Entry,W7_MOD3_2,LOOP_UNROLL_COUNT
                   1076:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_3Entry,W7_MOD3_3,LOOP_UNROLL_COUNT
                   1077:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_0Entry,W8_MOD3_0,LOOP_UNROLL_COUNT
                   1078:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_1Entry,W8_MOD3_1,LOOP_UNROLL_COUNT
                   1079:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_2Entry,W8_MOD3_2,LOOP_UNROLL_COUNT
                   1080:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_3Entry,W8_MOD3_3,LOOP_UNROLL_COUNT
                   1081: 
                   1082: 
                   1083: ;-----------------------------------------------------------------------;
                   1084: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
                   1085: ;
                   1086: ; Entry:
                   1087: ;      AL/AX/EAX = pixel mask (if AX or EAX, then 0xFFFF or 0xFFFFFFFF)
                   1088: ;       EBX = unrolled loop count
                   1089: ;       ECX = scan line width in bytes
                   1090: ;       EDI = start offset
                   1091: ;
                   1092: ; EBX, EDI modified. All other registers preserved.
                   1093: 
                   1094: ;-----------------------------------------------------------------------;
                   1095: ; Macro to draw one write-only byte, then advance to next scan line.
                   1096: 
                   1097: DRAW_1_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
                   1098: &ENTRY_LABEL&ENTRY_INDEX&:
                   1099:         mov     [edi],al                ;we always read 0xFF, so AL is written
                   1100:                                        ; as-is; because we're in write mode 3,
                   1101:                                        ; AL becomes the Bit Mask
                   1102:         add     edi,ecx                 ;point to the next scan line
                   1103:         endm    ;-----------------------------------;
                   1104: 
                   1105: ; 1-wide write-only.
                   1106: 
                   1107:         align   4
                   1108: draw_1_wide_w_loop     proc    near
                   1109:         UNROLL_LOOP     DRAW_1_WIDE_W,W1,LOOP_UNROLL_COUNT
                   1110:         dec     ebx
                   1111:         jnz     draw_1_wide_w_loop
                   1112: 
                   1113:         ret
                   1114: 
                   1115: draw_1_wide_w_loop     endp
                   1116: 
                   1117: ;-----------------------------------------------------------------------;
                   1118: ; Macro to draw two write-only bytes, then advance to next scan line.
                   1119: 
                   1120: DRAW_2_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
                   1121: &ENTRY_LABEL&ENTRY_INDEX&:
                   1122:         mov     [edi],ax
                   1123:         add     edi,ecx                 ;point to the next scan line
                   1124:         endm    ;-----------------------------------;
                   1125: 
                   1126: ; 2-wide write-only.
                   1127: 
                   1128:         align   4
                   1129: draw_2_wide_w_loop     proc    near
                   1130:         UNROLL_LOOP     DRAW_2_WIDE_W,W2,LOOP_UNROLL_COUNT
                   1131:         dec     ebx
                   1132:         jnz     draw_2_wide_w_loop
                   1133: 
                   1134:         ret
                   1135: 
                   1136: draw_2_wide_w_loop     endp
                   1137: 
                   1138: ;-----------------------------------------------------------------------;
                   1139: ; Macro to draw three write-only bytes, then advance to next scan line.
                   1140: ; Optimized for even start address.
                   1141: 
                   1142: DRAW_3_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
                   1143: &ENTRY_LABEL&ENTRY_INDEX&:
                   1144:         mov     [edi],ax
                   1145:         mov     [edi+2],al
                   1146:         add     edi,ecx                 ;point to the next scan line
                   1147:         endm    ;-----------------------------------;
                   1148: 
                   1149: ; 3-wide write-only, starting at an even address.
                   1150: 
                   1151:         align   4
                   1152: draw_3_wide_w_even_loop     proc    near
                   1153:         UNROLL_LOOP     DRAW_3_WIDE_W_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
                   1154:         dec     ebx
                   1155:         jnz     draw_3_wide_w_even_loop
                   1156: 
                   1157:         ret
                   1158: 
                   1159: draw_3_wide_w_even_loop     endp
                   1160: 
                   1161: ;-----------------------------------------------------------------------;
                   1162: ; Macro to draw three write-only bytes, then advance to next scan line.
                   1163: ; Optimized for odd start address.
                   1164: 
                   1165: DRAW_3_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
                   1166: &ENTRY_LABEL&ENTRY_INDEX&:
                   1167:         mov     [edi],al
                   1168:         mov     [edi+1],ax
                   1169:         add     edi,ecx                 ;point to the next scan line
                   1170:         endm    ;-----------------------------------;
                   1171: 
                   1172: ; 3-wide write-only, starting at an odd address.
                   1173: 
                   1174:         align   4
                   1175: draw_3_wide_w_odd_loop     proc    near
                   1176:         UNROLL_LOOP     DRAW_3_WIDE_W_ODD,W3_ODD,LOOP_UNROLL_COUNT
                   1177:         dec     ebx
                   1178:         jnz     draw_3_wide_w_odd_loop
                   1179: 
                   1180:         ret
                   1181: 
                   1182: draw_3_wide_w_odd_loop     endp
                   1183: 
                   1184: 
                   1185: ;-----------------------------------------------------------------------;
                   1186: ; Macro to draw four write-only bytes, then advance to next scan line.
                   1187: 
                   1188: DRAW_4_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
                   1189: &ENTRY_LABEL&ENTRY_INDEX&:
                   1190:         mov     [edi],eax
                   1191:         add     edi,ecx                 ;point to the next scan line
                   1192:         endm    ;-----------------------------------;
                   1193: 
                   1194: ; 4-wide write-only.
                   1195: 
                   1196:         align   4
                   1197: draw_4_wide_w_loop     proc    near
                   1198:         UNROLL_LOOP     DRAW_4_WIDE_W,W4,LOOP_UNROLL_COUNT
                   1199:         dec     ebx
                   1200:         jnz     draw_4_wide_w_loop
                   1201: 
                   1202:         ret
                   1203: 
                   1204: draw_4_wide_w_loop     endp
                   1205: 
                   1206: 
                   1207: ;-----------------------------------------------------------------------;
                   1208: ; Macro to draw five write-only bytes, then advance to next scan line.
                   1209: ; Optimized for even start address.
                   1210: 
                   1211: DRAW_5_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
                   1212: &ENTRY_LABEL&ENTRY_INDEX&:
                   1213:         mov     [edi],eax
                   1214:         mov     [edi+4],al
                   1215:         add     edi,ecx                 ;point to the next scan line
                   1216:         endm    ;-----------------------------------;
                   1217: 
                   1218: ; 5-wide write-only, starting at an even address.
                   1219: 
                   1220:         align   4
                   1221: draw_5_wide_w_even_loop     proc    near
                   1222:         UNROLL_LOOP     DRAW_5_WIDE_W_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
                   1223:         dec     ebx
                   1224:         jnz     draw_5_wide_w_even_loop
                   1225: 
                   1226:         ret
                   1227: 
                   1228: draw_5_wide_w_even_loop     endp
                   1229: 
                   1230: 
                   1231: ;-----------------------------------------------------------------------;
                   1232: ; Macro to draw five write-only bytes, then advance to next scan line.
                   1233: ; Optimized for odd start address.
                   1234: 
                   1235: DRAW_5_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
                   1236: &ENTRY_LABEL&ENTRY_INDEX&:
                   1237:         mov     [edi],al
                   1238:         mov     [edi+1],eax
                   1239:         add     edi,ecx                 ;point to the next scan line
                   1240:         endm    ;-----------------------------------;
                   1241: 
                   1242: ; 5-wide write-only, starting at an odd address.
                   1243: 
                   1244:         align   4
                   1245: draw_5_wide_w_odd_loop     proc    near
                   1246:         UNROLL_LOOP     DRAW_5_WIDE_W_ODD,W5_ODD,LOOP_UNROLL_COUNT
                   1247:         dec     ebx
                   1248:         jnz     draw_5_wide_w_odd_loop
                   1249: 
                   1250:         ret
                   1251: 
                   1252: draw_5_wide_w_odd_loop     endp
                   1253: 
                   1254: 
                   1255: ;-----------------------------------------------------------------------;
                   1256: ; Macro to draw six write-only bytes, then advance to next scan line.
                   1257: ; Optimized for start address MOD 3 == 0.
                   1258: 
                   1259: DRAW_6_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
                   1260: &ENTRY_LABEL&ENTRY_INDEX&:
                   1261:         mov     [edi],eax
                   1262:         mov     [edi+4],ax
                   1263:         add     edi,ecx                 ;point to the next scan line
                   1264:         endm    ;-----------------------------------;
                   1265: 
                   1266: ; 6-wide write-only, starting at MOD 3 == 0.
                   1267: 
                   1268:         align   4
                   1269: draw_6_wide_w_mod3_0_loop     proc    near
                   1270:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_0,W6_MOD3_0,LOOP_UNROLL_COUNT
                   1271:         dec     ebx
                   1272:         jnz     draw_6_wide_w_mod3_0_loop
                   1273: 
                   1274:         ret
                   1275: 
                   1276: draw_6_wide_w_mod3_0_loop     endp
                   1277: 
                   1278: 
                   1279: ;-----------------------------------------------------------------------;
                   1280: ; Macro to draw six write-only bytes, then advance to next scan line.
                   1281: ; Optimized for start address MOD 3 == 1 or 3.
                   1282: 
                   1283: DRAW_6_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
                   1284: &ENTRY_LABEL&ENTRY_INDEX&:
                   1285:         mov     [edi],al
                   1286:         mov     [edi+1],eax
                   1287:         mov     [edi+5],al
                   1288:         add     edi,ecx                 ;point to the next scan line
                   1289:         endm    ;-----------------------------------;
                   1290: 
                   1291: ; 6-wide write-only, starting at MOD 3 == 1 or 3.
                   1292: 
                   1293:         align   4
                   1294: draw_6_wide_w_mod3_1_loop     proc    near
                   1295:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_1,W6_MOD3_1,LOOP_UNROLL_COUNT
                   1296:         dec     ebx
                   1297:         jnz     draw_6_wide_w_mod3_1_loop
                   1298: 
                   1299:         ret
                   1300: 
                   1301: draw_6_wide_w_mod3_1_loop     endp
                   1302: 
                   1303: 
                   1304: ;-----------------------------------------------------------------------;
                   1305: ; Macro to draw six write-only bytes, then advance to next scan line.
                   1306: ; Optimized for start address MOD 3 == 2.
                   1307: 
                   1308: DRAW_6_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
                   1309: &ENTRY_LABEL&ENTRY_INDEX&:
                   1310:         mov     [edi],ax
                   1311:         mov     [edi+2],eax
                   1312:         add     edi,ecx                 ;point to the next scan line
                   1313:         endm    ;-----------------------------------;
                   1314: 
                   1315: ; 6-wide write-only, starting at MOD 3 == 2.
                   1316: 
                   1317:         align   4
                   1318: draw_6_wide_w_mod3_2_loop     proc    near
                   1319:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_2,W6_MOD3_2,LOOP_UNROLL_COUNT
                   1320:         dec     ebx
                   1321:         jnz     draw_6_wide_w_mod3_2_loop
                   1322: 
                   1323:         ret
                   1324: 
                   1325: draw_6_wide_w_mod3_2_loop     endp
                   1326: 
                   1327: 
                   1328: ;-----------------------------------------------------------------------;
                   1329: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1330: ; Optimized for start address MOD 3 == 0.
                   1331: 
                   1332: DRAW_7_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
                   1333: &ENTRY_LABEL&ENTRY_INDEX&:
                   1334:         mov     [edi],eax
                   1335:         mov     [edi+4],ax
                   1336:         mov     [edi+6],al
                   1337:         add     edi,ecx                 ;point to the next scan line
                   1338:         endm    ;-----------------------------------;
                   1339: 
                   1340: ; 7-wide write-only, starting at MOD 3 == 0.
                   1341: 
                   1342:         align   4
                   1343: draw_7_wide_w_mod3_0_loop     proc    near
                   1344:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_0,W7_MOD3_0,LOOP_UNROLL_COUNT
                   1345:         dec     ebx
                   1346:         jnz     draw_7_wide_w_mod3_0_loop
                   1347: 
                   1348:         ret
                   1349: 
                   1350: draw_7_wide_w_mod3_0_loop     endp
                   1351: 
                   1352: 
                   1353: ;-----------------------------------------------------------------------;
                   1354: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1355: ; Optimized for start address MOD 3 == 1.
                   1356: 
                   1357: DRAW_7_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
                   1358: &ENTRY_LABEL&ENTRY_INDEX&:
                   1359:         mov     [edi],al
                   1360:         mov     [edi+1],ax
                   1361:         mov     [edi+3],eax
                   1362:         add     edi,ecx                 ;point to the next scan line
                   1363:         endm    ;-----------------------------------;
                   1364: 
                   1365: ; 7-wide write-only, starting at MOD 3 == 0.
                   1366: 
                   1367:         align   4
                   1368: draw_7_wide_w_mod3_1_loop     proc    near
                   1369:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_1,W7_MOD3_1,LOOP_UNROLL_COUNT
                   1370:         dec     ebx
                   1371:         jnz     draw_7_wide_w_mod3_1_loop
                   1372: 
                   1373:         ret
                   1374: 
                   1375: draw_7_wide_w_mod3_1_loop     endp
                   1376: 
                   1377: 
                   1378: ;-----------------------------------------------------------------------;
                   1379: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1380: ; Optimized for start address MOD 3 == 2.
                   1381: 
                   1382: DRAW_7_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
                   1383: &ENTRY_LABEL&ENTRY_INDEX&:
                   1384:         mov     [edi],ax
                   1385:         mov     [edi+2],eax
                   1386:         mov     [edi+6],al
                   1387:         add     edi,ecx                 ;point to the next scan line
                   1388:         endm    ;-----------------------------------;
                   1389: 
                   1390: ; 7-wide write-only, starting at MOD 3 == 2.
                   1391: 
                   1392:         align   4
                   1393: draw_7_wide_w_mod3_2_loop     proc    near
                   1394:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_2,W7_MOD3_2,LOOP_UNROLL_COUNT
                   1395:         dec     ebx
                   1396:         jnz     draw_7_wide_w_mod3_2_loop
                   1397: 
                   1398:         ret
                   1399: 
                   1400: draw_7_wide_w_mod3_2_loop     endp
                   1401: 
                   1402: 
                   1403: ;-----------------------------------------------------------------------;
                   1404: ; Macro to draw seven write-only bytes, then advance to next scan line.
                   1405: ; Optimized for start address MOD 3 == 3.
                   1406: 
                   1407: DRAW_7_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
                   1408: &ENTRY_LABEL&ENTRY_INDEX&:
                   1409:         mov     [edi],al
                   1410:         mov     [edi+1],eax
                   1411:         mov     [edi+5],ax
                   1412:         add     edi,ecx                 ;point to the next scan line
                   1413:         endm    ;-----------------------------------;
                   1414: 
                   1415: ; 7-wide write-only, starting at MOD 3 == 3.
                   1416: 
                   1417:         align   4
                   1418: draw_7_wide_w_mod3_3_loop     proc    near
                   1419:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_3,W7_MOD3_3,LOOP_UNROLL_COUNT
                   1420:         dec     ebx
                   1421:         jnz     draw_7_wide_w_mod3_3_loop
                   1422: 
                   1423:         ret
                   1424: 
                   1425: draw_7_wide_w_mod3_3_loop     endp
                   1426: 
                   1427: 
                   1428: ;-----------------------------------------------------------------------;
                   1429: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1430: ; Optimized for start address MOD 3 == 0.
                   1431: 
                   1432: DRAW_8_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
                   1433: &ENTRY_LABEL&ENTRY_INDEX&:
                   1434:         mov     [edi],eax
                   1435:         mov     [edi+4],eax
                   1436:         add     edi,ecx                 ;point to the next scan line
                   1437:         endm    ;-----------------------------------;
                   1438: 
                   1439: ; 8-wide write-only, starting at MOD 3 == 0.
                   1440: 
                   1441:         align   4
                   1442: draw_8_wide_w_mod3_0_loop     proc    near
                   1443:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_0,W8_MOD3_0,LOOP_UNROLL_COUNT
                   1444:         dec     ebx
                   1445:         jnz     draw_8_wide_w_mod3_0_loop
                   1446: 
                   1447:         ret
                   1448: 
                   1449: draw_8_wide_w_mod3_0_loop     endp
                   1450: 
                   1451: 
                   1452: ;-----------------------------------------------------------------------;
                   1453: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1454: ; Optimized for start address MOD 3 == 1.
                   1455: 
                   1456: DRAW_8_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
                   1457: &ENTRY_LABEL&ENTRY_INDEX&:
                   1458:         mov     [edi],al
                   1459:         mov     [edi+1],ax
                   1460:         mov     [edi+3],eax
                   1461:         mov     [edi+7],al
                   1462:         add     edi,ecx                 ;point to the next scan line
                   1463:         endm    ;-----------------------------------;
                   1464: 
                   1465: ; 8-wide write-only, starting at MOD 3 == 0.
                   1466: 
                   1467:         align   4
                   1468: draw_8_wide_w_mod3_1_loop     proc    near
                   1469:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_1,W8_MOD3_1,LOOP_UNROLL_COUNT
                   1470:         dec     ebx
                   1471:         jnz     draw_8_wide_w_mod3_1_loop
                   1472: 
                   1473:         ret
                   1474: 
                   1475: draw_8_wide_w_mod3_1_loop     endp
                   1476: 
                   1477: 
                   1478: ;-----------------------------------------------------------------------;
                   1479: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1480: ; Optimized for start address MOD 3 == 2.
                   1481: 
                   1482: DRAW_8_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
                   1483: &ENTRY_LABEL&ENTRY_INDEX&:
                   1484:         mov     [edi],ax
                   1485:         mov     [edi+2],eax
                   1486:         mov     [edi+6],ax
                   1487:         add     edi,ecx                 ;point to the next scan line
                   1488:         endm    ;-----------------------------------;
                   1489: 
                   1490: ; 8-wide write-only, starting at MOD 3 == 2.
                   1491: 
                   1492:         align   4
                   1493: draw_8_wide_w_mod3_2_loop     proc    near
                   1494:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_2,W8_MOD3_2,LOOP_UNROLL_COUNT
                   1495:         dec     ebx
                   1496:         jnz     draw_8_wide_w_mod3_2_loop
                   1497: 
                   1498:         ret
                   1499: 
                   1500: draw_8_wide_w_mod3_2_loop     endp
                   1501: 
                   1502: 
                   1503: ;-----------------------------------------------------------------------;
                   1504: ; Macro to draw eight write-only bytes, then advance to next scan line.
                   1505: ; Optimized for start address MOD 3 == 3.
                   1506: 
                   1507: DRAW_8_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
                   1508: &ENTRY_LABEL&ENTRY_INDEX&:
                   1509:         mov     [edi],al
                   1510:         mov     [edi+1],eax
                   1511:         mov     [edi+5],ax
                   1512:         mov     [edi+7],al
                   1513:         add     edi,ecx                 ;point to the next scan line
                   1514:         endm    ;-----------------------------------;
                   1515: 
                   1516: ; 8-wide write-only, starting at MOD 3 == 3.
                   1517: 
                   1518:         align   4
                   1519: draw_8_wide_w_mod3_3_loop     proc    near
                   1520:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_3,W8_MOD3_3,LOOP_UNROLL_COUNT
                   1521:         dec     ebx
                   1522:         jnz     draw_8_wide_w_mod3_3_loop
                   1523: 
                   1524:         ret
                   1525: 
                   1526: draw_8_wide_w_mod3_3_loop     endp
                   1527: 
                   1528: ;-----------------------------------------------------------------------;
                   1529: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
                   1530: ;-----------------------------------------------------------------------;
                   1531: 
                   1532: ; Tables of entry points into unrolled wide write-only loops.
                   1533:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW00Entry,W_00,LOOP_UNROLL_COUNT
                   1534:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW01Entry,W_01,LOOP_UNROLL_COUNT
                   1535:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW02Entry,W_02,LOOP_UNROLL_COUNT
                   1536:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW03Entry,W_03,LOOP_UNROLL_COUNT
                   1537:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW10Entry,W_10,LOOP_UNROLL_COUNT
                   1538:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW11Entry,W_11,LOOP_UNROLL_COUNT
                   1539:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW12Entry,W_12,LOOP_UNROLL_COUNT
                   1540:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW13Entry,W_13,LOOP_UNROLL_COUNT
                   1541:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW20Entry,W_20,LOOP_UNROLL_COUNT
                   1542:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW21Entry,W_21,LOOP_UNROLL_COUNT
                   1543:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW22Entry,W_22,LOOP_UNROLL_COUNT
                   1544:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW23Entry,W_23,LOOP_UNROLL_COUNT
                   1545:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW30Entry,W_30,LOOP_UNROLL_COUNT
                   1546:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW31Entry,W_31,LOOP_UNROLL_COUNT
                   1547:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW32Entry,W_32,LOOP_UNROLL_COUNT
                   1548:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW33Entry,W_33,LOOP_UNROLL_COUNT
                   1549: 
                   1550: ;-----------------------------------------------------------------------;
                   1551: ; Macro to draw n write-only bytes, 0 leading bytes, 0 trailing bytes,
                   1552: ; then advance to next scan line.
                   1553: 
                   1554: DRAW_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
                   1555: &ENTRY_LABEL&ENTRY_INDEX&:
                   1556:         mov     ecx,esi         ;# of whole dwords
                   1557:         rep     stosd           ;fill all whole bytes as dwords
                   1558:         add     edi,edx         ;point to the next scan line
                   1559:         endm    ;-----------------------------------;
                   1560: 
                   1561: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
                   1562: ;  EAX = 0ffffh
                   1563: ;  EBX = count of scans to fill
                   1564: ;  EDX = offset from end of one scan's fill to start of next
                   1565: ;  ESI = # of dwords to fill
                   1566: ;  EDI = target address to fill
                   1567: 
                   1568:         align   4
                   1569: draw_wide_w_00_loop     proc    near
                   1570:         UNROLL_LOOP     DRAW_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
                   1571:         dec     ebx
                   1572:         jnz     draw_wide_w_00_loop
                   1573: 
                   1574:         ret
                   1575: 
                   1576: draw_wide_w_00_loop     endp
                   1577: 
                   1578: 
                   1579: ;-----------------------------------------------------------------------;
                   1580: ; Macro to draw n write-only bytes, 0 leading bytes, 1 trailing byte,
                   1581: ; then advance to next scan line.
                   1582: 
                   1583: DRAW_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
                   1584: &ENTRY_LABEL&ENTRY_INDEX&:
                   1585:         mov     ecx,esi         ;# of whole dwords
                   1586:         rep     stosd           ;fill whole bytes as dwords
                   1587:         stosb                   ;fill the trailing byte
                   1588:         add     edi,edx         ;point to the next scan line
                   1589:         endm    ;-----------------------------------;
                   1590: 
                   1591: ; N-wide write-only, 0 leading bytes, 1 trailing byte.
                   1592: ;  EAX = # of dwords to fill
                   1593: ;  EBX = count of scans to fill
                   1594: ;  EDX = offset from end of one scan's fill to start of next
                   1595: ;  ESI = # of dwords to fill
                   1596: ;  EDI = target address to fill
                   1597: 
                   1598:         align   4
                   1599: draw_wide_w_01_loop     proc    near
                   1600:         UNROLL_LOOP     DRAW_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
                   1601:         dec     ebx
                   1602:         jnz     draw_wide_w_01_loop
                   1603: 
                   1604:         ret
                   1605: 
                   1606: draw_wide_w_01_loop     endp
                   1607: 
                   1608: 
                   1609: ;-----------------------------------------------------------------------;
                   1610: ; Macro to draw n write-only bytes, 0 leading bytes, 2 trailing bytes,
                   1611: ; then advance to next scan line.
                   1612: 
                   1613: DRAW_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
                   1614: &ENTRY_LABEL&ENTRY_INDEX&:
                   1615:         mov     ecx,esi         ;# of whole dwords
                   1616:         rep     stosd           ;fill whole bytes as dwords
                   1617:         stosw                   ;fill the trailing word
                   1618:         add     edi,edx         ;point to the next scan line
                   1619:         endm    ;-----------------------------------;
                   1620: 
                   1621: ; N-wide write-only, 0 leading bytes, 2 trailing byte.
                   1622: ;  EAX = # of dwords to fill
                   1623: ;  EBX = count of scans to fill
                   1624: ;  EDX = offset from end of one scan's fill to start of next
                   1625: ;  ESI = # of dwords to fill
                   1626: ;  EDI = target address to fill
                   1627: 
                   1628:         align   4
                   1629: draw_wide_w_02_loop     proc    near
                   1630:         UNROLL_LOOP     DRAW_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
                   1631:         dec     ebx
                   1632:         jnz     draw_wide_w_02_loop
                   1633: 
                   1634:         ret
                   1635: 
                   1636: draw_wide_w_02_loop     endp
                   1637: 
                   1638: 
                   1639: ;-----------------------------------------------------------------------;
                   1640: ; Macro to draw n write-only bytes, 0 leading bytes, 3 trailing bytes,
                   1641: ; then advance to next scan line.
                   1642: 
                   1643: DRAW_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
                   1644: &ENTRY_LABEL&ENTRY_INDEX&:
                   1645:         mov     ecx,esi         ;# of whole dwords
                   1646:         rep     stosd           ;fill whole bytes as dwords
                   1647:         stosw                   ;fill the trailing word
                   1648:         stosb                   ;fill the trailing byte
                   1649:         add     edi,edx         ;point to the next scan line
                   1650:         endm    ;-----------------------------------;
                   1651: 
                   1652: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   1653: ;  EAX = # of dwords to fill
                   1654: ;  EBX = count of scans to fill
                   1655: ;  EDX = offset from end of one scan's fill to start of next
                   1656: ;  ESI = # of dwords to fill
                   1657: ;  EDI = target address to fill
                   1658: 
                   1659:         align   4
                   1660: draw_wide_w_03_loop     proc    near
                   1661:         UNROLL_LOOP     DRAW_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
                   1662:         dec     ebx
                   1663:         jnz     draw_wide_w_03_loop
                   1664: 
                   1665:         ret
                   1666: 
                   1667: draw_wide_w_03_loop     endp
                   1668: 
                   1669: 
                   1670: ;-----------------------------------------------------------------------;
                   1671: ; Macro to draw n write-only bytes, 1 leading byte, 0 trailing bytes,
                   1672: ; then advance to next scan line.
                   1673: 
                   1674: DRAW_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
                   1675: &ENTRY_LABEL&ENTRY_INDEX&:
                   1676:         stosb                   ;fill the leading byte
                   1677:         mov     ecx,esi         ;# of whole dwords
                   1678:         rep     stosd           ;fill all whole bytes as dwords
                   1679:         add     edi,edx         ;point to the next scan line
                   1680:         endm    ;-----------------------------------;
                   1681: 
                   1682: ; N-wide write-only, 1 leading byte, 0 trailing bytes.
                   1683: ;  EAX = # of dwords to fill
                   1684: ;  EBX = count of scans to fill
                   1685: ;  EDX = offset from end of one scan's fill to start of next
                   1686: ;  ESI = # of dwords to fill
                   1687: ;  EDI = target address to fill
                   1688: 
                   1689:         align   4
                   1690: draw_wide_w_10_loop     proc    near
                   1691:         UNROLL_LOOP     DRAW_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
                   1692:         dec     ebx
                   1693:         jnz     draw_wide_w_10_loop
                   1694: 
                   1695:         ret
                   1696: 
                   1697: draw_wide_w_10_loop     endp
                   1698: 
                   1699: 
                   1700: ;-----------------------------------------------------------------------;
                   1701: ; Macro to draw n write-only bytes, 1 leading byte, 1 trailing byte,
                   1702: ; then advance to next scan line.
                   1703: 
                   1704: DRAW_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
                   1705: &ENTRY_LABEL&ENTRY_INDEX&:
                   1706:         stosb                   ;fill the leading byte
                   1707:         mov     ecx,esi         ;# of whole dwords
                   1708:         rep     stosd           ;fill whole bytes as dwords
                   1709:         stosb                   ;fill the trailing byte
                   1710:         add     edi,edx         ;point to the next scan line
                   1711:         endm    ;-----------------------------------;
                   1712: 
                   1713: ; N-wide write-only, 1 leading bytes, 1 trailing byte.
                   1714: ;  EAX = # of dwords to fill
                   1715: ;  EBX = count of scans to fill
                   1716: ;  EDX = offset from end of one scan's fill to start of next
                   1717: ;  ESI = # of dwords to fill
                   1718: ;  EDI = target address to fill
                   1719: 
                   1720:         align   4
                   1721: draw_wide_w_11_loop     proc    near
                   1722:         UNROLL_LOOP     DRAW_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
                   1723:         dec     ebx
                   1724:         jnz     draw_wide_w_11_loop
                   1725: 
                   1726:         ret
                   1727: 
                   1728: draw_wide_w_11_loop     endp
                   1729: 
                   1730: 
                   1731: ;-----------------------------------------------------------------------;
                   1732: ; Macro to draw n write-only bytes, 1 leading byte, 2 trailing bytes,
                   1733: ; then advance to next scan line.
                   1734: 
                   1735: DRAW_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
                   1736: &ENTRY_LABEL&ENTRY_INDEX&:
                   1737:         stosb                   ;fill the leading byte
                   1738:         mov     ecx,esi         ;# of whole dwords
                   1739:         rep     stosd           ;fill whole bytes as dwords
                   1740:         stosw                   ;fill the trailing word
                   1741:         add     edi,edx         ;point to the next scan line
                   1742:         endm    ;-----------------------------------;
                   1743: 
                   1744: ; N-wide write-only, 1 leading bytes, 2 trailing byte.
                   1745: ;  EAX = # of dwords to fill
                   1746: ;  EBX = count of scans to fill
                   1747: ;  EDX = offset from end of one scan's fill to start of next
                   1748: ;  ESI = # of dwords to fill
                   1749: ;  EDI = target address to fill
                   1750: 
                   1751:         align   4
                   1752: draw_wide_w_12_loop     proc    near
                   1753:         UNROLL_LOOP     DRAW_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
                   1754:         dec     ebx
                   1755:         jnz     draw_wide_w_12_loop
                   1756: 
                   1757:         ret
                   1758: 
                   1759: draw_wide_w_12_loop     endp
                   1760: 
                   1761: 
                   1762: ;-----------------------------------------------------------------------;
                   1763: ; Macro to draw n write-only bytes, 1 leading byte, 3 trailing bytes,
                   1764: ; then advance to next scan line.
                   1765: 
                   1766: DRAW_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
                   1767: &ENTRY_LABEL&ENTRY_INDEX&:
                   1768:         stosb                   ;fill the leading byte
                   1769:         mov     ecx,esi         ;# of whole dwords
                   1770:         rep     stosd           ;fill whole bytes as dwords
                   1771:         stosw                   ;fill the trailing word
                   1772:         stosb                   ;fill the trailing byte
                   1773:         add     edi,edx         ;point to the next scan line
                   1774:         endm    ;-----------------------------------;
                   1775: 
                   1776: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   1777: ;  EAX = # of dwords to fill
                   1778: ;  EBX = count of scans to fill
                   1779: ;  EDX = offset from end of one scan's fill to start of next
                   1780: ;  ESI = # of dwords to fill
                   1781: ;  EDI = target address to fill
                   1782: 
                   1783:         align   4
                   1784: draw_wide_w_13_loop     proc    near
                   1785:         UNROLL_LOOP     DRAW_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
                   1786:         dec     ebx
                   1787:         jnz     draw_wide_w_13_loop
                   1788: 
                   1789:         ret
                   1790: 
                   1791: draw_wide_w_13_loop     endp
                   1792: 
                   1793: 
                   1794: ;-----------------------------------------------------------------------;
                   1795: ; Macro to draw n write-only bytes, 2 leading bytes, 0 trailing bytes,
                   1796: ; then advance to next scan line.
                   1797: 
                   1798: DRAW_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
                   1799: &ENTRY_LABEL&ENTRY_INDEX&:
                   1800:         stosw                   ;fill the leading word
                   1801:         mov     ecx,esi         ;# of whole dwords
                   1802:         rep     stosd           ;fill all whole bytes as dwords
                   1803:         add     edi,edx         ;point to the next scan line
                   1804:         endm    ;-----------------------------------;
                   1805: 
                   1806: ; N-wide write-only, 2 leading bytes, 0 trailing bytes.
                   1807: ;  EAX = # of dwords to fill
                   1808: ;  EBX = count of scans to fill
                   1809: ;  EDX = offset from end of one scan's fill to start of next
                   1810: ;  ESI = # of dwords to fill
                   1811: ;  EDI = target address to fill
                   1812: 
                   1813:         align   4
                   1814: draw_wide_w_20_loop     proc    near
                   1815:         UNROLL_LOOP     DRAW_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
                   1816:         dec     ebx
                   1817:         jnz     draw_wide_w_20_loop
                   1818: 
                   1819:         ret
                   1820: 
                   1821: draw_wide_w_20_loop     endp
                   1822: 
                   1823: 
                   1824: ;-----------------------------------------------------------------------;
                   1825: ; Macro to draw n write-only bytes, 2 leading bytes, 1 trailing byte,
                   1826: ; then advance to next scan line.
                   1827: 
                   1828: DRAW_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
                   1829: &ENTRY_LABEL&ENTRY_INDEX&:
                   1830:         stosw                   ;fill the leading word
                   1831:         mov     ecx,esi         ;# of whole dwords
                   1832:         rep     stosd           ;fill whole bytes as dwords
                   1833:         stosb                   ;fill the trailing byte
                   1834:         add     edi,edx         ;point to the next scan line
                   1835:         endm    ;-----------------------------------;
                   1836: 
                   1837: ; N-wide write-only, 2 leading bytess, 1 trailing byte.
                   1838: ;  EAX = # of dwords to fill
                   1839: ;  EBX = count of scans to fill
                   1840: ;  EDX = offset from end of one scan's fill to start of next
                   1841: ;  ESI = # of dwords to fill
                   1842: ;  EDI = target address to fill
                   1843: 
                   1844:         align   4
                   1845: draw_wide_w_21_loop     proc    near
                   1846:         UNROLL_LOOP     DRAW_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
                   1847:         dec     ebx
                   1848:         jnz     draw_wide_w_21_loop
                   1849: 
                   1850:         ret
                   1851: 
                   1852: draw_wide_w_21_loop     endp
                   1853: 
                   1854: 
                   1855: ;-----------------------------------------------------------------------;
                   1856: ; Macro to draw n write-only bytes, 2 leading bytes, 2 trailing bytes,
                   1857: ; then advance to next scan line.
                   1858: 
                   1859: DRAW_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
                   1860: &ENTRY_LABEL&ENTRY_INDEX&:
                   1861:         stosw                   ;fill the leading word
                   1862:         mov     ecx,esi         ;# of whole dwords
                   1863:         rep     stosd           ;fill whole bytes as dwords
                   1864:         stosw                   ;fill the trailing word
                   1865:         add     edi,edx         ;point to the next scan line
                   1866:         endm    ;-----------------------------------;
                   1867: 
                   1868: ; N-wide write-only, 2 leading bytess, 2 trailing byte.
                   1869: ;  EAX = # of dwords to fill
                   1870: ;  EBX = count of scans to fill
                   1871: ;  EDX = offset from end of one scan's fill to start of next
                   1872: ;  ESI = # of dwords to fill
                   1873: ;  EDI = target address to fill
                   1874: 
                   1875:         align   4
                   1876: draw_wide_w_22_loop     proc    near
                   1877:         UNROLL_LOOP     DRAW_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
                   1878:         dec     ebx
                   1879:         jnz     draw_wide_w_22_loop
                   1880: 
                   1881:         ret
                   1882: 
                   1883: draw_wide_w_22_loop     endp
                   1884: 
                   1885: 
                   1886: ;-----------------------------------------------------------------------;
                   1887: ; Macro to draw n write-only bytes, 2 leading bytes, 3 trailing bytes,
                   1888: ; then advance to next scan line.
                   1889: 
                   1890: DRAW_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
                   1891: &ENTRY_LABEL&ENTRY_INDEX&:
                   1892:         stosw                   ;fill the leading word
                   1893:         mov     ecx,esi         ;# of whole dwords
                   1894:         rep     stosd           ;fill whole bytes as dwords
                   1895:         stosw                   ;fill the trailing word
                   1896:         stosb                   ;fill the trailing byte
                   1897:         add     edi,edx         ;point to the next scan line
                   1898:         endm    ;-----------------------------------;
                   1899: 
                   1900: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   1901: ;  EAX = # of dwords to fill
                   1902: ;  EBX = count of scans to fill
                   1903: ;  EDX = offset from end of one scan's fill to start of next
                   1904: ;  ESI = # of dwords to fill
                   1905: ;  EDI = target address to fill
                   1906: 
                   1907:         align   4
                   1908: draw_wide_w_23_loop     proc    near
                   1909:         UNROLL_LOOP     DRAW_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
                   1910:         dec     ebx
                   1911:         jnz     draw_wide_w_23_loop
                   1912: 
                   1913:         ret
                   1914: 
                   1915: draw_wide_w_23_loop     endp
                   1916: 
                   1917: 
                   1918: ;-----------------------------------------------------------------------;
                   1919: ; Macro to draw n write-only bytes, 3 leading bytes, 0 trailing bytes,
                   1920: ; then advance to next scan line.
                   1921: 
                   1922: DRAW_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
                   1923: &ENTRY_LABEL&ENTRY_INDEX&:
                   1924:         stosb                   ;fill the leading byte
                   1925:         stosw                   ;fill the leading word
                   1926:         mov     ecx,esi         ;# of whole dwords
                   1927:         rep     stosd           ;fill all whole bytes as dwords
                   1928:         add     edi,edx         ;point to the next scan line
                   1929:         endm    ;-----------------------------------;
                   1930: 
                   1931: ; N-wide write-only, 3 leading bytes, 0 trailing bytes.
                   1932: ;  EAX = # of dwords to fill
                   1933: ;  EBX = count of scans to fill
                   1934: ;  EDX = offset from end of one scan's fill to start of next
                   1935: ;  ESI = # of dwords to fill
                   1936: ;  EDI = target address to fill
                   1937: 
                   1938:         align   4
                   1939: draw_wide_w_30_loop     proc    near
                   1940:         UNROLL_LOOP     DRAW_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
                   1941:         dec     ebx
                   1942:         jnz     draw_wide_w_30_loop
                   1943: 
                   1944:         ret
                   1945: 
                   1946: draw_wide_w_30_loop     endp
                   1947: 
                   1948: 
                   1949: ;-----------------------------------------------------------------------;
                   1950: ; Macro to draw n write-only bytes, 3 leading bytes, 1 trailing byte,
                   1951: ; then advance to next scan line.
                   1952: 
                   1953: DRAW_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
                   1954: &ENTRY_LABEL&ENTRY_INDEX&:
                   1955:         stosb                   ;fill the leading byte
                   1956:         stosw                   ;fill the leading word
                   1957:         mov     ecx,esi         ;# of whole dwords
                   1958:         rep     stosd           ;fill whole bytes as dwords
                   1959:         stosb                   ;fill the trailing byte
                   1960:         add     edi,edx         ;point to the next scan line
                   1961:         endm    ;-----------------------------------;
                   1962: 
                   1963: ; N-wide write-only, 3 leading bytess, 1 trailing byte.
                   1964: ;  EAX = # of dwords to fill
                   1965: ;  EBX = count of scans to fill
                   1966: ;  EDX = offset from end of one scan's fill to start of next
                   1967: ;  ESI = # of dwords to fill
                   1968: ;  EDI = target address to fill
                   1969: 
                   1970:         align   4
                   1971: draw_wide_w_31_loop     proc    near
                   1972:         UNROLL_LOOP     DRAW_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
                   1973:         dec     ebx
                   1974:         jnz     draw_wide_w_31_loop
                   1975: 
                   1976:         ret
                   1977: 
                   1978: draw_wide_w_31_loop     endp
                   1979: 
                   1980: 
                   1981: ;-----------------------------------------------------------------------;
                   1982: ; Macro to draw n write-only bytes, 3 leading bytes, 2 trailing bytes,
                   1983: ; then advance to next scan line.
                   1984: 
                   1985: DRAW_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
                   1986: &ENTRY_LABEL&ENTRY_INDEX&:
                   1987:         stosb                   ;fill the leading byte
                   1988:         stosw                   ;fill the leading word
                   1989:         mov     ecx,esi         ;# of whole dwords
                   1990:         rep     stosd           ;fill whole bytes as dwords
                   1991:         stosw                   ;fill the trailing word
                   1992:         add     edi,edx         ;point to the next scan line
                   1993:         endm    ;-----------------------------------;
                   1994: 
                   1995: ; N-wide write-only, 3 leading bytess, 2 trailing byte.
                   1996: ;  EAX = # of dwords to fill
                   1997: ;  EBX = count of scans to fill
                   1998: ;  EDX = offset from end of one scan's fill to start of next
                   1999: ;  ESI = # of dwords to fill
                   2000: ;  EDI = target address to fill
                   2001: 
                   2002:         align   4
                   2003: draw_wide_w_32_loop     proc    near
                   2004:         UNROLL_LOOP     DRAW_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
                   2005:         dec     ebx
                   2006:         jnz     draw_wide_w_32_loop
                   2007: 
                   2008:         ret
                   2009: 
                   2010: draw_wide_w_32_loop     endp
                   2011: 
                   2012: 
                   2013: ;-----------------------------------------------------------------------;
                   2014: ; Macro to draw n write-only bytes, 3 leading bytes, 3 trailing bytes,
                   2015: ; then advance to next scan line.
                   2016: 
                   2017: DRAW_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
                   2018: &ENTRY_LABEL&ENTRY_INDEX&:
                   2019:         stosb                   ;fill the leading byte
                   2020:         stosw                   ;fill the leading word
                   2021:         mov     ecx,esi         ;# of whole dwords
                   2022:         rep     stosd           ;fill whole bytes as dwords
                   2023:         stosw                   ;fill the trailing word
                   2024:         stosb                   ;fill the trailing byte
                   2025:         add     edi,edx         ;point to the next scan line
                   2026:         endm    ;-----------------------------------;
                   2027: 
                   2028: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
                   2029: ;  EAX = # of dwords to fill
                   2030: ;  EBX = count of scans to fill
                   2031: ;  EDX = offset from end of one scan's fill to start of next
                   2032: ;  ESI = # of dwords to fill
                   2033: ;  EDI = target address to fill
                   2034: 
                   2035:         align   4
                   2036: draw_wide_w_33_loop     proc    near
                   2037:         UNROLL_LOOP     DRAW_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
                   2038:         dec     ebx
                   2039:         jnz     draw_wide_w_33_loop
                   2040: 
                   2041:         ret
                   2042: 
                   2043: draw_wide_w_33_loop     endp
                   2044: 
                   2045: 
                   2046: ;--------------------------Private-Routine------------------------------;
                   2047: ; comp_byte_interval
                   2048: ;
                   2049: ;   A interval will be computed for byte boundaries.
                   2050: ;
                   2051: ;   A first mask and a last mask will be calculated, and possibly
                   2052: ;   combined into the inner loop count.  If no first byte exists,
                   2053: ;   the start address will be incremented to adjust for it.
                   2054: ;
                   2055: ; Entry:
                   2056: ;       EBX = right coordinate (exclusive)
                   2057: ;       EDX = left coordinate  (inclusive)
                   2058: ; Returns:
                   2059: ;       EDI = offset to first byte to be altered in the scan
                   2060: ;       ESI = inner loop count
                   2061: ;       AL  = first byte mask (possibly 0)
                   2062: ;       AH  = last  byte mask (possibly 0)
                   2063: ; Error Returns:
                   2064: ;       None
                   2065: ; Registers Preserved:
                   2066: ;       ES,BP
                   2067: ; Registers Destroyed:
                   2068: ;       AX,BX,CX,DX,SI,DI,FLAGS
                   2069: ; Calls:
                   2070: ;       None
                   2071: ; History:
                   2072: ;       Sat 11-Apr-1987 20:39:10 -by-  Walt Moore [waltm]
                   2073: ;       Created.
                   2074: ;-----------------------------------------------------------------------;
                   2075: 
                   2076: cProc   comp_byte_interval
                   2077: 
                   2078:         sub     ebx,edx                 ;Compute extent of interval
                   2079:         dec     ebx                     ;Make interval inclusive
                   2080:         mov     edi,edx                 ;Don't destroy starting X
                   2081:         shr     edi,3                   ;/8 for byte address
                   2082: 
                   2083:         and     edx,00000111b           ;Compute bit index for left side
                   2084:         add     ebx,edx                 ;Compute bit index for right side
                   2085:         mov     esi,ebx                 ;(save for inner loop count)
                   2086:         and     ebx,00000111b
                   2087:         mov     cl,dl                   ;Compute left side altered bits mask
                   2088:         mov     eax,0FFFFFFFFh
                   2089:         mov     edx,eax                 ;Need this here later
                   2090:         shr     al,cl                   ;AL = left side altered bytes mask
                   2091:         mov     cl,bl                   ;Compute right side altered bits mask
                   2092:         mov     ah,80h
                   2093:         sar     ah,cl                   ;AH = right side altered bits mask
                   2094:         shr     esi,3                   ;Compute inner byte count
                   2095:         jnz     short comp_byte_dont_combine ;loop count + 1 > 0, check it out
                   2096: 
                   2097: ; Only one byte will be affected.  Combine first/last masks, set loop count = 0
                   2098: 
                   2099:         and     al,ah                   ;Will use first byte mask only
                   2100:         xor     ah,ah                   ;Want last byte mask to be 0
                   2101:         inc     esi                     ;Fall through to set 0
                   2102: 
                   2103: comp_byte_dont_combine:
                   2104:         dec     esi                     ;Dec inner loop count (might become 0)
                   2105: 
                   2106: 
                   2107: ; If all pixels in the first byte are altered, combine the first byte into the
                   2108: ; inner loop and clear the first byte mask.  Ditto for the last byte mask.
                   2109: 
                   2110:         cmp     al,dl                   ;Set 'C' if not all pixels 1
                   2111:         sbb     esi,edx                 ;If no 'C', sub -1 (add 1), else sub 0
                   2112:         cmp     al,dl                   ;Set 'C' if not all pixels 1
                   2113:         sbb     al,dl                   ;If no 'C', sub -1 (add 1), else sub 0
                   2114: 
                   2115:         cmp     ah,dl                   ;Set 'C' if not all pixels 1
                   2116:         sbb     esi,edx                 ;If no 'C', sub -1 (add 1), else sub 0
                   2117:         cmp     ah,dl                   ;Set 'C' if not all pixels 1
                   2118:         sbb     ah,dl                   ;If no 'C', sub -1 (add 1), else sub 0
                   2119:         cRet    comp_byte_interval
                   2120: 
                   2121: endProc comp_byte_interval
                   2122: 
                   2123: _TEXT$01   ends
                   2124: 
                   2125:         end
                   2126: 
                   2127: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.