Annotation of ntddk/src/video/displays/vga/i386/vgablts.asm, revision 1.1

1.1     ! root        1: ;---------------------------Module-Header------------------------------;
        !             2: ; Module Name: vgablts.asm
        !             3: ;
        !             4: ; Copyright (c) 1992 Microsoft Corporation
        !             5: ;-----------------------------------------------------------------------;
        !             6: ;-----------------------------------------------------------------------;
        !             7: ; VOID vTrgBlt(PDEVSURF pdsurf, ULONG culRcl, RECTL * prcl, MIX ulMix,
        !             8: ;              ULONG ulClr);
        !             9: ; Input:
        !            10: ;  pdsurf - surface to which to draw
        !            11: ;  culRcl - # of rectangles to fill
        !            12: ;  prcl   - pointer to list of rectangles to fill
        !            13: ;  ulMix  - mix rop with which to fill
        !            14: ;  ulClr  - color with which to fill
        !            15: ;
        !            16: ; Performs accelarated solid area fills for all mixes.
        !            17: ;
        !            18: ;-----------------------------------------------------------------------;
        !            19: ;
        !            20: ; Note: Assumes all rectangles have positive heights and widths. Will not
        !            21: ; work properly if this is not the case.
        !            22: ;
        !            23: ;-----------------------------------------------------------------------;
        !            24: ;
        !            25: ; Note: Cases where the width of the whole bytes fill is equal to the
        !            26: ; width of the bitmap could be sped up by using a single REP MOVS or REP
        !            27: ; STOS, but how often does WIN32 do a fill that's the width of the screen?
        !            28: ; Not very.
        !            29: ;
        !            30: ;-----------------------------------------------------------------------;
        !            31: 
        !            32:         comment $
        !            33: 
        !            34: The overall approach of this module is to accept a list of rectangles to
        !            35: fill, set up the VGA hardware for the desired fill, and then fill the
        !            36: rectangles one at a time. Each rectangle fill is set up for everything
        !            37: but vertical parameters, and then decomposed into the sections that
        !            38: intersect each VGA bank; each section is drawn in turn. The drawing code
        !            39: is heavily unrolled for performance, and vectors are set up so that the
        !            40: drawing code appropriate for the desired fill is essentially threaded
        !            41: together.
        !            42: 
        !            43:         commend $
        !            44: 
        !            45: ;-----------------------------------------------------------------------;
        !            46: 
        !            47: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
        !            48: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
        !            49: ; times unrolling. This is the only thing you need to change to control
        !            50: ; unrolling.
        !            51: 
        !            52: LOOP_UNROLL_SHIFT equ 2
        !            53: 
        !            54: ;-----------------------------------------------------------------------;
        !            55: 
        !            56:                 .386
        !            57: 
        !            58: ifndef  DOS_PLATFORM
        !            59:         .model  small,c
        !            60: else
        !            61: ifdef   STD_CALL
        !            62:         .model  small,c
        !            63: else
        !            64:         .model  small,pascal
        !            65: endif;  STD_CALL
        !            66: endif;  DOS_PLATFORM
        !            67: 
        !            68:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
        !            69:         assume fs:nothing,gs:nothing
        !            70: 
        !            71:         .xlist
        !            72:         include stdcall.inc             ;calling convention cmacros
        !            73:         include i386\egavga.inc
        !            74:         include i386\strucs.inc
        !            75:         include i386\unroll.inc
        !            76:         include i386\ropdefs.inc
        !            77: 
        !            78:         .list
        !            79: 
        !            80: ;-----------------------------------------------------------------------;
        !            81: 
        !            82:         .data
        !            83: 
        !            84: ;-----------------------------------------------------------------------;
        !            85: ; Left edge clip masks for intrabyte start addresses 0 through 7.
        !            86: ; Whole byte cases are flagged as 0ffh.
        !            87:        public jLeftMask
        !            88: jLeftMask       label   byte
        !            89:         db      0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
        !            90: 
        !            91: ;-----------------------------------------------------------------------;
        !            92: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
        !            93: ; 0 through 7. Whole byte cases are flagged as 0ffh.
        !            94:        public jRightMask
        !            95: jRightMask      label   byte
        !            96:         db      0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
        !            97: 
        !            98: ;-----------------------------------------------------------------------;
        !            99: ; Tables used to set up for the desired raster op. Note that entries for raster
        !           100: ; ops that aren't handled here are generally correct, except that they ignore
        !           101: ; need for inversion of the destination, which those rops require.
        !           102: 
        !           103: ; Table used to force off the drawing color for R2_BLACK (0).
        !           104: ; The first entry is ignored; there is no mix 0.
        !           105:        public jForceOffTable
        !           106: jForceOffTable  db         0
        !           107:                 db         0,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
        !           108:                 db      0ffh,0ffh,   0,0ffh,0ffh,0ffh,0ffh,0ffh
        !           109: 
        !           110: ;-----------------------------------------------------------------------;
        !           111: ; Table used to force on the drawing color for R2_NOT (Dn) and R2_WHITE (1).
        !           112: ; The first entry is ignored; there is no mix 0.
        !           113:        public  jForceOnTable
        !           114: jForceOnTable   db      0, 0,0,0,0,0,0ffh,0,0,0,0,0,0,0,0,0,0ffh
        !           115: 
        !           116: ;-----------------------------------------------------------------------;
        !           117: ; Table used to invert the passed-in drawing color for Pn mixes.
        !           118: ; The first entry is ignored; there is no mix 0.
        !           119:        public  jNotTable
        !           120: jNotTable       db      0, 0,0ffh,0ffh,0ffh,0,0,0,0ffh,0,0ffh,0,0ffh,0,0,0,0
        !           121: 
        !           122: ;-----------------------------------------------------------------------;
        !           123: ; Table of VGA ALU logical functions corresponding to mixes. Note that Dn is
        !           124: ; handled as a separate preceding inversion pass when part of a more complex
        !           125: ; mix.
        !           126: ; The first entry is ignored; there is no mix 0.
        !           127:        public jALUFuncTable    
        !           128: jALUFuncTable   db      0
        !           129:                 db      DR_SET,DR_AND,DR_AND,DR_SET
        !           130:                 db      DR_AND,DR_XOR,DR_XOR,DR_OR
        !           131:                 db      DR_AND,DR_XOR,     0,DR_OR
        !           132:                 db      DR_SET,DR_OR ,DR_OR ,DR_SET
        !           133: 
        !           134: ;-----------------------------------------------------------------------;
        !           135: ; 1 entries mark rops that require two passes, one to invert the destination
        !           136: ; and then another to finish the rop.
        !           137: ; The first entry is ignored; there is no mix 0.
        !           138:        public  jInvertDest
        !           139: jInvertDest     db      0, 0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0
        !           140: 
        !           141: ;-----------------------------------------------------------------------;
        !           142: ; Table of routines to be called to draw edges, according to which edges are
        !           143: ; partial and which edges are whole bytes.
        !           144:         align   4
        !           145: pfnEdgeDrawing  label   dword
        !           146:         dd      do_right_edge_bytes
        !           147:         dd      do_both_edge_bytes
        !           148:         dd      check_next_bank
        !           149:         dd      do_left_edge_bytes
        !           150: 
        !           151: ;-----------------------------------------------------------------------;
        !           152: ; Table of pointers to tables used to find entries points in unrolled wide
        !           153: ; whole byte code.
        !           154: 
        !           155:         align   4
        !           156: pfnWideWholeRep label   dword
        !           157:         dd      pfnDrawWideW00Entry
        !           158:         dd      pfnDrawWideW01Entry
        !           159:         dd      pfnDrawWideW02Entry
        !           160:         dd      pfnDrawWideW03Entry
        !           161:         dd      pfnDrawWideW10Entry
        !           162:         dd      pfnDrawWideW11Entry
        !           163:         dd      pfnDrawWideW12Entry
        !           164:         dd      pfnDrawWideW13Entry
        !           165:         dd      pfnDrawWideW20Entry
        !           166:         dd      pfnDrawWideW21Entry
        !           167:         dd      pfnDrawWideW22Entry
        !           168:         dd      pfnDrawWideW23Entry
        !           169:         dd      pfnDrawWideW30Entry
        !           170:         dd      pfnDrawWideW31Entry
        !           171:         dd      pfnDrawWideW32Entry
        !           172:         dd      pfnDrawWideW33Entry
        !           173: 
        !           174: ;-----------------------------------------------------------------------;
        !           175: ; Table of pointers to tables used to find entries points in narrow, special-
        !           176: ; cased unrolled non-replace whole byte code.
        !           177: 
        !           178: ; Note: The breakpoint where one should switch from special-casing to
        !           179: ;  REP MOVSB is purely a guess on my part. 5 seemed reasonable.
        !           180: 
        !           181:         align   4
        !           182: pfnWholeBytesNonReplaceEntries  label   dword
        !           183:         dd      0                       ;we never get a 0-wide case
        !           184:         dd      pfnDraw1WideRWEntry
        !           185:         dd      pfnDraw2WideRWEntry
        !           186:         dd      pfnDraw3WideRWEntry
        !           187:         dd      pfnDraw4WideRWEntry
        !           188: MAX_NON_REPLACE_SPECIAL equ     ($-pfnWholeBytesNonReplaceEntries)/4
        !           189: 
        !           190: ;-----------------------------------------------------------------------;
        !           191: ; Table of pointers to tables used to find entry points in narrow, special-
        !           192: ; cased unrolled replace whole byte code.
        !           193: 
        !           194: ; Note: The breakpoint where one should switch from special-casing to
        !           195: ;  REP STOS is purely a guess on my part. 8 seemed reasonable.
        !           196: 
        !           197: ; Start address MOD 3 is 0.
        !           198:         align   4
        !           199: pfnWholeBytesMod0ReplaceEntries  label   dword
        !           200:         dd      0                       ;we never get a 0-wide case
        !           201:         dd      pfnDraw1WideWEntry
        !           202:         dd      pfnDraw2WideWEntry
        !           203:         dd      pfnDraw3WideWEvenEntry
        !           204:         dd      pfnDraw4WideWEntry
        !           205:         dd      pfnDraw5WideWEvenEntry
        !           206:         dd      pfnDraw6WideWMod3_0Entry
        !           207:         dd      pfnDraw7WideWMod3_0Entry
        !           208:         dd      pfnDraw8WideWMod3_0Entry
        !           209: MAX_REPLACE_SPECIAL equ     ($-pfnWholeBytesMod0ReplaceEntries)/4
        !           210: 
        !           211: ; Start address MOD 3 is 1.
        !           212:         align   4
        !           213: pfnWholeBytesMod1ReplaceEntries  label   dword
        !           214:         dd      0                       ;we never get a 0-wide case
        !           215:         dd      pfnDraw1WideWEntry
        !           216:         dd      pfnDraw2WideWEntry
        !           217:         dd      pfnDraw3WideWOddEntry
        !           218:         dd      pfnDraw4WideWEntry
        !           219:         dd      pfnDraw5WideWOddEntry
        !           220:         dd      pfnDraw6WideWMod3_1Entry
        !           221:         dd      pfnDraw7WideWMod3_1Entry
        !           222:         dd      pfnDraw8WideWMod3_1Entry
        !           223: 
        !           224: ; Start address MOD 3 is 2.
        !           225:         align   4
        !           226: pfnWholeBytesMod2ReplaceEntries  label   dword
        !           227:         dd      0                       ;we never get a 0-wide case
        !           228:         dd      pfnDraw1WideWEntry
        !           229:         dd      pfnDraw2WideWEntry
        !           230:         dd      pfnDraw3WideWEvenEntry
        !           231:         dd      pfnDraw4WideWEntry
        !           232:         dd      pfnDraw5WideWEvenEntry
        !           233:         dd      pfnDraw6WideWMod3_2Entry
        !           234:         dd      pfnDraw7WideWMod3_2Entry
        !           235:         dd      pfnDraw8WideWMod3_2Entry
        !           236: 
        !           237: ; Start address MOD 3 is 3.
        !           238:         align   4
        !           239: pfnWholeBytesMod3ReplaceEntries  label   dword
        !           240:         dd      0                       ;we never get a 0-wide case
        !           241:         dd      pfnDraw1WideWEntry
        !           242:         dd      pfnDraw2WideWEntry
        !           243:         dd      pfnDraw3WideWOddEntry
        !           244:         dd      pfnDraw4WideWEntry
        !           245:         dd      pfnDraw5WideWOddEntry
        !           246:         dd      pfnDraw6WideWMod3_1Entry
        !           247:         dd      pfnDraw7WideWMod3_3Entry
        !           248:         dd      pfnDraw8WideWMod3_3Entry
        !           249: 
        !           250: ; Master MOD 3 alignment look-up table for entry tables for four possible
        !           251: ; alignments for narrow, special-cased unrolled replace whole byte code.
        !           252:         align   4
        !           253: pfnWholeBytesReplaceMaster      label   dword
        !           254:         dd      pfnWholeBytesMod0ReplaceEntries
        !           255:         dd      pfnWholeBytesMod1ReplaceEntries
        !           256:         dd      pfnWholeBytesMod2ReplaceEntries
        !           257:         dd      pfnWholeBytesMod3ReplaceEntries
        !           258: 
        !           259: ;-----------------------------------------------------------------------;
        !           260: 
        !           261:                 .code
        !           262: 
        !           263: _TEXT$01   SEGMENT DWORD USE32 PUBLIC 'CODE'
        !           264:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
        !           265: 
        !           266: ;-----------------------------------------------------------------------;
        !           267: 
        !           268: cProc   vTrgBlt,20,<         \
        !           269:         uses    esi edi ebx, \
        !           270:         pdsurf: ptr DEVSURF, \
        !           271:         culRcl: dword,       \
        !           272:         prcl:   ptr RECTL,   \
        !           273:         ulMix:  dword,       \
        !           274:         ulColor:dword        >
        !           275: 
        !           276:         local   ulRowOffset :dword      ;Offset from start of scan line of
        !           277:                                         ; first byte to fill
        !           278:         local   ulWholeBytes :dword     ;# of whole bytes to fill
        !           279:         local   ulWholeDwords :dword    ;# of whole dwords to fill
        !           280:         local   pfnWholeFn  :dword      ;pointer to routine used to draw
        !           281:                                         ; whole bytes
        !           282:         local   ulScanWidth :dword      ;offset from start of one scan to start
        !           283:                                         ; of next
        !           284:         local   ulNextScan  :dword      ;offset from end of one scan line's
        !           285:                                         ; fill to start of next
        !           286:         local   ulCurrentTopScan :dword ;top scan line to fill in current bank
        !           287:         local   ulMasks     :dword      ;low byte = right mask, high byte =
        !           288:                                         ; left mask
        !           289:         local   ulBottomScan :dword     ;bottom scan line of fill rectangle
        !           290:         local   pfnDraw1WideVector :dword ;address at which to enter unrolled
        !           291:                                           ; edge loop
        !           292:         local   jALUFunc   :dword       ;VGA ALU logical operation (SET, AND,
        !           293:                                         ; OR, or XOR)
        !           294:         local   pfnStartDrawing :dword  ;pointer to function to call to start
        !           295:                                         ; drawing
        !           296:         local   pfnContinueDrawing :dword ;pointer to function to call to
        !           297:                                         ; continue drawing after doing whole
        !           298:                                         ; bytes
        !           299:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
        !           300:                                         ; address past the left edge when the
        !           301:                                         ; left edge is partial
        !           302:         local   pfnWholeBytes :dword    ;pointer to table of entry points
        !           303:                                         ; into unrolled loops for whole byte
        !           304:                                         ; filling
        !           305:         local   jInvertDestFirst :dword ;1 if the rop requires a pass to invert
        !           306:                                         ; the destination before the normal
        !           307:                                         ; pass
        !           308:         local   jDrawingColor :dword    ;color with which we're drawing
        !           309:                                         ; dword to finish out fill
        !           310: 
        !           311: ;-----------------------------------------------------------------------;
        !           312: 
        !           313:         cld
        !           314: 
        !           315: ;-----------------------------------------------------------------------;
        !           316: ; Make sure there's something to draw; clip enumerations can be empty.
        !           317: ;-----------------------------------------------------------------------;
        !           318: 
        !           319:         cmp     culRcl,0                ;any rects to fill?
        !           320:         jz      vTrgBlts_done           ;no, we're done
        !           321: 
        !           322: 
        !           323: ;-----------------------------------------------------------------------;
        !           324: ; Set up variables that are constant for the entire time we're in this
        !           325: ; module.
        !           326: ;-----------------------------------------------------------------------;
        !           327: 
        !           328: ;-----------------------------------------------------------------------;
        !           329: ; Set up for the desired raster op.
        !           330: ;-----------------------------------------------------------------------;
        !           331: 
        !           332:         sub     ebx,ebx                 ;ignore any background mix; we're only
        !           333:         mov     bl,byte ptr ulMix       ; concerned with the foreground in this
        !           334:                                         ; module
        !           335:         cmp     ebx,R2_NOP              ;is this NOP?
        !           336:         jz      vTrgBlts_done           ;yes, we're done
        !           337:         mov     al,jInvertDest[ebx]          ;remember whether we need to
        !           338:         mov     byte ptr jInvertDestFirst,al ; invert the destination before
        !           339:                                              ; finishing the rop
        !           340:         mov     ah,byte ptr ulColor     ;get the drawing color
        !           341:         and     ah,jForceOffTable[ebx]  ;force color to 0 if necessary
        !           342:                                         ; (R2_BLACK)
        !           343:         or      ah,jForceOnTable[ebx]   ;force color to 0ffh if necessary
        !           344:                                         ; (R2_WHITE, R2_NOT)
        !           345:         xor     ah,jNotTable[ebx]       ;invert color if necessary (any Pn mix)
        !           346:                                         ;at this point, CH has the color we
        !           347:                                         ; want to draw with; set up the VGA
        !           348:                                         ; hardware to draw with that color
        !           349:         mov     byte ptr jDrawingColor,ah ;remember drawing color for restoring
        !           350:                                           ; after inversion
        !           351:         mov     edx,VGA_BASE + GRAF_ADDR
        !           352:         mov     al,GRAF_SET_RESET       ;set/reset = color to write
        !           353:         out     dx,ax
        !           354:         mov     eax,0F00h + GRAF_ENAB_SR ;enable set/reset for all planes, so
        !           355:         out     dx,ax                   ; set/reset color we just set becomes
        !           356:                                         ; the drawing color, regardless of the
        !           357:                                         ; value written by the CPU
        !           358: 
        !           359:         mov     ah,jALUFuncTable[ebx]   ;get the ALU logical function
        !           360:         and     ah,ah                   ;is the logical function DR_SET?
        !           361:         .errnz  DR_SET
        !           362:         jz      short skip_ALU_set      ;yes, don't have to set because that's
        !           363:                                         ; the VGA's default state
        !           364:         mov     al,GRAF_DATA_ROT
        !           365:         out     dx,ax                   ;set the ALU logical function
        !           366: skip_ALU_set:
        !           367:         mov     byte ptr jALUFunc,ah    ;remember the ALU logical function
        !           368: 
        !           369:        mov     eax,GRAF_MODE + ((M_AND_WRITE + M_COLOR_READ) SHL 8)
        !           370:        out     dx,ax                   ;write mode 3 so we can do the masking
        !           371:                                        ; without OUTs, read mode 1 so we can
        !           372:                                        ; read 0xFF from memory always, for
        !           373:                                        ; ANDing (because Color Don't Care is
        !           374:                                        ; all zeros)
        !           375: 
        !           376: ;-----------------------------------------------------------------------;
        !           377: ; Fill the current rectangle with the specified raster op and color.
        !           378: ;-----------------------------------------------------------------------;
        !           379: 
        !           380: fill_rect_loop:
        !           381: 
        !           382: ;-----------------------------------------------------------------------;
        !           383: ; Set up variables that are constant from bank to bank during a single
        !           384: ; fill.
        !           385: ;-----------------------------------------------------------------------;
        !           386: 
        !           387: ;-----------------------------------------------------------------------;
        !           388: ; Set up masks and widths.
        !           389: ;-----------------------------------------------------------------------;
        !           390: 
        !           391:         mov     edi,prcl                ;point to rectangle to fill
        !           392:         mov     eax,[edi].yBottom
        !           393:         mov     ulBottomScan,eax        ;remember the bottom scan line of fill
        !           394: 
        !           395:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
        !           396:         mov     ecx,ebx
        !           397:         and     ecx,0111b               ;intrabyte address of right edge
        !           398:         mov     ah,jRightMask[ecx]      ;right edge mask
        !           399: 
        !           400:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
        !           401:         mov     ecx,esi
        !           402:         shr     ecx,3                   ;/8 for start offset from left edge
        !           403:                                         ; of scan line
        !           404:         mov     ulRowOffset,ecx         ;remember offset from start of scan
        !           405:                                         ; line
        !           406:         sub     ebx,esi                 ;width in pixels of fill
        !           407: 
        !           408:         and     esi,0111b               ;intrabyte address of left edge
        !           409:         mov     al,jLeftMask[esi]       ;left edge mask
        !           410: 
        !           411:         dec     ebx                     ;make inclusive on right
        !           412:         add     ebx,esi                 ;inclusive width, starting counting at
        !           413:                                         ; the beginning of the left edge byte
        !           414:         shr     ebx,3                   ;width of fill in bytes touched - 1
        !           415:         jnz     short more_than_1_byte  ;more than 1 byte is involved
        !           416: 
        !           417: ; Only one byte will be affected. Combine first/last masks.
        !           418: 
        !           419:         and     al,ah                   ;we'll use first byte mask only
        !           420:         xor     ah,ah                   ;want last byte mask to be 0
        !           421:         inc     ebx                     ;so there's one count to subtract below
        !           422:                                         ; if this isn't a whole edge byte
        !           423: more_than_1_byte:
        !           424: 
        !           425: ; If all pixels in the left edge are altered, combine the first byte into the
        !           426: ; whole byte count and clear the first byte mask, because we can handle solid
        !           427: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
        !           428: 
        !           429:         sub     ecx,ecx                 ;edge whole-status accumulator
        !           430:         cmp     al,-1                   ;is left edge a whole byte or partial?
        !           431:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
        !           432:         sub     ebx,ecx                 ;if left edge partial, deduct it from
        !           433:                                         ; the whole bytes count
        !           434:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
        !           435:                                         ; it's partial when pointing to the
        !           436:                                         ; whole bytes
        !           437:         and     ah,ah                   ;is right edge mask 0, meaning this
        !           438:                                         ; fill is only 1 byte wide?
        !           439:         jz      short save_masks        ;yes, no need to do anything
        !           440:         cmp     ah,-1                   ;is right edge a whole byte or partial?
        !           441:         jnz     short save_masks        ;partial
        !           442:         add     ecx,2                   ;bit 1 of ECX=0 if right edge partial,
        !           443:                                         ; 1 if whole;
        !           444:                                         ;bit 1=0 if left edge partial, 1 whole
        !           445:         inc     ebx                     ;if right edge whole, include it in the
        !           446:                                         ; whole bytes count
        !           447: save_masks:
        !           448:         mov     ulMasks,eax             ;save left and right clip masks
        !           449:         mov     ulWholeBytes,ebx        ;save # of whole bytes
        !           450: 
        !           451:         mov     ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
        !           452:         mov     pfnContinueDrawing,ecx    ; all partial (non-whole) edges
        !           453: 
        !           454:         and     ebx,ebx                 ;any whole bytes?
        !           455:         jz      short start_vec_set     ;no
        !           456:                                         ;yes, so draw the whole bytes before
        !           457:                                         ; the edge bytes
        !           458: 
        !           459: ; The whole bytes loop depends on the type of operation being done. If the
        !           460: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
        !           461: ; else we have to use a MOVSB-type operation (to load the latches with the
        !           462: ; existing contents of display memory to allow the ALUs to work).
        !           463: 
        !           464:         cmp     byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
        !           465:         jz      short is_replace_type   ;yes
        !           466:                                         ;no, set up for non-replace whole bytes
        !           467:         mov     ecx,offset whole_bytes_non_replace_wide
        !           468:                                         ;assume too wide to special-case
        !           469:         cmp     ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
        !           470:         jnb     short start_vec_set     ;yes
        !           471:         mov     ecx,pfnWholeBytesNonReplaceEntries[ebx*4] ;no, point to entry
        !           472:         mov     pfnWholeBytes,ecx       ; table for width
        !           473:         mov     ecx,offset whole_bytes_special
        !           474:                                         ;set up to call special routine to fill
        !           475:                                         ; whole bytes
        !           476:         jmp     short start_vec_set
        !           477: 
        !           478:         align   4
        !           479: is_replace_type:                        ;set up for replace-type rop
        !           480:         cmp     ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
        !           481:         jnb     short is_wide_replace   ;yes
        !           482:                                         ;narrow enough to special case. Look up
        !           483:                                         ; the entry table for the special case
        !           484:                                         ; base on the start alignment
        !           485:         mov     ecx,ulRowOffset
        !           486:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
        !           487:         and     ecx,011b                ;left edge whole bytes start alignment
        !           488:                                         ; MOD 3
        !           489:         mov     ecx,pfnWholeBytesReplaceMaster[ecx*4] ;look up table of entry
        !           490:                                                       ; tables for alignment
        !           491:         mov     ecx,[ecx+ebx*4]         ;look up entry table for width
        !           492:         mov     pfnWholeBytes,ecx       ; table for width
        !           493:         mov     ecx,offset whole_bytes_special
        !           494:                                         ;set up to call special routine to fill
        !           495:                                         ; whole bytes
        !           496:         jmp     short start_vec_set
        !           497: 
        !           498:         align   4
        !           499: is_wide_replace:                        ;set up for wide replace-type op
        !           500:                                         ;Note: assumes there is at least one
        !           501:                                         ; full dword involved!
        !           502:         mov     ecx,ulRowOffset
        !           503:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
        !           504:         neg     ecx
        !           505:         and     ecx,011b
        !           506:         mov     edx,ebx
        !           507:         sub     edx,ecx                 ;ignore odd leading bytes
        !           508:         mov     eax,edx
        !           509:         shr     edx,2                   ;# of whole dwords across (not counting
        !           510:                                         ; odd leading & trailing bytes)
        !           511:         mov     ulWholeDwords,edx
        !           512:         and     eax,011b                ;# of odd (fractional) trailing bytes
        !           513:         shl     ecx,2
        !           514:         or      ecx,eax                 ;build a look-up index from the number
        !           515:                                         ; of leading and trailing bytes
        !           516:         mov     ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
        !           517:         mov     pfnWholeBytes,ecx          ; back alignment
        !           518:         mov     ecx,offset whole_bytes_rep_wide
        !           519:                                         ;set up to call routine to perform wide
        !           520:                                         ; whole bytes fill
        !           521: start_vec_set:
        !           522:         mov     pfnStartDrawing,ecx     ; all partial (non-whole) edges
        !           523: 
        !           524:         mov     ecx,pdsurf
        !           525:         mov     eax,[ecx].dsurf_lNextScan
        !           526:         mov     ulScanWidth,eax         ;local copy of scan line width
        !           527:         sub     eax,ebx                 ;EAX = delta to next scan
        !           528:         mov     ulNextScan,eax
        !           529: 
        !           530: 
        !           531: ;-----------------------------------------------------------------------;
        !           532: ; Fill this rectangle.
        !           533: ;-----------------------------------------------------------------------;
        !           534: 
        !           535:         cmp     byte ptr jInvertDestFirst,1
        !           536:                                         ;is this an invert-dest-plus-something-
        !           537:                                         ; else rop that requires two passes?
        !           538:         jz      short do_invert_dest_rop ;yes, special case with two passes
        !           539: 
        !           540: do_single_pass:
        !           541:         call    draw_banks
        !           542: 
        !           543: 
        !           544: ;-----------------------------------------------------------------------;
        !           545: ; See if there are any more rectangles to fill.
        !           546: ;-----------------------------------------------------------------------;
        !           547: 
        !           548:         add     prcl,(size RECTL) ;point to the next rectangle, if there is one
        !           549:         dec     culRcl            ;count down the rectangles to fill
        !           550:         jnz     fill_rect_loop
        !           551: 
        !           552: 
        !           553: ;-----------------------------------------------------------------------;
        !           554: ; We have filled all rectangles.  Restore the VGA to its default state.
        !           555: ;-----------------------------------------------------------------------;
        !           556: 
        !           557:         mov     edx,VGA_BASE + GRAF_ADDR
        !           558:         mov     eax,0000h + GRAF_ENAB_SR ;disable set/reset
        !           559:         out     dx,ax
        !           560:        mov     eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8)
        !           561:        out     dx,ax                   ;restore read mode 0 and write mode 0
        !           562:         cmp     byte ptr jALUfunc,DR_SET ;is the logical function already SET?
        !           563:         jz      short vTrgBlts_done               ;yes, no need to reset it
        !           564:         mov     eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
        !           565:         out     dx,ax                             ; SET
        !           566: vTrgBlts_done:
        !           567:         cRet    vTrgBlt
        !           568: 
        !           569: 
        !           570: ;-----------------------------------------------------------------------;
        !           571: ; Handles rops that require two passes, the first being a destination
        !           572: ; inversion pass.
        !           573: ;-----------------------------------------------------------------------;
        !           574: 
        !           575:         align   4
        !           576: do_invert_dest_rop:
        !           577: 
        !           578: ; Set up the VGA's hardware for inversion
        !           579: 
        !           580:         mov     edx,VGA_BASE + GRAF_ADDR
        !           581:         mov     eax,0ff00h + GRAF_SET_RESET ;set/reset = 0ffh to invert in
        !           582:         out     dx,ax                       ; conjunction with XOR
        !           583:         mov     eax,(DR_XOR shl 8) + GRAF_DATA_ROT
        !           584:         out     dx,ax                   ;logical function = XOR to invert
        !           585: 
        !           586: ; Invert the destination
        !           587: 
        !           588:         call    draw_banks
        !           589: 
        !           590: ; Restore the VGA's hardware to the state required for the second pass.
        !           591: 
        !           592:         mov     edx,VGA_BASE + GRAF_ADDR
        !           593:         mov     ah,byte ptr jDrawingColor
        !           594:         mov     al,GRAF_SET_RESET       ;set/reset = color to write
        !           595:         out     dx,ax
        !           596:         mov     ah,byte ptr jALUFunc
        !           597:         mov     al,GRAF_DATA_ROT
        !           598:         out     dx,ax                   ;set the ALU logical function
        !           599: 
        !           600: ; Perform the second pass to finish the rop.
        !           601: 
        !           602:         jmp     do_single_pass
        !           603: 
        !           604: 
        !           605: ;-----------------------------------------------------------------------;
        !           606: ; Fills all banks in the current fill rectangle. Called once per fill
        !           607: ; rectangle, except for destination-inversion-plus-something-else rops.
        !           608: ;-----------------------------------------------------------------------;
        !           609: 
        !           610:         align   4
        !           611: draw_banks:
        !           612: 
        !           613: ;-----------------------------------------------------------------------;
        !           614: ; Map in the bank containing the top scan to fill, if it's not mapped in
        !           615: ; already.
        !           616: ;-----------------------------------------------------------------------;
        !           617: 
        !           618:         mov     edi,prcl                ;point to rectangle to fill
        !           619:         mov     ecx,pdsurf              ;point to surface
        !           620:         mov     eax,[edi].yTop          ;top scan line of fill
        !           621:         mov     ulCurrentTopScan,eax    ;this will be the fill top in 1st bank
        !           622: 
        !           623:         cmp     eax,[ecx].dsurf_rcl1WindowClip.yTop ;is fill top less than
        !           624:                                                     ; current bank?
        !           625:         jl      short map_init_bank             ;yes, map in proper bank
        !           626:         cmp     eax,[ecx].dsurf_rcl1WindowClip.yBottom ;fill top greater than
        !           627:                                                        ; current bank?
        !           628:         jl      short init_bank_mapped          ;no, proper bank already mapped
        !           629: map_init_bank:
        !           630: 
        !           631: ; Map in the bank containing the top scan line of the fill.
        !           632: 
        !           633:         ptrCall <dword ptr [ecx].dsurf_pfnBankControl>,<ecx,eax,JustifyTop>
        !           634: 
        !           635: init_bank_mapped:
        !           636: 
        !           637: ;-----------------------------------------------------------------------;
        !           638: ; Main loop for processing fill in each bank.
        !           639: ;-----------------------------------------------------------------------;
        !           640: 
        !           641: ; Compute the starting address and scan line count for the initial bank.
        !           642: 
        !           643:         mov     eax,pdsurf              ;EAX->target surface
        !           644:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
        !           645:         cmp     ebx,[eax].dsurf_rcl1WindowClip.yBottom
        !           646:                                         ;which comes first, the bottom of the
        !           647:                                         ; dest rect or the bottom of the
        !           648:                                         ; current bank?
        !           649:         jl      short BottomScanSet     ;fill bottom comes first, so draw to
        !           650:                                         ; that; this is the last bank in fill
        !           651:         mov     ebx,[eax].dsurf_rcl1WindowClip.yBottom
        !           652:                                         ;bank bottom comes first; draw to
        !           653:                                         ; bottom of bank
        !           654: BottomScanSet:
        !           655:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
        !           656:         sub     ebx,edi                 ;# of scans to fill in bank
        !           657:         imul    edi,ulScanWidth         ;offset of starting scan line
        !           658: 
        !           659: ; Note that the start of the bitmap will change each time through the
        !           660: ; bank loop, because the start of the bitmap is varied to map the
        !           661: ; desired scan line to the banking window.
        !           662: 
        !           663:         add     edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
        !           664:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
        !           665: 
        !           666: ; We have computed the starting address and scan count. Time to start drawing
        !           667: ; in the initial bank.
        !           668: 
        !           669:         jmp     pfnStartDrawing
        !           670: 
        !           671: 
        !           672: ;-----------------------------------------------------------------------;
        !           673: ; Whole byte fills.
        !           674: ;-----------------------------------------------------------------------;
        !           675: 
        !           676: ;-----------------------------------------------------------------------;
        !           677: ; Handles non-replace whole byte fills wider than the maximum special
        !           678: ; case width.
        !           679: ;
        !           680: ; The destination is not involved, so a STOS (or equivalent) can be used
        !           681: ; (no read needed before write).
        !           682: ;-----------------------------------------------------------------------;
        !           683: 
        !           684:         align   4
        !           685: whole_bytes_rep_wide:
        !           686:         push    ebx                     ;save scan count
        !           687:         push    edi                     ;save starting address
        !           688: 
        !           689:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
        !           690:                                         ; loop for whole byte width
        !           691:         SET_UP_UNROLL_VARS ebx,ecx, ebx,[eax], LOOP_UNROLL_SHIFT
        !           692:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
        !           693:         mov     esi,ulWholeDwords       ;whole dwords width
        !           694:        mov     eax,-1                  ;this will become the Bit Mask,
        !           695:                                        ; enabling drawing to all bits
        !           696:         mov     edx,ulNextScan          ;offset from end of one scan line to
        !           697:                                         ; start of next
        !           698:         call    ecx                     ;draw the wide whole bytes
        !           699: 
        !           700:         pop     edi                     ;restore screen pointer
        !           701:         pop     ebx                     ;restore fill scan count
        !           702:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
        !           703: 
        !           704: 
        !           705: ;-----------------------------------------------------------------------;
        !           706: ; Handles both replace and non-replace whole byte fills narrow enough to
        !           707: ; special case.
        !           708: ;-----------------------------------------------------------------------;
        !           709: 
        !           710:         align   4
        !           711: whole_bytes_special:
        !           712:         push    ebx                     ;save scan count
        !           713:         push    edi                     ;save starting address
        !           714: 
        !           715:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
        !           716:                                         ; loop for whole byte width
        !           717:         SET_UP_UNROLL_VARS ebx,edx, ebx,[eax], LOOP_UNROLL_SHIFT
        !           718:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
        !           719:         mov     ecx,ulScanWidth         ;offset to next scan line
        !           720:        mov     eax,-1                  ;this will become the Bit Mask,
        !           721:                                        ; enabling drawing to all bits
        !           722:         call    edx                     ;draw the whole bytes
        !           723: 
        !           724:         pop     edi                     ;restore screen pointer
        !           725:         pop     ebx                     ;restore fill scan count
        !           726:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
        !           727: 
        !           728: 
        !           729: ;-----------------------------------------------------------------------;
        !           730: ; Handles non-replace whole byte fills wider than the maximum special case
        !           731: ; width.
        !           732: ;
        !           733: ; The destination is involved, so a MOVSB (or equivalent) must be
        !           734: ; performed in order to do a read before write to give the ALUs something
        !           735: ; to work with.
        !           736: ;-----------------------------------------------------------------------;
        !           737: 
        !           738:         align   4
        !           739: whole_bytes_non_replace_wide:
        !           740:         push    ebx                     ;save scan count
        !           741:         push    edi                     ;save starting address
        !           742: 
        !           743:         SET_UP_UNROLL_VARS ebx,ecx, ebx,pfnDrawWideRWEntry, LOOP_UNROLL_SHIFT
        !           744:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
        !           745:         mov     eax,ulWholeBytes        ;whole bytes width
        !           746:         mov     edx,ulNextScan          ;offset from end of one scan line to
        !           747:                                         ; start of next
        !           748:         call    ecx                     ;draw the wide whole bytes
        !           749: 
        !           750:         pop     edi                     ;restore screen pointer
        !           751:         pop     ebx                     ;restore fill scan count
        !           752:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
        !           753: 
        !           754: 
        !           755: ;-----------------------------------------------------------------------;
        !           756: ; Process any left/right columns that that have to be done.
        !           757: ;
        !           758: ;   Currently:
        !           759: ;       EBX =   height to fill, in scans
        !           760: ;       EDI --> first byte of left edge
        !           761: ;-----------------------------------------------------------------------;
        !           762: 
        !           763: ;-----------------------------------------------------------------------;
        !           764: ; Handle case where both edges are partial (non-whole) bytes.
        !           765: ;-----------------------------------------------------------------------;
        !           766:         align   4
        !           767:        public do_both_edge_bytes
        !           768: do_both_edge_bytes:
        !           769: 
        !           770: ; Set up variables for entering unrolled loop.
        !           771: 
        !           772:         SET_UP_UNROLL_VARS ebx,edx, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
        !           773:         mov     pfnDraw1WideVector,edx
        !           774: 
        !           775:         mov     ecx,ulScanWidth         ;offset from one scan to next
        !           776: 
        !           777:         mov     esi,ulWholeBytes        ;ESI = # of whole bytes
        !           778:         lea     esi,[esi+edi+1]         ;--> start for right edge
        !           779:         mov     al,byte ptr ulMasks     ;this will become the Bit Mask for the
        !           780:                                        ; left edge
        !           781:         push    ebx                     ;preserve scan line count
        !           782:         call    edx                     ;jump into the unrolled loop to draw
        !           783:         pop     ebx                     ;restore scan line count
        !           784: 
        !           785:         mov     edi,esi                 ;point to first right edge byte
        !           786:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
        !           787:                                        ; right edge
        !           788:         push    offset edges_done       ;return here
        !           789:         jmp     pfnDraw1WideVector      ;jump into the unrolled loop to draw
        !           790: 
        !           791: ;-----------------------------------------------------------------------;
        !           792: ; Handle case where only the left edge is partial (non-whole).
        !           793: ;-----------------------------------------------------------------------;
        !           794:         align   4
        !           795: do_left_edge_bytes:
        !           796: 
        !           797: ; Set up variables for entering unrolled loop.
        !           798: 
        !           799:         SET_UP_UNROLL_VARS ebx,esi, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
        !           800: 
        !           801:         mov     ecx,ulScanWidth         ;offset from one scan to next
        !           802:         mov     al,byte ptr ulMasks     ;this will become the Bit Mask for the
        !           803:                                        ; left edge
        !           804:         push    offset edges_done       ;return here
        !           805:         jmp     esi                     ;jump into the unrolled loop to draw
        !           806: 
        !           807: ;-----------------------------------------------------------------------;
        !           808: ; Handle case where only the right edge is partial (non-whole).
        !           809: ;-----------------------------------------------------------------------;
        !           810:         align   4
        !           811: do_right_edge_bytes:
        !           812: 
        !           813: ; Set up variables for entering unrolled loop.
        !           814: 
        !           815:         SET_UP_UNROLL_VARS ebx,esi, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
        !           816: 
        !           817:         mov     ecx,ulScanWidth         ;offset from one scan to next
        !           818:         add     edi,ulWholeBytes        ;--> start for right edge (remember,
        !           819:                                         ; left edge is whole, so the left edge
        !           820:                                         ; byte is included in the whole byte
        !           821:                                         ; count)
        !           822:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
        !           823:                                        ; right edge
        !           824:         call    esi                     ;jump into the unrolled loop to draw
        !           825: 
        !           826: ;-----------------------------------------------------------------------;
        !           827: ; We have done all partial edges.
        !           828: ;-----------------------------------------------------------------------;
        !           829: 
        !           830: edges_done:
        !           831: 
        !           832: ;-----------------------------------------------------------------------;
        !           833: ; See if there are any more banks to process.
        !           834: ;-----------------------------------------------------------------------;
        !           835: 
        !           836: check_next_bank:
        !           837: 
        !           838:         mov     edi,pdsurf
        !           839:         mov     eax,[edi].dsurf_rcl1WindowClip.yBottom ;is the fill bottom in
        !           840:         cmp     ulBottomScan,eax                       ; the current bank?
        !           841:         jle     short banks_done        ;yes, so we're done
        !           842:                                         ;no, map in the next bank and fill it
        !           843:         mov     ulCurrentTopScan,eax    ;remember where the top of the bank
        !           844:                                         ; we're about to map in is (same as
        !           845:                                         ; bottom of bank we just did)
        !           846: 
        !           847:         ptrCall <dword ptr [edi].dsurf_pfnBankControl>,<edi,eax,JustifyTop>
        !           848:                                         ;map in the bank
        !           849: 
        !           850: ; Compute the starting address and scan line count in this bank.
        !           851: 
        !           852:         mov     eax,pdsurf              ;EAX->target surface
        !           853:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
        !           854:         cmp     ebx,[eax].dsurf_rcl1WindowClip.yBottom
        !           855:                                         ;which comes first, the bottom of the
        !           856:                                         ; dest rect or the bottom of the
        !           857:                                         ; current bank?
        !           858:         jl      short BottomScanSet2    ;fill bottom comes first, so draw to
        !           859:                                         ; that; this is the last bank in fill
        !           860:         mov     ebx,[eax].dsurf_rcl1WindowClip.yBottom
        !           861:                                         ;bank bottom comes first; draw to
        !           862:                                         ; bottom of bank
        !           863: BottomScanSet2:
        !           864:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
        !           865:         sub     ebx,edi                 ;# of scans to fill in bank
        !           866:         imul    edi,ulScanWidth         ;offset of starting scan line
        !           867: 
        !           868: ; Note that the start of the bitmap will change each time through the
        !           869: ; bank loop, because the start of the bitmap is varied to map the
        !           870: ; desired scan line to the banking window.
        !           871: 
        !           872:         add     edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
        !           873:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
        !           874: 
        !           875: ; Draw in the new bank.
        !           876: 
        !           877:         jmp     pfnStartDrawing
        !           878: 
        !           879: 
        !           880: ;-----------------------------------------------------------------------;
        !           881: ; Done with all banks in this fill.
        !           882: 
        !           883: banks_done:
        !           884:         PLAIN_RET
        !           885: 
        !           886: endProc vTrgBlt
        !           887: 
        !           888: 
        !           889: ;-----------------------------------------------------------------------;
        !           890: ; Unrolled loops.
        !           891: ; There are two kinds of unrolled loops: read-before-write (to load the
        !           892: ;  latches), and write-only (for replace-type rops).
        !           893: ;-----------------------------------------------------------------------;
        !           894: 
        !           895: 
        !           896: ;-----------------------------------------------------------------------;
        !           897: ; Unrolled drawing stuff for cases where read before write is required,
        !           898: ; to load the latches.
        !           899: ;-----------------------------------------------------------------------;
        !           900: 
        !           901: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
        !           902: ; read before write loops.
        !           903: 
        !           904:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideRWEntry,RW1,LOOP_UNROLL_COUNT
        !           905:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideRWEntry,RW2,LOOP_UNROLL_COUNT
        !           906:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideRWEntry,RW3,LOOP_UNROLL_COUNT
        !           907:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideRWEntry,RW4,LOOP_UNROLL_COUNT
        !           908:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideRWEntry,RWWIDE,LOOP_UNROLL_COUNT
        !           909: 
        !           910: ;-----------------------------------------------------------------------;
        !           911: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
        !           912: ;
        !           913: ; Entry:
        !           914: ;      AL = pixel mask
        !           915: ;       EBX = unrolled loop count
        !           916: ;       ECX = scan line width in bytes
        !           917: ;       EDI = start offset
        !           918: ;
        !           919: ; EBX, EDI modified. All other registers preserved.
        !           920: 
        !           921: ;-----------------------------------------------------------------------;
        !           922: ; Macro to draw one read before write byte, then advance to next scan line.
        !           923: 
        !           924: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !           925: &ENTRY_LABEL&ENTRY_INDEX&:
        !           926:         and     [edi],al                ;we always read 0xFF, so AL is written
        !           927:                                        ; as-is; because we're in write mode 3,
        !           928:                                        ; AL becomes the Bit Mask
        !           929:         add     edi,ecx                 ;point to the next scan line
        !           930:         endm    ;-----------------------------------;
        !           931: 
        !           932: ; 1-wide read/write.
        !           933: 
        !           934:         align   4
        !           935: draw_1_wide_rw_loop     proc    near
        !           936:         UNROLL_LOOP     DRAW_1_WIDE_RW,RW1,LOOP_UNROLL_COUNT
        !           937:         dec     ebx
        !           938:         jnz     draw_1_wide_rw_loop
        !           939: 
        !           940:         ret
        !           941: 
        !           942: draw_1_wide_rw_loop     endp
        !           943: 
        !           944: ;-----------------------------------------------------------------------;
        !           945: ; Macro to draw two read before write bytes, then advance to next scan line.
        !           946: 
        !           947: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !           948: &ENTRY_LABEL&ENTRY_INDEX&:
        !           949:         and     [edi],al
        !           950:         and     [edi+1],al
        !           951:         add     edi,ecx                 ;point to the next scan line
        !           952:         endm    ;-----------------------------------;
        !           953: 
        !           954: ; 2-wide read/write.
        !           955: 
        !           956:         align   4
        !           957: draw_2_wide_rw_loop     proc    near
        !           958:         UNROLL_LOOP     DRAW_2_WIDE_RW,RW2,LOOP_UNROLL_COUNT
        !           959:         dec     ebx
        !           960:         jnz     draw_2_wide_rw_loop
        !           961: 
        !           962:         ret
        !           963: 
        !           964: draw_2_wide_rw_loop     endp
        !           965: 
        !           966: ;-----------------------------------------------------------------------;
        !           967: ; Macro to draw three read before write bytes, then advance to next scan line.
        !           968: 
        !           969: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !           970: &ENTRY_LABEL&ENTRY_INDEX&:
        !           971:         and     [edi],al
        !           972:         and     [edi+1],al
        !           973:         and     [edi+2],al
        !           974:         add     edi,ecx                 ;point to the next scan line
        !           975:         endm    ;-----------------------------------;
        !           976: 
        !           977: ; 3-wide read/write.
        !           978: 
        !           979:         align   4
        !           980: draw_3_wide_rw_loop     proc    near
        !           981:         UNROLL_LOOP     DRAW_3_WIDE_RW,RW3,LOOP_UNROLL_COUNT
        !           982:         dec     ebx
        !           983:         jnz     draw_3_wide_rw_loop
        !           984: 
        !           985:         ret
        !           986: 
        !           987: draw_3_wide_rw_loop     endp
        !           988: 
        !           989: ;-----------------------------------------------------------------------;
        !           990: ; Macro to draw four read before write bytes, then advance to next scan line.
        !           991: 
        !           992: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !           993: &ENTRY_LABEL&ENTRY_INDEX&:
        !           994:         and     [edi],al
        !           995:         and     [edi+1],al
        !           996:         and     [edi+2],al
        !           997:         and     [edi+3],al
        !           998:         add     edi,ecx                 ;point to the next scan line
        !           999:         endm    ;-----------------------------------;
        !          1000: 
        !          1001: ; 4-wide read/write.
        !          1002: 
        !          1003:         align   4
        !          1004: draw_4_wide_rw_loop     proc    near
        !          1005:         UNROLL_LOOP     DRAW_4_WIDE_RW,RW4,LOOP_UNROLL_COUNT
        !          1006:         dec     ebx
        !          1007:         jnz     draw_4_wide_rw_loop
        !          1008: 
        !          1009:         ret
        !          1010: 
        !          1011: draw_4_wide_rw_loop     endp
        !          1012: 
        !          1013: ;-----------------------------------------------------------------------;
        !          1014: ; Unrolled 5-or-wider read before write loop.
        !          1015: ;
        !          1016: ; Entry:
        !          1017: ;       EAX = # of bytes to fill across scan line (needed only by 5-or-wider
        !          1018: ;               handler)
        !          1019: ;       EBX = unrolled loop count
        !          1020: ;       EDX = offset from end of one scan line to the start of the next next
        !          1021: ;       EDI = start offset
        !          1022: ;
        !          1023: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
        !          1024: 
        !          1025: ;-----------------------------------------------------------------------;
        !          1026: ; Macro to draw five or more read before write bytes, then advance to
        !          1027: ; next scan line. (Actually, will handle any number of bytes,
        !          1028: ; including 0, but there are special-case handlers for narrow cases.)
        !          1029: ; Works because reads of display memory return 0ffh, which then becomes the
        !          1030: ; Bit Mask as it's written in write mode 3.
        !          1031: 
        !          1032: DRAW_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !          1033: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1034:         mov     esi,edi
        !          1035:         mov     ecx,eax
        !          1036:         rep     movsb
        !          1037:         add     edi,edx
        !          1038:         endm    ;-----------------------------------;
        !          1039: 
        !          1040: ; 5-or-wider read/write.
        !          1041: 
        !          1042:         align   4
        !          1043: draw_wide_rw_loop proc  near
        !          1044:         UNROLL_LOOP     DRAW_WIDE_RW,RWWIDE,LOOP_UNROLL_COUNT
        !          1045:         dec     ebx
        !          1046:         jnz     draw_wide_rw_loop
        !          1047: 
        !          1048:         ret
        !          1049: 
        !          1050: draw_wide_rw_loop endp
        !          1051: 
        !          1052: 
        !          1053: ;-----------------------------------------------------------------------;
        !          1054: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
        !          1055: ; for cases where read before write is NOT required.
        !          1056: ;-----------------------------------------------------------------------;
        !          1057: 
        !          1058: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
        !          1059: ; Note that there may be separate entry tables for various alignments of a
        !          1060: ; specific width, in cases where performance can be improved by using different
        !          1061: ; code for different alignments.
        !          1062: 
        !          1063:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideWEntry,W1,LOOP_UNROLL_COUNT
        !          1064:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideWEntry,W2,LOOP_UNROLL_COUNT
        !          1065:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
        !          1066:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWOddEntry,W3_ODD,LOOP_UNROLL_COUNT
        !          1067:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideWEntry,W4,LOOP_UNROLL_COUNT
        !          1068:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
        !          1069:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWOddEntry,W5_ODD,LOOP_UNROLL_COUNT
        !          1070:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_0Entry,W6_MOD3_0,LOOP_UNROLL_COUNT
        !          1071:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_1Entry,W6_MOD3_1,LOOP_UNROLL_COUNT
        !          1072:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_2Entry,W6_MOD3_2,LOOP_UNROLL_COUNT
        !          1073:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_0Entry,W7_MOD3_0,LOOP_UNROLL_COUNT
        !          1074:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_1Entry,W7_MOD3_1,LOOP_UNROLL_COUNT
        !          1075:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_2Entry,W7_MOD3_2,LOOP_UNROLL_COUNT
        !          1076:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_3Entry,W7_MOD3_3,LOOP_UNROLL_COUNT
        !          1077:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_0Entry,W8_MOD3_0,LOOP_UNROLL_COUNT
        !          1078:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_1Entry,W8_MOD3_1,LOOP_UNROLL_COUNT
        !          1079:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_2Entry,W8_MOD3_2,LOOP_UNROLL_COUNT
        !          1080:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_3Entry,W8_MOD3_3,LOOP_UNROLL_COUNT
        !          1081: 
        !          1082: 
        !          1083: ;-----------------------------------------------------------------------;
        !          1084: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
        !          1085: ;
        !          1086: ; Entry:
        !          1087: ;      AL/AX/EAX = pixel mask (if AX or EAX, then 0xFFFF or 0xFFFFFFFF)
        !          1088: ;       EBX = unrolled loop count
        !          1089: ;       ECX = scan line width in bytes
        !          1090: ;       EDI = start offset
        !          1091: ;
        !          1092: ; EBX, EDI modified. All other registers preserved.
        !          1093: 
        !          1094: ;-----------------------------------------------------------------------;
        !          1095: ; Macro to draw one write-only byte, then advance to next scan line.
        !          1096: 
        !          1097: DRAW_1_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
        !          1098: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1099:         mov     [edi],al                ;we always read 0xFF, so AL is written
        !          1100:                                        ; as-is; because we're in write mode 3,
        !          1101:                                        ; AL becomes the Bit Mask
        !          1102:         add     edi,ecx                 ;point to the next scan line
        !          1103:         endm    ;-----------------------------------;
        !          1104: 
        !          1105: ; 1-wide write-only.
        !          1106: 
        !          1107:         align   4
        !          1108: draw_1_wide_w_loop     proc    near
        !          1109:         UNROLL_LOOP     DRAW_1_WIDE_W,W1,LOOP_UNROLL_COUNT
        !          1110:         dec     ebx
        !          1111:         jnz     draw_1_wide_w_loop
        !          1112: 
        !          1113:         ret
        !          1114: 
        !          1115: draw_1_wide_w_loop     endp
        !          1116: 
        !          1117: ;-----------------------------------------------------------------------;
        !          1118: ; Macro to draw two write-only bytes, then advance to next scan line.
        !          1119: 
        !          1120: DRAW_2_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
        !          1121: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1122:         mov     [edi],ax
        !          1123:         add     edi,ecx                 ;point to the next scan line
        !          1124:         endm    ;-----------------------------------;
        !          1125: 
        !          1126: ; 2-wide write-only.
        !          1127: 
        !          1128:         align   4
        !          1129: draw_2_wide_w_loop     proc    near
        !          1130:         UNROLL_LOOP     DRAW_2_WIDE_W,W2,LOOP_UNROLL_COUNT
        !          1131:         dec     ebx
        !          1132:         jnz     draw_2_wide_w_loop
        !          1133: 
        !          1134:         ret
        !          1135: 
        !          1136: draw_2_wide_w_loop     endp
        !          1137: 
        !          1138: ;-----------------------------------------------------------------------;
        !          1139: ; Macro to draw three write-only bytes, then advance to next scan line.
        !          1140: ; Optimized for even start address.
        !          1141: 
        !          1142: DRAW_3_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
        !          1143: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1144:         mov     [edi],ax
        !          1145:         mov     [edi+2],al
        !          1146:         add     edi,ecx                 ;point to the next scan line
        !          1147:         endm    ;-----------------------------------;
        !          1148: 
        !          1149: ; 3-wide write-only, starting at an even address.
        !          1150: 
        !          1151:         align   4
        !          1152: draw_3_wide_w_even_loop     proc    near
        !          1153:         UNROLL_LOOP     DRAW_3_WIDE_W_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
        !          1154:         dec     ebx
        !          1155:         jnz     draw_3_wide_w_even_loop
        !          1156: 
        !          1157:         ret
        !          1158: 
        !          1159: draw_3_wide_w_even_loop     endp
        !          1160: 
        !          1161: ;-----------------------------------------------------------------------;
        !          1162: ; Macro to draw three write-only bytes, then advance to next scan line.
        !          1163: ; Optimized for odd start address.
        !          1164: 
        !          1165: DRAW_3_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
        !          1166: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1167:         mov     [edi],al
        !          1168:         mov     [edi+1],ax
        !          1169:         add     edi,ecx                 ;point to the next scan line
        !          1170:         endm    ;-----------------------------------;
        !          1171: 
        !          1172: ; 3-wide write-only, starting at an odd address.
        !          1173: 
        !          1174:         align   4
        !          1175: draw_3_wide_w_odd_loop     proc    near
        !          1176:         UNROLL_LOOP     DRAW_3_WIDE_W_ODD,W3_ODD,LOOP_UNROLL_COUNT
        !          1177:         dec     ebx
        !          1178:         jnz     draw_3_wide_w_odd_loop
        !          1179: 
        !          1180:         ret
        !          1181: 
        !          1182: draw_3_wide_w_odd_loop     endp
        !          1183: 
        !          1184: 
        !          1185: ;-----------------------------------------------------------------------;
        !          1186: ; Macro to draw four write-only bytes, then advance to next scan line.
        !          1187: 
        !          1188: DRAW_4_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
        !          1189: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1190:         mov     [edi],eax
        !          1191:         add     edi,ecx                 ;point to the next scan line
        !          1192:         endm    ;-----------------------------------;
        !          1193: 
        !          1194: ; 4-wide write-only.
        !          1195: 
        !          1196:         align   4
        !          1197: draw_4_wide_w_loop     proc    near
        !          1198:         UNROLL_LOOP     DRAW_4_WIDE_W,W4,LOOP_UNROLL_COUNT
        !          1199:         dec     ebx
        !          1200:         jnz     draw_4_wide_w_loop
        !          1201: 
        !          1202:         ret
        !          1203: 
        !          1204: draw_4_wide_w_loop     endp
        !          1205: 
        !          1206: 
        !          1207: ;-----------------------------------------------------------------------;
        !          1208: ; Macro to draw five write-only bytes, then advance to next scan line.
        !          1209: ; Optimized for even start address.
        !          1210: 
        !          1211: DRAW_5_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
        !          1212: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1213:         mov     [edi],eax
        !          1214:         mov     [edi+4],al
        !          1215:         add     edi,ecx                 ;point to the next scan line
        !          1216:         endm    ;-----------------------------------;
        !          1217: 
        !          1218: ; 5-wide write-only, starting at an even address.
        !          1219: 
        !          1220:         align   4
        !          1221: draw_5_wide_w_even_loop     proc    near
        !          1222:         UNROLL_LOOP     DRAW_5_WIDE_W_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
        !          1223:         dec     ebx
        !          1224:         jnz     draw_5_wide_w_even_loop
        !          1225: 
        !          1226:         ret
        !          1227: 
        !          1228: draw_5_wide_w_even_loop     endp
        !          1229: 
        !          1230: 
        !          1231: ;-----------------------------------------------------------------------;
        !          1232: ; Macro to draw five write-only bytes, then advance to next scan line.
        !          1233: ; Optimized for odd start address.
        !          1234: 
        !          1235: DRAW_5_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
        !          1236: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1237:         mov     [edi],al
        !          1238:         mov     [edi+1],eax
        !          1239:         add     edi,ecx                 ;point to the next scan line
        !          1240:         endm    ;-----------------------------------;
        !          1241: 
        !          1242: ; 5-wide write-only, starting at an odd address.
        !          1243: 
        !          1244:         align   4
        !          1245: draw_5_wide_w_odd_loop     proc    near
        !          1246:         UNROLL_LOOP     DRAW_5_WIDE_W_ODD,W5_ODD,LOOP_UNROLL_COUNT
        !          1247:         dec     ebx
        !          1248:         jnz     draw_5_wide_w_odd_loop
        !          1249: 
        !          1250:         ret
        !          1251: 
        !          1252: draw_5_wide_w_odd_loop     endp
        !          1253: 
        !          1254: 
        !          1255: ;-----------------------------------------------------------------------;
        !          1256: ; Macro to draw six write-only bytes, then advance to next scan line.
        !          1257: ; Optimized for start address MOD 3 == 0.
        !          1258: 
        !          1259: DRAW_6_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
        !          1260: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1261:         mov     [edi],eax
        !          1262:         mov     [edi+4],ax
        !          1263:         add     edi,ecx                 ;point to the next scan line
        !          1264:         endm    ;-----------------------------------;
        !          1265: 
        !          1266: ; 6-wide write-only, starting at MOD 3 == 0.
        !          1267: 
        !          1268:         align   4
        !          1269: draw_6_wide_w_mod3_0_loop     proc    near
        !          1270:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_0,W6_MOD3_0,LOOP_UNROLL_COUNT
        !          1271:         dec     ebx
        !          1272:         jnz     draw_6_wide_w_mod3_0_loop
        !          1273: 
        !          1274:         ret
        !          1275: 
        !          1276: draw_6_wide_w_mod3_0_loop     endp
        !          1277: 
        !          1278: 
        !          1279: ;-----------------------------------------------------------------------;
        !          1280: ; Macro to draw six write-only bytes, then advance to next scan line.
        !          1281: ; Optimized for start address MOD 3 == 1 or 3.
        !          1282: 
        !          1283: DRAW_6_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
        !          1284: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1285:         mov     [edi],al
        !          1286:         mov     [edi+1],eax
        !          1287:         mov     [edi+5],al
        !          1288:         add     edi,ecx                 ;point to the next scan line
        !          1289:         endm    ;-----------------------------------;
        !          1290: 
        !          1291: ; 6-wide write-only, starting at MOD 3 == 1 or 3.
        !          1292: 
        !          1293:         align   4
        !          1294: draw_6_wide_w_mod3_1_loop     proc    near
        !          1295:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_1,W6_MOD3_1,LOOP_UNROLL_COUNT
        !          1296:         dec     ebx
        !          1297:         jnz     draw_6_wide_w_mod3_1_loop
        !          1298: 
        !          1299:         ret
        !          1300: 
        !          1301: draw_6_wide_w_mod3_1_loop     endp
        !          1302: 
        !          1303: 
        !          1304: ;-----------------------------------------------------------------------;
        !          1305: ; Macro to draw six write-only bytes, then advance to next scan line.
        !          1306: ; Optimized for start address MOD 3 == 2.
        !          1307: 
        !          1308: DRAW_6_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
        !          1309: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1310:         mov     [edi],ax
        !          1311:         mov     [edi+2],eax
        !          1312:         add     edi,ecx                 ;point to the next scan line
        !          1313:         endm    ;-----------------------------------;
        !          1314: 
        !          1315: ; 6-wide write-only, starting at MOD 3 == 2.
        !          1316: 
        !          1317:         align   4
        !          1318: draw_6_wide_w_mod3_2_loop     proc    near
        !          1319:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_2,W6_MOD3_2,LOOP_UNROLL_COUNT
        !          1320:         dec     ebx
        !          1321:         jnz     draw_6_wide_w_mod3_2_loop
        !          1322: 
        !          1323:         ret
        !          1324: 
        !          1325: draw_6_wide_w_mod3_2_loop     endp
        !          1326: 
        !          1327: 
        !          1328: ;-----------------------------------------------------------------------;
        !          1329: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1330: ; Optimized for start address MOD 3 == 0.
        !          1331: 
        !          1332: DRAW_7_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
        !          1333: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1334:         mov     [edi],eax
        !          1335:         mov     [edi+4],ax
        !          1336:         mov     [edi+6],al
        !          1337:         add     edi,ecx                 ;point to the next scan line
        !          1338:         endm    ;-----------------------------------;
        !          1339: 
        !          1340: ; 7-wide write-only, starting at MOD 3 == 0.
        !          1341: 
        !          1342:         align   4
        !          1343: draw_7_wide_w_mod3_0_loop     proc    near
        !          1344:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_0,W7_MOD3_0,LOOP_UNROLL_COUNT
        !          1345:         dec     ebx
        !          1346:         jnz     draw_7_wide_w_mod3_0_loop
        !          1347: 
        !          1348:         ret
        !          1349: 
        !          1350: draw_7_wide_w_mod3_0_loop     endp
        !          1351: 
        !          1352: 
        !          1353: ;-----------------------------------------------------------------------;
        !          1354: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1355: ; Optimized for start address MOD 3 == 1.
        !          1356: 
        !          1357: DRAW_7_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
        !          1358: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1359:         mov     [edi],al
        !          1360:         mov     [edi+1],ax
        !          1361:         mov     [edi+3],eax
        !          1362:         add     edi,ecx                 ;point to the next scan line
        !          1363:         endm    ;-----------------------------------;
        !          1364: 
        !          1365: ; 7-wide write-only, starting at MOD 3 == 0.
        !          1366: 
        !          1367:         align   4
        !          1368: draw_7_wide_w_mod3_1_loop     proc    near
        !          1369:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_1,W7_MOD3_1,LOOP_UNROLL_COUNT
        !          1370:         dec     ebx
        !          1371:         jnz     draw_7_wide_w_mod3_1_loop
        !          1372: 
        !          1373:         ret
        !          1374: 
        !          1375: draw_7_wide_w_mod3_1_loop     endp
        !          1376: 
        !          1377: 
        !          1378: ;-----------------------------------------------------------------------;
        !          1379: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1380: ; Optimized for start address MOD 3 == 2.
        !          1381: 
        !          1382: DRAW_7_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
        !          1383: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1384:         mov     [edi],ax
        !          1385:         mov     [edi+2],eax
        !          1386:         mov     [edi+6],al
        !          1387:         add     edi,ecx                 ;point to the next scan line
        !          1388:         endm    ;-----------------------------------;
        !          1389: 
        !          1390: ; 7-wide write-only, starting at MOD 3 == 2.
        !          1391: 
        !          1392:         align   4
        !          1393: draw_7_wide_w_mod3_2_loop     proc    near
        !          1394:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_2,W7_MOD3_2,LOOP_UNROLL_COUNT
        !          1395:         dec     ebx
        !          1396:         jnz     draw_7_wide_w_mod3_2_loop
        !          1397: 
        !          1398:         ret
        !          1399: 
        !          1400: draw_7_wide_w_mod3_2_loop     endp
        !          1401: 
        !          1402: 
        !          1403: ;-----------------------------------------------------------------------;
        !          1404: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1405: ; Optimized for start address MOD 3 == 3.
        !          1406: 
        !          1407: DRAW_7_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
        !          1408: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1409:         mov     [edi],al
        !          1410:         mov     [edi+1],eax
        !          1411:         mov     [edi+5],ax
        !          1412:         add     edi,ecx                 ;point to the next scan line
        !          1413:         endm    ;-----------------------------------;
        !          1414: 
        !          1415: ; 7-wide write-only, starting at MOD 3 == 3.
        !          1416: 
        !          1417:         align   4
        !          1418: draw_7_wide_w_mod3_3_loop     proc    near
        !          1419:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_3,W7_MOD3_3,LOOP_UNROLL_COUNT
        !          1420:         dec     ebx
        !          1421:         jnz     draw_7_wide_w_mod3_3_loop
        !          1422: 
        !          1423:         ret
        !          1424: 
        !          1425: draw_7_wide_w_mod3_3_loop     endp
        !          1426: 
        !          1427: 
        !          1428: ;-----------------------------------------------------------------------;
        !          1429: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1430: ; Optimized for start address MOD 3 == 0.
        !          1431: 
        !          1432: DRAW_8_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
        !          1433: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1434:         mov     [edi],eax
        !          1435:         mov     [edi+4],eax
        !          1436:         add     edi,ecx                 ;point to the next scan line
        !          1437:         endm    ;-----------------------------------;
        !          1438: 
        !          1439: ; 8-wide write-only, starting at MOD 3 == 0.
        !          1440: 
        !          1441:         align   4
        !          1442: draw_8_wide_w_mod3_0_loop     proc    near
        !          1443:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_0,W8_MOD3_0,LOOP_UNROLL_COUNT
        !          1444:         dec     ebx
        !          1445:         jnz     draw_8_wide_w_mod3_0_loop
        !          1446: 
        !          1447:         ret
        !          1448: 
        !          1449: draw_8_wide_w_mod3_0_loop     endp
        !          1450: 
        !          1451: 
        !          1452: ;-----------------------------------------------------------------------;
        !          1453: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1454: ; Optimized for start address MOD 3 == 1.
        !          1455: 
        !          1456: DRAW_8_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
        !          1457: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1458:         mov     [edi],al
        !          1459:         mov     [edi+1],ax
        !          1460:         mov     [edi+3],eax
        !          1461:         mov     [edi+7],al
        !          1462:         add     edi,ecx                 ;point to the next scan line
        !          1463:         endm    ;-----------------------------------;
        !          1464: 
        !          1465: ; 8-wide write-only, starting at MOD 3 == 0.
        !          1466: 
        !          1467:         align   4
        !          1468: draw_8_wide_w_mod3_1_loop     proc    near
        !          1469:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_1,W8_MOD3_1,LOOP_UNROLL_COUNT
        !          1470:         dec     ebx
        !          1471:         jnz     draw_8_wide_w_mod3_1_loop
        !          1472: 
        !          1473:         ret
        !          1474: 
        !          1475: draw_8_wide_w_mod3_1_loop     endp
        !          1476: 
        !          1477: 
        !          1478: ;-----------------------------------------------------------------------;
        !          1479: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1480: ; Optimized for start address MOD 3 == 2.
        !          1481: 
        !          1482: DRAW_8_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
        !          1483: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1484:         mov     [edi],ax
        !          1485:         mov     [edi+2],eax
        !          1486:         mov     [edi+6],ax
        !          1487:         add     edi,ecx                 ;point to the next scan line
        !          1488:         endm    ;-----------------------------------;
        !          1489: 
        !          1490: ; 8-wide write-only, starting at MOD 3 == 2.
        !          1491: 
        !          1492:         align   4
        !          1493: draw_8_wide_w_mod3_2_loop     proc    near
        !          1494:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_2,W8_MOD3_2,LOOP_UNROLL_COUNT
        !          1495:         dec     ebx
        !          1496:         jnz     draw_8_wide_w_mod3_2_loop
        !          1497: 
        !          1498:         ret
        !          1499: 
        !          1500: draw_8_wide_w_mod3_2_loop     endp
        !          1501: 
        !          1502: 
        !          1503: ;-----------------------------------------------------------------------;
        !          1504: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1505: ; Optimized for start address MOD 3 == 3.
        !          1506: 
        !          1507: DRAW_8_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
        !          1508: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1509:         mov     [edi],al
        !          1510:         mov     [edi+1],eax
        !          1511:         mov     [edi+5],ax
        !          1512:         mov     [edi+7],al
        !          1513:         add     edi,ecx                 ;point to the next scan line
        !          1514:         endm    ;-----------------------------------;
        !          1515: 
        !          1516: ; 8-wide write-only, starting at MOD 3 == 3.
        !          1517: 
        !          1518:         align   4
        !          1519: draw_8_wide_w_mod3_3_loop     proc    near
        !          1520:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_3,W8_MOD3_3,LOOP_UNROLL_COUNT
        !          1521:         dec     ebx
        !          1522:         jnz     draw_8_wide_w_mod3_3_loop
        !          1523: 
        !          1524:         ret
        !          1525: 
        !          1526: draw_8_wide_w_mod3_3_loop     endp
        !          1527: 
        !          1528: ;-----------------------------------------------------------------------;
        !          1529: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
        !          1530: ;-----------------------------------------------------------------------;
        !          1531: 
        !          1532: ; Tables of entry points into unrolled wide write-only loops.
        !          1533:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW00Entry,W_00,LOOP_UNROLL_COUNT
        !          1534:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW01Entry,W_01,LOOP_UNROLL_COUNT
        !          1535:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW02Entry,W_02,LOOP_UNROLL_COUNT
        !          1536:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW03Entry,W_03,LOOP_UNROLL_COUNT
        !          1537:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW10Entry,W_10,LOOP_UNROLL_COUNT
        !          1538:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW11Entry,W_11,LOOP_UNROLL_COUNT
        !          1539:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW12Entry,W_12,LOOP_UNROLL_COUNT
        !          1540:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW13Entry,W_13,LOOP_UNROLL_COUNT
        !          1541:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW20Entry,W_20,LOOP_UNROLL_COUNT
        !          1542:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW21Entry,W_21,LOOP_UNROLL_COUNT
        !          1543:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW22Entry,W_22,LOOP_UNROLL_COUNT
        !          1544:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW23Entry,W_23,LOOP_UNROLL_COUNT
        !          1545:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW30Entry,W_30,LOOP_UNROLL_COUNT
        !          1546:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW31Entry,W_31,LOOP_UNROLL_COUNT
        !          1547:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW32Entry,W_32,LOOP_UNROLL_COUNT
        !          1548:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW33Entry,W_33,LOOP_UNROLL_COUNT
        !          1549: 
        !          1550: ;-----------------------------------------------------------------------;
        !          1551: ; Macro to draw n write-only bytes, 0 leading bytes, 0 trailing bytes,
        !          1552: ; then advance to next scan line.
        !          1553: 
        !          1554: DRAW_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
        !          1555: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1556:         mov     ecx,esi         ;# of whole dwords
        !          1557:         rep     stosd           ;fill all whole bytes as dwords
        !          1558:         add     edi,edx         ;point to the next scan line
        !          1559:         endm    ;-----------------------------------;
        !          1560: 
        !          1561: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
        !          1562: ;  EAX = 0ffffh
        !          1563: ;  EBX = count of scans to fill
        !          1564: ;  EDX = offset from end of one scan's fill to start of next
        !          1565: ;  ESI = # of dwords to fill
        !          1566: ;  EDI = target address to fill
        !          1567: 
        !          1568:         align   4
        !          1569: draw_wide_w_00_loop     proc    near
        !          1570:         UNROLL_LOOP     DRAW_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
        !          1571:         dec     ebx
        !          1572:         jnz     draw_wide_w_00_loop
        !          1573: 
        !          1574:         ret
        !          1575: 
        !          1576: draw_wide_w_00_loop     endp
        !          1577: 
        !          1578: 
        !          1579: ;-----------------------------------------------------------------------;
        !          1580: ; Macro to draw n write-only bytes, 0 leading bytes, 1 trailing byte,
        !          1581: ; then advance to next scan line.
        !          1582: 
        !          1583: DRAW_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
        !          1584: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1585:         mov     ecx,esi         ;# of whole dwords
        !          1586:         rep     stosd           ;fill whole bytes as dwords
        !          1587:         stosb                   ;fill the trailing byte
        !          1588:         add     edi,edx         ;point to the next scan line
        !          1589:         endm    ;-----------------------------------;
        !          1590: 
        !          1591: ; N-wide write-only, 0 leading bytes, 1 trailing byte.
        !          1592: ;  EAX = # of dwords to fill
        !          1593: ;  EBX = count of scans to fill
        !          1594: ;  EDX = offset from end of one scan's fill to start of next
        !          1595: ;  ESI = # of dwords to fill
        !          1596: ;  EDI = target address to fill
        !          1597: 
        !          1598:         align   4
        !          1599: draw_wide_w_01_loop     proc    near
        !          1600:         UNROLL_LOOP     DRAW_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
        !          1601:         dec     ebx
        !          1602:         jnz     draw_wide_w_01_loop
        !          1603: 
        !          1604:         ret
        !          1605: 
        !          1606: draw_wide_w_01_loop     endp
        !          1607: 
        !          1608: 
        !          1609: ;-----------------------------------------------------------------------;
        !          1610: ; Macro to draw n write-only bytes, 0 leading bytes, 2 trailing bytes,
        !          1611: ; then advance to next scan line.
        !          1612: 
        !          1613: DRAW_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
        !          1614: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1615:         mov     ecx,esi         ;# of whole dwords
        !          1616:         rep     stosd           ;fill whole bytes as dwords
        !          1617:         stosw                   ;fill the trailing word
        !          1618:         add     edi,edx         ;point to the next scan line
        !          1619:         endm    ;-----------------------------------;
        !          1620: 
        !          1621: ; N-wide write-only, 0 leading bytes, 2 trailing byte.
        !          1622: ;  EAX = # of dwords to fill
        !          1623: ;  EBX = count of scans to fill
        !          1624: ;  EDX = offset from end of one scan's fill to start of next
        !          1625: ;  ESI = # of dwords to fill
        !          1626: ;  EDI = target address to fill
        !          1627: 
        !          1628:         align   4
        !          1629: draw_wide_w_02_loop     proc    near
        !          1630:         UNROLL_LOOP     DRAW_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
        !          1631:         dec     ebx
        !          1632:         jnz     draw_wide_w_02_loop
        !          1633: 
        !          1634:         ret
        !          1635: 
        !          1636: draw_wide_w_02_loop     endp
        !          1637: 
        !          1638: 
        !          1639: ;-----------------------------------------------------------------------;
        !          1640: ; Macro to draw n write-only bytes, 0 leading bytes, 3 trailing bytes,
        !          1641: ; then advance to next scan line.
        !          1642: 
        !          1643: DRAW_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
        !          1644: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1645:         mov     ecx,esi         ;# of whole dwords
        !          1646:         rep     stosd           ;fill whole bytes as dwords
        !          1647:         stosw                   ;fill the trailing word
        !          1648:         stosb                   ;fill the trailing byte
        !          1649:         add     edi,edx         ;point to the next scan line
        !          1650:         endm    ;-----------------------------------;
        !          1651: 
        !          1652: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          1653: ;  EAX = # of dwords to fill
        !          1654: ;  EBX = count of scans to fill
        !          1655: ;  EDX = offset from end of one scan's fill to start of next
        !          1656: ;  ESI = # of dwords to fill
        !          1657: ;  EDI = target address to fill
        !          1658: 
        !          1659:         align   4
        !          1660: draw_wide_w_03_loop     proc    near
        !          1661:         UNROLL_LOOP     DRAW_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
        !          1662:         dec     ebx
        !          1663:         jnz     draw_wide_w_03_loop
        !          1664: 
        !          1665:         ret
        !          1666: 
        !          1667: draw_wide_w_03_loop     endp
        !          1668: 
        !          1669: 
        !          1670: ;-----------------------------------------------------------------------;
        !          1671: ; Macro to draw n write-only bytes, 1 leading byte, 0 trailing bytes,
        !          1672: ; then advance to next scan line.
        !          1673: 
        !          1674: DRAW_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
        !          1675: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1676:         stosb                   ;fill the leading byte
        !          1677:         mov     ecx,esi         ;# of whole dwords
        !          1678:         rep     stosd           ;fill all whole bytes as dwords
        !          1679:         add     edi,edx         ;point to the next scan line
        !          1680:         endm    ;-----------------------------------;
        !          1681: 
        !          1682: ; N-wide write-only, 1 leading byte, 0 trailing bytes.
        !          1683: ;  EAX = # of dwords to fill
        !          1684: ;  EBX = count of scans to fill
        !          1685: ;  EDX = offset from end of one scan's fill to start of next
        !          1686: ;  ESI = # of dwords to fill
        !          1687: ;  EDI = target address to fill
        !          1688: 
        !          1689:         align   4
        !          1690: draw_wide_w_10_loop     proc    near
        !          1691:         UNROLL_LOOP     DRAW_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
        !          1692:         dec     ebx
        !          1693:         jnz     draw_wide_w_10_loop
        !          1694: 
        !          1695:         ret
        !          1696: 
        !          1697: draw_wide_w_10_loop     endp
        !          1698: 
        !          1699: 
        !          1700: ;-----------------------------------------------------------------------;
        !          1701: ; Macro to draw n write-only bytes, 1 leading byte, 1 trailing byte,
        !          1702: ; then advance to next scan line.
        !          1703: 
        !          1704: DRAW_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
        !          1705: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1706:         stosb                   ;fill the leading byte
        !          1707:         mov     ecx,esi         ;# of whole dwords
        !          1708:         rep     stosd           ;fill whole bytes as dwords
        !          1709:         stosb                   ;fill the trailing byte
        !          1710:         add     edi,edx         ;point to the next scan line
        !          1711:         endm    ;-----------------------------------;
        !          1712: 
        !          1713: ; N-wide write-only, 1 leading bytes, 1 trailing byte.
        !          1714: ;  EAX = # of dwords to fill
        !          1715: ;  EBX = count of scans to fill
        !          1716: ;  EDX = offset from end of one scan's fill to start of next
        !          1717: ;  ESI = # of dwords to fill
        !          1718: ;  EDI = target address to fill
        !          1719: 
        !          1720:         align   4
        !          1721: draw_wide_w_11_loop     proc    near
        !          1722:         UNROLL_LOOP     DRAW_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
        !          1723:         dec     ebx
        !          1724:         jnz     draw_wide_w_11_loop
        !          1725: 
        !          1726:         ret
        !          1727: 
        !          1728: draw_wide_w_11_loop     endp
        !          1729: 
        !          1730: 
        !          1731: ;-----------------------------------------------------------------------;
        !          1732: ; Macro to draw n write-only bytes, 1 leading byte, 2 trailing bytes,
        !          1733: ; then advance to next scan line.
        !          1734: 
        !          1735: DRAW_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
        !          1736: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1737:         stosb                   ;fill the leading byte
        !          1738:         mov     ecx,esi         ;# of whole dwords
        !          1739:         rep     stosd           ;fill whole bytes as dwords
        !          1740:         stosw                   ;fill the trailing word
        !          1741:         add     edi,edx         ;point to the next scan line
        !          1742:         endm    ;-----------------------------------;
        !          1743: 
        !          1744: ; N-wide write-only, 1 leading bytes, 2 trailing byte.
        !          1745: ;  EAX = # of dwords to fill
        !          1746: ;  EBX = count of scans to fill
        !          1747: ;  EDX = offset from end of one scan's fill to start of next
        !          1748: ;  ESI = # of dwords to fill
        !          1749: ;  EDI = target address to fill
        !          1750: 
        !          1751:         align   4
        !          1752: draw_wide_w_12_loop     proc    near
        !          1753:         UNROLL_LOOP     DRAW_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
        !          1754:         dec     ebx
        !          1755:         jnz     draw_wide_w_12_loop
        !          1756: 
        !          1757:         ret
        !          1758: 
        !          1759: draw_wide_w_12_loop     endp
        !          1760: 
        !          1761: 
        !          1762: ;-----------------------------------------------------------------------;
        !          1763: ; Macro to draw n write-only bytes, 1 leading byte, 3 trailing bytes,
        !          1764: ; then advance to next scan line.
        !          1765: 
        !          1766: DRAW_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
        !          1767: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1768:         stosb                   ;fill the leading byte
        !          1769:         mov     ecx,esi         ;# of whole dwords
        !          1770:         rep     stosd           ;fill whole bytes as dwords
        !          1771:         stosw                   ;fill the trailing word
        !          1772:         stosb                   ;fill the trailing byte
        !          1773:         add     edi,edx         ;point to the next scan line
        !          1774:         endm    ;-----------------------------------;
        !          1775: 
        !          1776: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          1777: ;  EAX = # of dwords to fill
        !          1778: ;  EBX = count of scans to fill
        !          1779: ;  EDX = offset from end of one scan's fill to start of next
        !          1780: ;  ESI = # of dwords to fill
        !          1781: ;  EDI = target address to fill
        !          1782: 
        !          1783:         align   4
        !          1784: draw_wide_w_13_loop     proc    near
        !          1785:         UNROLL_LOOP     DRAW_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
        !          1786:         dec     ebx
        !          1787:         jnz     draw_wide_w_13_loop
        !          1788: 
        !          1789:         ret
        !          1790: 
        !          1791: draw_wide_w_13_loop     endp
        !          1792: 
        !          1793: 
        !          1794: ;-----------------------------------------------------------------------;
        !          1795: ; Macro to draw n write-only bytes, 2 leading bytes, 0 trailing bytes,
        !          1796: ; then advance to next scan line.
        !          1797: 
        !          1798: DRAW_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
        !          1799: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1800:         stosw                   ;fill the leading word
        !          1801:         mov     ecx,esi         ;# of whole dwords
        !          1802:         rep     stosd           ;fill all whole bytes as dwords
        !          1803:         add     edi,edx         ;point to the next scan line
        !          1804:         endm    ;-----------------------------------;
        !          1805: 
        !          1806: ; N-wide write-only, 2 leading bytes, 0 trailing bytes.
        !          1807: ;  EAX = # of dwords to fill
        !          1808: ;  EBX = count of scans to fill
        !          1809: ;  EDX = offset from end of one scan's fill to start of next
        !          1810: ;  ESI = # of dwords to fill
        !          1811: ;  EDI = target address to fill
        !          1812: 
        !          1813:         align   4
        !          1814: draw_wide_w_20_loop     proc    near
        !          1815:         UNROLL_LOOP     DRAW_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
        !          1816:         dec     ebx
        !          1817:         jnz     draw_wide_w_20_loop
        !          1818: 
        !          1819:         ret
        !          1820: 
        !          1821: draw_wide_w_20_loop     endp
        !          1822: 
        !          1823: 
        !          1824: ;-----------------------------------------------------------------------;
        !          1825: ; Macro to draw n write-only bytes, 2 leading bytes, 1 trailing byte,
        !          1826: ; then advance to next scan line.
        !          1827: 
        !          1828: DRAW_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
        !          1829: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1830:         stosw                   ;fill the leading word
        !          1831:         mov     ecx,esi         ;# of whole dwords
        !          1832:         rep     stosd           ;fill whole bytes as dwords
        !          1833:         stosb                   ;fill the trailing byte
        !          1834:         add     edi,edx         ;point to the next scan line
        !          1835:         endm    ;-----------------------------------;
        !          1836: 
        !          1837: ; N-wide write-only, 2 leading bytess, 1 trailing byte.
        !          1838: ;  EAX = # of dwords to fill
        !          1839: ;  EBX = count of scans to fill
        !          1840: ;  EDX = offset from end of one scan's fill to start of next
        !          1841: ;  ESI = # of dwords to fill
        !          1842: ;  EDI = target address to fill
        !          1843: 
        !          1844:         align   4
        !          1845: draw_wide_w_21_loop     proc    near
        !          1846:         UNROLL_LOOP     DRAW_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
        !          1847:         dec     ebx
        !          1848:         jnz     draw_wide_w_21_loop
        !          1849: 
        !          1850:         ret
        !          1851: 
        !          1852: draw_wide_w_21_loop     endp
        !          1853: 
        !          1854: 
        !          1855: ;-----------------------------------------------------------------------;
        !          1856: ; Macro to draw n write-only bytes, 2 leading bytes, 2 trailing bytes,
        !          1857: ; then advance to next scan line.
        !          1858: 
        !          1859: DRAW_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
        !          1860: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1861:         stosw                   ;fill the leading word
        !          1862:         mov     ecx,esi         ;# of whole dwords
        !          1863:         rep     stosd           ;fill whole bytes as dwords
        !          1864:         stosw                   ;fill the trailing word
        !          1865:         add     edi,edx         ;point to the next scan line
        !          1866:         endm    ;-----------------------------------;
        !          1867: 
        !          1868: ; N-wide write-only, 2 leading bytess, 2 trailing byte.
        !          1869: ;  EAX = # of dwords to fill
        !          1870: ;  EBX = count of scans to fill
        !          1871: ;  EDX = offset from end of one scan's fill to start of next
        !          1872: ;  ESI = # of dwords to fill
        !          1873: ;  EDI = target address to fill
        !          1874: 
        !          1875:         align   4
        !          1876: draw_wide_w_22_loop     proc    near
        !          1877:         UNROLL_LOOP     DRAW_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
        !          1878:         dec     ebx
        !          1879:         jnz     draw_wide_w_22_loop
        !          1880: 
        !          1881:         ret
        !          1882: 
        !          1883: draw_wide_w_22_loop     endp
        !          1884: 
        !          1885: 
        !          1886: ;-----------------------------------------------------------------------;
        !          1887: ; Macro to draw n write-only bytes, 2 leading bytes, 3 trailing bytes,
        !          1888: ; then advance to next scan line.
        !          1889: 
        !          1890: DRAW_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
        !          1891: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1892:         stosw                   ;fill the leading word
        !          1893:         mov     ecx,esi         ;# of whole dwords
        !          1894:         rep     stosd           ;fill whole bytes as dwords
        !          1895:         stosw                   ;fill the trailing word
        !          1896:         stosb                   ;fill the trailing byte
        !          1897:         add     edi,edx         ;point to the next scan line
        !          1898:         endm    ;-----------------------------------;
        !          1899: 
        !          1900: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          1901: ;  EAX = # of dwords to fill
        !          1902: ;  EBX = count of scans to fill
        !          1903: ;  EDX = offset from end of one scan's fill to start of next
        !          1904: ;  ESI = # of dwords to fill
        !          1905: ;  EDI = target address to fill
        !          1906: 
        !          1907:         align   4
        !          1908: draw_wide_w_23_loop     proc    near
        !          1909:         UNROLL_LOOP     DRAW_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
        !          1910:         dec     ebx
        !          1911:         jnz     draw_wide_w_23_loop
        !          1912: 
        !          1913:         ret
        !          1914: 
        !          1915: draw_wide_w_23_loop     endp
        !          1916: 
        !          1917: 
        !          1918: ;-----------------------------------------------------------------------;
        !          1919: ; Macro to draw n write-only bytes, 3 leading bytes, 0 trailing bytes,
        !          1920: ; then advance to next scan line.
        !          1921: 
        !          1922: DRAW_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
        !          1923: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1924:         stosb                   ;fill the leading byte
        !          1925:         stosw                   ;fill the leading word
        !          1926:         mov     ecx,esi         ;# of whole dwords
        !          1927:         rep     stosd           ;fill all whole bytes as dwords
        !          1928:         add     edi,edx         ;point to the next scan line
        !          1929:         endm    ;-----------------------------------;
        !          1930: 
        !          1931: ; N-wide write-only, 3 leading bytes, 0 trailing bytes.
        !          1932: ;  EAX = # of dwords to fill
        !          1933: ;  EBX = count of scans to fill
        !          1934: ;  EDX = offset from end of one scan's fill to start of next
        !          1935: ;  ESI = # of dwords to fill
        !          1936: ;  EDI = target address to fill
        !          1937: 
        !          1938:         align   4
        !          1939: draw_wide_w_30_loop     proc    near
        !          1940:         UNROLL_LOOP     DRAW_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
        !          1941:         dec     ebx
        !          1942:         jnz     draw_wide_w_30_loop
        !          1943: 
        !          1944:         ret
        !          1945: 
        !          1946: draw_wide_w_30_loop     endp
        !          1947: 
        !          1948: 
        !          1949: ;-----------------------------------------------------------------------;
        !          1950: ; Macro to draw n write-only bytes, 3 leading bytes, 1 trailing byte,
        !          1951: ; then advance to next scan line.
        !          1952: 
        !          1953: DRAW_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
        !          1954: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1955:         stosb                   ;fill the leading byte
        !          1956:         stosw                   ;fill the leading word
        !          1957:         mov     ecx,esi         ;# of whole dwords
        !          1958:         rep     stosd           ;fill whole bytes as dwords
        !          1959:         stosb                   ;fill the trailing byte
        !          1960:         add     edi,edx         ;point to the next scan line
        !          1961:         endm    ;-----------------------------------;
        !          1962: 
        !          1963: ; N-wide write-only, 3 leading bytess, 1 trailing byte.
        !          1964: ;  EAX = # of dwords to fill
        !          1965: ;  EBX = count of scans to fill
        !          1966: ;  EDX = offset from end of one scan's fill to start of next
        !          1967: ;  ESI = # of dwords to fill
        !          1968: ;  EDI = target address to fill
        !          1969: 
        !          1970:         align   4
        !          1971: draw_wide_w_31_loop     proc    near
        !          1972:         UNROLL_LOOP     DRAW_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
        !          1973:         dec     ebx
        !          1974:         jnz     draw_wide_w_31_loop
        !          1975: 
        !          1976:         ret
        !          1977: 
        !          1978: draw_wide_w_31_loop     endp
        !          1979: 
        !          1980: 
        !          1981: ;-----------------------------------------------------------------------;
        !          1982: ; Macro to draw n write-only bytes, 3 leading bytes, 2 trailing bytes,
        !          1983: ; then advance to next scan line.
        !          1984: 
        !          1985: DRAW_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
        !          1986: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1987:         stosb                   ;fill the leading byte
        !          1988:         stosw                   ;fill the leading word
        !          1989:         mov     ecx,esi         ;# of whole dwords
        !          1990:         rep     stosd           ;fill whole bytes as dwords
        !          1991:         stosw                   ;fill the trailing word
        !          1992:         add     edi,edx         ;point to the next scan line
        !          1993:         endm    ;-----------------------------------;
        !          1994: 
        !          1995: ; N-wide write-only, 3 leading bytess, 2 trailing byte.
        !          1996: ;  EAX = # of dwords to fill
        !          1997: ;  EBX = count of scans to fill
        !          1998: ;  EDX = offset from end of one scan's fill to start of next
        !          1999: ;  ESI = # of dwords to fill
        !          2000: ;  EDI = target address to fill
        !          2001: 
        !          2002:         align   4
        !          2003: draw_wide_w_32_loop     proc    near
        !          2004:         UNROLL_LOOP     DRAW_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
        !          2005:         dec     ebx
        !          2006:         jnz     draw_wide_w_32_loop
        !          2007: 
        !          2008:         ret
        !          2009: 
        !          2010: draw_wide_w_32_loop     endp
        !          2011: 
        !          2012: 
        !          2013: ;-----------------------------------------------------------------------;
        !          2014: ; Macro to draw n write-only bytes, 3 leading bytes, 3 trailing bytes,
        !          2015: ; then advance to next scan line.
        !          2016: 
        !          2017: DRAW_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
        !          2018: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2019:         stosb                   ;fill the leading byte
        !          2020:         stosw                   ;fill the leading word
        !          2021:         mov     ecx,esi         ;# of whole dwords
        !          2022:         rep     stosd           ;fill whole bytes as dwords
        !          2023:         stosw                   ;fill the trailing word
        !          2024:         stosb                   ;fill the trailing byte
        !          2025:         add     edi,edx         ;point to the next scan line
        !          2026:         endm    ;-----------------------------------;
        !          2027: 
        !          2028: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          2029: ;  EAX = # of dwords to fill
        !          2030: ;  EBX = count of scans to fill
        !          2031: ;  EDX = offset from end of one scan's fill to start of next
        !          2032: ;  ESI = # of dwords to fill
        !          2033: ;  EDI = target address to fill
        !          2034: 
        !          2035:         align   4
        !          2036: draw_wide_w_33_loop     proc    near
        !          2037:         UNROLL_LOOP     DRAW_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
        !          2038:         dec     ebx
        !          2039:         jnz     draw_wide_w_33_loop
        !          2040: 
        !          2041:         ret
        !          2042: 
        !          2043: draw_wide_w_33_loop     endp
        !          2044: 
        !          2045: 
        !          2046: ;--------------------------Private-Routine------------------------------;
        !          2047: ; comp_byte_interval
        !          2048: ;
        !          2049: ;   A interval will be computed for byte boundaries.
        !          2050: ;
        !          2051: ;   A first mask and a last mask will be calculated, and possibly
        !          2052: ;   combined into the inner loop count.  If no first byte exists,
        !          2053: ;   the start address will be incremented to adjust for it.
        !          2054: ;
        !          2055: ; Entry:
        !          2056: ;       EBX = right coordinate (exclusive)
        !          2057: ;       EDX = left coordinate  (inclusive)
        !          2058: ; Returns:
        !          2059: ;       EDI = offset to first byte to be altered in the scan
        !          2060: ;       ESI = inner loop count
        !          2061: ;       AL  = first byte mask (possibly 0)
        !          2062: ;       AH  = last  byte mask (possibly 0)
        !          2063: ; Error Returns:
        !          2064: ;       None
        !          2065: ; Registers Preserved:
        !          2066: ;       ES,BP
        !          2067: ; Registers Destroyed:
        !          2068: ;       AX,BX,CX,DX,SI,DI,FLAGS
        !          2069: ; Calls:
        !          2070: ;       None
        !          2071: ; History:
        !          2072: ;       Sat 11-Apr-1987 20:39:10 -by-  Walt Moore [waltm]
        !          2073: ;       Created.
        !          2074: ;-----------------------------------------------------------------------;
        !          2075: 
        !          2076: cProc   comp_byte_interval
        !          2077: 
        !          2078:         sub     ebx,edx                 ;Compute extent of interval
        !          2079:         dec     ebx                     ;Make interval inclusive
        !          2080:         mov     edi,edx                 ;Don't destroy starting X
        !          2081:         shr     edi,3                   ;/8 for byte address
        !          2082: 
        !          2083:         and     edx,00000111b           ;Compute bit index for left side
        !          2084:         add     ebx,edx                 ;Compute bit index for right side
        !          2085:         mov     esi,ebx                 ;(save for inner loop count)
        !          2086:         and     ebx,00000111b
        !          2087:         mov     cl,dl                   ;Compute left side altered bits mask
        !          2088:         mov     eax,0FFFFFFFFh
        !          2089:         mov     edx,eax                 ;Need this here later
        !          2090:         shr     al,cl                   ;AL = left side altered bytes mask
        !          2091:         mov     cl,bl                   ;Compute right side altered bits mask
        !          2092:         mov     ah,80h
        !          2093:         sar     ah,cl                   ;AH = right side altered bits mask
        !          2094:         shr     esi,3                   ;Compute inner byte count
        !          2095:         jnz     short comp_byte_dont_combine ;loop count + 1 > 0, check it out
        !          2096: 
        !          2097: ; Only one byte will be affected.  Combine first/last masks, set loop count = 0
        !          2098: 
        !          2099:         and     al,ah                   ;Will use first byte mask only
        !          2100:         xor     ah,ah                   ;Want last byte mask to be 0
        !          2101:         inc     esi                     ;Fall through to set 0
        !          2102: 
        !          2103: comp_byte_dont_combine:
        !          2104:         dec     esi                     ;Dec inner loop count (might become 0)
        !          2105: 
        !          2106: 
        !          2107: ; If all pixels in the first byte are altered, combine the first byte into the
        !          2108: ; inner loop and clear the first byte mask.  Ditto for the last byte mask.
        !          2109: 
        !          2110:         cmp     al,dl                   ;Set 'C' if not all pixels 1
        !          2111:         sbb     esi,edx                 ;If no 'C', sub -1 (add 1), else sub 0
        !          2112:         cmp     al,dl                   ;Set 'C' if not all pixels 1
        !          2113:         sbb     al,dl                   ;If no 'C', sub -1 (add 1), else sub 0
        !          2114: 
        !          2115:         cmp     ah,dl                   ;Set 'C' if not all pixels 1
        !          2116:         sbb     esi,edx                 ;If no 'C', sub -1 (add 1), else sub 0
        !          2117:         cmp     ah,dl                   ;Set 'C' if not all pixels 1
        !          2118:         sbb     ah,dl                   ;If no 'C', sub -1 (add 1), else sub 0
        !          2119:         cRet    comp_byte_interval
        !          2120: 
        !          2121: endProc comp_byte_interval
        !          2122: 
        !          2123: _TEXT$01   ends
        !          2124: 
        !          2125:         end
        !          2126: 
        !          2127: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.