Annotation of ntddk/src/video/displays/vga256/i386/vgablts.asm, revision 1.1

1.1     ! root        1: ;---------------------------Module-Header------------------------------;
        !             2: ; Module Name: vgablts.asm
        !             3: ;
        !             4: ; Copyright (c) 1992-1993 Microsoft Corporation
        !             5: ;-----------------------------------------------------------------------;
        !             6: ;-----------------------------------------------------------------------;
        !             7: ; VOID vTrgBlt(PDEV * ppdev, ULONG culRcl, RECTL * prcl, MIX ulMix,
        !             8: ;              ULONG ulClr, POINTL * pptlBrush)
        !             9: ; Input:
        !            10: ;  ppdev     - pointer to PDEV for surface to which to draw
        !            11: ;  culRcl    - # of rectangles to fill
        !            12: ;  prcl      - pointer to list of rectangles to fill
        !            13: ;  ulMix     - mix rop with which to fill
        !            14: ;  ulClr     - color with which to fill
        !            15: ;  pptlBrush - not used
        !            16: ;
        !            17: ; Performs accelerated solid area fills for all mixes.
        !            18: ;
        !            19: ;-----------------------------------------------------------------------;
        !            20: ;
        !            21: ; Note: Assumes all rectangles have positive heights and widths. Will not
        !            22: ; work properly if this is not the case.
        !            23: ;
        !            24: ;-----------------------------------------------------------------------;
        !            25: ;
        !            26: ; Note: Cases where the width of the whole bytes fill is equal to the
        !            27: ; width of the bitmap could be sped up by using a single REP MOVS or REP
        !            28: ; STOS, but how often does WIN32 do a fill that's the width of the screen?
        !            29: ; Not very.
        !            30: ;
        !            31: ;-----------------------------------------------------------------------;
        !            32: 
        !            33:         comment $
        !            34: 
        !            35: The overall approach of this module is to accept a list of rectangles to
        !            36: fill, set up the VGA hardware for the desired fill, and then fill the
        !            37: rectangles one at a time. Each rectangle fill is set up for everything
        !            38: but vertical parameters, and then decomposed into the sections that
        !            39: intersect each VGA bank; each section is drawn in turn. The drawing code
        !            40: is heavily unrolled for performance, and vectors are set up so that the
        !            41: drawing code appropriate for the desired fill is essentially threaded
        !            42: together.
        !            43: 
        !            44:         commend $
        !            45: 
        !            46: ;-----------------------------------------------------------------------;
        !            47: 
        !            48: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
        !            49: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
        !            50: ; times unrolling. This is the only thing you need to change to control
        !            51: ; unrolling.
        !            52: 
        !            53: LOOP_UNROLL_SHIFT equ 2
        !            54: 
        !            55: ;-----------------------------------------------------------------------;
        !            56: 
        !            57:                 .386
        !            58: 
        !            59: ifndef  DOS_PLATFORM
        !            60:         .model  small,c
        !            61: else
        !            62: ifdef   STD_CALL
        !            63:         .model  small,c
        !            64: else
        !            65:         .model  small,pascal
        !            66: endif;  STD_CALL
        !            67: endif;  DOS_PLATFORM
        !            68: 
        !            69:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
        !            70:         assume fs:nothing,gs:nothing
        !            71: 
        !            72:         .xlist
        !            73:         include stdcall.inc             ;calling convention cmacros
        !            74:         include i386\strucs.inc
        !            75:         include i386\driver.inc
        !            76:         include i386\egavga.inc
        !            77:         include i386\unroll.inc
        !            78:         include i386\ropdefs.inc
        !            79: 
        !            80:         .list
        !            81: 
        !            82: ;-----------------------------------------------------------------------;
        !            83: 
        !            84:         .data
        !            85: 
        !            86: ;-----------------------------------------------------------------------;
        !            87: ; Left edge clip masks for intrabyte start addresses 0 through 3.
        !            88: ; Whole byte cases are flagged as 0ffh.
        !            89:         public jLeftMask
        !            90: jLeftMask       label   byte
        !            91:         db      0ffh,0eh,0ch,08h
        !            92: 
        !            93: ;-----------------------------------------------------------------------;
        !            94: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
        !            95: ; 0 through 3. Whole byte cases are flagged as 0ffh.
        !            96:         public jRightMask
        !            97: jRightMask      label   byte
        !            98:         db      0ffh,01h,03h,07h
        !            99: 
        !           100: ;-----------------------------------------------------------------------;
        !           101: ; Tables used to set up for the desired raster op. Note that entries for raster
        !           102: ; ops that aren't handled here are generally correct, except that they ignore
        !           103: ; need for inversion of the destination, which those rops require.
        !           104: 
        !           105: ; Table used to force off the drawing color for R2_BLACK (0).
        !           106: ; The first entry is ignored; there is no mix 0.
        !           107:         public jForceOffTable
        !           108: jForceOffTable  db         0
        !           109:                 db         000h,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
        !           110:                 db         0ffh,0ffh,000h,0ffh,0ffh,0ffh,0ffh,0ffh
        !           111: 
        !           112: ;-----------------------------------------------------------------------;
        !           113: ; Table used to force on the drawing color for R2_NOT (Dn) and R2_WHITE (1).
        !           114: ; The first entry is ignored; there is no mix 0.
        !           115:         public  jForceOnTable
        !           116: jForceOnTable   db      0, 0,0,0,0,0,0ffh,0,0,0,0,0,0,0,0,0,0ffh
        !           117: 
        !           118: ;-----------------------------------------------------------------------;
        !           119: ; Table used to invert the passed-in drawing color for Pn mixes.
        !           120: ; The first entry is ignored; there is no mix 0.
        !           121:         public  jNotTable
        !           122: jNotTable       db      0, 0,0ffh,0ffh,0ffh,0,0,0,0ffh,0,0ffh,0,0ffh,0,0,0,0
        !           123: 
        !           124: ;-----------------------------------------------------------------------;
        !           125: ; Table of VGA ALU logical functions corresponding to mixes. Note that Dn is
        !           126: ; handled as a separate preceding inversion pass when part of a more complex
        !           127: ; mix.
        !           128: ; The first entry is ignored; there is no mix 0.
        !           129:         public jALUFuncTable
        !           130: jALUFuncTable   db      0
        !           131:                 db      DR_SET,DR_AND,DR_AND,DR_SET
        !           132:                 db      DR_AND,DR_XOR,DR_XOR,DR_OR
        !           133:                 db      DR_AND,DR_XOR,     0,DR_OR
        !           134:                 db      DR_SET,DR_OR ,DR_OR ,DR_SET
        !           135: 
        !           136: ;-----------------------------------------------------------------------;
        !           137: ; 1 entries mark rops that require two passes, one to invert the destination
        !           138: ; and then another to finish the rop.
        !           139: ; The first entry is ignored; there is no mix 0.
        !           140:         public  jInvertDest
        !           141: jInvertDest     db      0, 0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0
        !           142: 
        !           143: ;-----------------------------------------------------------------------;
        !           144: ; Table of routines to be called to draw edges, according to which edges are
        !           145: ; partial and which edges are whole bytes.
        !           146:         align   4
        !           147: pfnEdgeDrawing  label   dword
        !           148:         dd      do_right_edge_bytes
        !           149:         dd      do_both_edge_bytes
        !           150:         dd      check_next_bank
        !           151:         dd      do_left_edge_bytes
        !           152: 
        !           153: ;-----------------------------------------------------------------------;
        !           154: ; Table of pointers to tables used to find entries points in unrolled wide
        !           155: ; whole byte code.
        !           156: 
        !           157:         align   4
        !           158: pfnWideWholeRep label   dword
        !           159:         dd      pfnDrawWideW00Entry
        !           160:         dd      pfnDrawWideW01Entry
        !           161:         dd      pfnDrawWideW02Entry
        !           162:         dd      pfnDrawWideW03Entry
        !           163:         dd      pfnDrawWideW10Entry
        !           164:         dd      pfnDrawWideW11Entry
        !           165:         dd      pfnDrawWideW12Entry
        !           166:         dd      pfnDrawWideW13Entry
        !           167:         dd      pfnDrawWideW20Entry
        !           168:         dd      pfnDrawWideW21Entry
        !           169:         dd      pfnDrawWideW22Entry
        !           170:         dd      pfnDrawWideW23Entry
        !           171:         dd      pfnDrawWideW30Entry
        !           172:         dd      pfnDrawWideW31Entry
        !           173:         dd      pfnDrawWideW32Entry
        !           174:         dd      pfnDrawWideW33Entry
        !           175: 
        !           176: ;-----------------------------------------------------------------------;
        !           177: ; Table of pointers to tables used to find entry points in narrow,
        !           178: ; special-cased unrolled non-replace whole byte code.
        !           179: 
        !           180: ; Note: The breakpoint where one should switch from special-casing to
        !           181: ;  REP MOVSB is purely a guess on my part. 5 seemed reasonable.
        !           182: 
        !           183:         align   4
        !           184: pfnWholeBytesNonReplaceEntries  label   dword
        !           185:         dd      0                       ;we never get a 0-wide case
        !           186:         dd      pfnDraw1WideRWEntry
        !           187:         dd      pfnDraw2WideRWEntry
        !           188:         dd      pfnDraw3WideRWEntry
        !           189:         dd      pfnDraw4WideRWEntry
        !           190: MAX_NON_REPLACE_SPECIAL equ     ($-pfnWholeBytesNonReplaceEntries)/4
        !           191: 
        !           192: ;-----------------------------------------------------------------------;
        !           193: ; Table of pointers to tables used to find entry points in narrow, special-
        !           194: ; cased unrolled replace whole byte code.
        !           195: 
        !           196: ; Note: The breakpoint where one should switch from special-casing to
        !           197: ;  REP STOS is purely a guess on my part. 8 seemed reasonable.
        !           198: 
        !           199: ; Start address MOD 3 is 0.
        !           200:         align   4
        !           201: pfnWholeBytesMod0ReplaceEntries  label   dword
        !           202:         dd      0                       ;we never get a 0-wide case
        !           203:         dd      pfnDraw1WideWEntry
        !           204:         dd      pfnDraw2WideWEntry
        !           205:         dd      pfnDraw3WideWEvenEntry
        !           206:         dd      pfnDraw4WideWEntry
        !           207:         dd      pfnDraw5WideWEvenEntry
        !           208:         dd      pfnDraw6WideWMod3_0Entry
        !           209:         dd      pfnDraw7WideWMod3_0Entry
        !           210:         dd      pfnDraw8WideWMod3_0Entry
        !           211: MAX_REPLACE_SPECIAL equ     ($-pfnWholeBytesMod0ReplaceEntries)/4
        !           212: 
        !           213: ; Start address MOD 3 is 1.
        !           214:         align   4
        !           215: pfnWholeBytesMod1ReplaceEntries  label   dword
        !           216:         dd      0                       ;we never get a 0-wide case
        !           217:         dd      pfnDraw1WideWEntry
        !           218:         dd      pfnDraw2WideWEntry
        !           219:         dd      pfnDraw3WideWOddEntry
        !           220:         dd      pfnDraw4WideWEntry
        !           221:         dd      pfnDraw5WideWOddEntry
        !           222:         dd      pfnDraw6WideWMod3_1Entry
        !           223:         dd      pfnDraw7WideWMod3_1Entry
        !           224:         dd      pfnDraw8WideWMod3_1Entry
        !           225: 
        !           226: ; Start address MOD 3 is 2.
        !           227:         align   4
        !           228: pfnWholeBytesMod2ReplaceEntries  label   dword
        !           229:         dd      0                       ;we never get a 0-wide case
        !           230:         dd      pfnDraw1WideWEntry
        !           231:         dd      pfnDraw2WideWEntry
        !           232:         dd      pfnDraw3WideWEvenEntry
        !           233:         dd      pfnDraw4WideWEntry
        !           234:         dd      pfnDraw5WideWEvenEntry
        !           235:         dd      pfnDraw6WideWMod3_2Entry
        !           236:         dd      pfnDraw7WideWMod3_2Entry
        !           237:         dd      pfnDraw8WideWMod3_2Entry
        !           238: 
        !           239: ; Start address MOD 3 is 3.
        !           240:         align   4
        !           241: pfnWholeBytesMod3ReplaceEntries  label   dword
        !           242:         dd      0                       ;we never get a 0-wide case
        !           243:         dd      pfnDraw1WideWEntry
        !           244:         dd      pfnDraw2WideWEntry
        !           245:         dd      pfnDraw3WideWOddEntry
        !           246:         dd      pfnDraw4WideWEntry
        !           247:         dd      pfnDraw5WideWOddEntry
        !           248:         dd      pfnDraw6WideWMod3_1Entry
        !           249:         dd      pfnDraw7WideWMod3_3Entry
        !           250:         dd      pfnDraw8WideWMod3_3Entry
        !           251: 
        !           252: ; Master MOD 3 alignment look-up table for entry tables for four possible
        !           253: ; alignments for narrow, special-cased unrolled replace whole byte code.
        !           254:         align   4
        !           255: pfnWholeBytesReplaceMaster      label   dword
        !           256:         dd      pfnWholeBytesMod0ReplaceEntries
        !           257:         dd      pfnWholeBytesMod1ReplaceEntries
        !           258:         dd      pfnWholeBytesMod2ReplaceEntries
        !           259:         dd      pfnWholeBytesMod3ReplaceEntries
        !           260: 
        !           261: ;-----------------------------------------------------------------------;
        !           262: 
        !           263:                 .code
        !           264: 
        !           265: ;-----------------------------------------------------------------------;
        !           266: 
        !           267: cProc   vTrgBlt,24,<         \
        !           268:         uses    esi edi ebx, \
        !           269:         ppdev:    ptr,       \
        !           270:         culRcl:   dword,     \
        !           271:         prcl:     ptr RECTL, \
        !           272:         ulMix:    dword,     \
        !           273:         ulColor:  dword,     \
        !           274:         pptlBrsuh:ptr POINTL >
        !           275: 
        !           276:         local   ulRowOffset :dword      ;Offset from start of scan line of
        !           277:                                         ; first byte to fill
        !           278:         local   ulWholeBytes :dword     ;# of whole bytes to fill
        !           279:         local   ulWholeDwords :dword    ;# of whole dwords to fill
        !           280:         local   pfnWholeFn  :dword      ;pointer to routine used to draw
        !           281:                                         ; whole bytes
        !           282:         local   ulScanWidth :dword      ;offset from start of one scan to start
        !           283:                                         ; of next
        !           284:         local   ulNextScan  :dword      ;offset from end of one scan line's
        !           285:                                         ; fill to start of next
        !           286:         local   ulCurrentTopScan :dword ;top scan line to fill in current bank
        !           287:         local   ulMasks     :dword      ;low byte = right mask, high byte =
        !           288:                                         ; left mask
        !           289:         local   ulBottomScan :dword     ;bottom scan line of fill rectangle
        !           290:         local   pfnDraw1WideVector :dword ;address at which to enter unrolled
        !           291:                                           ; edge loop
        !           292:         local   jALUFunc   :dword       ;VGA ALU logical operation (SET, AND,
        !           293:                                         ; OR, or XOR)
        !           294:         local   pfnStartDrawing :dword  ;pointer to function to call to start
        !           295:                                         ; drawing
        !           296:         local   pfnContinueDrawing :dword ;pointer to function to call to
        !           297:                                         ; continue drawing after doing whole
        !           298:                                         ; bytes
        !           299:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
        !           300:                                         ; address past the left edge when the
        !           301:                                         ; left edge is partial
        !           302:         local   pfnWholeBytes :dword    ;pointer to table of entry points
        !           303:                                         ; into unrolled loops for whole byte
        !           304:                                         ; filling
        !           305:         local   jInvertDestFirst :dword ;1 if the rop requires a pass to invert
        !           306:                                         ; the destination before the normal
        !           307:                                         ; pass
        !           308:         local   ulDrawingColor :dword   ;color byte with which to fill,
        !           309:                                         ; replicated to a dword
        !           310:         local   ppfnDrawEdgeTable :dword ;points to table to be used to look up
        !           311:                                          ; unrolled entry points for edge
        !           312:                                          ; bytes (pfnDraw1WideRWEntry or
        !           313:                                          ; pfnDraw1WideWEntry)
        !           314: 
        !           315: ;-----------------------------------------------------------------------;
        !           316: ; CLD is assumed on entry.
        !           317: ;-----------------------------------------------------------------------;
        !           318: 
        !           319: ;-----------------------------------------------------------------------;
        !           320: ; Make sure there's something to draw; clip enumerations can be empty.
        !           321: ;-----------------------------------------------------------------------;
        !           322: 
        !           323:         cmp     culRcl,0                ;any rects to fill?
        !           324:         jz      vTrgBlts_done           ;no, we're done
        !           325: 
        !           326: 
        !           327: ;-----------------------------------------------------------------------;
        !           328: ; Set up variables that are constant for the entire time we're in this
        !           329: ; module.
        !           330: ;-----------------------------------------------------------------------;
        !           331: 
        !           332: ;-----------------------------------------------------------------------;
        !           333: ; Set up for the desired raster op.
        !           334: ;-----------------------------------------------------------------------;
        !           335: 
        !           336:         sub     ebx,ebx                 ;ignore any background mix; we're only
        !           337:         mov     bl,byte ptr ulMix       ; concerned with the foreground in this
        !           338:                                         ; module
        !           339:         cmp     ebx,R2_NOP              ;is this NOP?
        !           340:         jz      vTrgBlts_done           ;yes, we're done
        !           341:         mov     al,jInvertDest[ebx]          ;remember whether we need to
        !           342:         mov     byte ptr jInvertDestFirst,al ; invert the destination before
        !           343:                                              ; finishing the rop
        !           344:         mov     ah,byte ptr ulColor     ;get the drawing color
        !           345:         and     ah,jForceOffTable[ebx]  ;force color to 0 if necessary
        !           346:                                         ; (R2_BLACK)
        !           347:         or      ah,jForceOnTable[ebx]   ;force color to 0ffh if necessary
        !           348:                                         ; (R2_WHITE, R2_NOT)
        !           349:         xor     ah,jNotTable[ebx]       ;invert color if necessary (any Pn mix)
        !           350:                                         ;at this point, CH has the color we
        !           351:                                         ; want to draw with; set up the VGA
        !           352:                                         ; hardware to draw with that color
        !           353:         mov     al,ah                   ;replicate the drawing color to a dword
        !           354:         mov     edx,eax
        !           355:         shl     eax,16
        !           356:         mov     ax,dx
        !           357:         mov     ulDrawingColor,eax      ;remember drawing color
        !           358: 
        !           359:         mov     ppfnDrawEdgeTable,offset pfnDraw1WideWEntry
        !           360:                                         ;assume replace-type rop, so we can
        !           361:                                         ; draw edge bytes with the write-
        !           362:                                         ; without-read code pointed to by this
        !           363:                                         ; table
        !           364:         mov     ah,jALUFuncTable[ebx]   ;get the ALU logical function
        !           365:         and     ah,ah                   ;is the logical function DR_SET?
        !           366:         .errnz  DR_SET
        !           367:         jz      short skip_ALU_set      ;yes, don't have to set because that's
        !           368:                                         ; the VGA's default state
        !           369:         mov     edx,VGA_BASE + GRAF_ADDR
        !           370:         mov     al,GRAF_DATA_ROT
        !           371:         out     dx,ax                   ;set the ALU logical function
        !           372:         mov     ppfnDrawEdgeTable,offset pfnDraw1WideRWEntry
        !           373:                                         ;draw edge bytes with the code pointed
        !           374:                                         ; to by this table (read/write)
        !           375: skip_ALU_set:
        !           376:         mov     byte ptr jALUFunc,ah    ;remember the ALU logical function
        !           377: 
        !           378: ;-----------------------------------------------------------------------;
        !           379: ; Fill the current rectangle with the specified raster op and color.
        !           380: ;-----------------------------------------------------------------------;
        !           381: 
        !           382: fill_rect_loop:
        !           383: 
        !           384: ;-----------------------------------------------------------------------;
        !           385: ; Set up variables that are constant from bank to bank during a single
        !           386: ; fill.
        !           387: ;-----------------------------------------------------------------------;
        !           388: 
        !           389: ;-----------------------------------------------------------------------;
        !           390: ; Set up masks and widths.
        !           391: ;-----------------------------------------------------------------------;
        !           392: 
        !           393:         mov     edi,prcl                ;point to rectangle to fill
        !           394:         mov     eax,[edi].yBottom
        !           395:         mov     ulBottomScan,eax        ;remember the bottom scan line of fill
        !           396: 
        !           397:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
        !           398:         mov     ecx,ebx
        !           399:         and     ecx,011b                ;intrabyte address of right edge
        !           400:         mov     ah,jRightMask[ecx]      ;right edge mask
        !           401: 
        !           402:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
        !           403:         mov     ecx,esi
        !           404:         shr     ecx,2                   ;/4 for start offset from left edge
        !           405:                                         ; of scan line
        !           406:         mov     ulRowOffset,ecx         ;remember offset from start of scan
        !           407:                                         ; line
        !           408:         sub     ebx,esi                 ;width in pixels of fill
        !           409: 
        !           410:         and     esi,011b                ;intrabyte address of left edge
        !           411:         mov     al,jLeftMask[esi]       ;left edge mask
        !           412: 
        !           413:         dec     ebx                     ;make inclusive on right
        !           414:         add     ebx,esi                 ;inclusive width, starting counting at
        !           415:                                         ; the beginning of the left edge byte
        !           416:         shr     ebx,2                   ;width of fill in bytes touched - 1
        !           417:         jnz     short more_than_1_byte  ;more than 1 byte is involved
        !           418: 
        !           419: ; Only one byte will be affected. Combine first/last masks.
        !           420: 
        !           421:         and     al,ah                   ;we'll use first byte mask only
        !           422:         xor     ah,ah                   ;want last byte mask to be 0
        !           423:         inc     ebx                     ;so there's one count to subtract below
        !           424:                                         ; if this isn't a whole edge byte
        !           425: more_than_1_byte:
        !           426: 
        !           427: ; If all pixels in the left edge are altered, combine the first byte into the
        !           428: ; whole byte count and clear the first byte mask, because we can handle solid
        !           429: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
        !           430: 
        !           431:         sub     ecx,ecx                 ;edge whole-status accumulator
        !           432:         cmp     al,-1                   ;is left edge a whole byte or partial?
        !           433:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
        !           434:         sub     ebx,ecx                 ;if left edge partial, deduct it from
        !           435:                                         ; the whole bytes count
        !           436:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
        !           437:                                         ; it's partial when pointing to the
        !           438:                                         ; whole bytes
        !           439:         and     ah,ah                   ;is right edge mask 0, meaning this
        !           440:                                         ; fill is only 1 byte wide?
        !           441:         jz      short save_masks        ;yes, no need to do anything
        !           442:         cmp     ah,-1                   ;is right edge a whole byte or partial?
        !           443:         jnz     short save_masks        ;partial
        !           444:         add     ecx,2                   ;bit 1 of ECX=0 if right edge partial,
        !           445:                                         ; 1 if whole;
        !           446:                                         ;bit 1=0 if left edge partial, 1 whole
        !           447:         inc     ebx                     ;if right edge whole, include it in the
        !           448:                                         ; whole bytes count
        !           449: save_masks:
        !           450:         mov     ulMasks,eax             ;save left and right clip masks
        !           451:         mov     ulWholeBytes,ebx        ;save # of whole bytes
        !           452: 
        !           453:         mov     ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
        !           454:         mov     pfnContinueDrawing,ecx    ; all partial (non-whole) edges
        !           455: 
        !           456:         and     ebx,ebx                 ;any whole bytes?
        !           457:         jz      short start_vec_set     ;no
        !           458:                                         ;yes, so draw the whole bytes before
        !           459:                                         ; the edge bytes
        !           460: 
        !           461: ; The whole bytes loop depends on the type of operation being done. If the
        !           462: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
        !           463: ; else we have to use a MOVSB-type operation (to load the latches with the
        !           464: ; existing contents of display memory to allow the ALUs to work).
        !           465: 
        !           466:         cmp     byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
        !           467:         jz      short is_replace_type   ;yes
        !           468:                                         ;no, set up for non-replace whole bytes
        !           469:         mov     ecx,offset whole_bytes_non_replace_wide
        !           470:                                         ;assume too wide to special-case
        !           471:         cmp     ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
        !           472:         jnb     short start_vec_set     ;yes
        !           473:         mov     ecx,pfnWholeBytesNonReplaceEntries[ebx*4] ;no, point to entry
        !           474:         mov     pfnWholeBytes,ecx       ; table for width
        !           475:         mov     ecx,offset whole_bytes_special
        !           476:                                         ;set up to call special routine to fill
        !           477:                                         ; whole bytes
        !           478:         jmp     short start_vec_set
        !           479: 
        !           480:         align   4
        !           481: is_replace_type:                        ;set up for replace-type rop
        !           482:         cmp     ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
        !           483:         jnb     short is_wide_replace   ;yes
        !           484:                                         ;narrow enough to special case. Look up
        !           485:                                         ; the entry table for the special case
        !           486:                                         ; base on the start alignment
        !           487:         mov     ecx,ulRowOffset
        !           488:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
        !           489:         and     ecx,011b                ;left edge whole bytes start alignment
        !           490:                                         ; MOD 3
        !           491:         mov     ecx,pfnWholeBytesReplaceMaster[ecx*4] ;look up table of entry
        !           492:                                                       ; tables for alignment
        !           493:         mov     ecx,[ecx+ebx*4]         ;look up entry table for width
        !           494:         mov     pfnWholeBytes,ecx       ; table for width
        !           495:         mov     ecx,offset whole_bytes_special
        !           496:                                         ;set up to call special routine to fill
        !           497:                                         ; whole bytes
        !           498:         jmp     short start_vec_set
        !           499: 
        !           500:         align   4
        !           501: is_wide_replace:                        ;set up for wide replace-type op
        !           502:                                         ;Note: assumes there is at least one
        !           503:                                         ; full dword involved!
        !           504:         mov     ecx,ulRowOffset
        !           505:         add     ecx,ulLeftEdgeAdjust    ;left edge whole bytes start offset
        !           506:         neg     ecx
        !           507:         and     ecx,011b
        !           508:         mov     edx,ebx
        !           509:         sub     edx,ecx                 ;ignore odd leading bytes
        !           510:         mov     eax,edx
        !           511:         shr     edx,2                   ;# of whole dwords across (not counting
        !           512:                                         ; odd leading & trailing bytes)
        !           513:         mov     ulWholeDwords,edx
        !           514:         and     eax,011b                ;# of odd (fractional) trailing bytes
        !           515:         shl     ecx,2
        !           516:         or      ecx,eax                 ;build a look-up index from the number
        !           517:                                         ; of leading and trailing bytes
        !           518:         mov     ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
        !           519:         mov     pfnWholeBytes,ecx          ; back alignment
        !           520:         mov     ecx,offset whole_bytes_rep_wide
        !           521:                                         ;set up to call routine to perform wide
        !           522:                                         ; whole bytes fill
        !           523: start_vec_set:
        !           524:         mov     pfnStartDrawing,ecx     ; all partial (non-whole) edges
        !           525: 
        !           526:         mov     ecx,ppdev
        !           527:         mov     eax,[ecx].pdev_lPlanarNextScan
        !           528:         mov     ulScanWidth,eax         ;local copy of scan line width
        !           529:         sub     eax,ebx                 ;EAX = delta to next scan
        !           530:         mov     ulNextScan,eax
        !           531: 
        !           532: 
        !           533: ;-----------------------------------------------------------------------;
        !           534: ; Fill this rectangle.
        !           535: ;-----------------------------------------------------------------------;
        !           536: 
        !           537:         cmp     byte ptr jInvertDestFirst,1
        !           538:                                         ;is this an invert-dest-plus-something-
        !           539:                                         ; else rop that requires two passes?
        !           540:         jz      short do_invert_dest_rop ;yes, special case with two passes
        !           541: 
        !           542: do_single_pass:
        !           543:         call    draw_banks
        !           544: 
        !           545: 
        !           546: ;-----------------------------------------------------------------------;
        !           547: ; See if there are any more rectangles to fill.
        !           548: ;-----------------------------------------------------------------------;
        !           549: 
        !           550:         add     prcl,(size RECTL) ;point to the next rectangle, if there is one
        !           551:         dec     culRcl            ;count down the rectangles to fill
        !           552:         jnz     fill_rect_loop
        !           553: 
        !           554: 
        !           555: ;-----------------------------------------------------------------------;
        !           556: ; We have filled all rectangles.  Restore the VGA to its default state.
        !           557: ;-----------------------------------------------------------------------;
        !           558: 
        !           559:         cmp     byte ptr jALUfunc,DR_SET ;is the logical function already SET?
        !           560:         jnz     short @F                 ;no, need to reset it
        !           561:         cRet    vTrgBlt                  ;yes, no need to reset it
        !           562: 
        !           563:         align   4
        !           564: @@:
        !           565:         mov     edx,VGA_BASE + GRAF_ADDR
        !           566:         mov     eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
        !           567:         out     dx,ax                              ; SET
        !           568: vTrgBlts_done:
        !           569:         cRet    vTrgBlt
        !           570: 
        !           571: 
        !           572: ;-----------------------------------------------------------------------;
        !           573: ; Handles rops that require two passes, the first being a destination
        !           574: ; inversion pass.
        !           575: ;-----------------------------------------------------------------------;
        !           576: 
        !           577:         align   4
        !           578: do_invert_dest_rop:
        !           579: 
        !           580: ; Set up the VGA's hardware for inversion
        !           581: 
        !           582:         mov     eax,ulDrawingColor      ;remember the normal drawing color
        !           583:         push    eax
        !           584:         mov     ulDrawingColor,-1       ;with XOR, this flips all bits
        !           585: 
        !           586:         mov     edx,VGA_BASE + GRAF_ADDR
        !           587:         mov     eax,(DR_XOR shl 8) + GRAF_DATA_ROT
        !           588:         out     dx,ax                   ;logical function = XOR to invert
        !           589: 
        !           590: ; Invert the destination
        !           591: 
        !           592:         call    draw_banks
        !           593: 
        !           594: ; Restore the VGA's hardware to the state required for the second pass.
        !           595: 
        !           596:         mov     edx,VGA_BASE + GRAF_ADDR
        !           597:         mov     ah,byte ptr jALUFunc
        !           598:         mov     al,GRAF_DATA_ROT
        !           599:         out     dx,ax                   ;set the ALU logical function back to
        !           600:                                         ; proper state for the rest of the rop
        !           601: 
        !           602:         pop     eax
        !           603:         mov     ulDrawingColor,eax      ;restore the normal drawing color
        !           604: 
        !           605: ; Perform the second pass to finish the rop.
        !           606: 
        !           607:         jmp     do_single_pass
        !           608: 
        !           609: 
        !           610: ;-----------------------------------------------------------------------;
        !           611: ; Fills all banks in the current fill rectangle. Called once per fill
        !           612: ; rectangle, except for destination-inversion-plus-something-else rops.
        !           613: ;-----------------------------------------------------------------------;
        !           614: 
        !           615:         align   4
        !           616: draw_banks:
        !           617: 
        !           618: ;-----------------------------------------------------------------------;
        !           619: ; Map in the bank containing the top scan to fill, if it's not mapped in
        !           620: ; already.
        !           621: ;-----------------------------------------------------------------------;
        !           622: 
        !           623:         mov     edi,prcl                ;point to rectangle to fill
        !           624:         mov     ecx,ppdev               ;point to PDEV
        !           625:         mov     eax,[edi].yTop          ;top scan line of fill
        !           626:         mov     ulCurrentTopScan,eax    ;this will be the fill top in 1st bank
        !           627: 
        !           628:         cmp     eax,[ecx].pdev_rcl1PlanarClip.yTop ;is fill top less than
        !           629:                                                    ; current bank?
        !           630:         jl      short map_init_bank             ;yes, map in proper bank
        !           631:         cmp     eax,[ecx].pdev_rcl1PlanarClip.yBottom ;fill top greater than
        !           632:                                                       ; current bank?
        !           633:         jl      short init_bank_mapped          ;no, proper bank already mapped
        !           634: map_init_bank:
        !           635: 
        !           636: ; Map in the bank containing the top scan line of the fill.
        !           637: 
        !           638:         ptrCall <dword ptr [ecx].pdev_pfnPlanarControl>,<ecx,eax,JustifyTop>
        !           639: 
        !           640: init_bank_mapped:
        !           641: 
        !           642: ;-----------------------------------------------------------------------;
        !           643: ; Main loop for processing fill in each bank.
        !           644: ;-----------------------------------------------------------------------;
        !           645: 
        !           646: ; Compute the starting address and scan line count for the initial bank.
        !           647: 
        !           648:         mov     eax,ppdev               ;point to PDEV
        !           649:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
        !           650:         cmp     ebx,[eax].pdev_rcl1PlanarClip.yBottom
        !           651:                                         ;which comes first, the bottom of the
        !           652:                                         ; dest rect or the bottom of the
        !           653:                                         ; current bank?
        !           654:         jl      short BottomScanSet     ;fill bottom comes first, so draw to
        !           655:                                         ; that; this is the last bank in fill
        !           656:         mov     ebx,[eax].pdev_rcl1PlanarClip.yBottom
        !           657:                                         ;bank bottom comes first; draw to
        !           658:                                         ; bottom of bank
        !           659: BottomScanSet:
        !           660:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
        !           661:         sub     ebx,edi                 ;# of scans to fill in bank
        !           662:         imul    edi,ulScanWidth         ;offset of starting scan line
        !           663: 
        !           664: ; Note that the start of the bitmap will change each time through the
        !           665: ; bank loop, because the start of the bitmap is varied to map the
        !           666: ; desired scan line to the banking window.
        !           667: 
        !           668:         add     edi,[eax].pdev_pvBitmapStart ;start of scan in bitmap
        !           669:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
        !           670: 
        !           671: ; We have computed the starting address and scan count. Time to start drawing
        !           672: ; in the initial bank.
        !           673: 
        !           674:         jmp     pfnStartDrawing
        !           675: 
        !           676: 
        !           677: ;-----------------------------------------------------------------------;
        !           678: ; Whole byte fills.
        !           679: ;-----------------------------------------------------------------------;
        !           680: 
        !           681: ;-----------------------------------------------------------------------;
        !           682: ; Handles non-replace whole byte fills wider than the maximum special
        !           683: ; case width.
        !           684: ;
        !           685: ; The destination is not involved, so a STOS (or equivalent) can be used
        !           686: ; (no read needed before write).
        !           687: ;-----------------------------------------------------------------------;
        !           688: 
        !           689:         align   4
        !           690: whole_bytes_rep_wide:
        !           691:         push    ebx                     ;save scan count
        !           692:         push    edi                     ;save starting address
        !           693: 
        !           694:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
        !           695:                                         ; loop for whole byte width
        !           696:         SET_UP_UNROLL_VARS ebx,ecx, ebx,[eax], LOOP_UNROLL_SHIFT
        !           697:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
        !           698:         mov     esi,ulWholeDwords       ;whole dwords width
        !           699:         mov     edx,ulNextScan          ;offset from end of one scan line to
        !           700:                                         ; start of next
        !           701:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           702: 
        !           703:         call    ecx                     ;draw the wide whole bytes
        !           704: 
        !           705:         pop     edi                     ;restore screen pointer
        !           706:         pop     ebx                     ;restore fill scan count
        !           707:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
        !           708: 
        !           709: 
        !           710: ;-----------------------------------------------------------------------;
        !           711: ; Handles both replace and non-replace whole byte fills narrow enough to
        !           712: ; special case.
        !           713: ;-----------------------------------------------------------------------;
        !           714: 
        !           715:         align   4
        !           716: whole_bytes_special:
        !           717:         push    ebx                     ;save scan count
        !           718:         push    edi                     ;save starting address
        !           719: 
        !           720:         mov     eax,pfnWholeBytes       ;point to entry table for unrolled
        !           721:                                         ; loop for whole byte width
        !           722:         SET_UP_UNROLL_VARS ebx,edx, ebx,[eax], LOOP_UNROLL_SHIFT
        !           723:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
        !           724:         mov     ecx,ulScanWidth         ;offset to next scan line
        !           725:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           726: 
        !           727:         call    edx                     ;draw the whole bytes
        !           728: 
        !           729:         pop     edi                     ;restore screen pointer
        !           730:         pop     ebx                     ;restore fill scan count
        !           731:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
        !           732: 
        !           733: 
        !           734: ;-----------------------------------------------------------------------;
        !           735: ; Handles non-replace whole byte fills wider than the maximum special case
        !           736: ; width.
        !           737: ;
        !           738: ; The destination is involved, so a MOVSB (or equivalent) must be
        !           739: ; performed in order to do a read before write to give the ALUs something
        !           740: ; to work with.
        !           741: ;-----------------------------------------------------------------------;
        !           742: 
        !           743:         align   4
        !           744: whole_bytes_non_replace_wide:
        !           745:         push    ebx                     ;save scan count
        !           746:         push    edi                     ;save starting address
        !           747: 
        !           748:         add     edi,ulLeftEdgeAdjust    ;point to first whole byte to fill
        !           749:         mov     esi,ulWholeBytes        ;whole bytes width
        !           750:         mov     edx,ulNextScan          ;offset from end of one scan line to
        !           751:                                         ; start of next
        !           752:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           753: 
        !           754: ;-----------------------------------------------------------------------;
        !           755: ; 5-or-wider read before write loop.
        !           756: ;
        !           757: ; Entry:
        !           758: ;       EAX = # of bytes to fill across scan line (needed only by 5-or-wider
        !           759: ;               handler)
        !           760: ;       EBX = unrolled loop count
        !           761: ;       EDX = offset from end of one scan line to the start of the next next
        !           762: ;       EDI = start offset
        !           763: ;
        !           764: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
        !           765: 
        !           766: ; 5-or-wider read/write.
        !           767: 
        !           768: draw_wide_rw_loop:
        !           769:         mov     ecx,esi
        !           770: @@:
        !           771:         mov     ah,[edi]        ;latch the target address. The data read
        !           772:                                 ; doesn't matter
        !           773:         mov     [edi],al        ;merge the drawing color with the latched
        !           774:                                 ; target address according to the selected ALU
        !           775:                                 ; function, and write the result to display
        !           776:                                 ; memory
        !           777:         inc     edi             ;point to the next byte
        !           778:         dec     ecx
        !           779:         jnz     @B
        !           780:         add     edi,edx
        !           781:         dec     ebx
        !           782:         jnz     draw_wide_rw_loop
        !           783: 
        !           784:         pop     edi                     ;restore screen pointer
        !           785:         pop     ebx                     ;restore fill scan count
        !           786:         jmp     pfnContinueDrawing      ;either keep drawing or we're done
        !           787: 
        !           788: 
        !           789: ;-----------------------------------------------------------------------;
        !           790: ; Process any left/right columns that that have to be done.
        !           791: ;
        !           792: ;   Currently:
        !           793: ;       EBX =   height to fill, in scans
        !           794: ;       EDI --> first byte of left edge
        !           795: ;-----------------------------------------------------------------------;
        !           796: 
        !           797: ;-----------------------------------------------------------------------;
        !           798: ; Handle case where both edges are partial (non-whole) bytes. We don't
        !           799: ; have to read before write because we're using the Map Mask, not the
        !           800: ; Bit Mask.
        !           801: ;-----------------------------------------------------------------------;
        !           802:         align   4
        !           803:         public do_both_edge_bytes
        !           804: do_both_edge_bytes:
        !           805: 
        !           806: ; Set up variables for entering unrolled loop.
        !           807: 
        !           808:         mov     al,byte ptr ulMasks     ;this will become the clip mask for the
        !           809:                                         ; left edge
        !           810:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
        !           811:         out     dx,al                   ;set Map Mask for left edge
        !           812: 
        !           813:         mov     ecx,ppfnDrawEdgeTable
        !           814:         SET_UP_UNROLL_VARS ebx,edx, ebx,[ecx], LOOP_UNROLL_SHIFT
        !           815:         mov     pfnDraw1WideVector,edx
        !           816: 
        !           817:         mov     ecx,ulScanWidth         ;offset from one scan to next
        !           818: 
        !           819:         mov     esi,ulWholeBytes        ;ESI = # of whole bytes
        !           820:         lea     esi,[esi+edi+1]         ;--> start for right edge
        !           821:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           822: 
        !           823:         push    ebx                     ;preserve scan line count
        !           824:         call    edx                     ;jump into the unrolled loop to draw
        !           825:         pop     ebx                     ;restore scan line count
        !           826: 
        !           827:         mov     edi,esi                 ;point to first right edge byte
        !           828:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
        !           829:                                         ; right edge
        !           830:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
        !           831:         out     dx,al                   ;set Map Mask for left edge
        !           832: 
        !           833:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           834: 
        !           835:         push    offset edges_done       ;return here
        !           836:         jmp     pfnDraw1WideVector      ;jump into the unrolled loop to draw
        !           837: 
        !           838: ;-----------------------------------------------------------------------;
        !           839: ; Handle case where only the left edge is partial (non-whole).
        !           840: ;-----------------------------------------------------------------------;
        !           841:         align   4
        !           842: do_left_edge_bytes:
        !           843: 
        !           844: ; Set up variables for entering unrolled loop.
        !           845: 
        !           846:         mov     ecx,ppfnDrawEdgeTable
        !           847:         SET_UP_UNROLL_VARS ebx,esi, ebx,[ecx], LOOP_UNROLL_SHIFT
        !           848: 
        !           849:         mov     ecx,ulScanWidth         ;offset from one scan to next
        !           850:         mov     al,byte ptr ulMasks     ;this will become the Bit Mask for the
        !           851:                                         ; left edge
        !           852:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
        !           853:         out     dx,al                   ;set Map Mask for left edge
        !           854: 
        !           855:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           856: 
        !           857:         push    offset edges_done       ;return here
        !           858:         jmp     esi                     ;jump into the unrolled loop to draw
        !           859: 
        !           860: ;-----------------------------------------------------------------------;
        !           861: ; Handle case where only the right edge is partial (non-whole).
        !           862: ;-----------------------------------------------------------------------;
        !           863:         align   4
        !           864: do_right_edge_bytes:
        !           865: 
        !           866: ; Set up variables for entering unrolled loop.
        !           867: 
        !           868:         mov     ecx,ppfnDrawEdgeTable
        !           869:         SET_UP_UNROLL_VARS ebx,esi, ebx,[ecx], LOOP_UNROLL_SHIFT
        !           870: 
        !           871:         mov     ecx,ulScanWidth         ;offset from one scan to next
        !           872:         add     edi,ulWholeBytes        ;--> start for right edge (remember,
        !           873:                                         ; left edge is whole, so the left edge
        !           874:                                         ; byte is included in the whole byte
        !           875:                                         ; count)
        !           876:         mov     al,byte ptr ulMasks+1   ;this will become the Bit Mask for the
        !           877:                                         ; right edge
        !           878:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
        !           879:         out     dx,al                   ;set Map Mask for right edge
        !           880: 
        !           881:         mov     eax,ulDrawingColor      ;each byte is color with which to fill
        !           882: 
        !           883:         call    esi                     ;jump into the unrolled loop to draw
        !           884: 
        !           885: ;-----------------------------------------------------------------------;
        !           886: ; We have done all partial edges.
        !           887: ;-----------------------------------------------------------------------;
        !           888: 
        !           889: edges_done:
        !           890: 
        !           891:         mov     edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
        !           892:         mov     al,MM_ALL               ;restore the default Map Mask of all
        !           893:         out     dx,al                   ; planes enabled
        !           894: 
        !           895: ;-----------------------------------------------------------------------;
        !           896: ; See if there are any more banks to process.
        !           897: ;-----------------------------------------------------------------------;
        !           898: 
        !           899: check_next_bank:
        !           900: 
        !           901:         mov     edi,ppdev
        !           902:         mov     eax,[edi].pdev_rcl1PlanarClip.yBottom ;is the fill bottom in
        !           903:         cmp     ulBottomScan,eax                      ; the current bank?
        !           904:         jle     short banks_done        ;yes, so we're done
        !           905:                                         ;no, map in the next bank and fill it
        !           906:         mov     ulCurrentTopScan,eax    ;remember where the top of the bank
        !           907:                                         ; we're about to map in is (same as
        !           908:                                         ; bottom of bank we just did)
        !           909: 
        !           910:         ptrCall <dword ptr [edi].pdev_pfnPlanarControl>,<edi,eax,JustifyTop>
        !           911:                                         ;map in the bank
        !           912: 
        !           913: ; Compute the starting address and scan line count in this bank.
        !           914: 
        !           915:         mov     eax,ppdev               ;EAX->target surface
        !           916:         mov     ebx,ulBottomScan        ;bottom of destination rectangle
        !           917:         cmp     ebx,[eax].pdev_rcl1PlanarClip.yBottom
        !           918:                                         ;which comes first, the bottom of the
        !           919:                                         ; dest rect or the bottom of the
        !           920:                                         ; current bank?
        !           921:         jl      short BottomScanSet2    ;fill bottom comes first, so draw to
        !           922:                                         ; that; this is the last bank in fill
        !           923:         mov     ebx,[eax].pdev_rcl1PlanarClip.yBottom
        !           924:                                         ;bank bottom comes first; draw to
        !           925:                                         ; bottom of bank
        !           926: BottomScanSet2:
        !           927:         mov     edi,ulCurrentTopScan    ;top scan line to fill in current bank
        !           928:         sub     ebx,edi                 ;# of scans to fill in bank
        !           929:         imul    edi,ulScanWidth         ;offset of starting scan line
        !           930: 
        !           931: ; Note that the start of the bitmap will change each time through the
        !           932: ; bank loop, because the start of the bitmap is varied to map the
        !           933: ; desired scan line to the banking window.
        !           934: 
        !           935:         add     edi,[eax].pdev_pvBitmapStart ;start of scan in bitmap
        !           936:         add     edi,ulRowOffset         ;EDI = start offset of fill in bitmap
        !           937: 
        !           938: ; Draw in the new bank.
        !           939: 
        !           940:         jmp     pfnStartDrawing
        !           941: 
        !           942: 
        !           943: ;-----------------------------------------------------------------------;
        !           944: ; Done with all banks in this fill.
        !           945: 
        !           946: banks_done:
        !           947:         retn
        !           948: 
        !           949: endProc vTrgBlt
        !           950: 
        !           951: 
        !           952: ;-----------------------------------------------------------------------;
        !           953: ; Unrolled loops.
        !           954: ; There are two kinds of unrolled loops: read-before-write (to load the
        !           955: ;  latches), and write-only (for replace-type rops).
        !           956: ;-----------------------------------------------------------------------;
        !           957: 
        !           958: 
        !           959: ;-----------------------------------------------------------------------;
        !           960: ; Unrolled drawing stuff for cases where read before write is required,
        !           961: ; to load the latches.
        !           962: ;-----------------------------------------------------------------------;
        !           963: 
        !           964: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
        !           965: ; read before write loops.
        !           966: 
        !           967:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideRWEntry,RW1,LOOP_UNROLL_COUNT
        !           968:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideRWEntry,RW2,LOOP_UNROLL_COUNT
        !           969:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideRWEntry,RW3,LOOP_UNROLL_COUNT
        !           970:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideRWEntry,RW4,LOOP_UNROLL_COUNT
        !           971: 
        !           972: ;-----------------------------------------------------------------------;
        !           973: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
        !           974: ;
        !           975: ; Entry:
        !           976: ;       AL  = drawing color
        !           977: ;       EBX = unrolled loop count
        !           978: ;       ECX = scan line width in bytes
        !           979: ;       EDI = start offset
        !           980: ;
        !           981: ; EBX, EDI modified. All other registers preserved.
        !           982: 
        !           983: ;-----------------------------------------------------------------------;
        !           984: ; Macro to draw one read before write byte, then advance to next scan line.
        !           985: 
        !           986: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !           987: &ENTRY_LABEL&ENTRY_INDEX&:
        !           988:         mov     ah,[edi]        ;latch the target address. The data read
        !           989:                                 ; doesn't matter
        !           990:         mov     [edi],al        ;merge the drawing color with the latched
        !           991:                                 ; target address according to the selected ALU
        !           992:                                 ; function, and write the result to display
        !           993:                                 ; memory
        !           994:         add     edi,ecx         ;point to the next scan line
        !           995:         endm    ;-----------------------------------;
        !           996: 
        !           997: ; 1-wide read/write.
        !           998: 
        !           999:         align   4
        !          1000: draw_1_wide_rw_loop     proc    near
        !          1001:         UNROLL_LOOP     DRAW_1_WIDE_RW,RW1,LOOP_UNROLL_COUNT
        !          1002:         dec     ebx
        !          1003:         jnz     draw_1_wide_rw_loop
        !          1004: 
        !          1005:         ret
        !          1006: 
        !          1007: draw_1_wide_rw_loop     endp
        !          1008: 
        !          1009: ;-----------------------------------------------------------------------;
        !          1010: ; Macro to draw two read before write bytes, then advance to next scan line.
        !          1011: 
        !          1012: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !          1013: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1014:         mov     ah,[edi]                ;see 1-wide RW case for comments
        !          1015:         mov     [edi],al
        !          1016:         mov     ah,[edi+1]
        !          1017:         mov     [edi+1],al
        !          1018:         add     edi,ecx                 ;point to the next scan line
        !          1019:         endm    ;-----------------------------------;
        !          1020: 
        !          1021: ; 2-wide read/write.
        !          1022: 
        !          1023:         align   4
        !          1024: draw_2_wide_rw_loop     proc    near
        !          1025:         UNROLL_LOOP     DRAW_2_WIDE_RW,RW2,LOOP_UNROLL_COUNT
        !          1026:         dec     ebx
        !          1027:         jnz     draw_2_wide_rw_loop
        !          1028: 
        !          1029:         ret
        !          1030: 
        !          1031: draw_2_wide_rw_loop     endp
        !          1032: 
        !          1033: ;-----------------------------------------------------------------------;
        !          1034: ; Macro to draw three read before write bytes, then advance to next scan line.
        !          1035: 
        !          1036: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !          1037: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1038:         mov     ah,[edi]                ;see 1-wide RW case for comments
        !          1039:         mov     [edi],al
        !          1040:         mov     ah,[edi+1]
        !          1041:         mov     [edi+1],al
        !          1042:         mov     ah,[edi+2]
        !          1043:         mov     [edi+2],al
        !          1044:         add     edi,ecx                 ;point to the next scan line
        !          1045:         endm    ;-----------------------------------;
        !          1046: 
        !          1047: ; 3-wide read/write.
        !          1048: 
        !          1049:         align   4
        !          1050: draw_3_wide_rw_loop     proc    near
        !          1051:         UNROLL_LOOP     DRAW_3_WIDE_RW,RW3,LOOP_UNROLL_COUNT
        !          1052:         dec     ebx
        !          1053:         jnz     draw_3_wide_rw_loop
        !          1054: 
        !          1055:         ret
        !          1056: 
        !          1057: draw_3_wide_rw_loop     endp
        !          1058: 
        !          1059: ;-----------------------------------------------------------------------;
        !          1060: ; Macro to draw three read before write bytes, then advance to next scan line.
        !          1061: 
        !          1062: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !          1063: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1064:         mov     ah,[edi]                ;see 1-wide RW case for comments
        !          1065:         mov     [edi],al
        !          1066:         mov     ah,[edi+1]
        !          1067:         mov     [edi+1],al
        !          1068:         mov     ah,[edi+2]
        !          1069:         mov     [edi+2],al
        !          1070:         mov     ah,[edi+3]
        !          1071:         mov     [edi+3],al
        !          1072:         add     edi,ecx                 ;point to the next scan line
        !          1073:         endm    ;-----------------------------------;
        !          1074: 
        !          1075: ; 4-wide read/write.
        !          1076: 
        !          1077:         align   4
        !          1078: draw_4_wide_rw_loop     proc    near
        !          1079:         UNROLL_LOOP     DRAW_4_WIDE_RW,RW4,LOOP_UNROLL_COUNT
        !          1080:         dec     ebx
        !          1081:         jnz     draw_4_wide_rw_loop
        !          1082: 
        !          1083:         ret
        !          1084: 
        !          1085: draw_4_wide_rw_loop     endp
        !          1086: 
        !          1087: ;-----------------------------------------------------------------------;
        !          1088: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
        !          1089: ; for cases where read before write is NOT required.
        !          1090: ;-----------------------------------------------------------------------;
        !          1091: 
        !          1092: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
        !          1093: ; Note that there may be separate entry tables for various alignments of a
        !          1094: ; specific width, in cases where performance can be improved by using different
        !          1095: ; code for different alignments.
        !          1096: 
        !          1097:         UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideWEntry,W1,LOOP_UNROLL_COUNT
        !          1098:         UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideWEntry,W2,LOOP_UNROLL_COUNT
        !          1099:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
        !          1100:         UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWOddEntry,W3_ODD,LOOP_UNROLL_COUNT
        !          1101:         UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideWEntry,W4,LOOP_UNROLL_COUNT
        !          1102:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
        !          1103:         UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWOddEntry,W5_ODD,LOOP_UNROLL_COUNT
        !          1104:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_0Entry,W6_MOD3_0,LOOP_UNROLL_COUNT
        !          1105:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_1Entry,W6_MOD3_1,LOOP_UNROLL_COUNT
        !          1106:         UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_2Entry,W6_MOD3_2,LOOP_UNROLL_COUNT
        !          1107:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_0Entry,W7_MOD3_0,LOOP_UNROLL_COUNT
        !          1108:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_1Entry,W7_MOD3_1,LOOP_UNROLL_COUNT
        !          1109:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_2Entry,W7_MOD3_2,LOOP_UNROLL_COUNT
        !          1110:         UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_3Entry,W7_MOD3_3,LOOP_UNROLL_COUNT
        !          1111:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_0Entry,W8_MOD3_0,LOOP_UNROLL_COUNT
        !          1112:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_1Entry,W8_MOD3_1,LOOP_UNROLL_COUNT
        !          1113:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_2Entry,W8_MOD3_2,LOOP_UNROLL_COUNT
        !          1114:         UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_3Entry,W8_MOD3_3,LOOP_UNROLL_COUNT
        !          1115: 
        !          1116: 
        !          1117: ;-----------------------------------------------------------------------;
        !          1118: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
        !          1119: ;
        !          1120: ; Entry:
        !          1121: ;       EAX = fill color, replicated four times
        !          1122: ;       EBX = unrolled loop count
        !          1123: ;       ECX = scan line width in bytes
        !          1124: ;       EDI = start offset
        !          1125: ;
        !          1126: ; EBX, EDI modified. All other registers preserved.
        !          1127: 
        !          1128: ;-----------------------------------------------------------------------;
        !          1129: ; Macro to draw one write-only byte, then advance to next scan line.
        !          1130: 
        !          1131: DRAW_1_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
        !          1132: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1133:         mov     [edi],al                ;draw the pixel
        !          1134:         add     edi,ecx                 ;point to the next scan line
        !          1135:         endm    ;-----------------------------------;
        !          1136: 
        !          1137: ; 1-wide write-only.
        !          1138: 
        !          1139:         align   4
        !          1140: draw_1_wide_w_loop     proc    near
        !          1141:         UNROLL_LOOP     DRAW_1_WIDE_W,W1,LOOP_UNROLL_COUNT
        !          1142:         dec     ebx
        !          1143:         jnz     draw_1_wide_w_loop
        !          1144: 
        !          1145:         ret
        !          1146: 
        !          1147: draw_1_wide_w_loop     endp
        !          1148: 
        !          1149: ;-----------------------------------------------------------------------;
        !          1150: ; Macro to draw two write-only bytes, then advance to next scan line.
        !          1151: 
        !          1152: DRAW_2_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
        !          1153: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1154:         mov     [edi],ax
        !          1155:         add     edi,ecx                 ;point to the next scan line
        !          1156:         endm    ;-----------------------------------;
        !          1157: 
        !          1158: ; 2-wide write-only.
        !          1159: 
        !          1160:         align   4
        !          1161: draw_2_wide_w_loop     proc    near
        !          1162:         UNROLL_LOOP     DRAW_2_WIDE_W,W2,LOOP_UNROLL_COUNT
        !          1163:         dec     ebx
        !          1164:         jnz     draw_2_wide_w_loop
        !          1165: 
        !          1166:         ret
        !          1167: 
        !          1168: draw_2_wide_w_loop     endp
        !          1169: 
        !          1170: ;-----------------------------------------------------------------------;
        !          1171: ; Macro to draw three write-only bytes, then advance to next scan line.
        !          1172: ; Optimized for even start address.
        !          1173: 
        !          1174: DRAW_3_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
        !          1175: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1176:         mov     [edi],ax
        !          1177:         mov     [edi+2],al
        !          1178:         add     edi,ecx                 ;point to the next scan line
        !          1179:         endm    ;-----------------------------------;
        !          1180: 
        !          1181: ; 3-wide write-only, starting at an even address.
        !          1182: 
        !          1183:         align   4
        !          1184: draw_3_wide_w_even_loop     proc    near
        !          1185:         UNROLL_LOOP     DRAW_3_WIDE_W_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
        !          1186:         dec     ebx
        !          1187:         jnz     draw_3_wide_w_even_loop
        !          1188: 
        !          1189:         ret
        !          1190: 
        !          1191: draw_3_wide_w_even_loop     endp
        !          1192: 
        !          1193: ;-----------------------------------------------------------------------;
        !          1194: ; Macro to draw three write-only bytes, then advance to next scan line.
        !          1195: ; Optimized for odd start address.
        !          1196: 
        !          1197: DRAW_3_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
        !          1198: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1199:         mov     [edi],al
        !          1200:         mov     [edi+1],ax
        !          1201:         add     edi,ecx                 ;point to the next scan line
        !          1202:         endm    ;-----------------------------------;
        !          1203: 
        !          1204: ; 3-wide write-only, starting at an odd address.
        !          1205: 
        !          1206:         align   4
        !          1207: draw_3_wide_w_odd_loop     proc    near
        !          1208:         UNROLL_LOOP     DRAW_3_WIDE_W_ODD,W3_ODD,LOOP_UNROLL_COUNT
        !          1209:         dec     ebx
        !          1210:         jnz     draw_3_wide_w_odd_loop
        !          1211: 
        !          1212:         ret
        !          1213: 
        !          1214: draw_3_wide_w_odd_loop     endp
        !          1215: 
        !          1216: 
        !          1217: ;-----------------------------------------------------------------------;
        !          1218: ; Macro to draw four write-only bytes, then advance to next scan line.
        !          1219: 
        !          1220: DRAW_4_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
        !          1221: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1222:         mov     [edi],eax
        !          1223:         add     edi,ecx                 ;point to the next scan line
        !          1224:         endm    ;-----------------------------------;
        !          1225: 
        !          1226: ; 4-wide write-only.
        !          1227: 
        !          1228:         align   4
        !          1229: draw_4_wide_w_loop     proc    near
        !          1230:         UNROLL_LOOP     DRAW_4_WIDE_W,W4,LOOP_UNROLL_COUNT
        !          1231:         dec     ebx
        !          1232:         jnz     draw_4_wide_w_loop
        !          1233: 
        !          1234:         ret
        !          1235: 
        !          1236: draw_4_wide_w_loop     endp
        !          1237: 
        !          1238: 
        !          1239: ;-----------------------------------------------------------------------;
        !          1240: ; Macro to draw five write-only bytes, then advance to next scan line.
        !          1241: ; Optimized for even start address.
        !          1242: 
        !          1243: DRAW_5_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
        !          1244: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1245:         mov     [edi],eax
        !          1246:         mov     [edi+4],al
        !          1247:         add     edi,ecx                 ;point to the next scan line
        !          1248:         endm    ;-----------------------------------;
        !          1249: 
        !          1250: ; 5-wide write-only, starting at an even address.
        !          1251: 
        !          1252:         align   4
        !          1253: draw_5_wide_w_even_loop     proc    near
        !          1254:         UNROLL_LOOP     DRAW_5_WIDE_W_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
        !          1255:         dec     ebx
        !          1256:         jnz     draw_5_wide_w_even_loop
        !          1257: 
        !          1258:         ret
        !          1259: 
        !          1260: draw_5_wide_w_even_loop     endp
        !          1261: 
        !          1262: 
        !          1263: ;-----------------------------------------------------------------------;
        !          1264: ; Macro to draw five write-only bytes, then advance to next scan line.
        !          1265: ; Optimized for odd start address.
        !          1266: 
        !          1267: DRAW_5_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
        !          1268: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1269:         mov     [edi],al
        !          1270:         mov     [edi+1],eax
        !          1271:         add     edi,ecx                 ;point to the next scan line
        !          1272:         endm    ;-----------------------------------;
        !          1273: 
        !          1274: ; 5-wide write-only, starting at an odd address.
        !          1275: 
        !          1276:         align   4
        !          1277: draw_5_wide_w_odd_loop     proc    near
        !          1278:         UNROLL_LOOP     DRAW_5_WIDE_W_ODD,W5_ODD,LOOP_UNROLL_COUNT
        !          1279:         dec     ebx
        !          1280:         jnz     draw_5_wide_w_odd_loop
        !          1281: 
        !          1282:         ret
        !          1283: 
        !          1284: draw_5_wide_w_odd_loop     endp
        !          1285: 
        !          1286: 
        !          1287: ;-----------------------------------------------------------------------;
        !          1288: ; Macro to draw six write-only bytes, then advance to next scan line.
        !          1289: ; Optimized for start address MOD 3 == 0.
        !          1290: 
        !          1291: DRAW_6_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
        !          1292: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1293:         mov     [edi],eax
        !          1294:         mov     [edi+4],ax
        !          1295:         add     edi,ecx                 ;point to the next scan line
        !          1296:         endm    ;-----------------------------------;
        !          1297: 
        !          1298: ; 6-wide write-only, starting at MOD 3 == 0.
        !          1299: 
        !          1300:         align   4
        !          1301: draw_6_wide_w_mod3_0_loop     proc    near
        !          1302:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_0,W6_MOD3_0,LOOP_UNROLL_COUNT
        !          1303:         dec     ebx
        !          1304:         jnz     draw_6_wide_w_mod3_0_loop
        !          1305: 
        !          1306:         ret
        !          1307: 
        !          1308: draw_6_wide_w_mod3_0_loop     endp
        !          1309: 
        !          1310: 
        !          1311: ;-----------------------------------------------------------------------;
        !          1312: ; Macro to draw six write-only bytes, then advance to next scan line.
        !          1313: ; Optimized for start address MOD 3 == 1 or 3.
        !          1314: 
        !          1315: DRAW_6_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
        !          1316: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1317:         mov     [edi],al
        !          1318:         mov     [edi+1],eax
        !          1319:         mov     [edi+5],al
        !          1320:         add     edi,ecx                 ;point to the next scan line
        !          1321:         endm    ;-----------------------------------;
        !          1322: 
        !          1323: ; 6-wide write-only, starting at MOD 3 == 1 or 3.
        !          1324: 
        !          1325:         align   4
        !          1326: draw_6_wide_w_mod3_1_loop     proc    near
        !          1327:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_1,W6_MOD3_1,LOOP_UNROLL_COUNT
        !          1328:         dec     ebx
        !          1329:         jnz     draw_6_wide_w_mod3_1_loop
        !          1330: 
        !          1331:         ret
        !          1332: 
        !          1333: draw_6_wide_w_mod3_1_loop     endp
        !          1334: 
        !          1335: 
        !          1336: ;-----------------------------------------------------------------------;
        !          1337: ; Macro to draw six write-only bytes, then advance to next scan line.
        !          1338: ; Optimized for start address MOD 3 == 2.
        !          1339: 
        !          1340: DRAW_6_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
        !          1341: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1342:         mov     [edi],ax
        !          1343:         mov     [edi+2],eax
        !          1344:         add     edi,ecx                 ;point to the next scan line
        !          1345:         endm    ;-----------------------------------;
        !          1346: 
        !          1347: ; 6-wide write-only, starting at MOD 3 == 2.
        !          1348: 
        !          1349:         align   4
        !          1350: draw_6_wide_w_mod3_2_loop     proc    near
        !          1351:         UNROLL_LOOP     DRAW_6_WIDE_W_MOD3_2,W6_MOD3_2,LOOP_UNROLL_COUNT
        !          1352:         dec     ebx
        !          1353:         jnz     draw_6_wide_w_mod3_2_loop
        !          1354: 
        !          1355:         ret
        !          1356: 
        !          1357: draw_6_wide_w_mod3_2_loop     endp
        !          1358: 
        !          1359: 
        !          1360: ;-----------------------------------------------------------------------;
        !          1361: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1362: ; Optimized for start address MOD 3 == 0.
        !          1363: 
        !          1364: DRAW_7_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
        !          1365: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1366:         mov     [edi],eax
        !          1367:         mov     [edi+4],ax
        !          1368:         mov     [edi+6],al
        !          1369:         add     edi,ecx                 ;point to the next scan line
        !          1370:         endm    ;-----------------------------------;
        !          1371: 
        !          1372: ; 7-wide write-only, starting at MOD 3 == 0.
        !          1373: 
        !          1374:         align   4
        !          1375: draw_7_wide_w_mod3_0_loop     proc    near
        !          1376:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_0,W7_MOD3_0,LOOP_UNROLL_COUNT
        !          1377:         dec     ebx
        !          1378:         jnz     draw_7_wide_w_mod3_0_loop
        !          1379: 
        !          1380:         ret
        !          1381: 
        !          1382: draw_7_wide_w_mod3_0_loop     endp
        !          1383: 
        !          1384: 
        !          1385: ;-----------------------------------------------------------------------;
        !          1386: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1387: ; Optimized for start address MOD 3 == 1.
        !          1388: 
        !          1389: DRAW_7_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
        !          1390: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1391:         mov     [edi],al
        !          1392:         mov     [edi+1],ax
        !          1393:         mov     [edi+3],eax
        !          1394:         add     edi,ecx                 ;point to the next scan line
        !          1395:         endm    ;-----------------------------------;
        !          1396: 
        !          1397: ; 7-wide write-only, starting at MOD 3 == 0.
        !          1398: 
        !          1399:         align   4
        !          1400: draw_7_wide_w_mod3_1_loop     proc    near
        !          1401:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_1,W7_MOD3_1,LOOP_UNROLL_COUNT
        !          1402:         dec     ebx
        !          1403:         jnz     draw_7_wide_w_mod3_1_loop
        !          1404: 
        !          1405:         ret
        !          1406: 
        !          1407: draw_7_wide_w_mod3_1_loop     endp
        !          1408: 
        !          1409: 
        !          1410: ;-----------------------------------------------------------------------;
        !          1411: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1412: ; Optimized for start address MOD 3 == 2.
        !          1413: 
        !          1414: DRAW_7_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
        !          1415: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1416:         mov     [edi],ax
        !          1417:         mov     [edi+2],eax
        !          1418:         mov     [edi+6],al
        !          1419:         add     edi,ecx                 ;point to the next scan line
        !          1420:         endm    ;-----------------------------------;
        !          1421: 
        !          1422: ; 7-wide write-only, starting at MOD 3 == 2.
        !          1423: 
        !          1424:         align   4
        !          1425: draw_7_wide_w_mod3_2_loop     proc    near
        !          1426:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_2,W7_MOD3_2,LOOP_UNROLL_COUNT
        !          1427:         dec     ebx
        !          1428:         jnz     draw_7_wide_w_mod3_2_loop
        !          1429: 
        !          1430:         ret
        !          1431: 
        !          1432: draw_7_wide_w_mod3_2_loop     endp
        !          1433: 
        !          1434: 
        !          1435: ;-----------------------------------------------------------------------;
        !          1436: ; Macro to draw seven write-only bytes, then advance to next scan line.
        !          1437: ; Optimized for start address MOD 3 == 3.
        !          1438: 
        !          1439: DRAW_7_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
        !          1440: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1441:         mov     [edi],al
        !          1442:         mov     [edi+1],eax
        !          1443:         mov     [edi+5],ax
        !          1444:         add     edi,ecx                 ;point to the next scan line
        !          1445:         endm    ;-----------------------------------;
        !          1446: 
        !          1447: ; 7-wide write-only, starting at MOD 3 == 3.
        !          1448: 
        !          1449:         align   4
        !          1450: draw_7_wide_w_mod3_3_loop     proc    near
        !          1451:         UNROLL_LOOP     DRAW_7_WIDE_W_MOD3_3,W7_MOD3_3,LOOP_UNROLL_COUNT
        !          1452:         dec     ebx
        !          1453:         jnz     draw_7_wide_w_mod3_3_loop
        !          1454: 
        !          1455:         ret
        !          1456: 
        !          1457: draw_7_wide_w_mod3_3_loop     endp
        !          1458: 
        !          1459: 
        !          1460: ;-----------------------------------------------------------------------;
        !          1461: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1462: ; Optimized for start address MOD 3 == 0.
        !          1463: 
        !          1464: DRAW_8_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
        !          1465: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1466:         mov     [edi],eax
        !          1467:         mov     [edi+4],eax
        !          1468:         add     edi,ecx                 ;point to the next scan line
        !          1469:         endm    ;-----------------------------------;
        !          1470: 
        !          1471: ; 8-wide write-only, starting at MOD 3 == 0.
        !          1472: 
        !          1473:         align   4
        !          1474: draw_8_wide_w_mod3_0_loop     proc    near
        !          1475:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_0,W8_MOD3_0,LOOP_UNROLL_COUNT
        !          1476:         dec     ebx
        !          1477:         jnz     draw_8_wide_w_mod3_0_loop
        !          1478: 
        !          1479:         ret
        !          1480: 
        !          1481: draw_8_wide_w_mod3_0_loop     endp
        !          1482: 
        !          1483: 
        !          1484: ;-----------------------------------------------------------------------;
        !          1485: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1486: ; Optimized for start address MOD 3 == 1.
        !          1487: 
        !          1488: DRAW_8_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
        !          1489: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1490:         mov     [edi],al
        !          1491:         mov     [edi+1],ax
        !          1492:         mov     [edi+3],eax
        !          1493:         mov     [edi+7],al
        !          1494:         add     edi,ecx                 ;point to the next scan line
        !          1495:         endm    ;-----------------------------------;
        !          1496: 
        !          1497: ; 8-wide write-only, starting at MOD 3 == 0.
        !          1498: 
        !          1499:         align   4
        !          1500: draw_8_wide_w_mod3_1_loop     proc    near
        !          1501:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_1,W8_MOD3_1,LOOP_UNROLL_COUNT
        !          1502:         dec     ebx
        !          1503:         jnz     draw_8_wide_w_mod3_1_loop
        !          1504: 
        !          1505:         ret
        !          1506: 
        !          1507: draw_8_wide_w_mod3_1_loop     endp
        !          1508: 
        !          1509: 
        !          1510: ;-----------------------------------------------------------------------;
        !          1511: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1512: ; Optimized for start address MOD 3 == 2.
        !          1513: 
        !          1514: DRAW_8_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
        !          1515: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1516:         mov     [edi],ax
        !          1517:         mov     [edi+2],eax
        !          1518:         mov     [edi+6],ax
        !          1519:         add     edi,ecx                 ;point to the next scan line
        !          1520:         endm    ;-----------------------------------;
        !          1521: 
        !          1522: ; 8-wide write-only, starting at MOD 3 == 2.
        !          1523: 
        !          1524:         align   4
        !          1525: draw_8_wide_w_mod3_2_loop     proc    near
        !          1526:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_2,W8_MOD3_2,LOOP_UNROLL_COUNT
        !          1527:         dec     ebx
        !          1528:         jnz     draw_8_wide_w_mod3_2_loop
        !          1529: 
        !          1530:         ret
        !          1531: 
        !          1532: draw_8_wide_w_mod3_2_loop     endp
        !          1533: 
        !          1534: 
        !          1535: ;-----------------------------------------------------------------------;
        !          1536: ; Macro to draw eight write-only bytes, then advance to next scan line.
        !          1537: ; Optimized for start address MOD 3 == 3.
        !          1538: 
        !          1539: DRAW_8_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
        !          1540: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1541:         mov     [edi],al
        !          1542:         mov     [edi+1],eax
        !          1543:         mov     [edi+5],ax
        !          1544:         mov     [edi+7],al
        !          1545:         add     edi,ecx                 ;point to the next scan line
        !          1546:         endm    ;-----------------------------------;
        !          1547: 
        !          1548: ; 8-wide write-only, starting at MOD 3 == 3.
        !          1549: 
        !          1550:         align   4
        !          1551: draw_8_wide_w_mod3_3_loop     proc    near
        !          1552:         UNROLL_LOOP     DRAW_8_WIDE_W_MOD3_3,W8_MOD3_3,LOOP_UNROLL_COUNT
        !          1553:         dec     ebx
        !          1554:         jnz     draw_8_wide_w_mod3_3_loop
        !          1555: 
        !          1556:         ret
        !          1557: 
        !          1558: draw_8_wide_w_mod3_3_loop     endp
        !          1559: 
        !          1560: ;-----------------------------------------------------------------------;
        !          1561: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
        !          1562: ;-----------------------------------------------------------------------;
        !          1563: 
        !          1564: ; Tables of entry points into unrolled wide write-only loops.
        !          1565:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW00Entry,W_00,LOOP_UNROLL_COUNT
        !          1566:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW01Entry,W_01,LOOP_UNROLL_COUNT
        !          1567:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW02Entry,W_02,LOOP_UNROLL_COUNT
        !          1568:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW03Entry,W_03,LOOP_UNROLL_COUNT
        !          1569:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW10Entry,W_10,LOOP_UNROLL_COUNT
        !          1570:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW11Entry,W_11,LOOP_UNROLL_COUNT
        !          1571:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW12Entry,W_12,LOOP_UNROLL_COUNT
        !          1572:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW13Entry,W_13,LOOP_UNROLL_COUNT
        !          1573:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW20Entry,W_20,LOOP_UNROLL_COUNT
        !          1574:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW21Entry,W_21,LOOP_UNROLL_COUNT
        !          1575:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW22Entry,W_22,LOOP_UNROLL_COUNT
        !          1576:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW23Entry,W_23,LOOP_UNROLL_COUNT
        !          1577:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW30Entry,W_30,LOOP_UNROLL_COUNT
        !          1578:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW31Entry,W_31,LOOP_UNROLL_COUNT
        !          1579:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW32Entry,W_32,LOOP_UNROLL_COUNT
        !          1580:         UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW33Entry,W_33,LOOP_UNROLL_COUNT
        !          1581: 
        !          1582: ;-----------------------------------------------------------------------;
        !          1583: ; Macro to draw n write-only bytes, 0 leading bytes, 0 trailing bytes,
        !          1584: ; then advance to next scan line.
        !          1585: 
        !          1586: DRAW_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
        !          1587: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1588:         mov     ecx,esi         ;# of whole dwords
        !          1589:         rep     stosd           ;fill all whole bytes as dwords
        !          1590:         add     edi,edx         ;point to the next scan line
        !          1591:         endm    ;-----------------------------------;
        !          1592: 
        !          1593: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
        !          1594: ;  EAX = 0ffffh
        !          1595: ;  EBX = count of scans to fill
        !          1596: ;  EDX = offset from end of one scan's fill to start of next
        !          1597: ;  ESI = # of dwords to fill
        !          1598: ;  EDI = target address to fill
        !          1599: 
        !          1600:         align   4
        !          1601: draw_wide_w_00_loop     proc    near
        !          1602:         UNROLL_LOOP     DRAW_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
        !          1603:         dec     ebx
        !          1604:         jnz     draw_wide_w_00_loop
        !          1605: 
        !          1606:         ret
        !          1607: 
        !          1608: draw_wide_w_00_loop     endp
        !          1609: 
        !          1610: 
        !          1611: ;-----------------------------------------------------------------------;
        !          1612: ; Macro to draw n write-only bytes, 0 leading bytes, 1 trailing byte,
        !          1613: ; then advance to next scan line.
        !          1614: 
        !          1615: DRAW_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
        !          1616: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1617:         mov     ecx,esi         ;# of whole dwords
        !          1618:         rep     stosd           ;fill whole bytes as dwords
        !          1619:         mov     [edi],al        ;fill the trailing byte
        !          1620:         inc     edi
        !          1621:         add     edi,edx         ;point to the next scan line
        !          1622:         endm    ;-----------------------------------;
        !          1623: 
        !          1624: ; N-wide write-only, 0 leading bytes, 1 trailing byte.
        !          1625: ;  EAX = # of dwords to fill
        !          1626: ;  EBX = count of scans to fill
        !          1627: ;  EDX = offset from end of one scan's fill to start of next
        !          1628: ;  ESI = # of dwords to fill
        !          1629: ;  EDI = target address to fill
        !          1630: 
        !          1631:         align   4
        !          1632: draw_wide_w_01_loop     proc    near
        !          1633:         UNROLL_LOOP     DRAW_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
        !          1634:         dec     ebx
        !          1635:         jnz     draw_wide_w_01_loop
        !          1636: 
        !          1637:         ret
        !          1638: 
        !          1639: draw_wide_w_01_loop     endp
        !          1640: 
        !          1641: 
        !          1642: ;-----------------------------------------------------------------------;
        !          1643: ; Macro to draw n write-only bytes, 0 leading bytes, 2 trailing bytes,
        !          1644: ; then advance to next scan line.
        !          1645: 
        !          1646: DRAW_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
        !          1647: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1648:         mov     ecx,esi         ;# of whole dwords
        !          1649:         rep     stosd           ;fill whole bytes as dwords
        !          1650:         mov     [edi],ax        ;fill the trailing word
        !          1651:         add     edi,2
        !          1652:         add     edi,edx         ;point to the next scan line
        !          1653:         endm    ;-----------------------------------;
        !          1654: 
        !          1655: ; N-wide write-only, 0 leading bytes, 2 trailing byte.
        !          1656: ;  EAX = # of dwords to fill
        !          1657: ;  EBX = count of scans to fill
        !          1658: ;  EDX = offset from end of one scan's fill to start of next
        !          1659: ;  ESI = # of dwords to fill
        !          1660: ;  EDI = target address to fill
        !          1661: 
        !          1662:         align   4
        !          1663: draw_wide_w_02_loop     proc    near
        !          1664:         UNROLL_LOOP     DRAW_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
        !          1665:         dec     ebx
        !          1666:         jnz     draw_wide_w_02_loop
        !          1667: 
        !          1668:         ret
        !          1669: 
        !          1670: draw_wide_w_02_loop     endp
        !          1671: 
        !          1672: 
        !          1673: ;-----------------------------------------------------------------------;
        !          1674: ; Macro to draw n write-only bytes, 0 leading bytes, 3 trailing bytes,
        !          1675: ; then advance to next scan line.
        !          1676: 
        !          1677: DRAW_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
        !          1678: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1679:         mov     ecx,esi         ;# of whole dwords
        !          1680:         rep     stosd           ;fill whole bytes as dwords
        !          1681:         mov     [edi],ax        ;fill the leading word
        !          1682:         mov     [edi+2],al      ;fill the trailing byte
        !          1683:         add     edi,3
        !          1684:         add     edi,edx         ;point to the next scan line
        !          1685:         endm    ;-----------------------------------;
        !          1686: 
        !          1687: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          1688: ;  EAX = # of dwords to fill
        !          1689: ;  EBX = count of scans to fill
        !          1690: ;  EDX = offset from end of one scan's fill to start of next
        !          1691: ;  ESI = # of dwords to fill
        !          1692: ;  EDI = target address to fill
        !          1693: 
        !          1694:         align   4
        !          1695: draw_wide_w_03_loop     proc    near
        !          1696:         UNROLL_LOOP     DRAW_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
        !          1697:         dec     ebx
        !          1698:         jnz     draw_wide_w_03_loop
        !          1699: 
        !          1700:         ret
        !          1701: 
        !          1702: draw_wide_w_03_loop     endp
        !          1703: 
        !          1704: 
        !          1705: ;-----------------------------------------------------------------------;
        !          1706: ; Macro to draw n write-only bytes, 1 leading byte, 0 trailing bytes,
        !          1707: ; then advance to next scan line.
        !          1708: 
        !          1709: DRAW_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
        !          1710: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1711:         mov     [edi],al        ;fill the leading byte
        !          1712:         inc     edi
        !          1713:         mov     ecx,esi         ;# of whole dwords
        !          1714:         rep     stosd           ;fill all whole bytes as dwords
        !          1715:         add     edi,edx         ;point to the next scan line
        !          1716:         endm    ;-----------------------------------;
        !          1717: 
        !          1718: ; N-wide write-only, 1 leading byte, 0 trailing bytes.
        !          1719: ;  EAX = # of dwords to fill
        !          1720: ;  EBX = count of scans to fill
        !          1721: ;  EDX = offset from end of one scan's fill to start of next
        !          1722: ;  ESI = # of dwords to fill
        !          1723: ;  EDI = target address to fill
        !          1724: 
        !          1725:         align   4
        !          1726: draw_wide_w_10_loop     proc    near
        !          1727:         UNROLL_LOOP     DRAW_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
        !          1728:         dec     ebx
        !          1729:         jnz     draw_wide_w_10_loop
        !          1730: 
        !          1731:         ret
        !          1732: 
        !          1733: draw_wide_w_10_loop     endp
        !          1734: 
        !          1735: 
        !          1736: ;-----------------------------------------------------------------------;
        !          1737: ; Macro to draw n write-only bytes, 1 leading byte, 1 trailing byte,
        !          1738: ; then advance to next scan line.
        !          1739: 
        !          1740: DRAW_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
        !          1741: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1742:         mov     [edi],al        ;fill the leading byte
        !          1743:         inc     edi
        !          1744:         mov     ecx,esi         ;# of whole dwords
        !          1745:         rep     stosd           ;fill whole bytes as dwords
        !          1746:         mov     [edi],al        ;fill the trailing byte
        !          1747:         inc     edi
        !          1748:         add     edi,edx         ;point to the next scan line
        !          1749:         endm    ;-----------------------------------;
        !          1750: 
        !          1751: ; N-wide write-only, 1 leading bytes, 1 trailing byte.
        !          1752: ;  EAX = # of dwords to fill
        !          1753: ;  EBX = count of scans to fill
        !          1754: ;  EDX = offset from end of one scan's fill to start of next
        !          1755: ;  ESI = # of dwords to fill
        !          1756: ;  EDI = target address to fill
        !          1757: 
        !          1758:         align   4
        !          1759: draw_wide_w_11_loop     proc    near
        !          1760:         UNROLL_LOOP     DRAW_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
        !          1761:         dec     ebx
        !          1762:         jnz     draw_wide_w_11_loop
        !          1763: 
        !          1764:         ret
        !          1765: 
        !          1766: draw_wide_w_11_loop     endp
        !          1767: 
        !          1768: 
        !          1769: ;-----------------------------------------------------------------------;
        !          1770: ; Macro to draw n write-only bytes, 1 leading byte, 2 trailing bytes,
        !          1771: ; then advance to next scan line.
        !          1772: 
        !          1773: DRAW_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
        !          1774: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1775:         mov     [edi],al        ;fill the leading byte
        !          1776:         inc     edi
        !          1777:         mov     ecx,esi         ;# of whole dwords
        !          1778:         rep     stosd           ;fill whole bytes as dwords
        !          1779:         mov     [edi],ax        ;fill the trailing word
        !          1780:         add     edi,2
        !          1781:         add     edi,edx         ;point to the next scan line
        !          1782:         endm    ;-----------------------------------;
        !          1783: 
        !          1784: ; N-wide write-only, 1 leading bytes, 2 trailing byte.
        !          1785: ;  EAX = # of dwords to fill
        !          1786: ;  EBX = count of scans to fill
        !          1787: ;  EDX = offset from end of one scan's fill to start of next
        !          1788: ;  ESI = # of dwords to fill
        !          1789: ;  EDI = target address to fill
        !          1790: 
        !          1791:         align   4
        !          1792: draw_wide_w_12_loop     proc    near
        !          1793:         UNROLL_LOOP     DRAW_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
        !          1794:         dec     ebx
        !          1795:         jnz     draw_wide_w_12_loop
        !          1796: 
        !          1797:         ret
        !          1798: 
        !          1799: draw_wide_w_12_loop     endp
        !          1800: 
        !          1801: 
        !          1802: ;-----------------------------------------------------------------------;
        !          1803: ; Macro to draw n write-only bytes, 1 leading byte, 3 trailing bytes,
        !          1804: ; then advance to next scan line.
        !          1805: 
        !          1806: DRAW_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
        !          1807: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1808:         mov     [edi],al        ;fill the leading byte
        !          1809:         inc     edi
        !          1810:         mov     ecx,esi         ;# of whole dwords
        !          1811:         rep     stosd           ;fill whole bytes as dwords
        !          1812:         mov     [edi],ax        ;fill the trailing word
        !          1813:         mov     [edi+2],al      ;fill the trailing byte
        !          1814:         add     edi,3
        !          1815:         add     edi,edx         ;point to the next scan line
        !          1816:         endm    ;-----------------------------------;
        !          1817: 
        !          1818: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          1819: ;  EAX = # of dwords to fill
        !          1820: ;  EBX = count of scans to fill
        !          1821: ;  EDX = offset from end of one scan's fill to start of next
        !          1822: ;  ESI = # of dwords to fill
        !          1823: ;  EDI = target address to fill
        !          1824: 
        !          1825:         align   4
        !          1826: draw_wide_w_13_loop     proc    near
        !          1827:         UNROLL_LOOP     DRAW_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
        !          1828:         dec     ebx
        !          1829:         jnz     draw_wide_w_13_loop
        !          1830: 
        !          1831:         ret
        !          1832: 
        !          1833: draw_wide_w_13_loop     endp
        !          1834: 
        !          1835: 
        !          1836: ;-----------------------------------------------------------------------;
        !          1837: ; Macro to draw n write-only bytes, 2 leading bytes, 0 trailing bytes,
        !          1838: ; then advance to next scan line.
        !          1839: 
        !          1840: DRAW_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
        !          1841: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1842:         mov     [edi],ax        ;fill the leading word
        !          1843:         add     edi,2
        !          1844:         mov     ecx,esi         ;# of whole dwords
        !          1845:         rep     stosd           ;fill all whole bytes as dwords
        !          1846:         add     edi,edx         ;point to the next scan line
        !          1847:         endm    ;-----------------------------------;
        !          1848: 
        !          1849: ; N-wide write-only, 2 leading bytes, 0 trailing bytes.
        !          1850: ;  EAX = # of dwords to fill
        !          1851: ;  EBX = count of scans to fill
        !          1852: ;  EDX = offset from end of one scan's fill to start of next
        !          1853: ;  ESI = # of dwords to fill
        !          1854: ;  EDI = target address to fill
        !          1855: 
        !          1856:         align   4
        !          1857: draw_wide_w_20_loop     proc    near
        !          1858:         UNROLL_LOOP     DRAW_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
        !          1859:         dec     ebx
        !          1860:         jnz     draw_wide_w_20_loop
        !          1861: 
        !          1862:         ret
        !          1863: 
        !          1864: draw_wide_w_20_loop     endp
        !          1865: 
        !          1866: 
        !          1867: ;-----------------------------------------------------------------------;
        !          1868: ; Macro to draw n write-only bytes, 2 leading bytes, 1 trailing byte,
        !          1869: ; then advance to next scan line.
        !          1870: 
        !          1871: DRAW_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
        !          1872: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1873:         mov     [edi],ax        ;fill the leading word
        !          1874:         add     edi,2
        !          1875:         mov     ecx,esi         ;# of whole dwords
        !          1876:         rep     stosd           ;fill whole bytes as dwords
        !          1877:         mov     [edi],al        ;fill the trailing byte
        !          1878:         inc     edi
        !          1879:         add     edi,edx         ;point to the next scan line
        !          1880:         endm    ;-----------------------------------;
        !          1881: 
        !          1882: ; N-wide write-only, 2 leading bytess, 1 trailing byte.
        !          1883: ;  EAX = # of dwords to fill
        !          1884: ;  EBX = count of scans to fill
        !          1885: ;  EDX = offset from end of one scan's fill to start of next
        !          1886: ;  ESI = # of dwords to fill
        !          1887: ;  EDI = target address to fill
        !          1888: 
        !          1889:         align   4
        !          1890: draw_wide_w_21_loop     proc    near
        !          1891:         UNROLL_LOOP     DRAW_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
        !          1892:         dec     ebx
        !          1893:         jnz     draw_wide_w_21_loop
        !          1894: 
        !          1895:         ret
        !          1896: 
        !          1897: draw_wide_w_21_loop     endp
        !          1898: 
        !          1899: 
        !          1900: ;-----------------------------------------------------------------------;
        !          1901: ; Macro to draw n write-only bytes, 2 leading bytes, 2 trailing bytes,
        !          1902: ; then advance to next scan line.
        !          1903: 
        !          1904: DRAW_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
        !          1905: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1906:         mov     [edi],ax        ;fill the leading word
        !          1907:         add     edi,2
        !          1908:         mov     ecx,esi         ;# of whole dwords
        !          1909:         rep     stosd           ;fill whole bytes as dwords
        !          1910:         mov     [edi],ax        ;fill the trailing word
        !          1911:         add     edi,2
        !          1912:         add     edi,edx         ;point to the next scan line
        !          1913:         endm    ;-----------------------------------;
        !          1914: 
        !          1915: ; N-wide write-only, 2 leading bytess, 2 trailing byte.
        !          1916: ;  EAX = # of dwords to fill
        !          1917: ;  EBX = count of scans to fill
        !          1918: ;  EDX = offset from end of one scan's fill to start of next
        !          1919: ;  ESI = # of dwords to fill
        !          1920: ;  EDI = target address to fill
        !          1921: 
        !          1922:         align   4
        !          1923: draw_wide_w_22_loop     proc    near
        !          1924:         UNROLL_LOOP     DRAW_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
        !          1925:         dec     ebx
        !          1926:         jnz     draw_wide_w_22_loop
        !          1927: 
        !          1928:         ret
        !          1929: 
        !          1930: draw_wide_w_22_loop     endp
        !          1931: 
        !          1932: 
        !          1933: ;-----------------------------------------------------------------------;
        !          1934: ; Macro to draw n write-only bytes, 2 leading bytes, 3 trailing bytes,
        !          1935: ; then advance to next scan line.
        !          1936: 
        !          1937: DRAW_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
        !          1938: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1939:         mov     [edi],ax        ;fill the leading word
        !          1940:         add     edi,2
        !          1941:         mov     ecx,esi         ;# of whole dwords
        !          1942:         rep     stosd           ;fill whole bytes as dwords
        !          1943:         mov     [edi],ax        ;fill the trailing word
        !          1944:         mov     [edi+2],al      ;fill the trailing byte
        !          1945:         add     edi,3
        !          1946:         add     edi,edx         ;point to the next scan line
        !          1947:         endm    ;-----------------------------------;
        !          1948: 
        !          1949: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          1950: ;  EAX = # of dwords to fill
        !          1951: ;  EBX = count of scans to fill
        !          1952: ;  EDX = offset from end of one scan's fill to start of next
        !          1953: ;  ESI = # of dwords to fill
        !          1954: ;  EDI = target address to fill
        !          1955: 
        !          1956:         align   4
        !          1957: draw_wide_w_23_loop     proc    near
        !          1958:         UNROLL_LOOP     DRAW_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
        !          1959:         dec     ebx
        !          1960:         jnz     draw_wide_w_23_loop
        !          1961: 
        !          1962:         ret
        !          1963: 
        !          1964: draw_wide_w_23_loop     endp
        !          1965: 
        !          1966: 
        !          1967: ;-----------------------------------------------------------------------;
        !          1968: ; Macro to draw n write-only bytes, 3 leading bytes, 0 trailing bytes,
        !          1969: ; then advance to next scan line.
        !          1970: 
        !          1971: DRAW_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
        !          1972: &ENTRY_LABEL&ENTRY_INDEX&:
        !          1973:         mov     [edi],al        ;fill the leading byte
        !          1974:         mov     [edi+1],ax      ;fill the leading word
        !          1975:         add     edi,3
        !          1976:         mov     ecx,esi         ;# of whole dwords
        !          1977:         rep     stosd           ;fill all whole bytes as dwords
        !          1978:         add     edi,edx         ;point to the next scan line
        !          1979:         endm    ;-----------------------------------;
        !          1980: 
        !          1981: ; N-wide write-only, 3 leading bytes, 0 trailing bytes.
        !          1982: ;  EAX = # of dwords to fill
        !          1983: ;  EBX = count of scans to fill
        !          1984: ;  EDX = offset from end of one scan's fill to start of next
        !          1985: ;  ESI = # of dwords to fill
        !          1986: ;  EDI = target address to fill
        !          1987: 
        !          1988:         align   4
        !          1989: draw_wide_w_30_loop     proc    near
        !          1990:         UNROLL_LOOP     DRAW_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
        !          1991:         dec     ebx
        !          1992:         jnz     draw_wide_w_30_loop
        !          1993: 
        !          1994:         ret
        !          1995: 
        !          1996: draw_wide_w_30_loop     endp
        !          1997: 
        !          1998: 
        !          1999: ;-----------------------------------------------------------------------;
        !          2000: ; Macro to draw n write-only bytes, 3 leading bytes, 1 trailing byte,
        !          2001: ; then advance to next scan line.
        !          2002: 
        !          2003: DRAW_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
        !          2004: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2005:         mov     [edi],al        ;fill the leading byte
        !          2006:         mov     [edi+1],ax      ;fill the leading word
        !          2007:         add     edi,3
        !          2008:         mov     ecx,esi         ;# of whole dwords
        !          2009:         rep     stosd           ;fill whole bytes as dwords
        !          2010:         mov     [edi],al        ;fill the trailing byte
        !          2011:         inc     edi
        !          2012:         add     edi,edx         ;point to the next scan line
        !          2013:         endm    ;-----------------------------------;
        !          2014: 
        !          2015: ; N-wide write-only, 3 leading bytess, 1 trailing byte.
        !          2016: ;  EAX = # of dwords to fill
        !          2017: ;  EBX = count of scans to fill
        !          2018: ;  EDX = offset from end of one scan's fill to start of next
        !          2019: ;  ESI = # of dwords to fill
        !          2020: ;  EDI = target address to fill
        !          2021: 
        !          2022:         align   4
        !          2023: draw_wide_w_31_loop     proc    near
        !          2024:         UNROLL_LOOP     DRAW_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
        !          2025:         dec     ebx
        !          2026:         jnz     draw_wide_w_31_loop
        !          2027: 
        !          2028:         ret
        !          2029: 
        !          2030: draw_wide_w_31_loop     endp
        !          2031: 
        !          2032: 
        !          2033: ;-----------------------------------------------------------------------;
        !          2034: ; Macro to draw n write-only bytes, 3 leading bytes, 2 trailing bytes,
        !          2035: ; then advance to next scan line.
        !          2036: 
        !          2037: DRAW_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
        !          2038: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2039:         mov     [edi],al        ;fill the leading byte
        !          2040:         mov     [edi+1],ax      ;fill the leading word
        !          2041:         add     edi,3
        !          2042:         mov     ecx,esi         ;# of whole dwords
        !          2043:         rep     stosd           ;fill whole bytes as dwords
        !          2044:         mov     [edi],ax        ;fill the trailing word
        !          2045:         add     edi,2
        !          2046:         add     edi,edx         ;point to the next scan line
        !          2047:         endm    ;-----------------------------------;
        !          2048: 
        !          2049: ; N-wide write-only, 3 leading bytess, 2 trailing byte.
        !          2050: ;  EAX = # of dwords to fill
        !          2051: ;  EBX = count of scans to fill
        !          2052: ;  EDX = offset from end of one scan's fill to start of next
        !          2053: ;  ESI = # of dwords to fill
        !          2054: ;  EDI = target address to fill
        !          2055: 
        !          2056:         align   4
        !          2057: draw_wide_w_32_loop     proc    near
        !          2058:         UNROLL_LOOP     DRAW_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
        !          2059:         dec     ebx
        !          2060:         jnz     draw_wide_w_32_loop
        !          2061: 
        !          2062:         ret
        !          2063: 
        !          2064: draw_wide_w_32_loop     endp
        !          2065: 
        !          2066: 
        !          2067: ;-----------------------------------------------------------------------;
        !          2068: ; Macro to draw n write-only bytes, 3 leading bytes, 3 trailing bytes,
        !          2069: ; then advance to next scan line.
        !          2070: 
        !          2071: DRAW_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
        !          2072: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2073:         mov     [edi],al        ;fill the leading byte
        !          2074:         mov     [edi+1],ax      ;fill the leading word
        !          2075:         add     edi,3
        !          2076:         mov     ecx,esi         ;# of whole dwords
        !          2077:         rep     stosd           ;fill whole bytes as dwords
        !          2078:         mov     [edi],ax        ;fill the trailing word
        !          2079:         mov     [edi+2],al      ;fill the trailing byte
        !          2080:         add     edi,3
        !          2081:         add     edi,edx         ;point to the next scan line
        !          2082:         endm    ;-----------------------------------;
        !          2083: 
        !          2084: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
        !          2085: ;  EAX = # of dwords to fill
        !          2086: ;  EBX = count of scans to fill
        !          2087: ;  EDX = offset from end of one scan's fill to start of next
        !          2088: ;  ESI = # of dwords to fill
        !          2089: ;  EDI = target address to fill
        !          2090: 
        !          2091:         align   4
        !          2092: draw_wide_w_33_loop     proc    near
        !          2093:         UNROLL_LOOP     DRAW_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
        !          2094:         dec     ebx
        !          2095:         jnz     draw_wide_w_33_loop
        !          2096: 
        !          2097:         ret
        !          2098: 
        !          2099: draw_wide_w_33_loop     endp
        !          2100: 
        !          2101:         end
        !          2102: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.