File:  [WindowsNT SDKs] / ntddk / src / video / displays / vga / i386 / alignblt.asm
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Thu Aug 9 18:31:12 2018 UTC (7 years, 9 months ago) by root
Branches: msft, MAIN
CVS tags: ntddk-nov-1993, HEAD
Microsoft Windows NT Build 511 (DDK SDK) 11-01-1993

;---------------------------Module-Header------------------------------;
; Module Name: alignblt.asm
;
; Copyright (c) 1992 Microsoft Corporation
;-----------------------------------------------------------------------;

;-----------------------------------------------------------------------;
; VOID vAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
;                      INT icopydir);
; Input:
;  pdsurf - surface on which to copy
;  prcldest - pointer to destination rectangle
;  pptlsrc - pointer to source upper left corner
;  icopydir - direction in which copy must proceed to avoid overlap problems
;             and synchronize with the clip enumeration visually, according to
;             constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
;             WINDDI.H
;
; Performs accelarated aligned SRCCOPY VGA-to-VGA blts.
;
;-----------------------------------------------------------------------;
;
; Note: Assumes all rectangles have positive heights and widths. Will not
; work properly if this is not the case.
;
;-----------------------------------------------------------------------;

        comment $

The overall approach of this module for each rectangle to copy is:

1) Precalculate the masks and whole byte widths, and determine which of
partial left edge, partial right edge, and whole middle bytes are required
for this copy.

2) Set up the starting pointers for each of the areas (left, whole middle,
right), the start and stop scan lines, the copying direction (left-to-right
or right-to-left, and top-to-bottom or bottom-to-top), the threading
(sequence of calls required to do the left/whole/right components in the
proper sequence), based on the passed-in copy direction, which in turn is
dictated by the nature of the overlap between the source and destination.

3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
1 R/W window, unbanked), that sequences through the intersection of each
bank with the source and destination rectangles in the proper direction
(top-to-bottom or bottom-to-top, based on the passed-in copy direction),
and performs the copy in each such rectangle. The threading vector is used
to call the required routines (copy left/whole/right bytes). For 1 R/W and
1R/1W adapters, there is a second threading vector that is called when the
source and the destination are both adequately (for the copy purposes)
addressable simultaneously (because they're in the same bank), so there's
no need to copy through a temp buffer. Obviously, we want to avoid the temp
buffer whenever we can, because it's much slower and doesn't let us take
advantage of the VGA's hardware.

Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
each plane's bytes are not stored in the corresponding plane's temp buffer, but
rather consecutively in the plane 0 temp buffer. This is to reduce page
faulting, and also so that 1R/1W adapters only need a temp buffer large enough
to hold 4*tallest bank bytes (2K will do here, but nalgnblt.asm needs 4K).
1 R/W adapters still copy whole bytes through the full temp buffer, using all
four planes' temp buffers, so they require a temp buffer big enough to hold a
full bank (256K will do).

        commend $

;-----------------------------------------------------------------------;
; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
; times unrolling. This is the only thing you need to change to control
; unrolling. Note: does not affect loops that process in chunks, like edge
; loops.

LOOP_UNROLL_SHIFT equ 2

;-----------------------------------------------------------------------;
; Maximum # of edge bytes to process before switching to next plane. Larger
; means faster, but there's more potential for flicker, since the raster scan
; has a better chance of catching bytes that have changed in some planes but
; not all planes.

EDGE_CHUNK_SIZE equ     16

;-----------------------------------------------------------------------;
; Macro to push the current threading sequence (string of routine calls) on the
; stack, then jump to the first threading entry. The threading pointer can be
; specified, or defaults to pCurrentThread. The return address can be
; immediately after the JMP, or can be specified.

THREAD_AND_START macro THREADING,RETURN_ADDR
        local   push_base, return_address

ifb <&RETURN_ADDR&>
        push    offset return_address   ;after all the threaded routines, we
                                        ; return here
else
        push    offset &RETURN_ADDR&    ;return here
endif

ifb <&THREADING&>
        mov     eax,pCurrentThread
else
        mov     eax,&THREADING&
endif

        mov     ecx,[eax]               ;# of routines to thread (at least 1)
        lea     ecx,[ecx*2+ecx]         ;pushes below are 3 bytes each
        mov     edx,offset push_base+3
        sub     edx,ecx
        jmp     edx                     ;branch to push or jmp below

; Push the threading addresses on to the stack, so routines perform the
; threading as they return.

        push    dword ptr [eax+12]       ;3 byte instruction
        push    dword ptr [eax+8]
push_base:
        jmp     dword ptr [eax+4]        ;jump to the first threaded routine

        align   4
return_address:
        endm

;-----------------------------------------------------------------------;

                .386

ifndef  DOS_PLATFORM
        .model  small,c
else
ifdef   STD_CALL
        .model  small,c
else
        .model  small,pascal
endif;  STD_CALL
endif;  DOS_PLATFORM

        assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
        assume fs:nothing,gs:nothing

        .xlist
        include stdcall.inc             ;calling convention cmacros
        include i386\egavga.inc
        include i386\strucs.inc
        include i386\unroll.inc
        include i386\ropdefs.inc

        .list

;-----------------------------------------------------------------------;

        .data

; Threads for stringing together left, whole byte, and right operations
; in various orders, both using a temp buffer and not. Data format is:
;
; DWORD +0 = # of calls in thread (1, 2, or 3)
;       +4 = first call (required)
;       +8 = second call (optional)
;      +12 = third call (optional)

        align   4

; Copies not involving the temp buffer.

Thread_L        dd      1
                dd      copy_left_edge

Thread_W        dd      1
                dd      copy_whole_bytes

Thread_R        dd      1
                dd      copy_right_edge

Thread_LR       dd      2
                dd      copy_left_edge
                dd      copy_right_edge

Thread_RL       dd      2
                dd      copy_right_edge
                dd      copy_left_edge

Thread_LW       dd      2
                dd      copy_left_edge
                dd      copy_whole_bytes

Thread_WL       dd      2
                dd      copy_whole_bytes
                dd      copy_left_edge

Thread_WR       dd      2
                dd      copy_whole_bytes
                dd      copy_right_edge

Thread_RW       dd      2
                dd      copy_right_edge
                dd      copy_whole_bytes

Thread_LWR      dd      3
                dd      copy_left_edge
                dd      copy_whole_bytes
                dd      copy_right_edge

Thread_RWL      dd      3
                dd      copy_right_edge
                dd      copy_whole_bytes
                dd      copy_left_edge

; Copies involving the temp buffer.

Thread_Lb       dd      1
                dd      copy_left_edge_via_buffer

Thread_Wb       dd      1
                dd      copy_whole_bytes_via_buffer

Thread_Rb       dd      1
                dd      copy_right_edge_via_buffer

Thread_LbRb     dd      2
                dd      copy_left_edge_via_buffer
                dd      copy_right_edge_via_buffer

Thread_RbLb     dd      2
                dd      copy_right_edge_via_buffer
                dd      copy_left_edge_via_buffer

Thread_LbW      dd      2
                dd      copy_left_edge_via_buffer
                dd      copy_whole_bytes

Thread_LbWb     dd      2
                dd      copy_left_edge_via_buffer
                dd      copy_whole_bytes_via_buffer

Thread_WLb      dd      2
                dd      copy_whole_bytes
                dd      copy_left_edge_via_buffer

Thread_WbLb     dd      2
                dd      copy_whole_bytes_via_buffer
                dd      copy_left_edge_via_buffer

Thread_WRb      dd      2
                dd      copy_whole_bytes
                dd      copy_right_edge_via_buffer

Thread_WbRb     dd      2
                dd      copy_whole_bytes_via_buffer
                dd      copy_right_edge_via_buffer

Thread_RbW      dd      2
                dd      copy_right_edge_via_buffer
                dd      copy_whole_bytes

Thread_RbWb     dd      2
                dd      copy_right_edge_via_buffer
                dd      copy_whole_bytes_via_buffer

Thread_LbWRb    dd      3
                dd      copy_left_edge_via_buffer
                dd      copy_whole_bytes
                dd      copy_right_edge_via_buffer

Thread_LbWbRb   dd      3
                dd      copy_left_edge_via_buffer
                dd      copy_whole_bytes_via_buffer
                dd      copy_right_edge_via_buffer

Thread_RbWLb    dd      3
                dd      copy_right_edge_via_buffer
                dd      copy_whole_bytes
                dd      copy_left_edge_via_buffer

Thread_RbWbLb   dd      3
                dd      copy_right_edge_via_buffer
                dd      copy_whole_bytes_via_buffer
                dd      copy_left_edge_via_buffer

;-----------------------------------------------------------------------;
; Table of thread selection for various horizontal copy directions, with
; the look-up index a 4-bit field as follows:
;
; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
; Bit 2 = 1 if left edge must be copied
; Bit 1 = 1 if whole bytes must be copied
; Bit 0 = 1 if right edge must be copied
;
; This is used for all cases where both the source and destination are
; simultaneously addressable for our purposes, so there's no need to go
; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).

MasterThreadTable label dword
                                ;right-to-left
        dd      0               ;<not used>
        dd      Thread_R        ;R->L, R
        dd      Thread_W        ;R->L, W
        dd      Thread_RW       ;R->L, RW
        dd      Thread_L        ;R->L, L
        dd      Thread_RL       ;R->L, RL
        dd      Thread_WL       ;R->L, WL
        dd      Thread_RWL      ;R->L, RWL
                                ;left-to-right
        dd      0               ;<not used>
        dd      Thread_R        ;L->R, R
        dd      Thread_W        ;L->R, W
        dd      Thread_WR       ;L->R, WR
        dd      Thread_L        ;L->R, L
        dd      Thread_LR       ;L->R, LR
        dd      Thread_LW       ;L->R, LW
        dd      Thread_LWR      ;L->R, LWR


; Table of thread selection for various adapter types and horizontal
; copy directions, with the look-up index a 6-bit field as follows:
;
; Bit 5 = adapter type high bit
; Bit 4 = adapter type low bit
; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
; Bit 2 = 1 if left edge must be copied
; Bit 1 = 1 if whole bytes must be copied
; Bit 0 = 1 if right edge must be copied
;
; This is used for all cases where the source and destination are not both
; simultaneously addressable for our purposes, so we need to go through the
; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).

MasterThreadTableViaBuffer label dword
                                ;unbanked (no need for buffer)
                                ;right-to-left
        dd      0               ;<not used>
        dd      Thread_R        ;R->L, R
        dd      Thread_W        ;R->L, W
        dd      Thread_RW       ;R->L, RW
        dd      Thread_L        ;R->L, L
        dd      Thread_RL       ;R->L, RL
        dd      Thread_WL       ;R->L, WL
        dd      Thread_RWL      ;R->L, RWL
                                ;left-to-right
        dd      0               ;<not used>
        dd      Thread_R        ;L->R, R
        dd      Thread_W        ;L->R, W
        dd      Thread_WR       ;L->R, WR
        dd      Thread_L        ;L->R, L
        dd      Thread_LR       ;L->R, LR
        dd      Thread_LW       ;L->R, LW
        dd      Thread_LWR      ;L->R, LWR

                                ;1 R/W banking window (everything goes through
                                ;                       buffer)
                                ;right-to-left
        dd      0               ;<not used>
        dd      Thread_Rb       ;R->L, R
        dd      Thread_Wb       ;R->L, W
        dd      Thread_RbWb     ;R->L, RW
        dd      Thread_Lb       ;R->L, L
        dd      Thread_RbLb     ;R->L, RL
        dd      Thread_WbLb     ;R->L, WL
        dd      Thread_RbWbLb   ;R->L, RWL
                                ;left-to-right
        dd      0               ;<not used>
        dd      Thread_Rb       ;L->R, R
        dd      Thread_Wb       ;L->R, W
        dd      Thread_WbRb     ;L->R, WR
        dd      Thread_Lb       ;L->R, L
        dd      Thread_LbRb     ;L->R, LR
        dd      Thread_LbWb     ;L->R, LW
        dd      Thread_LbWbRb   ;L->R, LWR

                                ;1R/1W banking window (edge go through buffer)
                                ;right-to-left
        dd      0               ;<not used>
        dd      Thread_Rb       ;R->L, R
        dd      Thread_W        ;R->L, W
        dd      Thread_RbW      ;R->L, RW
        dd      Thread_Lb       ;R->L, L
        dd      Thread_RbLb     ;R->L, RL
        dd      Thread_WLb      ;R->L, WL
        dd      Thread_RbWLb    ;R->L, RWL
                                ;left-to-right
        dd      0               ;<not used>
        dd      Thread_Rb       ;L->R, R
        dd      Thread_W        ;L->R, W
        dd      Thread_WRb      ;L->R, WR
        dd      Thread_Lb       ;L->R, L
        dd      Thread_LbRb     ;L->R, LR
        dd      Thread_LbW      ;L->R, LW
        dd      Thread_LbWRb    ;L->R, LWR

                                ;2 R/W banking window (no need for buffer)
                                ;right-to-left
        dd      0               ;<not used>
        dd      Thread_R        ;R->L, R
        dd      Thread_W        ;R->L, W
        dd      Thread_RW       ;R->L, RW
        dd      Thread_L        ;R->L, L
        dd      Thread_RL       ;R->L, RL
        dd      Thread_WL       ;R->L, WL
        dd      Thread_RWL      ;R->L, RWL
                                ;left-to-right
        dd      0               ;<not used>
        dd      Thread_R        ;L->R, R
        dd      Thread_W        ;L->R, W
        dd      Thread_WR       ;L->R, WR
        dd      Thread_L        ;L->R, L
        dd      Thread_LR       ;L->R, LR
        dd      Thread_LW       ;L->R, LW
        dd      Thread_LWR      ;L->R, LWR


; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.

ADAPTER_FIELD_SHIFT     equ     4

; Mask for setting left-to-right bit to "left-to-right true" for use in both
; MasterThread tables.

LEFT_TO_RIGHT_FIELD_SET equ     1000b


; Table of top-to-bottom loops for adapter types.

        align   4
TopToBottomLoopTable label dword
        dd      top_to_bottom_2RW       ;unbanked is same as 2RW
        dd      top_to_bottom_1RW
        dd      top_to_bottom_1R1W
        dd      top_to_bottom_2RW


; Table of bottom-to-top loops for adapter types.

        align   4
BottomToTopLoopTable label dword
        dd      bottom_to_top_2RW       ;unbanked is same as 2RW
        dd      bottom_to_top_1RW
        dd      bottom_to_top_1R1W
        dd      bottom_to_top_2RW


; Table of routines for setting up to copy in various directions.

        align   4
SetUpForCopyDirection   label   dword
        dd      left_to_right_top_to_bottom     ;CD_RIGHTDOWN
        dd      right_to_left_top_to_bottom     ;CD_LEFTDOWN
        dd      left_to_right_bottom_to_top     ;CD_RIGHTUP
        dd      right_to_left_bottom_to_top     ;CD_LEFTUP

;-----------------------------------------------------------------------;
; Left edge clip masks for intrabyte start addresses 0 through 7.
; Whole byte cases are flagged as 0ffh.

jLeftMaskTable  label   byte
        db      0ffh,07fh,03fh,01fh,00fh,007h,003h,001h

;-----------------------------------------------------------------------;
; Right edge clip masks for intrabyte end addresses (non-inclusive)
; 0 through 7. Whole byte cases are flagged as 0ffh.

jRightMaskTable label   byte
        db      0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh

;-----------------------------------------------------------------------;

        .code

_TEXT$03   SEGMENT DWORD USE32 PUBLIC 'CODE'
           ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING

;-----------------------------------------------------------------------;

cProc   vAlignedSrcCopy,16,<        \
        uses    esi edi ebx,    \
        pdsurf: ptr DEVSURF,    \
        prcldest : ptr RECTL,   \
        pptlsrc : ptr POINTL,   \
        icopydir : dword

        local   culWholeBytesWidth : dword ;# of bytes to copy across each scan
        local   ulBlockHeight : dword   ;# of scans to copy per bank block
        local   ulWholeScanDelta : dword;offset from end of one whole bytes
                                        ; scan to start of next
        local   ulWholeBytesSrc : dword ;offset in bitmap of first source whole
                                        ; byte to copy from
        local   ulWholeBytesDest : dword;offset in bitmap of first source whole
                                        ; byte to copy to
        local   ulLeftEdgeSrc : dword   ;offset in bitmap of first source left
                                        ; edge byte to copy from
        local   ulLeftEdgeDest : dword  ;offset in bitmap of first dest left
                                        ; edge byte to copy to
        local   ulRightEdgeSrc : dword  ;offset in bitmap of first source right
                                        ; edge byte to copy from
        local   ulRightEdgeDest : dword ;offset in bitmap of first dest right
                                        ; edge byte to copy to
        local   ulNextScan : dword      ;width of scan, in bytes
        local   jLeftMask : dword       ;left edge clip mask
        local   jRightMask : dword      ;right edge clip mask
        local   culTempCount : dword    ;handy temporary counter
        local   pTempEntry : dword      ;temporary storage for vector into
                                        ; unrolled loop
        local   pTempPlane : dword      ;pointer to storage in temp buffer for
                                        ; edge bytes (which are stored
                                        ; consecutively, not in each plane's
                                        ; temp buffer, to reduce possible page
                                        ; faulting
        local   ppTempPlane0 : dword    ;pointer to pointer to storage in temp
                                        ; buffer for plane 0, immediately
                                        ; preceded by storage for planes 1, 2,
                                        ; and 3
        local   ppTempPlane3 : dword    ;like above, but for plane 3
        local   ulOffsetInBank : dword  ;offset relative to bank start
        local   pSrcAddr : dword        ;working pointer to first source
                                        ; byte to copy from
        local   pDestAddr : dword       ;working pointer to first dest
                                        ; byte to copy to
        local   ulCurrentJustification:dword ;justification used to map in
                                             ; banks; top for top to bottom
                                             ; copies, bottom for bottom to top
        local   ulCurrentSrcScan :dword ;scan line used to map in current
                                        ; source bank
        local   ulCurrentDestScan:dword ;scan line used to map in current dest
                                        ; bank
        local   ulLastDestScan :dword   ;scan in target rect at which we stop
                                        ; advancing through banks
        local   pCurrentThread : dword  ;pointer to data describing the
                                        ; threaded calls to be performed to
                                        ; perform the current copy
        local   pCurrentThreadViaBuffer:dword
                                        ;pointer to data describing the
                                        ; threaded calls to be performed to
                                        ; perform the current copy in the case
                                        ; where the source and destination are
                                        ; not simultaneously adequately
                                        ; accessible, so the copy has to go
                                        ; through a temp buffer (used only for
                                        ; 1 R/W and 1R/1W banking)
        local   ulAdapterType : dword   ;adapter type code, per VIDEO_BANK_TYPE
        local   ulLWRType : dword       ;whether left edge, whole bytes, and
                                        ; right edge are involved in the
                                        ; current operation;
                                        ; bit 2 = 1 if left edge involved
                                        ; bit 1 = 1 if whole bytes involved
                                        ; bit 0 = 1 if right edge involved
        local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
                                        ; address past the left edge when the
                                        ; left edge is partial

;-----------------------------------------------------------------------;

; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
; adapters), and other rectangle-independent variables.

        mov     esi,pdsurf
        mov     eax,[esi].dsurf_pvBankBufferPlane0
        mov     pTempPlane,eax
        lea     eax,[esi].dsurf_pvBankBufferPlane0
        mov     ppTempPlane0,eax
        lea     eax,[esi].dsurf_pvBankBufferPlane3
        mov     ppTempPlane3,eax

        mov     eax,[esi].dsurf_vbtBankingType
        mov     ulAdapterType,eax

; Copy the rectangle.

        call    copy_rect

;-----------------------------------------------------------------------;
; Set the VGA registers back to their default state.
;-----------------------------------------------------------------------;

        mov     edx,VGA_BASE + GRAF_ADDR
        mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
        out     dx,ax           ;enable bit mask for all bits

        mov     dl,SEQ_DATA
        mov     al,MM_ALL
        out     dx,al           ;enable writes to all planes

        cld                     ;restore default direction flag

        cRet    vAlignedSrcCopy ;done


;***********************************************************************;
;
; Copies the specified rectangle.
;
;***********************************************************************;

        align   4
copy_rect:

; Set up masks and whole bytes count, and build left/whole/right index
; indicating which of those parts are involved in the copy.

        mov     edi,prcldest            ;point to rectangle to copy

        mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
        mov     ecx,ebx
        and     ecx,0111b               ;intrabyte address of right edge
        mov     ah,jRightMaskTable[ecx] ;right edge mask

        mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
        mov     ecx,esi
        shr     ecx,3                   ;/8 for start offset from left edge
                                        ; of scan line
        sub     ebx,esi                 ;width in pixels of fill

        and     esi,0111b               ;intrabyte address of left edge
        mov     al,jLeftMaskTable[esi]  ;left edge mask

        dec     ebx                     ;make inclusive on right
        add     ebx,esi                 ;inclusive width, starting counting at
                                        ; the beginning of the left edge byte
        shr     ebx,3                   ;width of fill in bytes touched - 1
        jnz     short more_than_1_byte  ;more than 1 byte is involved

; Only one byte will be affected. Combine first/last masks.

        and     al,ah                   ;we'll use first byte mask only
        xor     ah,ah                   ;want last byte mask to be 0 to
                                        ; indicate right edge not involved
        inc     ebx                     ;so there's one count to subtract below
                                        ; if this isn't a whole edge byte
more_than_1_byte:

; If all pixels in the left edge are altered, combine the first byte into the
; whole byte count, because we can handle solid edge bytes faster as part of
; the whole bytes. Ditto for the right edge.

        sub     ecx,ecx                 ;edge whole-status accumulator
        cmp     al,-1                   ;is left edge a whole byte or partial?
        adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
        sub     ebx,ecx                 ;if left edge partial, deduct it from
                                        ; the whole bytes count
        mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
                                        ; it's partial when pointing to the
                                        ; whole bytes
        and     ah,ah                   ;is right edge mask 0, meaning this
                                        ; fill is only 1 byte wide?
        jz      short save_masks        ;yes, no need to do anything
        or      ecx,40h                 ;assume there's a partial right edge
        cmp     ah,-1                   ;is right edge a whole byte or partial?
        jnz     short save_masks        ;partial
                                        ;bit 1=0 if left edge partial, 1 whole
        inc     ebx                     ;if right edge whole, include it in the
                                        ; whole bytes count
        and     ecx,not 40h             ;there's no partial right edge
save_masks:
        cmp     ebx,1                   ;do we have any whole bytes?
        cmc                             ;CF set if whole byte count > 0
        adc     ecx,ecx                 ;if any whole bytes, set whole bytes
                                        ; bit in left/whole/right accumulator
        rol     cl,1                    ;align the left/whole/right bits
        mov     ulLWRType,ecx           ;save left/whole/right status

        mov     byte ptr jLeftMask,al   ;save left and right clip masks
        mov     byte ptr jRightMask,ah
        mov     culWholeBytesWidth,ebx  ;save # of whole bytes

; Copy the rectangle in the specified direction.

        mov     eax,icopydir
        jmp     SetUpForCopyDirection[eax*4]


;***********************************************************************;
;
; The following routines set up to handle the four possible copy
; directions.
;
;***********************************************************************;


;-----------------------------------------------------------------------;
; Set-up code for left-to-right, top-to-bottom copies.
;-----------------------------------------------------------------------;

        align   4
left_to_right_top_to_bottom:

        cld                             ;we'll copy left to right

        mov     esi,pdsurf
        mov     eax,[esi].dsurf_lNextScan
        mov     ulNextScan,eax          ;copy top to bottom
        sub     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        mov     ulWholeScanDelta,eax    ; to start of next

        mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                                        ; right involvement in operation
        or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
        mov     eax,MasterThreadTable[esi*4]
        mov     pCurrentThread,eax      ;threading when no buffering is needed
        mov     edx,ulAdapterType
        shl     edx,ADAPTER_FIELD_SHIFT
        or      esi,edx                 ;factor adapter type into the index
        mov     eax,MasterThreadTableViaBuffer[esi*4]
        mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed

        mov     ulCurrentJustification,JustifyTop ;copy top to bottom

        mov     esi,prcldest
        mov     eax,[esi].yBottom
        mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
        mov     eax,[esi].yTop
        mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
        mul     ulNextScan              ;offset in bitmap of top dest rect scan
        mov     edx,[esi].xLeft
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first dest byte
        mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeDest,eax     ;where the right dest edge starts

        mov     esi,pptlsrc
        mov     eax,[esi].ptl_y
        mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
        mul     ulNextScan              ;offset in bitmap of top dest rect scan
        mov     edx,[esi].ptl_x
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first source byte
        mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeSrc,eax      ;where the right src edge starts

; Branch to the appropriate top-to-bottom bank enumeration loop.

        mov     eax,ulAdapterType
        jmp     TopToBottomLoopTable[eax*4]


;-----------------------------------------------------------------------;
; Set-up code for right-to-left, top-to-bottom copies.
;-----------------------------------------------------------------------;

        align   4
right_to_left_top_to_bottom:

        std                             ;we'll copy right to left

        mov     esi,pdsurf
        mov     eax,[esi].dsurf_lNextScan
        mov     ulNextScan,eax          ;copy top to bottom
        add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        mov     ulWholeScanDelta,eax    ; to start of next, given that we're
                                        ; copying one way and going scan-to-
                                        ; scan the other way
        mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                                        ; right involvement in operation
                                        ;leave left-to-right field cleared, so
                                        ; we look up right-to-left entries
        mov     eax,MasterThreadTable[esi*4]
        mov     pCurrentThread,eax      ;threading when no buffering is needed
        mov     edx,ulAdapterType
        shl     edx,ADAPTER_FIELD_SHIFT
        or      esi,edx                 ;factor adapter type into the index
        mov     eax,MasterThreadTableViaBuffer[esi*4]
        mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed

        mov     ulCurrentJustification,JustifyTop ;copy top to bottom

        mov     esi,prcldest
        mov     eax,[esi].yBottom
        mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
        mov     eax,[esi].yTop
        mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
        mul     ulNextScan              ;offset in bitmap of top dest rect scan
        mov     edx,[esi].xLeft
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first dest byte
        mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeDest,eax     ;where the right dest edge starts
        dec     eax                     ;back up to the last whole byte
        mov     ulWholeBytesDest,eax    ;where the whole dest bytes start

        mov     esi,pptlsrc
        mov     eax,[esi].ptl_y
        mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
        mul     ulNextScan              ;offset in bitmap of top dest rect scan
        mov     edx,[esi].ptl_x
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first source byte
        mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeSrc,eax      ;where the right src edge starts
        dec     eax                     ;back up to the last whole byte
        mov     ulWholeBytesSrc,eax     ;where the src whole bytes start

; Branch to the appropriate top-to-bottom bank enumeration loop.

        mov     eax,ulAdapterType
        jmp     TopToBottomLoopTable[eax*4]



;-----------------------------------------------------------------------;
; Set-up code for left-to-right, bottom-to-top copies.
;-----------------------------------------------------------------------;

        align   4
left_to_right_bottom_to_top:

        cld                             ;we'll copy left to right

        mov     edi,pdsurf
        mov     eax,[edi].dsurf_lNextScan
        neg     eax
        mov     ulNextScan,eax          ;copy bottom to top
        sub     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        mov     ulWholeScanDelta,eax    ; to start of next, given that we're
                                        ; copying one way and going scan-to-
                                        ; scan the other way
        mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                                        ; right involvement in operation
        or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
        mov     eax,MasterThreadTable[esi*4]
        mov     pCurrentThread,eax      ;threading when no buffering is needed
        mov     edx,ulAdapterType
        shl     edx,ADAPTER_FIELD_SHIFT
        or      esi,edx                 ;factor adapter type into the index
        mov     eax,MasterThreadTableViaBuffer[esi*4]
        mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed

        mov     ulCurrentJustification,JustifyBottom ;copy bottom to top

        mov     esi,prcldest
        mov     edx,[esi].yTop
        mov     ulLastDestScan,edx      ;end at top of dest copy rect
        mov     eax,[esi].yBottom
        dec     eax                     ;rectangle definition is non-inclusive,
                                        ; so advance to first scan we'll copy
        sub     edx,eax                 ;-(offset from rect top to bottom)
        push    edx                     ;remember for use with source
        mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
        mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                                        ; scan (first scan to which to copy)
        mov     edx,[esi].xLeft
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first dest byte
        mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeDest,eax     ;where the right dest edge starts

        mov     esi,pptlsrc
        mov     eax,[esi].ptl_y
        pop     edx                     ;retrieve -(offset from top to bottom)
        sub     eax,edx                 ;advance to bottom of source rect
                                        ; (inclusive; this is first scan from
                                        ; which to copy)
        mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
        mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                                        ; scan
        mov     edx,[esi].ptl_x
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first source byte
        mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeSrc,eax      ;where the right src edge starts

; Branch to the appropriate bottom-to-top bank enumeration loop.

        mov     eax,ulAdapterType
        jmp     BottomToTopLoopTable[eax*4]


;-----------------------------------------------------------------------;
; Set-up code for right-to-left, bottom-to-top copies.
;-----------------------------------------------------------------------;

        align   4
right_to_left_bottom_to_top:

        std                             ;we'll copy right to left

        mov     edi,pdsurf
        mov     eax,[edi].dsurf_lNextScan
        neg     eax
        mov     ulNextScan,eax          ;copy bottom to top
        add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        mov     ulWholeScanDelta,eax    ; to start of next
        mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                                        ; right involvement in operation
                                        ;leave left-to-right field cleared, so
                                        ; we look up right-to-left entries
        mov     eax,MasterThreadTable[esi*4]
        mov     pCurrentThread,eax      ;threading when no buffering is needed
        mov     edx,ulAdapterType
        shl     edx,ADAPTER_FIELD_SHIFT
        or      esi,edx                 ;factor adapter type into the index
        mov     eax,MasterThreadTableViaBuffer[esi*4]
        mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed

        mov     ulCurrentJustification,JustifyBottom ;copy bottom to top

        mov     esi,prcldest
        mov     edx,[esi].yTop
        mov     ulLastDestScan,edx      ;end at top of dest copy rect
        mov     eax,[esi].yBottom
        dec     eax                     ;rectangle definition is non-inclusive,
                                        ; so advance to first scan we'll copy
        sub     edx,eax                 ;-(offset from rect top to bottom)
        push    edx                     ;remember for use with source
        mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
        mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                                        ; scan (first scan to which to copy)
        mov     edx,[esi].xLeft
        shr     edx,3                   ;byte X address
        add     eax,edx
        mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeDest,eax     ;where the right dest edge starts
        dec     eax                     ;back up to the last whole byte
        mov     ulWholeBytesDest,eax    ;where the whole dest bytes start

        mov     esi,pptlsrc
        mov     eax,[esi].ptl_y
        pop     edx                     ;retrieve -(offset from top to bottom)
        sub     eax,edx                 ;advance to bottom of source rect
                                        ; (inclusive; this is first scan from
                                        ; which to copy)
        mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
        mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                                        ; scan
        mov     edx,[esi].ptl_x
        shr     edx,3                   ;byte X address
        add     eax,edx                 ;offset in bitmap of first source byte
        mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                                        ; byte, unless the left edge is a whole
                                        ; byte and is thus part of the whole
                                        ; bytes already
        add     eax,culWholeBytesWidth  ;point to the right edge
        mov     ulRightEdgeSrc,eax      ;where the right src edge starts
        dec     eax                     ;back up to the last whole byte
        mov     ulWholeBytesSrc,eax     ;where the src whole bytes start

; Branch to the appropriate bottom-to-top bank enumeration loop.

        mov     eax,ulAdapterType
        jmp     BottomToTopLoopTable[eax*4]


;***********************************************************************;
;
; The following routines are the banking loops.
;
;***********************************************************************;


;-----------------------------------------------------------------------;
; Banking for 2 R/W and unbanked adapters, top to bottom.
;-----------------------------------------------------------------------;
        align   4
top_to_bottom_2RW:

; We're going top to bottom. Map in the source and dest, top-justified.

        mov     ebx,pdsurf
        mov     edx,ulCurrentSrcScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
                                                     ; current source bank?
        jl      short top_2RW_map_init_src_bank      ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
                                                        ; current source bank?
        jl      short top_2RW_init_src_bank_mapped
                                                ;no, proper bank already mapped
top_2RW_map_init_src_bank:

; Map bank containing the top source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyTop,MapSourceBank>

top_2RW_init_src_bank_mapped:

        mov     edx,ulCurrentDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
                                                     ; current dest bank?
        jl      short top_2RW_map_init_dest_bank     ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
                                                        ; current dest bank?
        jl      short top_2RW_init_dest_bank_mapped
                                                ;no, proper bank already mapped
top_2RW_map_init_dest_bank:

; Map bank containing the top dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyTop,MapDestBank>

top_2RW_init_dest_bank_mapped:

; Bank-by-bank top-to-bottom copy loop.

top_2RW_bank_loop:

; Decide how far we can go before we run out of bank or rectangle to copy.

        mov     edx,ulLastDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
        jl      short @F        ;copy rectangle bottom is in this bank
        mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
                                                        ; of bank, at least
@@:
        sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                                        ; the dest bank
        mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
        sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank

        cmp     edx,eax
        jb      short @F        ;source bank isn't limiting
        mov     edx,eax         ;source bank is limiting
@@:
        mov     ulBlockHeight,edx ;# of scans we'll do in this bank

; We're ready to copy this block.

        THREAD_AND_START

; Any more scans to copy?

        mov     eax,ulCurrentDestScan
        mov     esi,ulBlockHeight
        add     eax,esi                 ;we've copied to dest up to here
        cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
        jz      short top_2RW_done      ;yes, we're done
        mov     ulCurrentDestScan,eax

; Now advance either or both banks, as needed.

        mov     ebx,pdsurf
        cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
                                                        ; current dest bank?
        jl      short top_2RW_dest_bank_mapped    ;no, proper bank still mapped

; Map bank containing the current dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,eax,JustifyTop,MapDestBank>

top_2RW_dest_bank_mapped:

        add     esi,ulCurrentSrcScan    ;we've copied from source up to here
        mov     ulCurrentSrcScan,esi

        cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
                                                        ; current src bank?
        jl      short top_2RW_src_bank_mapped     ;no, proper bank still mapped

; Map bank containing the current source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,esi,JustifyTop,MapSourceBank>

top_2RW_src_bank_mapped:

        jmp     top_2RW_bank_loop

top_2RW_done:
        PLAIN_RET


;-----------------------------------------------------------------------;
; Banking for 2 R/W and unbanked adapters, bottom to top.
;-----------------------------------------------------------------------;
        align   4
bottom_to_top_2RW:

; We're going bottom to top. Map in the source and dest, bottom-justified.

        mov     ebx,pdsurf
        mov     edx,ulCurrentSrcScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
                                                     ; current source bank?
        jl      short bot_2RW_map_init_src_bank      ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
                                                        ; than current src bank?
        jl      short bot_2RW_init_src_bank_mapped
                                                ;no, proper bank already mapped
bot_2RW_map_init_src_bank:

; Map bank containing the bottom source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyBottom,MapSourceBank>

bot_2RW_init_src_bank_mapped:

        mov     edx,ulCurrentDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
                                                     ; current dest bank?
        jl      short bot_2RW_map_init_dest_bank     ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
                                                        ; than current dst bank?
        jl      short bot_2RW_init_dest_bank_mapped
                                                ;no, proper bank already mapped
bot_2RW_map_init_dest_bank:

; Map bank containing the bottom dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyBottom,MapDestBank>

bot_2RW_init_dest_bank_mapped:

; Bank-by-bank bottom-to-top copy loop.

bot_2RW_bank_loop:

; Decide how far we can go before we run out of bank or rectangle to copy.

        mov     edx,ulLastDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
        jg      short @F        ;copy rectangle top is in this bank
        mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
                                                     ; of bank, at least
@@:
        neg     edx
        add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
        inc     edx                     ; the dest bank

        mov     eax,ulCurrentSrcScan
        sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
        inc     eax                     ;# of scans we can do in the src bank

        cmp     edx,eax
        jb      short @F        ;source bank isn't limiting
        mov     edx,eax         ;source bank is limiting
@@:
        mov     ulBlockHeight,edx ;# of scans we'll do in this bank

; We're ready to copy this block.

        THREAD_AND_START

; Any more scans to copy?

        mov     eax,ulCurrentDestScan
        mov     esi,ulBlockHeight
        sub     eax,esi                 ;we've copied to dest up to here
        cmp     ulLastDestScan,eax      ;are we past the dest rect top?
        jg      short bot_2RW_done      ;yes, we're done
        mov     ulCurrentDestScan,eax

; Now advance either or both banks, as needed.

        mov     ebx,pdsurf
        cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
                                                     ; current dest bank?
        jge     short bot_2RW_dest_bank_mapped    ;no, proper bank still mapped

; Map bank containing the current dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,eax,JustifyBottom,MapDestBank>

bot_2RW_dest_bank_mapped:

        mov     eax,ulCurrentSrcScan
        sub     eax,esi         ;we've copied from source up to here
        mov     ulCurrentSrcScan,eax

        cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
                                                     ; current src bank?
        jge     short bot_2RW_src_bank_mapped     ;no, proper bank still mapped

; Map bank containing the current source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,eax,JustifyBottom,MapSourceBank>

bot_2RW_src_bank_mapped:

        jmp     bot_2RW_bank_loop

bot_2RW_done:
        PLAIN_RET


;-----------------------------------------------------------------------;
; Banking for 1R/1W adapters, top to bottom.
;-----------------------------------------------------------------------;
        align   4
top_to_bottom_1R1W:

; We're going top to bottom. Map in the source and dest, top-justified.

        mov     ebx,pdsurf
        mov     edx,ulCurrentSrcScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
                                                     ; current source bank?
        jl      short top_1R1W_map_init_src_bank      ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
                                                        ; current source bank?
        jl      short top_1R1W_init_src_bank_mapped
                                                ;no, proper bank already mapped
top_1R1W_map_init_src_bank:

; Map bank containing the top source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyTop,MapSourceBank>

top_1R1W_init_src_bank_mapped:

        mov     edx,ulCurrentDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
                                                     ; current dest bank?
        jl      short top_1R1W_map_init_dest_bank     ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
                                                        ; current dest bank?
        jl      short top_1R1W_init_dest_bank_mapped
                                                ;no, proper bank already mapped
top_1R1W_map_init_dest_bank:

; Map bank containing the top dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyTop,MapDestBank>

top_1R1W_init_dest_bank_mapped:

; Bank-by-bank top-to-bottom copy loop.

top_1R1W_bank_loop:

; Decide how far we can go before we run out of bank or rectangle to copy.

        mov     edx,ulLastDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
        jl      short @F        ;copy rectangle bottom is in this bank
        mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
                                                        ; of bank, at least
@@:
        sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                                        ; the dest bank
        mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
        sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank

        cmp     edx,eax
        jb      short @F        ;source bank isn't limiting
        mov     edx,eax         ;source bank is limiting
@@:
        mov     ulBlockHeight,edx ;# of scans we'll do in this bank

; We're ready to copy this block.
; Select different threading, depending on whether the source and destination
; are currently in the same bank; we can do edges faster if they are.

        mov     eax,[ebx].dsurf_ulWindowBank
        cmp     eax,[ebx].dsurf_ulWindowBank[4]
        jz      short top_1R1W_copy_same_bank

; Source and dest are currently in different banks, must go through temp buffer.

        THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans

; Source and dest are currently in the same bank.

        align   4
top_1R1W_copy_same_bank:
        THREAD_AND_START

; Any more scans to copy?

top_1R1W_check_more_scans:

        mov     eax,ulCurrentDestScan
        mov     esi,ulBlockHeight
        add     eax,esi                 ;we've copied to dest up to here
        cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
        jz      short top_1R1W_done     ;yes, we're done
        mov     ulCurrentDestScan,eax

; Now advance either or both banks, as needed.

        mov     ebx,pdsurf
        cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
                                                        ; current dest bank?
        jl      short top_1R1W_dest_bank_mapped   ;no, proper bank still mapped

; Map bank containing the current dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,eax,JustifyTop,MapDestBank>

top_1R1W_dest_bank_mapped:

        add     esi,ulCurrentSrcScan    ;we've copied from source up to here
        mov     ulCurrentSrcScan,esi

        cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
                                                        ; current src bank?
        jl      short top_1R1W_src_bank_mapped     ;no, proper bank still mapped

; Map bank containing the current source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,esi,JustifyTop,MapSourceBank>

top_1R1W_src_bank_mapped:

        jmp     top_1R1W_bank_loop

top_1R1W_done:
        PLAIN_RET


;-----------------------------------------------------------------------;
; Banking for 1R/1W adapters, bottom to top.
;-----------------------------------------------------------------------;
        align   4
bottom_to_top_1R1W:

; We're going bottom to top. Map in the source and dest, bottom-justified.

        mov     ebx,pdsurf
        mov     edx,ulCurrentSrcScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
                                                     ; current source bank?
        jl      short bot_1R1W_map_init_src_bank      ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
                                                        ; than current src bank?
        jl      short bot_1R1W_init_src_bank_mapped
                                                ;no, proper bank already mapped
bot_1R1W_map_init_src_bank:

; Map bank containing the bottom source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyBottom,MapSourceBank>

bot_1R1W_init_src_bank_mapped:

        mov     edx,ulCurrentDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
                                                     ; current dest bank?
        jl      short bot_1R1W_map_init_dest_bank     ;yes, map in proper bank
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
                                                        ; than current dst bank?
        jl      short bot_1R1W_init_dest_bank_mapped
                                                ;no, proper bank already mapped
bot_1R1W_map_init_dest_bank:

; Map bank containing the bottom dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,edx,JustifyBottom,MapDestBank>

bot_1R1W_init_dest_bank_mapped:

; Bank-by-bank bottom-to-top copy loop.

bot_1R1W_bank_loop:

; Decide how far we can go before we run out of bank or rectangle to copy.

        mov     edx,ulLastDestScan
        cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
        jg      short @F        ;copy rectangle top is in this bank
        mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
                                                     ; of bank, at least
@@:
        neg     edx
        add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
        inc     edx                     ; the dest bank

        mov     eax,ulCurrentSrcScan
        sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
        inc     eax                     ;# of scans we can do in the src bank

        cmp     edx,eax
        jb      short @F        ;source bank isn't limiting
        mov     edx,eax         ;source bank is limiting
@@:
        mov     ulBlockHeight,edx ;# of scans we'll do in this bank

; We're ready to copy this block.
; Select different threading, depending on whether the source and destination
; are currently in the same bank; we can do edges faster if they are.

        mov     al,byte ptr [ebx].dsurf_ulWindowBank
        cmp     al,byte ptr [ebx].dsurf_ulWindowBank[4]
        jz      short bot_1R1W_copy_same_bank

; Source and dest are currently in different banks, must go through temp buffer.

        THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans

; Source and dest are currently in the same bank.

        align   4
bot_1R1W_copy_same_bank:
        THREAD_AND_START

; Any more scans to copy?

        align   4
bot_1R1W_check_more_scans:

        mov     eax,ulCurrentDestScan
        mov     esi,ulBlockHeight
        sub     eax,esi                 ;we've copied to dest up to here
        cmp     ulLastDestScan,eax      ;are we past the dest rect top?
        jg      short bot_1R1W_done     ;yes, we're done
        mov     ulCurrentDestScan,eax

; Now advance either or both banks, as needed.

        mov     ebx,pdsurf
        cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
                                                     ; current dest bank?
        jge     short bot_1R1W_dest_bank_mapped   ;no, proper bank still mapped

; Map bank containing the current dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,eax,JustifyBottom,MapDestBank>

bot_1R1W_dest_bank_mapped:

        mov     eax,ulCurrentSrcScan
        sub     eax,esi         ;we've copied from source up to here
        mov     ulCurrentSrcScan,eax

        cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
                                                     ; current src bank?
        jge     short bot_1R1W_src_bank_mapped    ;no, proper bank still mapped

; Map bank containing the current source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,eax,JustifyBottom,MapSourceBank>

bot_1R1W_src_bank_mapped:

        jmp     bot_1R1W_bank_loop

bot_1R1W_done:
        PLAIN_RET


;-----------------------------------------------------------------------;
; Banking for 1 R/W adapters, top to bottom.
;-----------------------------------------------------------------------;
        align   4
top_to_bottom_1RW:

; We're going top to bottom. Map in the dest, top-justified.

        mov     ebx,pdsurf
        mov     esi,ulCurrentDestScan
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest top less than
                                                     ; current bank?
        jl      short top_1RW_map_init_dest_bank     ;yes, map in proper bank
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
                                                        ; current bank?
        jl      short top_1RW_init_dest_bank_mapped
                                                ;no, proper bank already mapped
top_1RW_map_init_dest_bank:

; Map bank containing the top dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>

top_1RW_init_dest_bank_mapped:

; Bank-by-bank top-to-bottom copy loop.

top_1RW_bank_loop:

; Decide how far we can go before we run out of bank or rectangle to copy.

        mov     edi,ulLastDestScan
        cmp     edi,[ebx].dsurf_rcl1WindowClip.yBottom
        jl      short @F        ;copy rectangle bottom is in this bank
        mov     edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
                                                       ; of bank, at least
@@:
        sub     edi,esi   ;# of scans we can and want to do in the dest bank

; Now make sure source is mapped in. This is the condition the copying routines
; expect, and we need to figure out how far we can go in the source.

        sub     edx,edx                 ;assume source and dest are in the same
                                        ; bank
        mov     esi,ulCurrentSrcScan
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
                                                    ; current bank?
        jl      short top_1RW_map_src_Bank          ;yes, must map in
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
                                                       ; current bank?
        jl      short top_1RW_src_bank_mapped     ;no, proper bank still mapped

top_1RW_map_src_Bank:

; Map bank containing the current source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>

        mov     edx,1                   ;mark that source and dest are not in
                                        ; the same bank
top_1RW_src_bank_mapped:

        mov     eax,[ebx].dsurf_rcl1WindowClip.yBottom
        sub     eax,esi         ;# of scans we can do in the src bank

        cmp     edi,eax
        jb      short @F        ;source bank isn't limiting
        mov     edi,eax         ;source bank is limiting
@@:
        mov     ulBlockHeight,edi ;# of scans we'll do in this bank

; We're ready to copy this block.
; Select different threading, depending on whether the source and destination
; are currently in the same bank; we can do edges faster if they are.

        and     edx,edx
        jz      short top_1RW_copy_same_bank

; Source and dest are currently in different banks, must go through temp buffer.

        THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans

; Source and dest are currently in the same bank.

        align   4
top_1RW_copy_same_bank:
        THREAD_AND_START

; Any more scans to copy?

top_1RW_check_more_scans:

        mov     esi,ulCurrentDestScan
        mov     edi,ulBlockHeight
        add     esi,edi                 ;we've copied to dest up to here
        cmp     ulLastDestScan,esi      ;are we at the dest rect bottom?
        jz      short top_1RW_done      ;yes, we're done
        mov     ulCurrentDestScan,esi

; Now make sure the dest bank is mapped in.

        mov     ebx,pdsurf
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
                                                    ; current bank?
        jl      short top_1RW_map_dest_bank         ;yes, map in dest bank
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
                                                        ; current bank?
        jl      short top_1RW_dest_bank_mapped   ;no, proper bank mapped

top_1RW_map_dest_bank:

; Map bank containing the current dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>

top_1RW_dest_bank_mapped:

        add     ulCurrentSrcScan,edi    ;we've copied from source up to here

        jmp     top_1RW_bank_loop

top_1RW_done:
        PLAIN_RET


;-----------------------------------------------------------------------;
; Banking for 1 R/W adapters, bottom to top.
;-----------------------------------------------------------------------;
        align   4
bottom_to_top_1RW:

; We're going bottom to top. Map in the dest, bottom-justified.

        mov     ebx,pdsurf
        mov     esi,ulCurrentDestScan
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest bottom less than
                                                     ; current dest bank?
        jl      short bot_1RW_map_init_dest_bank     ;yes, map in proper bank
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
                                                       ; than current dst bank?
        jl      short bot_1RW_init_dest_bank_mapped
                                                ;no, proper bank already mapped
bot_1RW_map_init_dest_bank:

; Map bank containing the bottom dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>

bot_1RW_init_dest_bank_mapped:

; Bank-by-bank bottom-to-top copy loop.

bot_1RW_bank_loop:

; Decide how far we can go before we run out of bank or rectangle to copy.

        mov     edi,ulLastDestScan
        cmp     edi,[ebx].dsurf_rcl1WindowClip.yTop
        jg      short @F        ;copy rectangle top is in this bank
        mov     edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
                                                    ; of bank, at least
@@:
        neg     edi
        add     edi,esi                 ;# of scans we can and want to do in
        inc     edi                     ; the dest bank

; Now make sure source is mapped in. This is the condition the copying routines
; expect, and we need to figure out how far we can go in the source.

        sub     edx,edx                 ;assume source and dest are in the same
                                        ; bank
        mov     esi,ulCurrentSrcScan
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
                                                    ; current bank?
        jl      short bot_1RW_map_src_Bank          ;yes, must map in
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
                                                       ; current bank?
        jl      short bot_1RW_src_bank_mapped     ;no, proper bank still mapped

bot_1RW_map_src_Bank:

; Map bank containing the current source scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>

        mov     edx,1                   ;mark that source and dest are not in
                                        ; the same bank
bot_1RW_src_bank_mapped:

        sub     esi,[ebx].dsurf_rcl1WindowClip.yTop
        inc     esi                     ;# of scans we can do in the src bank

        cmp     edi,esi
        jb      short @F        ;source bank isn't limiting
        mov     edi,esi         ;source bank is limiting
@@:
        mov     ulBlockHeight,edi ;# of scans we'll do in this bank

; We're ready to copy this block.
; Select different threading, depending on whether the source and destination
; are currently in the same bank; we can copy much faster if they are.

        and     edx,edx
        jz      short bot_1RW_copy_same_bank

; Source and dest are currently in different banks, must go through temp buffer.

        THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans

; Source and dest are currently in the same bank.

        align   4
bot_1RW_copy_same_bank:
        THREAD_AND_START

; Any more scans to copy?

        align   4
bot_1RW_check_more_scans:

        mov     esi,ulCurrentDestScan
        mov     edi,ulBlockHeight
        sub     esi,edi                 ;we've copied to dest up to here
        cmp     ulLastDestScan,esi      ;are we past the dest rect top?
        jg      short bot_1RW_done      ;yes, we're done
        mov     ulCurrentDestScan,esi

; Now make sure the dest bank is mapped in.

        mov     ebx,pdsurf
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
                                                    ; current bank?
        jl      short bot_1RW_map_dest_bank         ;yes, map in dest bank
        cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
                                                        ; current bank?
        jl      short bot_1RW_dest_bank_mapped   ;no, proper bank mapped

bot_1RW_map_dest_bank:

; Map bank containing the current dest scan line into source window.
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>

bot_1RW_dest_bank_mapped:

        sub     ulCurrentSrcScan,edi    ;we've copied from source up to here

        jmp     bot_1RW_bank_loop

bot_1RW_done:
        PLAIN_RET


;***********************************************************************;
;
; The following routines are the low-level copying routines. They know
; almost nothing about banks (the routines that copy through a temp
; buffer know how to switch banks after filling the temp buffer, but
; that's it). Banking should be taken care of at a higher level.
;
;***********************************************************************;

;-----------------------------------------------------------------------;
; Copies a block of solid bytes from the source to the destination via the
; latches. Can only be used by 2 R/W or 1R/1W window banking, or by
; unbanked modes, or by 1 R/W adapters when the source and dest are in the
; same bank. 1 R/W adapters must go through an intermediate local buffer
; when the source and the destination aren't in the same bank.
;
; Input:
;       Direction Flag set for desired direction of copy
;       culWholeBytesWidth = # of bytes to copy across each scan line
;       ulWholeScanDelta = distance to start of next scan from end of current
;       ulBlockHeight = # of scans to copy
;       ulWholeBytesSrc = start source offset in bitmap
;       ulWholeBytesDest = start dest offset in bitmap
;
; Output:
;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
;               scan processed
;-----------------------------------------------------------------------;

        align   4
copy_whole_bytes:

; Set the bit mask to disable all bits, so we can copy through the latches.

        mov     edx,VGA_BASE + GRAF_ADDR
        mov     eax,(000h shl 8) + GRAF_BIT_MASK
        out     dx,ax

; Set Map Mask to enable writes to all planes.

        mov     dl,SEQ_DATA
        mov     al,MM_ALL
        out     dx,al

; Set up to copy the whole bytes via the latches.

        mov     eax,culWholeBytesWidth
        mov     ebx,ulBlockHeight
        mov     edx,ulWholeScanDelta

; Calculate full start addresses.

        mov     ecx,pdsurf
        mov     esi,ulWholeBytesSrc
        add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        mov     edi,ulWholeBytesDest
        add     edi,[ecx].dsurf_pvBitmapStart2WindowD

        SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeLatchesEntry, \
                                LOOP_UNROLL_SHIFT
        jmp    ecx      ;copy the whole bytes

;-----------------------------------------------------------------------;
; Table of unrolled whole latched loop entry points.
;-----------------------------------------------------------------------;

        UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeLatchesEntry,WHOLE_LATCHES, \
                                LOOP_UNROLL_COUNT

;-----------------------------------------------------------------------;
; Unrolled loop for copying a block of whole bytes via the latches.
;-----------------------------------------------------------------------;

COPY_WHOLE_LATCHES macro ENTRY_LABEL,ENTRY_INDEX
&ENTRY_LABEL&ENTRY_INDEX&:
        mov     ecx,eax         ;# of whole bytes to copy
        rep     movsb           ;copy the bytes via the latches
        add     esi,edx         ;point to next source scan
        add     edi,edx         ;point to next dest scan
        endm    ;-----------------------------------;

;  EAX = # of bytes to copy
;  EBX = count of unrolled loop iterations
;  EDX = offset from end of one scan's fill to start of next
;  ESI = source address to copy from
;  EDI = target address to copy to

        align   4
whole_latches_loop:
        UNROLL_LOOP     COPY_WHOLE_LATCHES,WHOLE_LATCHES,LOOP_UNROLL_COUNT
        dec     ebx
        jnz     whole_latches_loop

; Remember where we left off, for next time.

        mov     ecx,pdsurf
        sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
        mov     ulWholeBytesSrc,esi
        sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
        mov     ulWholeBytesDest,edi

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies a block of solid bytes from the source to the destination via
; the temp buffer. This should only be used by 1 R/W adapters, and then
; only when the source and dest are in different banks.
;
; All relevant bytes are copied from the source to a temp buffer that's an
; image of the source first. Then, we copy each of the four planes for one scan
; line from the temp buffer to the screen before going on to the next scan line.
; It would be faster to do all scans in one plane, then all in the next, and so
; on, but that would give nasty color effects from pixels that were changed in
; some planes but not in others. A compromise would be to do several scans at a
; pop per plane, as is done with the edge bytes; however, given that there can
; be 128 (or more) bytes across a single whole-bytes scan, if we do 16 scan
; lines per chunk, we're going to be performing up to 128*4*16 accesses per
; chunk; at an assumed 1 microsecond per access, that's 8 millisecond per scan
; line, or about 1/2 of a frame time. We're definitely going to see flicker or
; sparkles from partially updated bytes at that point, in my opinion. Another
; alternative would be to dynamically adjust the number of scans processed at a
; pop per plane, depending on the copy width, with more scans copied for
; narrower widths. For all but very narrow copies, though, it seems to me that
; the actual copy time would far outweigh the time for the OUTs to switch
; planes, and the return for some rather complex code would be marginal.
;
; It would be nice if we copied bytes a word or dword at a time. However, it
; becomes rather complex handling fractional words or dwords, especially when
; copying right-to-left, so this is left for LATER. I haven't unrolled these
; loops because of the possibility of this further word/dword optimization;
; no point in fine-tuning sub-optimal code.
;
; Input:
;       Direction Flag set for desired direction of copy
;       culWholeBytesWidth = # of bytes to copy across each scan line
;       ulWholeScanDelta = distance to start of next scan from end of current
;       ulBlockHeight = # of scans to copy
;       ulWholeBytesSrc = start source offset in bitmap
;       ulWholeBytesDest = start dest offset in bitmap
;       ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
;       ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
;       Expects the source bank to be mapped in; source bank is mapped in on
;               exit
;
; Output:
;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
;               scan processed
;-----------------------------------------------------------------------;

        align   4
copy_whole_bytes_via_buffer:

; Calculate start source address from bitmap start address and offset within
; bitmap.

        mov     ecx,pdsurf
        mov     eax,ulWholeBytesSrc
        add     eax,[ecx].dsurf_pvBitmapStart
        mov     pSrcAddr,eax
        sub     eax,[ecx].dsurf_pvStart
        mov     ulOffsetInBank,eax ;will come in handy because we treat the
                                   ; temp buffer as an image of the current
                                   ; bank

; First, copy all the bytes into the temporary buffer.

; Leave the GC Index pointing to the Read Map.

        mov     edx,VGA_BASE + GRAF_ADDR
        mov     al,GRAF_READ_MAP
        out     dx,al

        mov     eax,3           ;start by copying plane 3
copy_whole_to_buffer_plane_loop:
        mov     ebx,ulBlockHeight  ;# of scans to copy
        mov     esi,pSrcAddr       ;source offset in screen
        mov     edi,ppTempPlane0
        mov     edi,[edi+eax*4]    ;pointer to current plane in temp buffer
        add     edi,ulOffsetInBank ;dest for plane in temp buffer

        mov     edx,VGA_BASE + GRAF_DATA
        out     dx,al            ;set Read Map to plane we're copying from.

        push    eax             ;remember plane index
        mov     eax,ulWholeScanDelta ;offset to next scan
        mov     edx,culWholeBytesWidth ;# of bytes per scan
copy_whole_to_buffer_scan_loop:
        mov     ecx,edx         ;# of bytes per scan
        rep     movsb           ;copy the scan line to the temp buffer
        add     esi,eax         ;point to next source scan
        add     edi,eax         ;point to next dest scan

        dec     ebx              ;count down scan lines
        jnz     copy_whole_to_buffer_scan_loop

        pop     eax             ;get back plane index
        dec     eax             ;count down planes
        jns     copy_whole_to_buffer_plane_loop

; Remember where we left off, for next time.

        mov     ebx,pdsurf
        sub     esi,[ebx].dsurf_pvBitmapStart
        mov     ulWholeBytesSrc,esi


; Now copy the temp buffer to the screen.

; Map in the destination bank, so we can read/write to it  and let the Bit Mask
; work.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
                <ebx,ulCurrentDestScan,ulCurrentJustification>

; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
; until now to calculate this, because the dest bank wasn't mapped earlier).

        mov     eax,ulWholeBytesDest
        add     eax,[ebx].dsurf_pvBitmapStart
        mov     pDestAddr,eax

; Set the bit mask to enable all bits.

        mov     edx,VGA_BASE + GRAF_ADDR
        mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
        out     dx,ax

        mov     dl,SEQ_DATA     ;leave DX pointing to the SC Data reg

; Set up to copy the whole bytes from the buffer.

        mov     eax,ulBlockHeight ;# of scans to copy
        mov     culTempCount,eax

copy_whole_from_buffer_scan_loop:

        mov     ebx,ppTempPlane3  ;point to plane 3's temp buffer offset
        mov     al,MM_C3        ;start by copying plane 3

copy_whole_from_buffer_plane_loop:

; Set Map Mask to enable writes to the plane we're copying.

        out     dx,al

; Select the corresponding plane from the temp buffer.

        mov     esi,[ebx]          ;point to plane start in temp buffer
        add     esi,ulOffsetInBank ;point to current scan start in temp buffer
        mov     edi,pDestAddr      ;point to destination start

        mov     ecx,culWholeBytesWidth  ;# of whole bytes to copy
        rep     movsb           ;copy the bytes from the buffer to the screen

; Do next plane, if any.

        sub     ebx,4                   ;point to next temp buffer plane ptr
        shr     al,1                    ;advance to next plane
        jnz     copy_whole_from_buffer_plane_loop

; Remember where we left off, for next scan.

        add     edi,ulWholeScanDelta    ;point to next dest scan
        mov     pDestAddr,edi
        mov     eax,ulNextScan
        add     ulOffsetInBank,eax      ;next scan's start in temp buffer,
                                        ; relative to start of plane's storage

; Count down scan lines.

        dec     culTempCount
        jnz     copy_whole_from_buffer_scan_loop

; Remember where we left off, for next time.

        mov     ebx,pdsurf
        sub     edi,[ebx].dsurf_pvBitmapStart
        mov     ulWholeBytesDest,edi

; Put back the original source bank.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
                <ebx,ulCurrentSrcScan,ulCurrentJustification>

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies a strip of left edge bytes from the source to the destination,
; assuming both the source and the destination are both readable and
; writable. Can only be used by 2 R/W window banking, or by unbanked
; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
; buffer when the source and dest are in different banks. Processes up to
; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
; flicker.
;
; Input:
;       ulNextScan = width of scan, in bytes
;       ulBlockHeight = # of scans to copy
;       ulLeftEdgeSrc = start source offset in bitmap
;       ulLeftEdgeDest = start dest offset in bitmap
;       jLeftMask = left edge clip mask
;
; Output:
;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
;               scan processed
;-----------------------------------------------------------------------;

        align   4
copy_left_edge:

; Calculate start source and dest addresses from bitmap start addresses and
; offsets within bitmap.

        mov     ecx,pdsurf
        mov     esi,ulLeftEdgeSrc
        add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        mov     edi,ulLeftEdgeDest
        add     edi,[ecx].dsurf_pvBitmapStart2WindowD

; Copy the edge.

        mov     ah,byte ptr jLeftMask   ;clip mask for this edge
        call    copy_edge

; Remember where we left off, for next time.

        mov     ecx,pdsurf
        sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
        mov     ulLeftEdgeSrc,esi
        sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
        mov     ulLeftEdgeDest,edi

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies a strip of right edge bytes from the source to the destination,
; assuming both the source and the destination are both readable and
; writable. Can only be used by 2 R/W window banking, or by unbanked
; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
; buffer when the source and dest are in different banks. Processes up to
; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
; flicker.
;
; Input:
;       ulNextScan = width of scan, in bytes
;       ulBlockHeight = # of scans to copy
;       ulRightEdgeSrc = start source offset in bitmap
;       ulRightEdgeDest = start dest offset in bitmap
;       jRightMask = right edge clip mask
;
; Output:
;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
;               scan processed
;-----------------------------------------------------------------------;

        align   4
copy_right_edge:

; Calculate start source and dest addresses from bitmap start addresses and
; offsets within bitmap.

        mov     ecx,pdsurf
        mov     esi,ulRightEdgeSrc
        add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        mov     edi,ulRightEdgeDest
        add     edi,[ecx].dsurf_pvBitmapStart2WindowD

; Copy the edge.

        mov     ah,byte ptr jRightMask  ;clip mask for this edge
        call    copy_edge

; Remember where we left off, for next time

        mov     ecx,pdsurf
        sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
        mov     ulRightEdgeSrc,esi
        sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
        mov     ulRightEdgeDest,edi

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies an edge from the source to the destination on the screen.
; Entry:
;       AH = bit mask setting for edge
;       ESI = source address
;       EDI = destination address
;       ulBlockHeight = # of bytes to copy per plane
;       ulNextScan = scan width
;       Source readable, and destination readable and writable
; Exit:
;       ESI = next source address
;       EDI = next destination address
;
; Preserved: EBP
;-----------------------------------------------------------------------;

        align   4
copy_edge:
        mov     pSrcAddr,esi
        mov     pDestAddr,edi

; Set the clip mask for this edge.

        mov     edx,VGA_BASE + GRAF_ADDR
        mov     al,GRAF_BIT_MASK
        out     dx,ax

; Leave the GC Index pointing to the Read Map.

        mov     al,GRAF_READ_MAP
        out     dx,al

        mov     ecx,offset copy_edge_rw_full_chunk
                                ;entry point into unrolled loop to copy first
                                ; chunk, assuming it's a full chunk
        mov     ebx,ulBlockHeight

; Copy the edge in a series of chunks.

copy_edge_chunk_loop:

        sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                                    ; a full chunk
        jge     short @F            ;do a full chunk
        add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                                    ; scans
        mov     ecx,pfnCopyEdgeRWEntry[-4][ebx*4]
                                ;entry point into unrolled loop to copy desired
                                ; chunk size
        sub     ebx,ebx         ;no scans after this
@@:
        push    ebx             ;remember remaining scan count

        mov     ah,MM_C3        ;start by copying plane 3
        mov     ebx,ulNextScan

copy_edge_plane_loop:

; Set Map Mask to enable writes to plane we're copying.

        mov     al,ah
        mov     dl,SEQ_DATA
        out     dx,al

; Set Read Map to same plane.

        shr     al,1                    ;map plane into ReadMask
        cmp     al,100b                 ;set Carry if not C3 (plane 3)
        adc     al,-1                   ;sub 1 only if C3
        mov     dl,GRAF_DATA
        out     dx,al

        mov     esi,pSrcAddr
        mov     edi,pDestAddr

        jmp     ecx                     ;copy the left edge


;-----------------------------------------------------------------------;
; Table of unrolled edge loop entry points. First entry point is to copy
; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
;-----------------------------------------------------------------------;

pfnCopyEdgeRWEntry label dword
INDEX = 1
        rept    EDGE_CHUNK_SIZE
        DEFINE_DD       EDGE_RW,%INDEX
INDEX = INDEX+1
        endm


;-----------------------------------------------------------------------;
; Unrolled loop for copying a strip of edge bytes, with source and
; destination both readable and writable.
;-----------------------------------------------------------------------;

COPY_EDGE_RW macro ENTRY_LABEL,ENTRY_INDEX
&ENTRY_LABEL&ENTRY_INDEX&:
        mov     al,[esi]        ;get byte to copy
        add     esi,ebx         ;point to next source scan
        xchg    [edi],al        ;read before write so Bit Mask can operate
        add     edi,ebx         ;point to next dest scan
        endm    ;-----------------------------------;

;  EBX = scan line width
;  ESI = source address to copy from
;  EDI = target address to copy to
;  Bit Mask set to desired clipping
;  Read Map and Map Mask set to enable the desired plane for read and write

        align   4
copy_edge_rw_full_chunk:
        UNROLL_LOOP     COPY_EDGE_RW,EDGE_RW,EDGE_CHUNK_SIZE

; Do next plane within this chunk, if any.

        shr     ah,1                    ;advance to next plane
        jnz     copy_edge_plane_loop

; Remember where we left off, for the next chunk.

        mov     pSrcAddr,esi
        mov     pDestAddr,edi

; Do next chunk within this bank block, if any.

        pop     ebx                     ;retrieve remaining scan count
        and     ebx,ebx                 ;any scans left?
        jnz     copy_edge_chunk_loop    ;more scans to do

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies a strip of left edge bytes from the source to the destination
; through an intermediate RAM buffer. This is the approach required by
; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
; cause flicker.
;
; Input:
;       ulNextScan = width of scan, in bytes
;       ulBlockHeight = # of scans to copy
;       ulLeftEdgeSrc = start source offset in bitmap
;       ulLeftEdgeDest = start dest offset in bitmap
;       jLeftMask = left edge clip mask
;       pTempPlane = pointer to temp storage buffer
;       ulCurrentSrcScan = scan used to map in source bank
;       ulCurrentDestScan = scan used to map in dest bank
;       ulCurrentJustification = justification used to map in current bank
;       For 1 R/W adapters, expects the source bank to be mapped in; banking
;               is the same at exit as it was at entry
;
; Output:
;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
;               scan processed
;
; Note that this should never be called for an unbanked or 2 R/W adapter,
; because the source and dest are always both addressable simultaneously then.
;-----------------------------------------------------------------------;

        align   4
copy_left_edge_via_buffer:

; First, copy all the bytes into the temporary buffer.

; Calculate start source and dest addresses from bitmap start addresses and
; offsets within bitmap.

        mov     ecx,pdsurf
        mov     esi,ulLeftEdgeSrc
        add     esi,[ecx].dsurf_pvBitmapStart2WindowS

; Copy the edge from the source to the temp buffer.

        call    copy_screen_to_buffered_edge

; Remember where we left off, for next time

        mov     ebx,pdsurf
        sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
        mov     ulLeftEdgeSrc,esi

; Now copy the temp buffer to the screen.

; Map in the source bank to match the destination, so we can read/write to it
; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
; mapped by this call, which is fine.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>

; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
; until now to calculate this, because the dest bank wasn't mapped earlier).

        mov     edi,ulLeftEdgeDest
        add     edi,[ebx].dsurf_pvBitmapStart2WindowD

        mov     ah,byte ptr jLeftMask           ;clip mask for this edge
        call    copy_buffered_edge_to_screen    ;do the copy

; Remember where we left off, for next time.

        mov     ebx,pdsurf
        sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
        mov     ulLeftEdgeDest,edi

; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
; will be mapped by this call, which is fine.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies a strip of right edge bytes from the source to the destination
; through an intermediate RAM buffer. This is the approach required by
; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
; cause flicker.
;
; Input:
;       ulNextScan = width of scan, in bytes
;       ulBlockHeight = # of scans to copy
;       ulRightEdgeSrc = start source offset in bitmap
;       ulRightEdgeDest = start dest offset in bitmap
;       jRightMask = right edge clip mask
;       pTempPlane = pointer to temp storage buffer
;       ulCurrentSrcScan = scan used to map in source bank
;       ulCurrentDestScan = scan used to map in dest bank
;       ulCurrentJustification = justification used to map in current bank
;       For 1 R/W adapters, expects the source bank to be mapped in; banking
;               is the same at exit as it was at entry
;
; Output:
;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
;               scan processed
;
; Note that this should never be called for an unbanked or 2 R/W adapter,
; because the source and dest are always both addressable simultaneously then.
;-----------------------------------------------------------------------;

        align   4
copy_right_edge_via_buffer:

; First, copy all the bytes into the temporary buffer.

; Calculate start source address from bitmap start addresses and
; offsets within bitmap.

        mov     ecx,pdsurf
        mov     esi,ulRightEdgeSrc
        add     esi,[ecx].dsurf_pvBitmapStart2WindowS

; Copy the edge from the source to the temp buffer.

        call    copy_screen_to_buffered_edge

; Remember where we left off, for next time

        mov     ebx,pdsurf
        sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
        mov     ulRightEdgeSrc,esi

; Now copy the temp buffer to the screen.

; Map in the source bank to match the destination, so we can read/write to it
; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
; mapped by this call, which is correct.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>

; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
; until now to calculate this, because the dest bank wasn't mapped earlier).

        mov     edi,ulRightEdgeDest
        add     edi,[ebx].dsurf_pvBitmapStart2WindowD

        mov     ah,byte ptr jRightMask          ;clip mask for this edge
        call    copy_buffered_edge_to_screen    ;do the copy

; Remember where we left off, for next time.

        mov     ebx,pdsurf
        sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
        mov     ulRightEdgeDest,edi

; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
; will be mapped by this call, which is fine.

        ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies an edge from the temp buffer to the screen.
; Entry:
;       AH = bit mask setting for edge
;       DH = VGA_BASE SHR 8
;       EDI = destination address
;       pTempPlane = temp buffer from which to copy
;       ulBlockHeight = # of bytes to copy per plane
;       ulNextScan = scan width
;       Source and dest banks both pointing to destination
; Exit:
;       EDI = next destination address
;
; Preserved: EBP
;-----------------------------------------------------------------------;

        align   4
copy_buffered_edge_to_screen:

        mov     pDestAddr,edi

        mov     dl,GRAF_ADDR
        mov     al,GRAF_BIT_MASK
        out     dx,ax

        mov     pTempEntry,offset copy_edge_from_buf_full_chunk
                                ;entry point into unrolled loop to copy first
                                ; chunk, assuming it's a full chunk
        mov     ecx,pTempPlane  ;temp buffer start (copy from here)
        mov     ebx,ulBlockHeight ;total # of scans to copy

; Copy the edge in a series of chunks, to avoid flicker.

copy_from_buffer_chunk_loop:

        sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                                    ; a full chunk
        jge     short @F            ;do a full chunk
        add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                                    ; scans
        mov     ebx,pfnCopyEdgesFromBufferEntry[-4][ebx*4]
        mov     pTempEntry,ebx  ;entry point into unrolled loop to copy desired
                                ; chunk size
        sub     ebx,ebx         ;no scans after this
@@:
        push    ebx             ;remember remaining scan count

        mov     al,MM_C3        ;start by copying plane 3
        mov     ebx,ulNextScan

        push    ecx             ;remember current temp buffer start

        mov     dl,SEQ_DATA     ;leave DX pointing to Sequencer Data reg
copy_from_buffer_plane_loop:

; Set Map Mask to enable writes to plane we're copying.

        out     dx,al

; Calculate the equivalent Read Map, and use that to select the correct plane
; from the temp buffer.

        mov     esi,ecx                 ;point to current plane's source byte
        add     ecx,ulBlockHeight       ;point to next plane's source byte

        mov     edi,pDestAddr

        jmp     pTempEntry              ;copy the left edge


;-----------------------------------------------------------------------;
; Table of unrolled edge copy-from-buffer loop entry points. First entry
; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
; bytes.
;-----------------------------------------------------------------------;

pfnCopyEdgesFromBufferEntry label dword
INDEX = 1
        rept    EDGE_CHUNK_SIZE
        DEFINE_DD       EDGE_FROM_BUFFER,%INDEX
INDEX = INDEX+1
        endm


;-----------------------------------------------------------------------;
; Unrolled loop for copying a strip of edge bytes from the temp buffer.
;-----------------------------------------------------------------------;

COPY_EDGE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX
&ENTRY_LABEL&ENTRY_INDEX&:
        mov     ah,[esi]        ;get byte to copy
        inc     esi             ;point to next source (temp buffer) byte
        xchg    [edi],ah        ;read before write so Bit Mask can operate
        add     edi,ebx         ;point to next dest (screen) scna
        endm    ;-----------------------------------;

;  EBX = scan line width
;  ESI = source address to copy from (temp buffer)
;  EDI = target address to copy to (screen)
;  Bit Mask set to desired clipping
;  Map Mask set to enable the desired plane for write

        align   4
copy_edge_from_buf_full_chunk:
        UNROLL_LOOP     COPY_EDGE_FROM_BUFFER,EDGE_FROM_BUFFER,EDGE_CHUNK_SIZE

; Do next plane within this chunk, if any.

        shr     al,1                    ;advance to next plane
        jnz     copy_from_buffer_plane_loop

; Remember where we left off, for next chunk.

        mov     pDestAddr,edi
        pop     ecx             ;get back current temp buffer start
        add     ecx,EDGE_CHUNK_SIZE ;point to next chunk's start

; Do next chunk within this bank block, if any.

        pop     ebx                     ;retrieve remaining scan count
        and     ebx,ebx                 ;any scans left?
        jnz     copy_from_buffer_chunk_loop    ;more scans to do

        PLAIN_RET


;-----------------------------------------------------------------------;
; Copies an edge from the screen to the temp buffer.
; Entry:
;       ESI = source address
;       pTempPlane = temp buffer from which to copy
;       ulBlockHeight = # of bytes to copy per plane
;       ulNextScan = scan width
;       Source bank pointing to source
; Exit:
;       DH = VGA_BASE SHR 8
;       ESI = next source address
;
; Preserved: EBP
;-----------------------------------------------------------------------;

        align   4
copy_screen_to_buffered_edge:

        mov     pSrcAddr,esi

; Leave the GC Index pointing to the Read Map.

        mov     edx,VGA_BASE + GRAF_ADDR
        mov     al,GRAF_READ_MAP
        out     dx,al

        mov     ebx,ulBlockHeight
        SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry, \
                                LOOP_UNROLL_SHIFT
        mov     culTempCount,ebx ;remember # of unrolled loop iterations
        mov     pTempEntry,ecx   ;ditto for entry point

        mov     ecx,ulNextScan
        mov     edi,pTempPlane  ;dest offset in temp buffer for plane 3 bytes.
                                ;The rest of the planes are stored
                                ; consecutively
        mov     al,3            ;start by copying plane 3
        mov     dl,GRAF_DATA    ;leave DX pointing to GC Data reg
copy_edge_to_buffer_plane_loop:
        mov     esi,pSrcAddr ;source pointer

        out     dx,al            ;set Read Map to plane we're copying from.

        mov     ebx,culTempCount ;# of unrolled loop iterations
        jmp     pTempEntry       ;copy the edge bytes for this plane to the
                                 ; temp buffer

;-----------------------------------------------------------------------;
; Table of unrolled edge copy to temp buffer loop entry points.
;-----------------------------------------------------------------------;

        UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry,EDGE_TO_TEMP, \
                                LOOP_UNROLL_COUNT

;-----------------------------------------------------------------------;
; Unrolled loop for copying edge bytes to the temp buffer.
;-----------------------------------------------------------------------;

COPY_EDGE_TO_TEMP macro ENTRY_LABEL,ENTRY_INDEX
&ENTRY_LABEL&ENTRY_INDEX&:
        mov     ah,[esi]        ;get byte to copy
        add     esi,ecx         ;point to next source scan
        mov     [edi],ah        ;copy byte to temp buffer
        inc     edi             ;point to next temp buffer byte
        endm    ;-----------------------------------;

;  EBX = count of unrolled loop iterations
;  ECX = offset from end of one scan's fill to start of next
;  ESI = source address to copy from (screen)
;  EDI = target address to copy to (temp buffer)
;  Read Map set to enable the desired plane for read

        align   4
edge_to_buffer_loop:
        UNROLL_LOOP     COPY_EDGE_TO_TEMP,EDGE_TO_TEMP,LOOP_UNROLL_COUNT
        dec     ebx
        jnz     edge_to_buffer_loop

        dec     al               ;count down planes
        jns     copy_edge_to_buffer_plane_loop

        PLAIN_RET


;-----------------------------------------------------------------------;

endProc vAlignedSrcCopy

_TEXT$03   ends

        end


unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.