Annotation of ntddk/src/video/displays/vga/i386/alignblt.asm, revision 1.1.1.1

1.1       root        1: ;---------------------------Module-Header------------------------------;
                      2: ; Module Name: alignblt.asm
                      3: ;
                      4: ; Copyright (c) 1992 Microsoft Corporation
                      5: ;-----------------------------------------------------------------------;
                      6: 
                      7: ;-----------------------------------------------------------------------;
                      8: ; VOID vAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
                      9: ;                      INT icopydir);
                     10: ; Input:
                     11: ;  pdsurf - surface on which to copy
                     12: ;  prcldest - pointer to destination rectangle
                     13: ;  pptlsrc - pointer to source upper left corner
                     14: ;  icopydir - direction in which copy must proceed to avoid overlap problems
                     15: ;             and synchronize with the clip enumeration visually, according to
                     16: ;             constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
                     17: ;             WINDDI.H
                     18: ;
                     19: ; Performs accelarated aligned SRCCOPY VGA-to-VGA blts.
                     20: ;
                     21: ;-----------------------------------------------------------------------;
                     22: ;
                     23: ; Note: Assumes all rectangles have positive heights and widths. Will not
                     24: ; work properly if this is not the case.
                     25: ;
                     26: ;-----------------------------------------------------------------------;
                     27: 
                     28:         comment $
                     29: 
                     30: The overall approach of this module for each rectangle to copy is:
                     31: 
                     32: 1) Precalculate the masks and whole byte widths, and determine which of
                     33: partial left edge, partial right edge, and whole middle bytes are required
                     34: for this copy.
                     35: 
                     36: 2) Set up the starting pointers for each of the areas (left, whole middle,
                     37: right), the start and stop scan lines, the copying direction (left-to-right
                     38: or right-to-left, and top-to-bottom or bottom-to-top), the threading
                     39: (sequence of calls required to do the left/whole/right components in the
                     40: proper sequence), based on the passed-in copy direction, which in turn is
                     41: dictated by the nature of the overlap between the source and destination.
                     42: 
                     43: 3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
                     44: 1 R/W window, unbanked), that sequences through the intersection of each
                     45: bank with the source and destination rectangles in the proper direction
                     46: (top-to-bottom or bottom-to-top, based on the passed-in copy direction),
                     47: and performs the copy in each such rectangle. The threading vector is used
                     48: to call the required routines (copy left/whole/right bytes). For 1 R/W and
                     49: 1R/1W adapters, there is a second threading vector that is called when the
                     50: source and the destination are both adequately (for the copy purposes)
                     51: addressable simultaneously (because they're in the same bank), so there's
                     52: no need to copy through a temp buffer. Obviously, we want to avoid the temp
                     53: buffer whenever we can, because it's much slower and doesn't let us take
                     54: advantage of the VGA's hardware.
                     55: 
                     56: Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
                     57: each plane's bytes are not stored in the corresponding plane's temp buffer, but
                     58: rather consecutively in the plane 0 temp buffer. This is to reduce page
                     59: faulting, and also so that 1R/1W adapters only need a temp buffer large enough
                     60: to hold 4*tallest bank bytes (2K will do here, but nalgnblt.asm needs 4K).
                     61: 1 R/W adapters still copy whole bytes through the full temp buffer, using all
                     62: four planes' temp buffers, so they require a temp buffer big enough to hold a
                     63: full bank (256K will do).
                     64: 
                     65:         commend $
                     66: 
                     67: ;-----------------------------------------------------------------------;
                     68: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
                     69: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
                     70: ; times unrolling. This is the only thing you need to change to control
                     71: ; unrolling. Note: does not affect loops that process in chunks, like edge
                     72: ; loops.
                     73: 
                     74: LOOP_UNROLL_SHIFT equ 2
                     75: 
                     76: ;-----------------------------------------------------------------------;
                     77: ; Maximum # of edge bytes to process before switching to next plane. Larger
                     78: ; means faster, but there's more potential for flicker, since the raster scan
                     79: ; has a better chance of catching bytes that have changed in some planes but
                     80: ; not all planes.
                     81: 
                     82: EDGE_CHUNK_SIZE equ     16
                     83: 
                     84: ;-----------------------------------------------------------------------;
                     85: ; Macro to push the current threading sequence (string of routine calls) on the
                     86: ; stack, then jump to the first threading entry. The threading pointer can be
                     87: ; specified, or defaults to pCurrentThread. The return address can be
                     88: ; immediately after the JMP, or can be specified.
                     89: 
                     90: THREAD_AND_START macro THREADING,RETURN_ADDR
                     91:         local   push_base, return_address
                     92: 
                     93: ifb <&RETURN_ADDR&>
                     94:         push    offset return_address   ;after all the threaded routines, we
                     95:                                         ; return here
                     96: else
                     97:         push    offset &RETURN_ADDR&    ;return here
                     98: endif
                     99: 
                    100: ifb <&THREADING&>
                    101:         mov     eax,pCurrentThread
                    102: else
                    103:         mov     eax,&THREADING&
                    104: endif
                    105: 
                    106:         mov     ecx,[eax]               ;# of routines to thread (at least 1)
                    107:         lea     ecx,[ecx*2+ecx]         ;pushes below are 3 bytes each
                    108:         mov     edx,offset push_base+3
                    109:         sub     edx,ecx
                    110:         jmp     edx                     ;branch to push or jmp below
                    111: 
                    112: ; Push the threading addresses on to the stack, so routines perform the
                    113: ; threading as they return.
                    114: 
                    115:         push    dword ptr [eax+12]       ;3 byte instruction
                    116:         push    dword ptr [eax+8]
                    117: push_base:
                    118:         jmp     dword ptr [eax+4]        ;jump to the first threaded routine
                    119: 
                    120:         align   4
                    121: return_address:
                    122:         endm
                    123: 
                    124: ;-----------------------------------------------------------------------;
                    125: 
                    126:                 .386
                    127: 
                    128: ifndef  DOS_PLATFORM
                    129:         .model  small,c
                    130: else
                    131: ifdef   STD_CALL
                    132:         .model  small,c
                    133: else
                    134:         .model  small,pascal
                    135: endif;  STD_CALL
                    136: endif;  DOS_PLATFORM
                    137: 
                    138:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                    139:         assume fs:nothing,gs:nothing
                    140: 
                    141:         .xlist
                    142:         include stdcall.inc             ;calling convention cmacros
                    143:         include i386\egavga.inc
                    144:         include i386\strucs.inc
                    145:         include i386\unroll.inc
                    146:         include i386\ropdefs.inc
                    147: 
                    148:         .list
                    149: 
                    150: ;-----------------------------------------------------------------------;
                    151: 
                    152:         .data
                    153: 
                    154: ; Threads for stringing together left, whole byte, and right operations
                    155: ; in various orders, both using a temp buffer and not. Data format is:
                    156: ;
                    157: ; DWORD +0 = # of calls in thread (1, 2, or 3)
                    158: ;       +4 = first call (required)
                    159: ;       +8 = second call (optional)
                    160: ;      +12 = third call (optional)
                    161: 
                    162:         align   4
                    163: 
                    164: ; Copies not involving the temp buffer.
                    165: 
                    166: Thread_L        dd      1
                    167:                 dd      copy_left_edge
                    168: 
                    169: Thread_W        dd      1
                    170:                 dd      copy_whole_bytes
                    171: 
                    172: Thread_R        dd      1
                    173:                 dd      copy_right_edge
                    174: 
                    175: Thread_LR       dd      2
                    176:                 dd      copy_left_edge
                    177:                 dd      copy_right_edge
                    178: 
                    179: Thread_RL       dd      2
                    180:                 dd      copy_right_edge
                    181:                 dd      copy_left_edge
                    182: 
                    183: Thread_LW       dd      2
                    184:                 dd      copy_left_edge
                    185:                 dd      copy_whole_bytes
                    186: 
                    187: Thread_WL       dd      2
                    188:                 dd      copy_whole_bytes
                    189:                 dd      copy_left_edge
                    190: 
                    191: Thread_WR       dd      2
                    192:                 dd      copy_whole_bytes
                    193:                 dd      copy_right_edge
                    194: 
                    195: Thread_RW       dd      2
                    196:                 dd      copy_right_edge
                    197:                 dd      copy_whole_bytes
                    198: 
                    199: Thread_LWR      dd      3
                    200:                 dd      copy_left_edge
                    201:                 dd      copy_whole_bytes
                    202:                 dd      copy_right_edge
                    203: 
                    204: Thread_RWL      dd      3
                    205:                 dd      copy_right_edge
                    206:                 dd      copy_whole_bytes
                    207:                 dd      copy_left_edge
                    208: 
                    209: ; Copies involving the temp buffer.
                    210: 
                    211: Thread_Lb       dd      1
                    212:                 dd      copy_left_edge_via_buffer
                    213: 
                    214: Thread_Wb       dd      1
                    215:                 dd      copy_whole_bytes_via_buffer
                    216: 
                    217: Thread_Rb       dd      1
                    218:                 dd      copy_right_edge_via_buffer
                    219: 
                    220: Thread_LbRb     dd      2
                    221:                 dd      copy_left_edge_via_buffer
                    222:                 dd      copy_right_edge_via_buffer
                    223: 
                    224: Thread_RbLb     dd      2
                    225:                 dd      copy_right_edge_via_buffer
                    226:                 dd      copy_left_edge_via_buffer
                    227: 
                    228: Thread_LbW      dd      2
                    229:                 dd      copy_left_edge_via_buffer
                    230:                 dd      copy_whole_bytes
                    231: 
                    232: Thread_LbWb     dd      2
                    233:                 dd      copy_left_edge_via_buffer
                    234:                 dd      copy_whole_bytes_via_buffer
                    235: 
                    236: Thread_WLb      dd      2
                    237:                 dd      copy_whole_bytes
                    238:                 dd      copy_left_edge_via_buffer
                    239: 
                    240: Thread_WbLb     dd      2
                    241:                 dd      copy_whole_bytes_via_buffer
                    242:                 dd      copy_left_edge_via_buffer
                    243: 
                    244: Thread_WRb      dd      2
                    245:                 dd      copy_whole_bytes
                    246:                 dd      copy_right_edge_via_buffer
                    247: 
                    248: Thread_WbRb     dd      2
                    249:                 dd      copy_whole_bytes_via_buffer
                    250:                 dd      copy_right_edge_via_buffer
                    251: 
                    252: Thread_RbW      dd      2
                    253:                 dd      copy_right_edge_via_buffer
                    254:                 dd      copy_whole_bytes
                    255: 
                    256: Thread_RbWb     dd      2
                    257:                 dd      copy_right_edge_via_buffer
                    258:                 dd      copy_whole_bytes_via_buffer
                    259: 
                    260: Thread_LbWRb    dd      3
                    261:                 dd      copy_left_edge_via_buffer
                    262:                 dd      copy_whole_bytes
                    263:                 dd      copy_right_edge_via_buffer
                    264: 
                    265: Thread_LbWbRb   dd      3
                    266:                 dd      copy_left_edge_via_buffer
                    267:                 dd      copy_whole_bytes_via_buffer
                    268:                 dd      copy_right_edge_via_buffer
                    269: 
                    270: Thread_RbWLb    dd      3
                    271:                 dd      copy_right_edge_via_buffer
                    272:                 dd      copy_whole_bytes
                    273:                 dd      copy_left_edge_via_buffer
                    274: 
                    275: Thread_RbWbLb   dd      3
                    276:                 dd      copy_right_edge_via_buffer
                    277:                 dd      copy_whole_bytes_via_buffer
                    278:                 dd      copy_left_edge_via_buffer
                    279: 
                    280: ;-----------------------------------------------------------------------;
                    281: ; Table of thread selection for various horizontal copy directions, with
                    282: ; the look-up index a 4-bit field as follows:
                    283: ;
                    284: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
                    285: ; Bit 2 = 1 if left edge must be copied
                    286: ; Bit 1 = 1 if whole bytes must be copied
                    287: ; Bit 0 = 1 if right edge must be copied
                    288: ;
                    289: ; This is used for all cases where both the source and destination are
                    290: ; simultaneously addressable for our purposes, so there's no need to go
                    291: ; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).
                    292: 
                    293: MasterThreadTable label dword
                    294:                                 ;right-to-left
                    295:         dd      0               ;<not used>
                    296:         dd      Thread_R        ;R->L, R
                    297:         dd      Thread_W        ;R->L, W
                    298:         dd      Thread_RW       ;R->L, RW
                    299:         dd      Thread_L        ;R->L, L
                    300:         dd      Thread_RL       ;R->L, RL
                    301:         dd      Thread_WL       ;R->L, WL
                    302:         dd      Thread_RWL      ;R->L, RWL
                    303:                                 ;left-to-right
                    304:         dd      0               ;<not used>
                    305:         dd      Thread_R        ;L->R, R
                    306:         dd      Thread_W        ;L->R, W
                    307:         dd      Thread_WR       ;L->R, WR
                    308:         dd      Thread_L        ;L->R, L
                    309:         dd      Thread_LR       ;L->R, LR
                    310:         dd      Thread_LW       ;L->R, LW
                    311:         dd      Thread_LWR      ;L->R, LWR
                    312: 
                    313: 
                    314: ; Table of thread selection for various adapter types and horizontal
                    315: ; copy directions, with the look-up index a 6-bit field as follows:
                    316: ;
                    317: ; Bit 5 = adapter type high bit
                    318: ; Bit 4 = adapter type low bit
                    319: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
                    320: ; Bit 2 = 1 if left edge must be copied
                    321: ; Bit 1 = 1 if whole bytes must be copied
                    322: ; Bit 0 = 1 if right edge must be copied
                    323: ;
                    324: ; This is used for all cases where the source and destination are not both
                    325: ; simultaneously addressable for our purposes, so we need to go through the
                    326: ; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).
                    327: 
                    328: MasterThreadTableViaBuffer label dword
                    329:                                 ;unbanked (no need for buffer)
                    330:                                 ;right-to-left
                    331:         dd      0               ;<not used>
                    332:         dd      Thread_R        ;R->L, R
                    333:         dd      Thread_W        ;R->L, W
                    334:         dd      Thread_RW       ;R->L, RW
                    335:         dd      Thread_L        ;R->L, L
                    336:         dd      Thread_RL       ;R->L, RL
                    337:         dd      Thread_WL       ;R->L, WL
                    338:         dd      Thread_RWL      ;R->L, RWL
                    339:                                 ;left-to-right
                    340:         dd      0               ;<not used>
                    341:         dd      Thread_R        ;L->R, R
                    342:         dd      Thread_W        ;L->R, W
                    343:         dd      Thread_WR       ;L->R, WR
                    344:         dd      Thread_L        ;L->R, L
                    345:         dd      Thread_LR       ;L->R, LR
                    346:         dd      Thread_LW       ;L->R, LW
                    347:         dd      Thread_LWR      ;L->R, LWR
                    348: 
                    349:                                 ;1 R/W banking window (everything goes through
                    350:                                 ;                       buffer)
                    351:                                 ;right-to-left
                    352:         dd      0               ;<not used>
                    353:         dd      Thread_Rb       ;R->L, R
                    354:         dd      Thread_Wb       ;R->L, W
                    355:         dd      Thread_RbWb     ;R->L, RW
                    356:         dd      Thread_Lb       ;R->L, L
                    357:         dd      Thread_RbLb     ;R->L, RL
                    358:         dd      Thread_WbLb     ;R->L, WL
                    359:         dd      Thread_RbWbLb   ;R->L, RWL
                    360:                                 ;left-to-right
                    361:         dd      0               ;<not used>
                    362:         dd      Thread_Rb       ;L->R, R
                    363:         dd      Thread_Wb       ;L->R, W
                    364:         dd      Thread_WbRb     ;L->R, WR
                    365:         dd      Thread_Lb       ;L->R, L
                    366:         dd      Thread_LbRb     ;L->R, LR
                    367:         dd      Thread_LbWb     ;L->R, LW
                    368:         dd      Thread_LbWbRb   ;L->R, LWR
                    369: 
                    370:                                 ;1R/1W banking window (edge go through buffer)
                    371:                                 ;right-to-left
                    372:         dd      0               ;<not used>
                    373:         dd      Thread_Rb       ;R->L, R
                    374:         dd      Thread_W        ;R->L, W
                    375:         dd      Thread_RbW      ;R->L, RW
                    376:         dd      Thread_Lb       ;R->L, L
                    377:         dd      Thread_RbLb     ;R->L, RL
                    378:         dd      Thread_WLb      ;R->L, WL
                    379:         dd      Thread_RbWLb    ;R->L, RWL
                    380:                                 ;left-to-right
                    381:         dd      0               ;<not used>
                    382:         dd      Thread_Rb       ;L->R, R
                    383:         dd      Thread_W        ;L->R, W
                    384:         dd      Thread_WRb      ;L->R, WR
                    385:         dd      Thread_Lb       ;L->R, L
                    386:         dd      Thread_LbRb     ;L->R, LR
                    387:         dd      Thread_LbW      ;L->R, LW
                    388:         dd      Thread_LbWRb    ;L->R, LWR
                    389: 
                    390:                                 ;2 R/W banking window (no need for buffer)
                    391:                                 ;right-to-left
                    392:         dd      0               ;<not used>
                    393:         dd      Thread_R        ;R->L, R
                    394:         dd      Thread_W        ;R->L, W
                    395:         dd      Thread_RW       ;R->L, RW
                    396:         dd      Thread_L        ;R->L, L
                    397:         dd      Thread_RL       ;R->L, RL
                    398:         dd      Thread_WL       ;R->L, WL
                    399:         dd      Thread_RWL      ;R->L, RWL
                    400:                                 ;left-to-right
                    401:         dd      0               ;<not used>
                    402:         dd      Thread_R        ;L->R, R
                    403:         dd      Thread_W        ;L->R, W
                    404:         dd      Thread_WR       ;L->R, WR
                    405:         dd      Thread_L        ;L->R, L
                    406:         dd      Thread_LR       ;L->R, LR
                    407:         dd      Thread_LW       ;L->R, LW
                    408:         dd      Thread_LWR      ;L->R, LWR
                    409: 
                    410: 
                    411: ; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.
                    412: 
                    413: ADAPTER_FIELD_SHIFT     equ     4
                    414: 
                    415: ; Mask for setting left-to-right bit to "left-to-right true" for use in both
                    416: ; MasterThread tables.
                    417: 
                    418: LEFT_TO_RIGHT_FIELD_SET equ     1000b
                    419: 
                    420: 
                    421: ; Table of top-to-bottom loops for adapter types.
                    422: 
                    423:         align   4
                    424: TopToBottomLoopTable label dword
                    425:         dd      top_to_bottom_2RW       ;unbanked is same as 2RW
                    426:         dd      top_to_bottom_1RW
                    427:         dd      top_to_bottom_1R1W
                    428:         dd      top_to_bottom_2RW
                    429: 
                    430: 
                    431: ; Table of bottom-to-top loops for adapter types.
                    432: 
                    433:         align   4
                    434: BottomToTopLoopTable label dword
                    435:         dd      bottom_to_top_2RW       ;unbanked is same as 2RW
                    436:         dd      bottom_to_top_1RW
                    437:         dd      bottom_to_top_1R1W
                    438:         dd      bottom_to_top_2RW
                    439: 
                    440: 
                    441: ; Table of routines for setting up to copy in various directions.
                    442: 
                    443:         align   4
                    444: SetUpForCopyDirection   label   dword
                    445:         dd      left_to_right_top_to_bottom     ;CD_RIGHTDOWN
                    446:         dd      right_to_left_top_to_bottom     ;CD_LEFTDOWN
                    447:         dd      left_to_right_bottom_to_top     ;CD_RIGHTUP
                    448:         dd      right_to_left_bottom_to_top     ;CD_LEFTUP
                    449: 
                    450: ;-----------------------------------------------------------------------;
                    451: ; Left edge clip masks for intrabyte start addresses 0 through 7.
                    452: ; Whole byte cases are flagged as 0ffh.
                    453: 
                    454: jLeftMaskTable  label   byte
                    455:         db      0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
                    456: 
                    457: ;-----------------------------------------------------------------------;
                    458: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
                    459: ; 0 through 7. Whole byte cases are flagged as 0ffh.
                    460: 
                    461: jRightMaskTable label   byte
                    462:         db      0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
                    463: 
                    464: ;-----------------------------------------------------------------------;
                    465: 
                    466:         .code
                    467: 
                    468: _TEXT$03   SEGMENT DWORD USE32 PUBLIC 'CODE'
                    469:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
                    470: 
                    471: ;-----------------------------------------------------------------------;
                    472: 
                    473: cProc   vAlignedSrcCopy,16,<        \
                    474:         uses    esi edi ebx,    \
                    475:         pdsurf: ptr DEVSURF,    \
                    476:         prcldest : ptr RECTL,   \
                    477:         pptlsrc : ptr POINTL,   \
                    478:         icopydir : dword
                    479: 
                    480:         local   culWholeBytesWidth : dword ;# of bytes to copy across each scan
                    481:         local   ulBlockHeight : dword   ;# of scans to copy per bank block
                    482:         local   ulWholeScanDelta : dword;offset from end of one whole bytes
                    483:                                         ; scan to start of next
                    484:         local   ulWholeBytesSrc : dword ;offset in bitmap of first source whole
                    485:                                         ; byte to copy from
                    486:         local   ulWholeBytesDest : dword;offset in bitmap of first source whole
                    487:                                         ; byte to copy to
                    488:         local   ulLeftEdgeSrc : dword   ;offset in bitmap of first source left
                    489:                                         ; edge byte to copy from
                    490:         local   ulLeftEdgeDest : dword  ;offset in bitmap of first dest left
                    491:                                         ; edge byte to copy to
                    492:         local   ulRightEdgeSrc : dword  ;offset in bitmap of first source right
                    493:                                         ; edge byte to copy from
                    494:         local   ulRightEdgeDest : dword ;offset in bitmap of first dest right
                    495:                                         ; edge byte to copy to
                    496:         local   ulNextScan : dword      ;width of scan, in bytes
                    497:         local   jLeftMask : dword       ;left edge clip mask
                    498:         local   jRightMask : dword      ;right edge clip mask
                    499:         local   culTempCount : dword    ;handy temporary counter
                    500:         local   pTempEntry : dword      ;temporary storage for vector into
                    501:                                         ; unrolled loop
                    502:         local   pTempPlane : dword      ;pointer to storage in temp buffer for
                    503:                                         ; edge bytes (which are stored
                    504:                                         ; consecutively, not in each plane's
                    505:                                         ; temp buffer, to reduce possible page
                    506:                                         ; faulting
                    507:         local   ppTempPlane0 : dword    ;pointer to pointer to storage in temp
                    508:                                         ; buffer for plane 0, immediately
                    509:                                         ; preceded by storage for planes 1, 2,
                    510:                                         ; and 3
                    511:         local   ppTempPlane3 : dword    ;like above, but for plane 3
                    512:         local   ulOffsetInBank : dword  ;offset relative to bank start
                    513:         local   pSrcAddr : dword        ;working pointer to first source
                    514:                                         ; byte to copy from
                    515:         local   pDestAddr : dword       ;working pointer to first dest
                    516:                                         ; byte to copy to
                    517:         local   ulCurrentJustification:dword ;justification used to map in
                    518:                                              ; banks; top for top to bottom
                    519:                                              ; copies, bottom for bottom to top
                    520:         local   ulCurrentSrcScan :dword ;scan line used to map in current
                    521:                                         ; source bank
                    522:         local   ulCurrentDestScan:dword ;scan line used to map in current dest
                    523:                                         ; bank
                    524:         local   ulLastDestScan :dword   ;scan in target rect at which we stop
                    525:                                         ; advancing through banks
                    526:         local   pCurrentThread : dword  ;pointer to data describing the
                    527:                                         ; threaded calls to be performed to
                    528:                                         ; perform the current copy
                    529:         local   pCurrentThreadViaBuffer:dword
                    530:                                         ;pointer to data describing the
                    531:                                         ; threaded calls to be performed to
                    532:                                         ; perform the current copy in the case
                    533:                                         ; where the source and destination are
                    534:                                         ; not simultaneously adequately
                    535:                                         ; accessible, so the copy has to go
                    536:                                         ; through a temp buffer (used only for
                    537:                                         ; 1 R/W and 1R/1W banking)
                    538:         local   ulAdapterType : dword   ;adapter type code, per VIDEO_BANK_TYPE
                    539:         local   ulLWRType : dword       ;whether left edge, whole bytes, and
                    540:                                         ; right edge are involved in the
                    541:                                         ; current operation;
                    542:                                         ; bit 2 = 1 if left edge involved
                    543:                                         ; bit 1 = 1 if whole bytes involved
                    544:                                         ; bit 0 = 1 if right edge involved
                    545:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
                    546:                                         ; address past the left edge when the
                    547:                                         ; left edge is partial
                    548: 
                    549: ;-----------------------------------------------------------------------;
                    550: 
                    551: ; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
                    552: ; adapters), and other rectangle-independent variables.
                    553: 
                    554:         mov     esi,pdsurf
                    555:         mov     eax,[esi].dsurf_pvBankBufferPlane0
                    556:         mov     pTempPlane,eax
                    557:         lea     eax,[esi].dsurf_pvBankBufferPlane0
                    558:         mov     ppTempPlane0,eax
                    559:         lea     eax,[esi].dsurf_pvBankBufferPlane3
                    560:         mov     ppTempPlane3,eax
                    561: 
                    562:         mov     eax,[esi].dsurf_vbtBankingType
                    563:         mov     ulAdapterType,eax
                    564: 
                    565: ; Copy the rectangle.
                    566: 
                    567:         call    copy_rect
                    568: 
                    569: ;-----------------------------------------------------------------------;
                    570: ; Set the VGA registers back to their default state.
                    571: ;-----------------------------------------------------------------------;
                    572: 
                    573:         mov     edx,VGA_BASE + GRAF_ADDR
                    574:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
                    575:         out     dx,ax           ;enable bit mask for all bits
                    576: 
                    577:         mov     dl,SEQ_DATA
                    578:         mov     al,MM_ALL
                    579:         out     dx,al           ;enable writes to all planes
                    580: 
                    581:         cld                     ;restore default direction flag
                    582: 
                    583:         cRet    vAlignedSrcCopy ;done
                    584: 
                    585: 
                    586: ;***********************************************************************;
                    587: ;
                    588: ; Copies the specified rectangle.
                    589: ;
                    590: ;***********************************************************************;
                    591: 
                    592:         align   4
                    593: copy_rect:
                    594: 
                    595: ; Set up masks and whole bytes count, and build left/whole/right index
                    596: ; indicating which of those parts are involved in the copy.
                    597: 
                    598:         mov     edi,prcldest            ;point to rectangle to copy
                    599: 
                    600:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
                    601:         mov     ecx,ebx
                    602:         and     ecx,0111b               ;intrabyte address of right edge
                    603:         mov     ah,jRightMaskTable[ecx] ;right edge mask
                    604: 
                    605:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
                    606:         mov     ecx,esi
                    607:         shr     ecx,3                   ;/8 for start offset from left edge
                    608:                                         ; of scan line
                    609:         sub     ebx,esi                 ;width in pixels of fill
                    610: 
                    611:         and     esi,0111b               ;intrabyte address of left edge
                    612:         mov     al,jLeftMaskTable[esi]  ;left edge mask
                    613: 
                    614:         dec     ebx                     ;make inclusive on right
                    615:         add     ebx,esi                 ;inclusive width, starting counting at
                    616:                                         ; the beginning of the left edge byte
                    617:         shr     ebx,3                   ;width of fill in bytes touched - 1
                    618:         jnz     short more_than_1_byte  ;more than 1 byte is involved
                    619: 
                    620: ; Only one byte will be affected. Combine first/last masks.
                    621: 
                    622:         and     al,ah                   ;we'll use first byte mask only
                    623:         xor     ah,ah                   ;want last byte mask to be 0 to
                    624:                                         ; indicate right edge not involved
                    625:         inc     ebx                     ;so there's one count to subtract below
                    626:                                         ; if this isn't a whole edge byte
                    627: more_than_1_byte:
                    628: 
                    629: ; If all pixels in the left edge are altered, combine the first byte into the
                    630: ; whole byte count, because we can handle solid edge bytes faster as part of
                    631: ; the whole bytes. Ditto for the right edge.
                    632: 
                    633:         sub     ecx,ecx                 ;edge whole-status accumulator
                    634:         cmp     al,-1                   ;is left edge a whole byte or partial?
                    635:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
                    636:         sub     ebx,ecx                 ;if left edge partial, deduct it from
                    637:                                         ; the whole bytes count
                    638:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
                    639:                                         ; it's partial when pointing to the
                    640:                                         ; whole bytes
                    641:         and     ah,ah                   ;is right edge mask 0, meaning this
                    642:                                         ; fill is only 1 byte wide?
                    643:         jz      short save_masks        ;yes, no need to do anything
                    644:         or      ecx,40h                 ;assume there's a partial right edge
                    645:         cmp     ah,-1                   ;is right edge a whole byte or partial?
                    646:         jnz     short save_masks        ;partial
                    647:                                         ;bit 1=0 if left edge partial, 1 whole
                    648:         inc     ebx                     ;if right edge whole, include it in the
                    649:                                         ; whole bytes count
                    650:         and     ecx,not 40h             ;there's no partial right edge
                    651: save_masks:
                    652:         cmp     ebx,1                   ;do we have any whole bytes?
                    653:         cmc                             ;CF set if whole byte count > 0
                    654:         adc     ecx,ecx                 ;if any whole bytes, set whole bytes
                    655:                                         ; bit in left/whole/right accumulator
                    656:         rol     cl,1                    ;align the left/whole/right bits
                    657:         mov     ulLWRType,ecx           ;save left/whole/right status
                    658: 
                    659:         mov     byte ptr jLeftMask,al   ;save left and right clip masks
                    660:         mov     byte ptr jRightMask,ah
                    661:         mov     culWholeBytesWidth,ebx  ;save # of whole bytes
                    662: 
                    663: ; Copy the rectangle in the specified direction.
                    664: 
                    665:         mov     eax,icopydir
                    666:         jmp     SetUpForCopyDirection[eax*4]
                    667: 
                    668: 
                    669: ;***********************************************************************;
                    670: ;
                    671: ; The following routines set up to handle the four possible copy
                    672: ; directions.
                    673: ;
                    674: ;***********************************************************************;
                    675: 
                    676: 
                    677: ;-----------------------------------------------------------------------;
                    678: ; Set-up code for left-to-right, top-to-bottom copies.
                    679: ;-----------------------------------------------------------------------;
                    680: 
                    681:         align   4
                    682: left_to_right_top_to_bottom:
                    683: 
                    684:         cld                             ;we'll copy left to right
                    685: 
                    686:         mov     esi,pdsurf
                    687:         mov     eax,[esi].dsurf_lNextScan
                    688:         mov     ulNextScan,eax          ;copy top to bottom
                    689:         sub     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                    690:         mov     ulWholeScanDelta,eax    ; to start of next
                    691: 
                    692:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                    693:                                         ; right involvement in operation
                    694:         or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
                    695:         mov     eax,MasterThreadTable[esi*4]
                    696:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                    697:         mov     edx,ulAdapterType
                    698:         shl     edx,ADAPTER_FIELD_SHIFT
                    699:         or      esi,edx                 ;factor adapter type into the index
                    700:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                    701:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                    702: 
                    703:         mov     ulCurrentJustification,JustifyTop ;copy top to bottom
                    704: 
                    705:         mov     esi,prcldest
                    706:         mov     eax,[esi].yBottom
                    707:         mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
                    708:         mov     eax,[esi].yTop
                    709:         mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
                    710:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    711:         mov     edx,[esi].xLeft
                    712:         shr     edx,3                   ;byte X address
                    713:         add     eax,edx                 ;offset in bitmap of first dest byte
                    714:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                    715:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    716:                                         ; byte, unless the left edge is a whole
                    717:                                         ; byte and is thus part of the whole
                    718:                                         ; bytes already
                    719:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                    720:         add     eax,culWholeBytesWidth  ;point to the right edge
                    721:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                    722: 
                    723:         mov     esi,pptlsrc
                    724:         mov     eax,[esi].ptl_y
                    725:         mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
                    726:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    727:         mov     edx,[esi].ptl_x
                    728:         shr     edx,3                   ;byte X address
                    729:         add     eax,edx                 ;offset in bitmap of first source byte
                    730:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                    731:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    732:                                         ; byte, unless the left edge is a whole
                    733:                                         ; byte and is thus part of the whole
                    734:                                         ; bytes already
                    735:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                    736:         add     eax,culWholeBytesWidth  ;point to the right edge
                    737:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts
                    738: 
                    739: ; Branch to the appropriate top-to-bottom bank enumeration loop.
                    740: 
                    741:         mov     eax,ulAdapterType
                    742:         jmp     TopToBottomLoopTable[eax*4]
                    743: 
                    744: 
                    745: ;-----------------------------------------------------------------------;
                    746: ; Set-up code for right-to-left, top-to-bottom copies.
                    747: ;-----------------------------------------------------------------------;
                    748: 
                    749:         align   4
                    750: right_to_left_top_to_bottom:
                    751: 
                    752:         std                             ;we'll copy right to left
                    753: 
                    754:         mov     esi,pdsurf
                    755:         mov     eax,[esi].dsurf_lNextScan
                    756:         mov     ulNextScan,eax          ;copy top to bottom
                    757:         add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                    758:         mov     ulWholeScanDelta,eax    ; to start of next, given that we're
                    759:                                         ; copying one way and going scan-to-
                    760:                                         ; scan the other way
                    761:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                    762:                                         ; right involvement in operation
                    763:                                         ;leave left-to-right field cleared, so
                    764:                                         ; we look up right-to-left entries
                    765:         mov     eax,MasterThreadTable[esi*4]
                    766:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                    767:         mov     edx,ulAdapterType
                    768:         shl     edx,ADAPTER_FIELD_SHIFT
                    769:         or      esi,edx                 ;factor adapter type into the index
                    770:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                    771:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                    772: 
                    773:         mov     ulCurrentJustification,JustifyTop ;copy top to bottom
                    774: 
                    775:         mov     esi,prcldest
                    776:         mov     eax,[esi].yBottom
                    777:         mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
                    778:         mov     eax,[esi].yTop
                    779:         mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
                    780:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    781:         mov     edx,[esi].xLeft
                    782:         shr     edx,3                   ;byte X address
                    783:         add     eax,edx                 ;offset in bitmap of first dest byte
                    784:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                    785:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    786:                                         ; byte, unless the left edge is a whole
                    787:                                         ; byte and is thus part of the whole
                    788:                                         ; bytes already
                    789:         add     eax,culWholeBytesWidth  ;point to the right edge
                    790:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                    791:         dec     eax                     ;back up to the last whole byte
                    792:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                    793: 
                    794:         mov     esi,pptlsrc
                    795:         mov     eax,[esi].ptl_y
                    796:         mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
                    797:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    798:         mov     edx,[esi].ptl_x
                    799:         shr     edx,3                   ;byte X address
                    800:         add     eax,edx                 ;offset in bitmap of first source byte
                    801:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                    802:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    803:                                         ; byte, unless the left edge is a whole
                    804:                                         ; byte and is thus part of the whole
                    805:                                         ; bytes already
                    806:         add     eax,culWholeBytesWidth  ;point to the right edge
                    807:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts
                    808:         dec     eax                     ;back up to the last whole byte
                    809:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                    810: 
                    811: ; Branch to the appropriate top-to-bottom bank enumeration loop.
                    812: 
                    813:         mov     eax,ulAdapterType
                    814:         jmp     TopToBottomLoopTable[eax*4]
                    815: 
                    816: 
                    817: 
                    818: ;-----------------------------------------------------------------------;
                    819: ; Set-up code for left-to-right, bottom-to-top copies.
                    820: ;-----------------------------------------------------------------------;
                    821: 
                    822:         align   4
                    823: left_to_right_bottom_to_top:
                    824: 
                    825:         cld                             ;we'll copy left to right
                    826: 
                    827:         mov     edi,pdsurf
                    828:         mov     eax,[edi].dsurf_lNextScan
                    829:         neg     eax
                    830:         mov     ulNextScan,eax          ;copy bottom to top
                    831:         sub     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                    832:         mov     ulWholeScanDelta,eax    ; to start of next, given that we're
                    833:                                         ; copying one way and going scan-to-
                    834:                                         ; scan the other way
                    835:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                    836:                                         ; right involvement in operation
                    837:         or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
                    838:         mov     eax,MasterThreadTable[esi*4]
                    839:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                    840:         mov     edx,ulAdapterType
                    841:         shl     edx,ADAPTER_FIELD_SHIFT
                    842:         or      esi,edx                 ;factor adapter type into the index
                    843:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                    844:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                    845: 
                    846:         mov     ulCurrentJustification,JustifyBottom ;copy bottom to top
                    847: 
                    848:         mov     esi,prcldest
                    849:         mov     edx,[esi].yTop
                    850:         mov     ulLastDestScan,edx      ;end at top of dest copy rect
                    851:         mov     eax,[esi].yBottom
                    852:         dec     eax                     ;rectangle definition is non-inclusive,
                    853:                                         ; so advance to first scan we'll copy
                    854:         sub     edx,eax                 ;-(offset from rect top to bottom)
                    855:         push    edx                     ;remember for use with source
                    856:         mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
                    857:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                    858:                                         ; scan (first scan to which to copy)
                    859:         mov     edx,[esi].xLeft
                    860:         shr     edx,3                   ;byte X address
                    861:         add     eax,edx                 ;offset in bitmap of first dest byte
                    862:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                    863:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    864:                                         ; byte, unless the left edge is a whole
                    865:                                         ; byte and is thus part of the whole
                    866:                                         ; bytes already
                    867:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                    868:         add     eax,culWholeBytesWidth  ;point to the right edge
                    869:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                    870: 
                    871:         mov     esi,pptlsrc
                    872:         mov     eax,[esi].ptl_y
                    873:         pop     edx                     ;retrieve -(offset from top to bottom)
                    874:         sub     eax,edx                 ;advance to bottom of source rect
                    875:                                         ; (inclusive; this is first scan from
                    876:                                         ; which to copy)
                    877:         mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
                    878:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                    879:                                         ; scan
                    880:         mov     edx,[esi].ptl_x
                    881:         shr     edx,3                   ;byte X address
                    882:         add     eax,edx                 ;offset in bitmap of first source byte
                    883:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                    884:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    885:                                         ; byte, unless the left edge is a whole
                    886:                                         ; byte and is thus part of the whole
                    887:                                         ; bytes already
                    888:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                    889:         add     eax,culWholeBytesWidth  ;point to the right edge
                    890:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts
                    891: 
                    892: ; Branch to the appropriate bottom-to-top bank enumeration loop.
                    893: 
                    894:         mov     eax,ulAdapterType
                    895:         jmp     BottomToTopLoopTable[eax*4]
                    896: 
                    897: 
                    898: ;-----------------------------------------------------------------------;
                    899: ; Set-up code for right-to-left, bottom-to-top copies.
                    900: ;-----------------------------------------------------------------------;
                    901: 
                    902:         align   4
                    903: right_to_left_bottom_to_top:
                    904: 
                    905:         std                             ;we'll copy right to left
                    906: 
                    907:         mov     edi,pdsurf
                    908:         mov     eax,[edi].dsurf_lNextScan
                    909:         neg     eax
                    910:         mov     ulNextScan,eax          ;copy bottom to top
                    911:         add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                    912:         mov     ulWholeScanDelta,eax    ; to start of next
                    913:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                    914:                                         ; right involvement in operation
                    915:                                         ;leave left-to-right field cleared, so
                    916:                                         ; we look up right-to-left entries
                    917:         mov     eax,MasterThreadTable[esi*4]
                    918:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                    919:         mov     edx,ulAdapterType
                    920:         shl     edx,ADAPTER_FIELD_SHIFT
                    921:         or      esi,edx                 ;factor adapter type into the index
                    922:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                    923:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                    924: 
                    925:         mov     ulCurrentJustification,JustifyBottom ;copy bottom to top
                    926: 
                    927:         mov     esi,prcldest
                    928:         mov     edx,[esi].yTop
                    929:         mov     ulLastDestScan,edx      ;end at top of dest copy rect
                    930:         mov     eax,[esi].yBottom
                    931:         dec     eax                     ;rectangle definition is non-inclusive,
                    932:                                         ; so advance to first scan we'll copy
                    933:         sub     edx,eax                 ;-(offset from rect top to bottom)
                    934:         push    edx                     ;remember for use with source
                    935:         mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
                    936:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                    937:                                         ; scan (first scan to which to copy)
                    938:         mov     edx,[esi].xLeft
                    939:         shr     edx,3                   ;byte X address
                    940:         add     eax,edx
                    941:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                    942:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    943:                                         ; byte, unless the left edge is a whole
                    944:                                         ; byte and is thus part of the whole
                    945:                                         ; bytes already
                    946:         add     eax,culWholeBytesWidth  ;point to the right edge
                    947:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                    948:         dec     eax                     ;back up to the last whole byte
                    949:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                    950: 
                    951:         mov     esi,pptlsrc
                    952:         mov     eax,[esi].ptl_y
                    953:         pop     edx                     ;retrieve -(offset from top to bottom)
                    954:         sub     eax,edx                 ;advance to bottom of source rect
                    955:                                         ; (inclusive; this is first scan from
                    956:                                         ; which to copy)
                    957:         mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
                    958:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                    959:                                         ; scan
                    960:         mov     edx,[esi].ptl_x
                    961:         shr     edx,3                   ;byte X address
                    962:         add     eax,edx                 ;offset in bitmap of first source byte
                    963:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                    964:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    965:                                         ; byte, unless the left edge is a whole
                    966:                                         ; byte and is thus part of the whole
                    967:                                         ; bytes already
                    968:         add     eax,culWholeBytesWidth  ;point to the right edge
                    969:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts
                    970:         dec     eax                     ;back up to the last whole byte
                    971:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                    972: 
                    973: ; Branch to the appropriate bottom-to-top bank enumeration loop.
                    974: 
                    975:         mov     eax,ulAdapterType
                    976:         jmp     BottomToTopLoopTable[eax*4]
                    977: 
                    978: 
                    979: ;***********************************************************************;
                    980: ;
                    981: ; The following routines are the banking loops.
                    982: ;
                    983: ;***********************************************************************;
                    984: 
                    985: 
                    986: ;-----------------------------------------------------------------------;
                    987: ; Banking for 2 R/W and unbanked adapters, top to bottom.
                    988: ;-----------------------------------------------------------------------;
                    989:         align   4
                    990: top_to_bottom_2RW:
                    991: 
                    992: ; We're going top to bottom. Map in the source and dest, top-justified.
                    993: 
                    994:         mov     ebx,pdsurf
                    995:         mov     edx,ulCurrentSrcScan
                    996:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
                    997:                                                      ; current source bank?
                    998:         jl      short top_2RW_map_init_src_bank      ;yes, map in proper bank
                    999:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
                   1000:                                                         ; current source bank?
                   1001:         jl      short top_2RW_init_src_bank_mapped
                   1002:                                                 ;no, proper bank already mapped
                   1003: top_2RW_map_init_src_bank:
                   1004: 
                   1005: ; Map bank containing the top source scan line into source window.
                   1006: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1007: 
                   1008:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1009:                 <ebx,edx,JustifyTop,MapSourceBank>
                   1010: 
                   1011: top_2RW_init_src_bank_mapped:
                   1012: 
                   1013:         mov     edx,ulCurrentDestScan
                   1014:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
                   1015:                                                      ; current dest bank?
                   1016:         jl      short top_2RW_map_init_dest_bank     ;yes, map in proper bank
                   1017:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
                   1018:                                                         ; current dest bank?
                   1019:         jl      short top_2RW_init_dest_bank_mapped
                   1020:                                                 ;no, proper bank already mapped
                   1021: top_2RW_map_init_dest_bank:
                   1022: 
                   1023: ; Map bank containing the top dest scan line into source window.
                   1024: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1025: 
                   1026:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1027:                 <ebx,edx,JustifyTop,MapDestBank>
                   1028: 
                   1029: top_2RW_init_dest_bank_mapped:
                   1030: 
                   1031: ; Bank-by-bank top-to-bottom copy loop.
                   1032: 
                   1033: top_2RW_bank_loop:
                   1034: 
                   1035: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1036: 
                   1037:         mov     edx,ulLastDestScan
                   1038:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
                   1039:         jl      short @F        ;copy rectangle bottom is in this bank
                   1040:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
                   1041:                                                         ; of bank, at least
                   1042: @@:
                   1043:         sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1044:                                         ; the dest bank
                   1045:         mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
                   1046:         sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank
                   1047: 
                   1048:         cmp     edx,eax
                   1049:         jb      short @F        ;source bank isn't limiting
                   1050:         mov     edx,eax         ;source bank is limiting
                   1051: @@:
                   1052:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1053: 
                   1054: ; We're ready to copy this block.
                   1055: 
                   1056:         THREAD_AND_START
                   1057: 
                   1058: ; Any more scans to copy?
                   1059: 
                   1060:         mov     eax,ulCurrentDestScan
                   1061:         mov     esi,ulBlockHeight
                   1062:         add     eax,esi                 ;we've copied to dest up to here
                   1063:         cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
                   1064:         jz      short top_2RW_done      ;yes, we're done
                   1065:         mov     ulCurrentDestScan,eax
                   1066: 
                   1067: ; Now advance either or both banks, as needed.
                   1068: 
                   1069:         mov     ebx,pdsurf
                   1070:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
                   1071:                                                         ; current dest bank?
                   1072:         jl      short top_2RW_dest_bank_mapped    ;no, proper bank still mapped
                   1073: 
                   1074: ; Map bank containing the current dest scan line into source window.
                   1075: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1076: 
                   1077:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1078:                 <ebx,eax,JustifyTop,MapDestBank>
                   1079: 
                   1080: top_2RW_dest_bank_mapped:
                   1081: 
                   1082:         add     esi,ulCurrentSrcScan    ;we've copied from source up to here
                   1083:         mov     ulCurrentSrcScan,esi
                   1084: 
                   1085:         cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
                   1086:                                                         ; current src bank?
                   1087:         jl      short top_2RW_src_bank_mapped     ;no, proper bank still mapped
                   1088: 
                   1089: ; Map bank containing the current source scan line into source window.
                   1090: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1091: 
                   1092:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1093:                 <ebx,esi,JustifyTop,MapSourceBank>
                   1094: 
                   1095: top_2RW_src_bank_mapped:
                   1096: 
                   1097:         jmp     top_2RW_bank_loop
                   1098: 
                   1099: top_2RW_done:
                   1100:         PLAIN_RET
                   1101: 
                   1102: 
                   1103: ;-----------------------------------------------------------------------;
                   1104: ; Banking for 2 R/W and unbanked adapters, bottom to top.
                   1105: ;-----------------------------------------------------------------------;
                   1106:         align   4
                   1107: bottom_to_top_2RW:
                   1108: 
                   1109: ; We're going bottom to top. Map in the source and dest, bottom-justified.
                   1110: 
                   1111:         mov     ebx,pdsurf
                   1112:         mov     edx,ulCurrentSrcScan
                   1113:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
                   1114:                                                      ; current source bank?
                   1115:         jl      short bot_2RW_map_init_src_bank      ;yes, map in proper bank
                   1116:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
                   1117:                                                         ; than current src bank?
                   1118:         jl      short bot_2RW_init_src_bank_mapped
                   1119:                                                 ;no, proper bank already mapped
                   1120: bot_2RW_map_init_src_bank:
                   1121: 
                   1122: ; Map bank containing the bottom source scan line into source window.
                   1123: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1124: 
                   1125:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1126:                 <ebx,edx,JustifyBottom,MapSourceBank>
                   1127: 
                   1128: bot_2RW_init_src_bank_mapped:
                   1129: 
                   1130:         mov     edx,ulCurrentDestScan
                   1131:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
                   1132:                                                      ; current dest bank?
                   1133:         jl      short bot_2RW_map_init_dest_bank     ;yes, map in proper bank
                   1134:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
                   1135:                                                         ; than current dst bank?
                   1136:         jl      short bot_2RW_init_dest_bank_mapped
                   1137:                                                 ;no, proper bank already mapped
                   1138: bot_2RW_map_init_dest_bank:
                   1139: 
                   1140: ; Map bank containing the bottom dest scan line into source window.
                   1141: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1142: 
                   1143:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1144:                 <ebx,edx,JustifyBottom,MapDestBank>
                   1145: 
                   1146: bot_2RW_init_dest_bank_mapped:
                   1147: 
                   1148: ; Bank-by-bank bottom-to-top copy loop.
                   1149: 
                   1150: bot_2RW_bank_loop:
                   1151: 
                   1152: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1153: 
                   1154:         mov     edx,ulLastDestScan
                   1155:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
                   1156:         jg      short @F        ;copy rectangle top is in this bank
                   1157:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
                   1158:                                                      ; of bank, at least
                   1159: @@:
                   1160:         neg     edx
                   1161:         add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1162:         inc     edx                     ; the dest bank
                   1163: 
                   1164:         mov     eax,ulCurrentSrcScan
                   1165:         sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
                   1166:         inc     eax                     ;# of scans we can do in the src bank
                   1167: 
                   1168:         cmp     edx,eax
                   1169:         jb      short @F        ;source bank isn't limiting
                   1170:         mov     edx,eax         ;source bank is limiting
                   1171: @@:
                   1172:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1173: 
                   1174: ; We're ready to copy this block.
                   1175: 
                   1176:         THREAD_AND_START
                   1177: 
                   1178: ; Any more scans to copy?
                   1179: 
                   1180:         mov     eax,ulCurrentDestScan
                   1181:         mov     esi,ulBlockHeight
                   1182:         sub     eax,esi                 ;we've copied to dest up to here
                   1183:         cmp     ulLastDestScan,eax      ;are we past the dest rect top?
                   1184:         jg      short bot_2RW_done      ;yes, we're done
                   1185:         mov     ulCurrentDestScan,eax
                   1186: 
                   1187: ; Now advance either or both banks, as needed.
                   1188: 
                   1189:         mov     ebx,pdsurf
                   1190:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
                   1191:                                                      ; current dest bank?
                   1192:         jge     short bot_2RW_dest_bank_mapped    ;no, proper bank still mapped
                   1193: 
                   1194: ; Map bank containing the current dest scan line into source window.
                   1195: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1196: 
                   1197:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1198:                 <ebx,eax,JustifyBottom,MapDestBank>
                   1199: 
                   1200: bot_2RW_dest_bank_mapped:
                   1201: 
                   1202:         mov     eax,ulCurrentSrcScan
                   1203:         sub     eax,esi         ;we've copied from source up to here
                   1204:         mov     ulCurrentSrcScan,eax
                   1205: 
                   1206:         cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
                   1207:                                                      ; current src bank?
                   1208:         jge     short bot_2RW_src_bank_mapped     ;no, proper bank still mapped
                   1209: 
                   1210: ; Map bank containing the current source scan line into source window.
                   1211: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1212: 
                   1213:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1214:                 <ebx,eax,JustifyBottom,MapSourceBank>
                   1215: 
                   1216: bot_2RW_src_bank_mapped:
                   1217: 
                   1218:         jmp     bot_2RW_bank_loop
                   1219: 
                   1220: bot_2RW_done:
                   1221:         PLAIN_RET
                   1222: 
                   1223: 
                   1224: ;-----------------------------------------------------------------------;
                   1225: ; Banking for 1R/1W adapters, top to bottom.
                   1226: ;-----------------------------------------------------------------------;
                   1227:         align   4
                   1228: top_to_bottom_1R1W:
                   1229: 
                   1230: ; We're going top to bottom. Map in the source and dest, top-justified.
                   1231: 
                   1232:         mov     ebx,pdsurf
                   1233:         mov     edx,ulCurrentSrcScan
                   1234:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
                   1235:                                                      ; current source bank?
                   1236:         jl      short top_1R1W_map_init_src_bank      ;yes, map in proper bank
                   1237:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
                   1238:                                                         ; current source bank?
                   1239:         jl      short top_1R1W_init_src_bank_mapped
                   1240:                                                 ;no, proper bank already mapped
                   1241: top_1R1W_map_init_src_bank:
                   1242: 
                   1243: ; Map bank containing the top source scan line into source window.
                   1244: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1245: 
                   1246:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1247:                 <ebx,edx,JustifyTop,MapSourceBank>
                   1248: 
                   1249: top_1R1W_init_src_bank_mapped:
                   1250: 
                   1251:         mov     edx,ulCurrentDestScan
                   1252:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
                   1253:                                                      ; current dest bank?
                   1254:         jl      short top_1R1W_map_init_dest_bank     ;yes, map in proper bank
                   1255:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
                   1256:                                                         ; current dest bank?
                   1257:         jl      short top_1R1W_init_dest_bank_mapped
                   1258:                                                 ;no, proper bank already mapped
                   1259: top_1R1W_map_init_dest_bank:
                   1260: 
                   1261: ; Map bank containing the top dest scan line into source window.
                   1262: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1263: 
                   1264:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1265:                 <ebx,edx,JustifyTop,MapDestBank>
                   1266: 
                   1267: top_1R1W_init_dest_bank_mapped:
                   1268: 
                   1269: ; Bank-by-bank top-to-bottom copy loop.
                   1270: 
                   1271: top_1R1W_bank_loop:
                   1272: 
                   1273: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1274: 
                   1275:         mov     edx,ulLastDestScan
                   1276:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
                   1277:         jl      short @F        ;copy rectangle bottom is in this bank
                   1278:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
                   1279:                                                         ; of bank, at least
                   1280: @@:
                   1281:         sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1282:                                         ; the dest bank
                   1283:         mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
                   1284:         sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank
                   1285: 
                   1286:         cmp     edx,eax
                   1287:         jb      short @F        ;source bank isn't limiting
                   1288:         mov     edx,eax         ;source bank is limiting
                   1289: @@:
                   1290:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1291: 
                   1292: ; We're ready to copy this block.
                   1293: ; Select different threading, depending on whether the source and destination
                   1294: ; are currently in the same bank; we can do edges faster if they are.
                   1295: 
                   1296:         mov     eax,[ebx].dsurf_ulWindowBank
                   1297:         cmp     eax,[ebx].dsurf_ulWindowBank[4]
                   1298:         jz      short top_1R1W_copy_same_bank
                   1299: 
                   1300: ; Source and dest are currently in different banks, must go through temp buffer.
                   1301: 
                   1302:         THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans
                   1303: 
                   1304: ; Source and dest are currently in the same bank.
                   1305: 
                   1306:         align   4
                   1307: top_1R1W_copy_same_bank:
                   1308:         THREAD_AND_START
                   1309: 
                   1310: ; Any more scans to copy?
                   1311: 
                   1312: top_1R1W_check_more_scans:
                   1313: 
                   1314:         mov     eax,ulCurrentDestScan
                   1315:         mov     esi,ulBlockHeight
                   1316:         add     eax,esi                 ;we've copied to dest up to here
                   1317:         cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
                   1318:         jz      short top_1R1W_done     ;yes, we're done
                   1319:         mov     ulCurrentDestScan,eax
                   1320: 
                   1321: ; Now advance either or both banks, as needed.
                   1322: 
                   1323:         mov     ebx,pdsurf
                   1324:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
                   1325:                                                         ; current dest bank?
                   1326:         jl      short top_1R1W_dest_bank_mapped   ;no, proper bank still mapped
                   1327: 
                   1328: ; Map bank containing the current dest scan line into source window.
                   1329: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1330: 
                   1331:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1332:                 <ebx,eax,JustifyTop,MapDestBank>
                   1333: 
                   1334: top_1R1W_dest_bank_mapped:
                   1335: 
                   1336:         add     esi,ulCurrentSrcScan    ;we've copied from source up to here
                   1337:         mov     ulCurrentSrcScan,esi
                   1338: 
                   1339:         cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
                   1340:                                                         ; current src bank?
                   1341:         jl      short top_1R1W_src_bank_mapped     ;no, proper bank still mapped
                   1342: 
                   1343: ; Map bank containing the current source scan line into source window.
                   1344: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1345: 
                   1346:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1347:                 <ebx,esi,JustifyTop,MapSourceBank>
                   1348: 
                   1349: top_1R1W_src_bank_mapped:
                   1350: 
                   1351:         jmp     top_1R1W_bank_loop
                   1352: 
                   1353: top_1R1W_done:
                   1354:         PLAIN_RET
                   1355: 
                   1356: 
                   1357: ;-----------------------------------------------------------------------;
                   1358: ; Banking for 1R/1W adapters, bottom to top.
                   1359: ;-----------------------------------------------------------------------;
                   1360:         align   4
                   1361: bottom_to_top_1R1W:
                   1362: 
                   1363: ; We're going bottom to top. Map in the source and dest, bottom-justified.
                   1364: 
                   1365:         mov     ebx,pdsurf
                   1366:         mov     edx,ulCurrentSrcScan
                   1367:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
                   1368:                                                      ; current source bank?
                   1369:         jl      short bot_1R1W_map_init_src_bank      ;yes, map in proper bank
                   1370:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
                   1371:                                                         ; than current src bank?
                   1372:         jl      short bot_1R1W_init_src_bank_mapped
                   1373:                                                 ;no, proper bank already mapped
                   1374: bot_1R1W_map_init_src_bank:
                   1375: 
                   1376: ; Map bank containing the bottom source scan line into source window.
                   1377: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1378: 
                   1379:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1380:                 <ebx,edx,JustifyBottom,MapSourceBank>
                   1381: 
                   1382: bot_1R1W_init_src_bank_mapped:
                   1383: 
                   1384:         mov     edx,ulCurrentDestScan
                   1385:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
                   1386:                                                      ; current dest bank?
                   1387:         jl      short bot_1R1W_map_init_dest_bank     ;yes, map in proper bank
                   1388:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
                   1389:                                                         ; than current dst bank?
                   1390:         jl      short bot_1R1W_init_dest_bank_mapped
                   1391:                                                 ;no, proper bank already mapped
                   1392: bot_1R1W_map_init_dest_bank:
                   1393: 
                   1394: ; Map bank containing the bottom dest scan line into source window.
                   1395: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1396: 
                   1397:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1398:                 <ebx,edx,JustifyBottom,MapDestBank>
                   1399: 
                   1400: bot_1R1W_init_dest_bank_mapped:
                   1401: 
                   1402: ; Bank-by-bank bottom-to-top copy loop.
                   1403: 
                   1404: bot_1R1W_bank_loop:
                   1405: 
                   1406: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1407: 
                   1408:         mov     edx,ulLastDestScan
                   1409:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
                   1410:         jg      short @F        ;copy rectangle top is in this bank
                   1411:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
                   1412:                                                      ; of bank, at least
                   1413: @@:
                   1414:         neg     edx
                   1415:         add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1416:         inc     edx                     ; the dest bank
                   1417: 
                   1418:         mov     eax,ulCurrentSrcScan
                   1419:         sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
                   1420:         inc     eax                     ;# of scans we can do in the src bank
                   1421: 
                   1422:         cmp     edx,eax
                   1423:         jb      short @F        ;source bank isn't limiting
                   1424:         mov     edx,eax         ;source bank is limiting
                   1425: @@:
                   1426:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1427: 
                   1428: ; We're ready to copy this block.
                   1429: ; Select different threading, depending on whether the source and destination
                   1430: ; are currently in the same bank; we can do edges faster if they are.
                   1431: 
                   1432:         mov     al,byte ptr [ebx].dsurf_ulWindowBank
                   1433:         cmp     al,byte ptr [ebx].dsurf_ulWindowBank[4]
                   1434:         jz      short bot_1R1W_copy_same_bank
                   1435: 
                   1436: ; Source and dest are currently in different banks, must go through temp buffer.
                   1437: 
                   1438:         THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans
                   1439: 
                   1440: ; Source and dest are currently in the same bank.
                   1441: 
                   1442:         align   4
                   1443: bot_1R1W_copy_same_bank:
                   1444:         THREAD_AND_START
                   1445: 
                   1446: ; Any more scans to copy?
                   1447: 
                   1448:         align   4
                   1449: bot_1R1W_check_more_scans:
                   1450: 
                   1451:         mov     eax,ulCurrentDestScan
                   1452:         mov     esi,ulBlockHeight
                   1453:         sub     eax,esi                 ;we've copied to dest up to here
                   1454:         cmp     ulLastDestScan,eax      ;are we past the dest rect top?
                   1455:         jg      short bot_1R1W_done     ;yes, we're done
                   1456:         mov     ulCurrentDestScan,eax
                   1457: 
                   1458: ; Now advance either or both banks, as needed.
                   1459: 
                   1460:         mov     ebx,pdsurf
                   1461:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
                   1462:                                                      ; current dest bank?
                   1463:         jge     short bot_1R1W_dest_bank_mapped   ;no, proper bank still mapped
                   1464: 
                   1465: ; Map bank containing the current dest scan line into source window.
                   1466: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1467: 
                   1468:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1469:                 <ebx,eax,JustifyBottom,MapDestBank>
                   1470: 
                   1471: bot_1R1W_dest_bank_mapped:
                   1472: 
                   1473:         mov     eax,ulCurrentSrcScan
                   1474:         sub     eax,esi         ;we've copied from source up to here
                   1475:         mov     ulCurrentSrcScan,eax
                   1476: 
                   1477:         cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
                   1478:                                                      ; current src bank?
                   1479:         jge     short bot_1R1W_src_bank_mapped    ;no, proper bank still mapped
                   1480: 
                   1481: ; Map bank containing the current source scan line into source window.
                   1482: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1483: 
                   1484:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1485:                 <ebx,eax,JustifyBottom,MapSourceBank>
                   1486: 
                   1487: bot_1R1W_src_bank_mapped:
                   1488: 
                   1489:         jmp     bot_1R1W_bank_loop
                   1490: 
                   1491: bot_1R1W_done:
                   1492:         PLAIN_RET
                   1493: 
                   1494: 
                   1495: ;-----------------------------------------------------------------------;
                   1496: ; Banking for 1 R/W adapters, top to bottom.
                   1497: ;-----------------------------------------------------------------------;
                   1498:         align   4
                   1499: top_to_bottom_1RW:
                   1500: 
                   1501: ; We're going top to bottom. Map in the dest, top-justified.
                   1502: 
                   1503:         mov     ebx,pdsurf
                   1504:         mov     esi,ulCurrentDestScan
                   1505:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest top less than
                   1506:                                                      ; current bank?
                   1507:         jl      short top_1RW_map_init_dest_bank     ;yes, map in proper bank
                   1508:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
                   1509:                                                         ; current bank?
                   1510:         jl      short top_1RW_init_dest_bank_mapped
                   1511:                                                 ;no, proper bank already mapped
                   1512: top_1RW_map_init_dest_bank:
                   1513: 
                   1514: ; Map bank containing the top dest scan line into source window.
                   1515: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1516: 
                   1517:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
                   1518: 
                   1519: top_1RW_init_dest_bank_mapped:
                   1520: 
                   1521: ; Bank-by-bank top-to-bottom copy loop.
                   1522: 
                   1523: top_1RW_bank_loop:
                   1524: 
                   1525: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1526: 
                   1527:         mov     edi,ulLastDestScan
                   1528:         cmp     edi,[ebx].dsurf_rcl1WindowClip.yBottom
                   1529:         jl      short @F        ;copy rectangle bottom is in this bank
                   1530:         mov     edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
                   1531:                                                        ; of bank, at least
                   1532: @@:
                   1533:         sub     edi,esi   ;# of scans we can and want to do in the dest bank
                   1534: 
                   1535: ; Now make sure source is mapped in. This is the condition the copying routines
                   1536: ; expect, and we need to figure out how far we can go in the source.
                   1537: 
                   1538:         sub     edx,edx                 ;assume source and dest are in the same
                   1539:                                         ; bank
                   1540:         mov     esi,ulCurrentSrcScan
                   1541:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
                   1542:                                                     ; current bank?
                   1543:         jl      short top_1RW_map_src_Bank          ;yes, must map in
                   1544:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
                   1545:                                                        ; current bank?
                   1546:         jl      short top_1RW_src_bank_mapped     ;no, proper bank still mapped
                   1547: 
                   1548: top_1RW_map_src_Bank:
                   1549: 
                   1550: ; Map bank containing the current source scan line into source window.
                   1551: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1552: 
                   1553:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
                   1554: 
                   1555:         mov     edx,1                   ;mark that source and dest are not in
                   1556:                                         ; the same bank
                   1557: top_1RW_src_bank_mapped:
                   1558: 
                   1559:         mov     eax,[ebx].dsurf_rcl1WindowClip.yBottom
                   1560:         sub     eax,esi         ;# of scans we can do in the src bank
                   1561: 
                   1562:         cmp     edi,eax
                   1563:         jb      short @F        ;source bank isn't limiting
                   1564:         mov     edi,eax         ;source bank is limiting
                   1565: @@:
                   1566:         mov     ulBlockHeight,edi ;# of scans we'll do in this bank
                   1567: 
                   1568: ; We're ready to copy this block.
                   1569: ; Select different threading, depending on whether the source and destination
                   1570: ; are currently in the same bank; we can do edges faster if they are.
                   1571: 
                   1572:         and     edx,edx
                   1573:         jz      short top_1RW_copy_same_bank
                   1574: 
                   1575: ; Source and dest are currently in different banks, must go through temp buffer.
                   1576: 
                   1577:         THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans
                   1578: 
                   1579: ; Source and dest are currently in the same bank.
                   1580: 
                   1581:         align   4
                   1582: top_1RW_copy_same_bank:
                   1583:         THREAD_AND_START
                   1584: 
                   1585: ; Any more scans to copy?
                   1586: 
                   1587: top_1RW_check_more_scans:
                   1588: 
                   1589:         mov     esi,ulCurrentDestScan
                   1590:         mov     edi,ulBlockHeight
                   1591:         add     esi,edi                 ;we've copied to dest up to here
                   1592:         cmp     ulLastDestScan,esi      ;are we at the dest rect bottom?
                   1593:         jz      short top_1RW_done      ;yes, we're done
                   1594:         mov     ulCurrentDestScan,esi
                   1595: 
                   1596: ; Now make sure the dest bank is mapped in.
                   1597: 
                   1598:         mov     ebx,pdsurf
                   1599:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
                   1600:                                                     ; current bank?
                   1601:         jl      short top_1RW_map_dest_bank         ;yes, map in dest bank
                   1602:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
                   1603:                                                         ; current bank?
                   1604:         jl      short top_1RW_dest_bank_mapped   ;no, proper bank mapped
                   1605: 
                   1606: top_1RW_map_dest_bank:
                   1607: 
                   1608: ; Map bank containing the current dest scan line into source window.
                   1609: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1610: 
                   1611:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
                   1612: 
                   1613: top_1RW_dest_bank_mapped:
                   1614: 
                   1615:         add     ulCurrentSrcScan,edi    ;we've copied from source up to here
                   1616: 
                   1617:         jmp     top_1RW_bank_loop
                   1618: 
                   1619: top_1RW_done:
                   1620:         PLAIN_RET
                   1621: 
                   1622: 
                   1623: ;-----------------------------------------------------------------------;
                   1624: ; Banking for 1 R/W adapters, bottom to top.
                   1625: ;-----------------------------------------------------------------------;
                   1626:         align   4
                   1627: bottom_to_top_1RW:
                   1628: 
                   1629: ; We're going bottom to top. Map in the dest, bottom-justified.
                   1630: 
                   1631:         mov     ebx,pdsurf
                   1632:         mov     esi,ulCurrentDestScan
                   1633:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest bottom less than
                   1634:                                                      ; current dest bank?
                   1635:         jl      short bot_1RW_map_init_dest_bank     ;yes, map in proper bank
                   1636:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
                   1637:                                                        ; than current dst bank?
                   1638:         jl      short bot_1RW_init_dest_bank_mapped
                   1639:                                                 ;no, proper bank already mapped
                   1640: bot_1RW_map_init_dest_bank:
                   1641: 
                   1642: ; Map bank containing the bottom dest scan line into source window.
                   1643: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1644: 
                   1645:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
                   1646: 
                   1647: bot_1RW_init_dest_bank_mapped:
                   1648: 
                   1649: ; Bank-by-bank bottom-to-top copy loop.
                   1650: 
                   1651: bot_1RW_bank_loop:
                   1652: 
                   1653: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1654: 
                   1655:         mov     edi,ulLastDestScan
                   1656:         cmp     edi,[ebx].dsurf_rcl1WindowClip.yTop
                   1657:         jg      short @F        ;copy rectangle top is in this bank
                   1658:         mov     edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
                   1659:                                                     ; of bank, at least
                   1660: @@:
                   1661:         neg     edi
                   1662:         add     edi,esi                 ;# of scans we can and want to do in
                   1663:         inc     edi                     ; the dest bank
                   1664: 
                   1665: ; Now make sure source is mapped in. This is the condition the copying routines
                   1666: ; expect, and we need to figure out how far we can go in the source.
                   1667: 
                   1668:         sub     edx,edx                 ;assume source and dest are in the same
                   1669:                                         ; bank
                   1670:         mov     esi,ulCurrentSrcScan
                   1671:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
                   1672:                                                     ; current bank?
                   1673:         jl      short bot_1RW_map_src_Bank          ;yes, must map in
                   1674:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
                   1675:                                                        ; current bank?
                   1676:         jl      short bot_1RW_src_bank_mapped     ;no, proper bank still mapped
                   1677: 
                   1678: bot_1RW_map_src_Bank:
                   1679: 
                   1680: ; Map bank containing the current source scan line into source window.
                   1681: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1682: 
                   1683:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
                   1684: 
                   1685:         mov     edx,1                   ;mark that source and dest are not in
                   1686:                                         ; the same bank
                   1687: bot_1RW_src_bank_mapped:
                   1688: 
                   1689:         sub     esi,[ebx].dsurf_rcl1WindowClip.yTop
                   1690:         inc     esi                     ;# of scans we can do in the src bank
                   1691: 
                   1692:         cmp     edi,esi
                   1693:         jb      short @F        ;source bank isn't limiting
                   1694:         mov     edi,esi         ;source bank is limiting
                   1695: @@:
                   1696:         mov     ulBlockHeight,edi ;# of scans we'll do in this bank
                   1697: 
                   1698: ; We're ready to copy this block.
                   1699: ; Select different threading, depending on whether the source and destination
                   1700: ; are currently in the same bank; we can copy much faster if they are.
                   1701: 
                   1702:         and     edx,edx
                   1703:         jz      short bot_1RW_copy_same_bank
                   1704: 
                   1705: ; Source and dest are currently in different banks, must go through temp buffer.
                   1706: 
                   1707:         THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans
                   1708: 
                   1709: ; Source and dest are currently in the same bank.
                   1710: 
                   1711:         align   4
                   1712: bot_1RW_copy_same_bank:
                   1713:         THREAD_AND_START
                   1714: 
                   1715: ; Any more scans to copy?
                   1716: 
                   1717:         align   4
                   1718: bot_1RW_check_more_scans:
                   1719: 
                   1720:         mov     esi,ulCurrentDestScan
                   1721:         mov     edi,ulBlockHeight
                   1722:         sub     esi,edi                 ;we've copied to dest up to here
                   1723:         cmp     ulLastDestScan,esi      ;are we past the dest rect top?
                   1724:         jg      short bot_1RW_done      ;yes, we're done
                   1725:         mov     ulCurrentDestScan,esi
                   1726: 
                   1727: ; Now make sure the dest bank is mapped in.
                   1728: 
                   1729:         mov     ebx,pdsurf
                   1730:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
                   1731:                                                     ; current bank?
                   1732:         jl      short bot_1RW_map_dest_bank         ;yes, map in dest bank
                   1733:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
                   1734:                                                         ; current bank?
                   1735:         jl      short bot_1RW_dest_bank_mapped   ;no, proper bank mapped
                   1736: 
                   1737: bot_1RW_map_dest_bank:
                   1738: 
                   1739: ; Map bank containing the current dest scan line into source window.
                   1740: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1741: 
                   1742:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
                   1743: 
                   1744: bot_1RW_dest_bank_mapped:
                   1745: 
                   1746:         sub     ulCurrentSrcScan,edi    ;we've copied from source up to here
                   1747: 
                   1748:         jmp     bot_1RW_bank_loop
                   1749: 
                   1750: bot_1RW_done:
                   1751:         PLAIN_RET
                   1752: 
                   1753: 
                   1754: ;***********************************************************************;
                   1755: ;
                   1756: ; The following routines are the low-level copying routines. They know
                   1757: ; almost nothing about banks (the routines that copy through a temp
                   1758: ; buffer know how to switch banks after filling the temp buffer, but
                   1759: ; that's it). Banking should be taken care of at a higher level.
                   1760: ;
                   1761: ;***********************************************************************;
                   1762: 
                   1763: ;-----------------------------------------------------------------------;
                   1764: ; Copies a block of solid bytes from the source to the destination via the
                   1765: ; latches. Can only be used by 2 R/W or 1R/1W window banking, or by
                   1766: ; unbanked modes, or by 1 R/W adapters when the source and dest are in the
                   1767: ; same bank. 1 R/W adapters must go through an intermediate local buffer
                   1768: ; when the source and the destination aren't in the same bank.
                   1769: ;
                   1770: ; Input:
                   1771: ;       Direction Flag set for desired direction of copy
                   1772: ;       culWholeBytesWidth = # of bytes to copy across each scan line
                   1773: ;       ulWholeScanDelta = distance to start of next scan from end of current
                   1774: ;       ulBlockHeight = # of scans to copy
                   1775: ;       ulWholeBytesSrc = start source offset in bitmap
                   1776: ;       ulWholeBytesDest = start dest offset in bitmap
                   1777: ;
                   1778: ; Output:
                   1779: ;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
                   1780: ;               scan processed
                   1781: ;-----------------------------------------------------------------------;
                   1782: 
                   1783:         align   4
                   1784: copy_whole_bytes:
                   1785: 
                   1786: ; Set the bit mask to disable all bits, so we can copy through the latches.
                   1787: 
                   1788:         mov     edx,VGA_BASE + GRAF_ADDR
                   1789:         mov     eax,(000h shl 8) + GRAF_BIT_MASK
                   1790:         out     dx,ax
                   1791: 
                   1792: ; Set Map Mask to enable writes to all planes.
                   1793: 
                   1794:         mov     dl,SEQ_DATA
                   1795:         mov     al,MM_ALL
                   1796:         out     dx,al
                   1797: 
                   1798: ; Set up to copy the whole bytes via the latches.
                   1799: 
                   1800:         mov     eax,culWholeBytesWidth
                   1801:         mov     ebx,ulBlockHeight
                   1802:         mov     edx,ulWholeScanDelta
                   1803: 
                   1804: ; Calculate full start addresses.
                   1805: 
                   1806:         mov     ecx,pdsurf
                   1807:         mov     esi,ulWholeBytesSrc
                   1808:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   1809:         mov     edi,ulWholeBytesDest
                   1810:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   1811: 
                   1812:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeLatchesEntry, \
                   1813:                                 LOOP_UNROLL_SHIFT
                   1814:         jmp    ecx      ;copy the whole bytes
                   1815: 
                   1816: ;-----------------------------------------------------------------------;
                   1817: ; Table of unrolled whole latched loop entry points.
                   1818: ;-----------------------------------------------------------------------;
                   1819: 
                   1820:         UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeLatchesEntry,WHOLE_LATCHES, \
                   1821:                                 LOOP_UNROLL_COUNT
                   1822: 
                   1823: ;-----------------------------------------------------------------------;
                   1824: ; Unrolled loop for copying a block of whole bytes via the latches.
                   1825: ;-----------------------------------------------------------------------;
                   1826: 
                   1827: COPY_WHOLE_LATCHES macro ENTRY_LABEL,ENTRY_INDEX
                   1828: &ENTRY_LABEL&ENTRY_INDEX&:
                   1829:         mov     ecx,eax         ;# of whole bytes to copy
                   1830:         rep     movsb           ;copy the bytes via the latches
                   1831:         add     esi,edx         ;point to next source scan
                   1832:         add     edi,edx         ;point to next dest scan
                   1833:         endm    ;-----------------------------------;
                   1834: 
                   1835: ;  EAX = # of bytes to copy
                   1836: ;  EBX = count of unrolled loop iterations
                   1837: ;  EDX = offset from end of one scan's fill to start of next
                   1838: ;  ESI = source address to copy from
                   1839: ;  EDI = target address to copy to
                   1840: 
                   1841:         align   4
                   1842: whole_latches_loop:
                   1843:         UNROLL_LOOP     COPY_WHOLE_LATCHES,WHOLE_LATCHES,LOOP_UNROLL_COUNT
                   1844:         dec     ebx
                   1845:         jnz     whole_latches_loop
                   1846: 
                   1847: ; Remember where we left off, for next time.
                   1848: 
                   1849:         mov     ecx,pdsurf
                   1850:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   1851:         mov     ulWholeBytesSrc,esi
                   1852:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   1853:         mov     ulWholeBytesDest,edi
                   1854: 
                   1855:         PLAIN_RET
                   1856: 
                   1857: 
                   1858: ;-----------------------------------------------------------------------;
                   1859: ; Copies a block of solid bytes from the source to the destination via
                   1860: ; the temp buffer. This should only be used by 1 R/W adapters, and then
                   1861: ; only when the source and dest are in different banks.
                   1862: ;
                   1863: ; All relevant bytes are copied from the source to a temp buffer that's an
                   1864: ; image of the source first. Then, we copy each of the four planes for one scan
                   1865: ; line from the temp buffer to the screen before going on to the next scan line.
                   1866: ; It would be faster to do all scans in one plane, then all in the next, and so
                   1867: ; on, but that would give nasty color effects from pixels that were changed in
                   1868: ; some planes but not in others. A compromise would be to do several scans at a
                   1869: ; pop per plane, as is done with the edge bytes; however, given that there can
                   1870: ; be 128 (or more) bytes across a single whole-bytes scan, if we do 16 scan
                   1871: ; lines per chunk, we're going to be performing up to 128*4*16 accesses per
                   1872: ; chunk; at an assumed 1 microsecond per access, that's 8 millisecond per scan
                   1873: ; line, or about 1/2 of a frame time. We're definitely going to see flicker or
                   1874: ; sparkles from partially updated bytes at that point, in my opinion. Another
                   1875: ; alternative would be to dynamically adjust the number of scans processed at a
                   1876: ; pop per plane, depending on the copy width, with more scans copied for
                   1877: ; narrower widths. For all but very narrow copies, though, it seems to me that
                   1878: ; the actual copy time would far outweigh the time for the OUTs to switch
                   1879: ; planes, and the return for some rather complex code would be marginal.
                   1880: ;
                   1881: ; It would be nice if we copied bytes a word or dword at a time. However, it
                   1882: ; becomes rather complex handling fractional words or dwords, especially when
                   1883: ; copying right-to-left, so this is left for LATER. I haven't unrolled these
                   1884: ; loops because of the possibility of this further word/dword optimization;
                   1885: ; no point in fine-tuning sub-optimal code.
                   1886: ;
                   1887: ; Input:
                   1888: ;       Direction Flag set for desired direction of copy
                   1889: ;       culWholeBytesWidth = # of bytes to copy across each scan line
                   1890: ;       ulWholeScanDelta = distance to start of next scan from end of current
                   1891: ;       ulBlockHeight = # of scans to copy
                   1892: ;       ulWholeBytesSrc = start source offset in bitmap
                   1893: ;       ulWholeBytesDest = start dest offset in bitmap
                   1894: ;       ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
                   1895: ;       ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
                   1896: ;       Expects the source bank to be mapped in; source bank is mapped in on
                   1897: ;               exit
                   1898: ;
                   1899: ; Output:
                   1900: ;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
                   1901: ;               scan processed
                   1902: ;-----------------------------------------------------------------------;
                   1903: 
                   1904:         align   4
                   1905: copy_whole_bytes_via_buffer:
                   1906: 
                   1907: ; Calculate start source address from bitmap start address and offset within
                   1908: ; bitmap.
                   1909: 
                   1910:         mov     ecx,pdsurf
                   1911:         mov     eax,ulWholeBytesSrc
                   1912:         add     eax,[ecx].dsurf_pvBitmapStart
                   1913:         mov     pSrcAddr,eax
                   1914:         sub     eax,[ecx].dsurf_pvStart
                   1915:         mov     ulOffsetInBank,eax ;will come in handy because we treat the
                   1916:                                    ; temp buffer as an image of the current
                   1917:                                    ; bank
                   1918: 
                   1919: ; First, copy all the bytes into the temporary buffer.
                   1920: 
                   1921: ; Leave the GC Index pointing to the Read Map.
                   1922: 
                   1923:         mov     edx,VGA_BASE + GRAF_ADDR
                   1924:         mov     al,GRAF_READ_MAP
                   1925:         out     dx,al
                   1926: 
                   1927:         mov     eax,3           ;start by copying plane 3
                   1928: copy_whole_to_buffer_plane_loop:
                   1929:         mov     ebx,ulBlockHeight  ;# of scans to copy
                   1930:         mov     esi,pSrcAddr       ;source offset in screen
                   1931:         mov     edi,ppTempPlane0
                   1932:         mov     edi,[edi+eax*4]    ;pointer to current plane in temp buffer
                   1933:         add     edi,ulOffsetInBank ;dest for plane in temp buffer
                   1934: 
                   1935:         mov     edx,VGA_BASE + GRAF_DATA
                   1936:         out     dx,al            ;set Read Map to plane we're copying from.
                   1937: 
                   1938:         push    eax             ;remember plane index
                   1939:         mov     eax,ulWholeScanDelta ;offset to next scan
                   1940:         mov     edx,culWholeBytesWidth ;# of bytes per scan
                   1941: copy_whole_to_buffer_scan_loop:
                   1942:         mov     ecx,edx         ;# of bytes per scan
                   1943:         rep     movsb           ;copy the scan line to the temp buffer
                   1944:         add     esi,eax         ;point to next source scan
                   1945:         add     edi,eax         ;point to next dest scan
                   1946: 
                   1947:         dec     ebx              ;count down scan lines
                   1948:         jnz     copy_whole_to_buffer_scan_loop
                   1949: 
                   1950:         pop     eax             ;get back plane index
                   1951:         dec     eax             ;count down planes
                   1952:         jns     copy_whole_to_buffer_plane_loop
                   1953: 
                   1954: ; Remember where we left off, for next time.
                   1955: 
                   1956:         mov     ebx,pdsurf
                   1957:         sub     esi,[ebx].dsurf_pvBitmapStart
                   1958:         mov     ulWholeBytesSrc,esi
                   1959: 
                   1960: 
                   1961: ; Now copy the temp buffer to the screen.
                   1962: 
                   1963: ; Map in the destination bank, so we can read/write to it  and let the Bit Mask
                   1964: ; work.
                   1965: 
                   1966:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
                   1967:                 <ebx,ulCurrentDestScan,ulCurrentJustification>
                   1968: 
                   1969: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
                   1970: ; until now to calculate this, because the dest bank wasn't mapped earlier).
                   1971: 
                   1972:         mov     eax,ulWholeBytesDest
                   1973:         add     eax,[ebx].dsurf_pvBitmapStart
                   1974:         mov     pDestAddr,eax
                   1975: 
                   1976: ; Set the bit mask to enable all bits.
                   1977: 
                   1978:         mov     edx,VGA_BASE + GRAF_ADDR
                   1979:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
                   1980:         out     dx,ax
                   1981: 
                   1982:         mov     dl,SEQ_DATA     ;leave DX pointing to the SC Data reg
                   1983: 
                   1984: ; Set up to copy the whole bytes from the buffer.
                   1985: 
                   1986:         mov     eax,ulBlockHeight ;# of scans to copy
                   1987:         mov     culTempCount,eax
                   1988: 
                   1989: copy_whole_from_buffer_scan_loop:
                   1990: 
                   1991:         mov     ebx,ppTempPlane3  ;point to plane 3's temp buffer offset
                   1992:         mov     al,MM_C3        ;start by copying plane 3
                   1993: 
                   1994: copy_whole_from_buffer_plane_loop:
                   1995: 
                   1996: ; Set Map Mask to enable writes to the plane we're copying.
                   1997: 
                   1998:         out     dx,al
                   1999: 
                   2000: ; Select the corresponding plane from the temp buffer.
                   2001: 
                   2002:         mov     esi,[ebx]          ;point to plane start in temp buffer
                   2003:         add     esi,ulOffsetInBank ;point to current scan start in temp buffer
                   2004:         mov     edi,pDestAddr      ;point to destination start
                   2005: 
                   2006:         mov     ecx,culWholeBytesWidth  ;# of whole bytes to copy
                   2007:         rep     movsb           ;copy the bytes from the buffer to the screen
                   2008: 
                   2009: ; Do next plane, if any.
                   2010: 
                   2011:         sub     ebx,4                   ;point to next temp buffer plane ptr
                   2012:         shr     al,1                    ;advance to next plane
                   2013:         jnz     copy_whole_from_buffer_plane_loop
                   2014: 
                   2015: ; Remember where we left off, for next scan.
                   2016: 
                   2017:         add     edi,ulWholeScanDelta    ;point to next dest scan
                   2018:         mov     pDestAddr,edi
                   2019:         mov     eax,ulNextScan
                   2020:         add     ulOffsetInBank,eax      ;next scan's start in temp buffer,
                   2021:                                         ; relative to start of plane's storage
                   2022: 
                   2023: ; Count down scan lines.
                   2024: 
                   2025:         dec     culTempCount
                   2026:         jnz     copy_whole_from_buffer_scan_loop
                   2027: 
                   2028: ; Remember where we left off, for next time.
                   2029: 
                   2030:         mov     ebx,pdsurf
                   2031:         sub     edi,[ebx].dsurf_pvBitmapStart
                   2032:         mov     ulWholeBytesDest,edi
                   2033: 
                   2034: ; Put back the original source bank.
                   2035: 
                   2036:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
                   2037:                 <ebx,ulCurrentSrcScan,ulCurrentJustification>
                   2038: 
                   2039:         PLAIN_RET
                   2040: 
                   2041: 
                   2042: ;-----------------------------------------------------------------------;
                   2043: ; Copies a strip of left edge bytes from the source to the destination,
                   2044: ; assuming both the source and the destination are both readable and
                   2045: ; writable. Can only be used by 2 R/W window banking, or by unbanked
                   2046: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
                   2047: ; buffer when the source and dest are in different banks. Processes up to
                   2048: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
                   2049: ; flicker.
                   2050: ;
                   2051: ; Input:
                   2052: ;       ulNextScan = width of scan, in bytes
                   2053: ;       ulBlockHeight = # of scans to copy
                   2054: ;       ulLeftEdgeSrc = start source offset in bitmap
                   2055: ;       ulLeftEdgeDest = start dest offset in bitmap
                   2056: ;       jLeftMask = left edge clip mask
                   2057: ;
                   2058: ; Output:
                   2059: ;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
                   2060: ;               scan processed
                   2061: ;-----------------------------------------------------------------------;
                   2062: 
                   2063:         align   4
                   2064: copy_left_edge:
                   2065: 
                   2066: ; Calculate start source and dest addresses from bitmap start addresses and
                   2067: ; offsets within bitmap.
                   2068: 
                   2069:         mov     ecx,pdsurf
                   2070:         mov     esi,ulLeftEdgeSrc
                   2071:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2072:         mov     edi,ulLeftEdgeDest
                   2073:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2074: 
                   2075: ; Copy the edge.
                   2076: 
                   2077:         mov     ah,byte ptr jLeftMask   ;clip mask for this edge
                   2078:         call    copy_edge
                   2079: 
                   2080: ; Remember where we left off, for next time.
                   2081: 
                   2082:         mov     ecx,pdsurf
                   2083:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2084:         mov     ulLeftEdgeSrc,esi
                   2085:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2086:         mov     ulLeftEdgeDest,edi
                   2087: 
                   2088:         PLAIN_RET
                   2089: 
                   2090: 
                   2091: ;-----------------------------------------------------------------------;
                   2092: ; Copies a strip of right edge bytes from the source to the destination,
                   2093: ; assuming both the source and the destination are both readable and
                   2094: ; writable. Can only be used by 2 R/W window banking, or by unbanked
                   2095: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
                   2096: ; buffer when the source and dest are in different banks. Processes up to
                   2097: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
                   2098: ; flicker.
                   2099: ;
                   2100: ; Input:
                   2101: ;       ulNextScan = width of scan, in bytes
                   2102: ;       ulBlockHeight = # of scans to copy
                   2103: ;       ulRightEdgeSrc = start source offset in bitmap
                   2104: ;       ulRightEdgeDest = start dest offset in bitmap
                   2105: ;       jRightMask = right edge clip mask
                   2106: ;
                   2107: ; Output:
                   2108: ;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
                   2109: ;               scan processed
                   2110: ;-----------------------------------------------------------------------;
                   2111: 
                   2112:         align   4
                   2113: copy_right_edge:
                   2114: 
                   2115: ; Calculate start source and dest addresses from bitmap start addresses and
                   2116: ; offsets within bitmap.
                   2117: 
                   2118:         mov     ecx,pdsurf
                   2119:         mov     esi,ulRightEdgeSrc
                   2120:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2121:         mov     edi,ulRightEdgeDest
                   2122:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2123: 
                   2124: ; Copy the edge.
                   2125: 
                   2126:         mov     ah,byte ptr jRightMask  ;clip mask for this edge
                   2127:         call    copy_edge
                   2128: 
                   2129: ; Remember where we left off, for next time
                   2130: 
                   2131:         mov     ecx,pdsurf
                   2132:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2133:         mov     ulRightEdgeSrc,esi
                   2134:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2135:         mov     ulRightEdgeDest,edi
                   2136: 
                   2137:         PLAIN_RET
                   2138: 
                   2139: 
                   2140: ;-----------------------------------------------------------------------;
                   2141: ; Copies an edge from the source to the destination on the screen.
                   2142: ; Entry:
                   2143: ;       AH = bit mask setting for edge
                   2144: ;       ESI = source address
                   2145: ;       EDI = destination address
                   2146: ;       ulBlockHeight = # of bytes to copy per plane
                   2147: ;       ulNextScan = scan width
                   2148: ;       Source readable, and destination readable and writable
                   2149: ; Exit:
                   2150: ;       ESI = next source address
                   2151: ;       EDI = next destination address
                   2152: ;
                   2153: ; Preserved: EBP
                   2154: ;-----------------------------------------------------------------------;
                   2155: 
                   2156:         align   4
                   2157: copy_edge:
                   2158:         mov     pSrcAddr,esi
                   2159:         mov     pDestAddr,edi
                   2160: 
                   2161: ; Set the clip mask for this edge.
                   2162: 
                   2163:         mov     edx,VGA_BASE + GRAF_ADDR
                   2164:         mov     al,GRAF_BIT_MASK
                   2165:         out     dx,ax
                   2166: 
                   2167: ; Leave the GC Index pointing to the Read Map.
                   2168: 
                   2169:         mov     al,GRAF_READ_MAP
                   2170:         out     dx,al
                   2171: 
                   2172:         mov     ecx,offset copy_edge_rw_full_chunk
                   2173:                                 ;entry point into unrolled loop to copy first
                   2174:                                 ; chunk, assuming it's a full chunk
                   2175:         mov     ebx,ulBlockHeight
                   2176: 
                   2177: ; Copy the edge in a series of chunks.
                   2178: 
                   2179: copy_edge_chunk_loop:
                   2180: 
                   2181:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                   2182:                                     ; a full chunk
                   2183:         jge     short @F            ;do a full chunk
                   2184:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                   2185:                                     ; scans
                   2186:         mov     ecx,pfnCopyEdgeRWEntry[-4][ebx*4]
                   2187:                                 ;entry point into unrolled loop to copy desired
                   2188:                                 ; chunk size
                   2189:         sub     ebx,ebx         ;no scans after this
                   2190: @@:
                   2191:         push    ebx             ;remember remaining scan count
                   2192: 
                   2193:         mov     ah,MM_C3        ;start by copying plane 3
                   2194:         mov     ebx,ulNextScan
                   2195: 
                   2196: copy_edge_plane_loop:
                   2197: 
                   2198: ; Set Map Mask to enable writes to plane we're copying.
                   2199: 
                   2200:         mov     al,ah
                   2201:         mov     dl,SEQ_DATA
                   2202:         out     dx,al
                   2203: 
                   2204: ; Set Read Map to same plane.
                   2205: 
                   2206:         shr     al,1                    ;map plane into ReadMask
                   2207:         cmp     al,100b                 ;set Carry if not C3 (plane 3)
                   2208:         adc     al,-1                   ;sub 1 only if C3
                   2209:         mov     dl,GRAF_DATA
                   2210:         out     dx,al
                   2211: 
                   2212:         mov     esi,pSrcAddr
                   2213:         mov     edi,pDestAddr
                   2214: 
                   2215:         jmp     ecx                     ;copy the left edge
                   2216: 
                   2217: 
                   2218: ;-----------------------------------------------------------------------;
                   2219: ; Table of unrolled edge loop entry points. First entry point is to copy
                   2220: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
                   2221: ;-----------------------------------------------------------------------;
                   2222: 
                   2223: pfnCopyEdgeRWEntry label dword
                   2224: INDEX = 1
                   2225:         rept    EDGE_CHUNK_SIZE
                   2226:         DEFINE_DD       EDGE_RW,%INDEX
                   2227: INDEX = INDEX+1
                   2228:         endm
                   2229: 
                   2230: 
                   2231: ;-----------------------------------------------------------------------;
                   2232: ; Unrolled loop for copying a strip of edge bytes, with source and
                   2233: ; destination both readable and writable.
                   2234: ;-----------------------------------------------------------------------;
                   2235: 
                   2236: COPY_EDGE_RW macro ENTRY_LABEL,ENTRY_INDEX
                   2237: &ENTRY_LABEL&ENTRY_INDEX&:
                   2238:         mov     al,[esi]        ;get byte to copy
                   2239:         add     esi,ebx         ;point to next source scan
                   2240:         xchg    [edi],al        ;read before write so Bit Mask can operate
                   2241:         add     edi,ebx         ;point to next dest scan
                   2242:         endm    ;-----------------------------------;
                   2243: 
                   2244: ;  EBX = scan line width
                   2245: ;  ESI = source address to copy from
                   2246: ;  EDI = target address to copy to
                   2247: ;  Bit Mask set to desired clipping
                   2248: ;  Read Map and Map Mask set to enable the desired plane for read and write
                   2249: 
                   2250:         align   4
                   2251: copy_edge_rw_full_chunk:
                   2252:         UNROLL_LOOP     COPY_EDGE_RW,EDGE_RW,EDGE_CHUNK_SIZE
                   2253: 
                   2254: ; Do next plane within this chunk, if any.
                   2255: 
                   2256:         shr     ah,1                    ;advance to next plane
                   2257:         jnz     copy_edge_plane_loop
                   2258: 
                   2259: ; Remember where we left off, for the next chunk.
                   2260: 
                   2261:         mov     pSrcAddr,esi
                   2262:         mov     pDestAddr,edi
                   2263: 
                   2264: ; Do next chunk within this bank block, if any.
                   2265: 
                   2266:         pop     ebx                     ;retrieve remaining scan count
                   2267:         and     ebx,ebx                 ;any scans left?
                   2268:         jnz     copy_edge_chunk_loop    ;more scans to do
                   2269: 
                   2270:         PLAIN_RET
                   2271: 
                   2272: 
                   2273: ;-----------------------------------------------------------------------;
                   2274: ; Copies a strip of left edge bytes from the source to the destination
                   2275: ; through an intermediate RAM buffer. This is the approach required by
                   2276: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
                   2277: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
                   2278: ; cause flicker.
                   2279: ;
                   2280: ; Input:
                   2281: ;       ulNextScan = width of scan, in bytes
                   2282: ;       ulBlockHeight = # of scans to copy
                   2283: ;       ulLeftEdgeSrc = start source offset in bitmap
                   2284: ;       ulLeftEdgeDest = start dest offset in bitmap
                   2285: ;       jLeftMask = left edge clip mask
                   2286: ;       pTempPlane = pointer to temp storage buffer
                   2287: ;       ulCurrentSrcScan = scan used to map in source bank
                   2288: ;       ulCurrentDestScan = scan used to map in dest bank
                   2289: ;       ulCurrentJustification = justification used to map in current bank
                   2290: ;       For 1 R/W adapters, expects the source bank to be mapped in; banking
                   2291: ;               is the same at exit as it was at entry
                   2292: ;
                   2293: ; Output:
                   2294: ;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
                   2295: ;               scan processed
                   2296: ;
                   2297: ; Note that this should never be called for an unbanked or 2 R/W adapter,
                   2298: ; because the source and dest are always both addressable simultaneously then.
                   2299: ;-----------------------------------------------------------------------;
                   2300: 
                   2301:         align   4
                   2302: copy_left_edge_via_buffer:
                   2303: 
                   2304: ; First, copy all the bytes into the temporary buffer.
                   2305: 
                   2306: ; Calculate start source and dest addresses from bitmap start addresses and
                   2307: ; offsets within bitmap.
                   2308: 
                   2309:         mov     ecx,pdsurf
                   2310:         mov     esi,ulLeftEdgeSrc
                   2311:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2312: 
                   2313: ; Copy the edge from the source to the temp buffer.
                   2314: 
                   2315:         call    copy_screen_to_buffered_edge
                   2316: 
                   2317: ; Remember where we left off, for next time
                   2318: 
                   2319:         mov     ebx,pdsurf
                   2320:         sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
                   2321:         mov     ulLeftEdgeSrc,esi
                   2322: 
                   2323: ; Now copy the temp buffer to the screen.
                   2324: 
                   2325: ; Map in the source bank to match the destination, so we can read/write to it
                   2326: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
                   2327: ; mapped by this call, which is fine.
                   2328: 
                   2329:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2330:                 <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
                   2331: 
                   2332: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
                   2333: ; until now to calculate this, because the dest bank wasn't mapped earlier).
                   2334: 
                   2335:         mov     edi,ulLeftEdgeDest
                   2336:         add     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2337: 
                   2338:         mov     ah,byte ptr jLeftMask           ;clip mask for this edge
                   2339:         call    copy_buffered_edge_to_screen    ;do the copy
                   2340: 
                   2341: ; Remember where we left off, for next time.
                   2342: 
                   2343:         mov     ebx,pdsurf
                   2344:         sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2345:         mov     ulLeftEdgeDest,edi
                   2346: 
                   2347: ; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
                   2348: ; will be mapped by this call, which is fine.
                   2349: 
                   2350:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2351:                 <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
                   2352: 
                   2353:         PLAIN_RET
                   2354: 
                   2355: 
                   2356: ;-----------------------------------------------------------------------;
                   2357: ; Copies a strip of right edge bytes from the source to the destination
                   2358: ; through an intermediate RAM buffer. This is the approach required by
                   2359: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
                   2360: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
                   2361: ; cause flicker.
                   2362: ;
                   2363: ; Input:
                   2364: ;       ulNextScan = width of scan, in bytes
                   2365: ;       ulBlockHeight = # of scans to copy
                   2366: ;       ulRightEdgeSrc = start source offset in bitmap
                   2367: ;       ulRightEdgeDest = start dest offset in bitmap
                   2368: ;       jRightMask = right edge clip mask
                   2369: ;       pTempPlane = pointer to temp storage buffer
                   2370: ;       ulCurrentSrcScan = scan used to map in source bank
                   2371: ;       ulCurrentDestScan = scan used to map in dest bank
                   2372: ;       ulCurrentJustification = justification used to map in current bank
                   2373: ;       For 1 R/W adapters, expects the source bank to be mapped in; banking
                   2374: ;               is the same at exit as it was at entry
                   2375: ;
                   2376: ; Output:
                   2377: ;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
                   2378: ;               scan processed
                   2379: ;
                   2380: ; Note that this should never be called for an unbanked or 2 R/W adapter,
                   2381: ; because the source and dest are always both addressable simultaneously then.
                   2382: ;-----------------------------------------------------------------------;
                   2383: 
                   2384:         align   4
                   2385: copy_right_edge_via_buffer:
                   2386: 
                   2387: ; First, copy all the bytes into the temporary buffer.
                   2388: 
                   2389: ; Calculate start source address from bitmap start addresses and
                   2390: ; offsets within bitmap.
                   2391: 
                   2392:         mov     ecx,pdsurf
                   2393:         mov     esi,ulRightEdgeSrc
                   2394:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2395: 
                   2396: ; Copy the edge from the source to the temp buffer.
                   2397: 
                   2398:         call    copy_screen_to_buffered_edge
                   2399: 
                   2400: ; Remember where we left off, for next time
                   2401: 
                   2402:         mov     ebx,pdsurf
                   2403:         sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
                   2404:         mov     ulRightEdgeSrc,esi
                   2405: 
                   2406: ; Now copy the temp buffer to the screen.
                   2407: 
                   2408: ; Map in the source bank to match the destination, so we can read/write to it
                   2409: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
                   2410: ; mapped by this call, which is correct.
                   2411: 
                   2412:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2413:                 <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
                   2414: 
                   2415: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
                   2416: ; until now to calculate this, because the dest bank wasn't mapped earlier).
                   2417: 
                   2418:         mov     edi,ulRightEdgeDest
                   2419:         add     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2420: 
                   2421:         mov     ah,byte ptr jRightMask          ;clip mask for this edge
                   2422:         call    copy_buffered_edge_to_screen    ;do the copy
                   2423: 
                   2424: ; Remember where we left off, for next time.
                   2425: 
                   2426:         mov     ebx,pdsurf
                   2427:         sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2428:         mov     ulRightEdgeDest,edi
                   2429: 
                   2430: ; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
                   2431: ; will be mapped by this call, which is fine.
                   2432: 
                   2433:         ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2434:                 <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
                   2435: 
                   2436:         PLAIN_RET
                   2437: 
                   2438: 
                   2439: ;-----------------------------------------------------------------------;
                   2440: ; Copies an edge from the temp buffer to the screen.
                   2441: ; Entry:
                   2442: ;       AH = bit mask setting for edge
                   2443: ;       DH = VGA_BASE SHR 8
                   2444: ;       EDI = destination address
                   2445: ;       pTempPlane = temp buffer from which to copy
                   2446: ;       ulBlockHeight = # of bytes to copy per plane
                   2447: ;       ulNextScan = scan width
                   2448: ;       Source and dest banks both pointing to destination
                   2449: ; Exit:
                   2450: ;       EDI = next destination address
                   2451: ;
                   2452: ; Preserved: EBP
                   2453: ;-----------------------------------------------------------------------;
                   2454: 
                   2455:         align   4
                   2456: copy_buffered_edge_to_screen:
                   2457: 
                   2458:         mov     pDestAddr,edi
                   2459: 
                   2460:         mov     dl,GRAF_ADDR
                   2461:         mov     al,GRAF_BIT_MASK
                   2462:         out     dx,ax
                   2463: 
                   2464:         mov     pTempEntry,offset copy_edge_from_buf_full_chunk
                   2465:                                 ;entry point into unrolled loop to copy first
                   2466:                                 ; chunk, assuming it's a full chunk
                   2467:         mov     ecx,pTempPlane  ;temp buffer start (copy from here)
                   2468:         mov     ebx,ulBlockHeight ;total # of scans to copy
                   2469: 
                   2470: ; Copy the edge in a series of chunks, to avoid flicker.
                   2471: 
                   2472: copy_from_buffer_chunk_loop:
                   2473: 
                   2474:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                   2475:                                     ; a full chunk
                   2476:         jge     short @F            ;do a full chunk
                   2477:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                   2478:                                     ; scans
                   2479:         mov     ebx,pfnCopyEdgesFromBufferEntry[-4][ebx*4]
                   2480:         mov     pTempEntry,ebx  ;entry point into unrolled loop to copy desired
                   2481:                                 ; chunk size
                   2482:         sub     ebx,ebx         ;no scans after this
                   2483: @@:
                   2484:         push    ebx             ;remember remaining scan count
                   2485: 
                   2486:         mov     al,MM_C3        ;start by copying plane 3
                   2487:         mov     ebx,ulNextScan
                   2488: 
                   2489:         push    ecx             ;remember current temp buffer start
                   2490: 
                   2491:         mov     dl,SEQ_DATA     ;leave DX pointing to Sequencer Data reg
                   2492: copy_from_buffer_plane_loop:
                   2493: 
                   2494: ; Set Map Mask to enable writes to plane we're copying.
                   2495: 
                   2496:         out     dx,al
                   2497: 
                   2498: ; Calculate the equivalent Read Map, and use that to select the correct plane
                   2499: ; from the temp buffer.
                   2500: 
                   2501:         mov     esi,ecx                 ;point to current plane's source byte
                   2502:         add     ecx,ulBlockHeight       ;point to next plane's source byte
                   2503: 
                   2504:         mov     edi,pDestAddr
                   2505: 
                   2506:         jmp     pTempEntry              ;copy the left edge
                   2507: 
                   2508: 
                   2509: ;-----------------------------------------------------------------------;
                   2510: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
                   2511: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
                   2512: ; bytes.
                   2513: ;-----------------------------------------------------------------------;
                   2514: 
                   2515: pfnCopyEdgesFromBufferEntry label dword
                   2516: INDEX = 1
                   2517:         rept    EDGE_CHUNK_SIZE
                   2518:         DEFINE_DD       EDGE_FROM_BUFFER,%INDEX
                   2519: INDEX = INDEX+1
                   2520:         endm
                   2521: 
                   2522: 
                   2523: ;-----------------------------------------------------------------------;
                   2524: ; Unrolled loop for copying a strip of edge bytes from the temp buffer.
                   2525: ;-----------------------------------------------------------------------;
                   2526: 
                   2527: COPY_EDGE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX
                   2528: &ENTRY_LABEL&ENTRY_INDEX&:
                   2529:         mov     ah,[esi]        ;get byte to copy
                   2530:         inc     esi             ;point to next source (temp buffer) byte
                   2531:         xchg    [edi],ah        ;read before write so Bit Mask can operate
                   2532:         add     edi,ebx         ;point to next dest (screen) scna
                   2533:         endm    ;-----------------------------------;
                   2534: 
                   2535: ;  EBX = scan line width
                   2536: ;  ESI = source address to copy from (temp buffer)
                   2537: ;  EDI = target address to copy to (screen)
                   2538: ;  Bit Mask set to desired clipping
                   2539: ;  Map Mask set to enable the desired plane for write
                   2540: 
                   2541:         align   4
                   2542: copy_edge_from_buf_full_chunk:
                   2543:         UNROLL_LOOP     COPY_EDGE_FROM_BUFFER,EDGE_FROM_BUFFER,EDGE_CHUNK_SIZE
                   2544: 
                   2545: ; Do next plane within this chunk, if any.
                   2546: 
                   2547:         shr     al,1                    ;advance to next plane
                   2548:         jnz     copy_from_buffer_plane_loop
                   2549: 
                   2550: ; Remember where we left off, for next chunk.
                   2551: 
                   2552:         mov     pDestAddr,edi
                   2553:         pop     ecx             ;get back current temp buffer start
                   2554:         add     ecx,EDGE_CHUNK_SIZE ;point to next chunk's start
                   2555: 
                   2556: ; Do next chunk within this bank block, if any.
                   2557: 
                   2558:         pop     ebx                     ;retrieve remaining scan count
                   2559:         and     ebx,ebx                 ;any scans left?
                   2560:         jnz     copy_from_buffer_chunk_loop    ;more scans to do
                   2561: 
                   2562:         PLAIN_RET
                   2563: 
                   2564: 
                   2565: ;-----------------------------------------------------------------------;
                   2566: ; Copies an edge from the screen to the temp buffer.
                   2567: ; Entry:
                   2568: ;       ESI = source address
                   2569: ;       pTempPlane = temp buffer from which to copy
                   2570: ;       ulBlockHeight = # of bytes to copy per plane
                   2571: ;       ulNextScan = scan width
                   2572: ;       Source bank pointing to source
                   2573: ; Exit:
                   2574: ;       DH = VGA_BASE SHR 8
                   2575: ;       ESI = next source address
                   2576: ;
                   2577: ; Preserved: EBP
                   2578: ;-----------------------------------------------------------------------;
                   2579: 
                   2580:         align   4
                   2581: copy_screen_to_buffered_edge:
                   2582: 
                   2583:         mov     pSrcAddr,esi
                   2584: 
                   2585: ; Leave the GC Index pointing to the Read Map.
                   2586: 
                   2587:         mov     edx,VGA_BASE + GRAF_ADDR
                   2588:         mov     al,GRAF_READ_MAP
                   2589:         out     dx,al
                   2590: 
                   2591:         mov     ebx,ulBlockHeight
                   2592:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry, \
                   2593:                                 LOOP_UNROLL_SHIFT
                   2594:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
                   2595:         mov     pTempEntry,ecx   ;ditto for entry point
                   2596: 
                   2597:         mov     ecx,ulNextScan
                   2598:         mov     edi,pTempPlane  ;dest offset in temp buffer for plane 3 bytes.
                   2599:                                 ;The rest of the planes are stored
                   2600:                                 ; consecutively
                   2601:         mov     al,3            ;start by copying plane 3
                   2602:         mov     dl,GRAF_DATA    ;leave DX pointing to GC Data reg
                   2603: copy_edge_to_buffer_plane_loop:
                   2604:         mov     esi,pSrcAddr ;source pointer
                   2605: 
                   2606:         out     dx,al            ;set Read Map to plane we're copying from.
                   2607: 
                   2608:         mov     ebx,culTempCount ;# of unrolled loop iterations
                   2609:         jmp     pTempEntry       ;copy the edge bytes for this plane to the
                   2610:                                  ; temp buffer
                   2611: 
                   2612: ;-----------------------------------------------------------------------;
                   2613: ; Table of unrolled edge copy to temp buffer loop entry points.
                   2614: ;-----------------------------------------------------------------------;
                   2615: 
                   2616:         UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry,EDGE_TO_TEMP, \
                   2617:                                 LOOP_UNROLL_COUNT
                   2618: 
                   2619: ;-----------------------------------------------------------------------;
                   2620: ; Unrolled loop for copying edge bytes to the temp buffer.
                   2621: ;-----------------------------------------------------------------------;
                   2622: 
                   2623: COPY_EDGE_TO_TEMP macro ENTRY_LABEL,ENTRY_INDEX
                   2624: &ENTRY_LABEL&ENTRY_INDEX&:
                   2625:         mov     ah,[esi]        ;get byte to copy
                   2626:         add     esi,ecx         ;point to next source scan
                   2627:         mov     [edi],ah        ;copy byte to temp buffer
                   2628:         inc     edi             ;point to next temp buffer byte
                   2629:         endm    ;-----------------------------------;
                   2630: 
                   2631: ;  EBX = count of unrolled loop iterations
                   2632: ;  ECX = offset from end of one scan's fill to start of next
                   2633: ;  ESI = source address to copy from (screen)
                   2634: ;  EDI = target address to copy to (temp buffer)
                   2635: ;  Read Map set to enable the desired plane for read
                   2636: 
                   2637:         align   4
                   2638: edge_to_buffer_loop:
                   2639:         UNROLL_LOOP     COPY_EDGE_TO_TEMP,EDGE_TO_TEMP,LOOP_UNROLL_COUNT
                   2640:         dec     ebx
                   2641:         jnz     edge_to_buffer_loop
                   2642: 
                   2643:         dec     al               ;count down planes
                   2644:         jns     copy_edge_to_buffer_plane_loop
                   2645: 
                   2646:         PLAIN_RET
                   2647: 
                   2648: 
                   2649: ;-----------------------------------------------------------------------;
                   2650: 
                   2651: endProc vAlignedSrcCopy
                   2652: 
                   2653: _TEXT$03   ends
                   2654: 
                   2655:         end
                   2656: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.