Annotation of ntddk/src/video/displays/vga/i386/nalgnblt.asm, revision 1.1.1.1

1.1       root        1: ;******************************Module*Header*******************************\
                      2: ; Module Name: nalgnblt.asm
                      3: ;
                      4: ; driver prototypes
                      5: ;
                      6: ; Copyright (c) 1992 Microsoft Corporation
                      7: ;**************************************************************************/
                      8: 
                      9: ;-----------------------------------------------------------------------;
                     10: ; VOID vNonAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
                     11: ;                      INT icopydir);
                     12: ; Input:
                     13: ;  pdsurf - surface on which to copy
                     14: ;  prcldest - pointer to destination rectangle
                     15: ;  pptlsrc - pointer to source upper left corner
                     16: ;  icopydir - direction in which copy must proceed to avoid overlap problems
                     17: ;             and synchronize with the clip enumeration visually, according to
                     18: ;             constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
                     19: ;             WINDDI.H
                     20: ;
                     21: ; Performs accelarated non-aligned SRCCOPY VGA-to-VGA blts.
                     22: ;
                     23: ;-----------------------------------------------------------------------;
                     24: ;
                     25: ; Note: The source and dest *must* be non-aligned (not have the same
                     26: ; left-edge intrabyte pixel alignment. Will not work properly if they are
                     27: ; in fact aligned.
                     28: ;
                     29: ; Note: Assumes all rectangles have positive heights and widths. Will not
                     30: ; work properly if this is not the case.
                     31: ;
                     32: ;-----------------------------------------------------------------------;
                     33: 
                     34:         comment $
                     35: 
                     36: The overall approach of this module for each rectangle to copy is:
                     37: 
                     38: 1) Precalculate the masks and whole byte widths, and determine which of
                     39: partial left edge, partial right edge, and whole middle bytes are required
                     40: for this copy.
                     41: 
                     42: 2) Set up the starting pointers for each of the areas (left, whole middle,
                     43: right), the start and stop scan lines, the copying direction (left-to-right
                     44: or right-to-left, and top-to-bottom or bottom-to-top), the threading
                     45: (sequence of calls required to do the left/whole/right components in the
                     46: proper sequence), based on the passed-in copy direction, which in turn is
                     47: dictated by the nature of the overlap between the source and destination.
                     48: 
                     49: 3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
                     50: 1 R/W window, unbanked), that sequences through the intersection of each
                     51: bank with the source and destination rectangles in the proper direction
                     52: (top-to-bottom or bottom-to-top, based on the passed-in copy direction),
                     53: and performs the copy in each such rectangle. The threading vector is used
                     54: to call the required routines (copy left/whole/right bytes). For 1 R/W and
                     55: 1R/1W adapters, there is a second threading vector that is called when the
                     56: source and the destination are both adequately (for the copy purposes)
                     57: addressable simultaneously (because they're in the same bank), so there's
                     58: no need to copy through a temp buffer. We want to avoid the temp
                     59: buffer whenever we can, because it's slower.
                     60: 
                     61: Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
                     62: each plane's bytes are not stored in the corresponding plane's temp buffer, but
                     63: rather consecutively in the plane 0 temp buffer. This is to reduce page
                     64: faulting, and also so that 1R/1W adapters only need a temp buffer large enough
                     65: to hold 4*tallest bank words (4K will do). 1 R/W adapters still copy whole
                     66: bytes through the full temp buffer, using all four planes' temp buffers, so
                     67: they require a temp buffer big enough to hold a full bank (256K will do).
                     68: 
                     69: Note: The VGA's rotator is used to perform all rotation in this module. The
                     70: two source bytes relevant to this operation are masked to preserve the desired
                     71: bits, then combined and fed to the VGA's rotator, which performs the rotation.
                     72: This is better than letting the 386/486 do the rotation because even with the
                     73: barrel shifter, those processors take 3 cycles per rotate, where the masking
                     74: and combining take only 2 cycles (or no cycles, for edges with 1-wide
                     75: sources). We also get to avoid 16-bit instructions like ROL AX,CL; the 16-bit
                     76: size prefix costs a cycle on a 486.
                     77: 
                     78:         commend $
                     79: 
                     80: ;-----------------------------------------------------------------------;
                     81: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
                     82: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
                     83: ; times unrolling. This is the only thing you need to change to control
                     84: ; unrolling. Note: does not affect loops that process in chunks, like edge
                     85: ; loops.
                     86: 
                     87: LOOP_UNROLL_SHIFT equ 2
                     88: 
                     89: ;-----------------------------------------------------------------------;
                     90: ; Maximum # of edge bytes to process before switching to next plane. Larger
                     91: ; means faster, but there's more potential for flicker, since the raster scan
                     92: ; has a better chance of catching bytes that have changed in some planes but
                     93: ; not all planes.
                     94: 
                     95: EDGE_CHUNK_SIZE equ     16
                     96: 
                     97: ;-----------------------------------------------------------------------;
                     98: ; Macro to push the current threading sequence (string of routine calls) on the
                     99: ; stack, then jump to the first threading entry. The threading pointer can be
                    100: ; specified, or defaults to pCurrentThread. The return address can be
                    101: ; immediately after the JMP, or can be specified.
                    102: 
                    103: THREAD_AND_START macro THREADING,RETURN_ADDR
                    104:         local   push_base, return_address
                    105: 
                    106: ifb <&RETURN_ADDR&>
                    107:         push    offset return_address   ;after all the threaded routines, we
                    108:                                         ; return here
                    109: else
                    110:         push    offset &RETURN_ADDR&    ;return here
                    111: endif
                    112: 
                    113: ifb <&THREADING&>
                    114:         mov     eax,pCurrentThread
                    115: else
                    116:         mov     eax,&THREADING&
                    117: endif
                    118: 
                    119:         mov     ecx,[eax]               ;# of routines to thread (at least 1)
                    120:         lea     ecx,[ecx*2+ecx]         ;pushes below are 3 bytes each
                    121:         mov     edx,offset push_base+3
                    122:         sub     edx,ecx
                    123:         jmp     edx                     ;branch to push or jmp below
                    124: 
                    125: ; Push the threading addresses on to the stack, so routines perform the
                    126: ; threading as they return.
                    127: 
                    128:         push    dword ptr [eax+12]       ;3 byte instruction
                    129:         push    dword ptr [eax+8]
                    130: push_base:
                    131:         jmp     dword ptr [eax+4]        ;jump to the first threaded routine
                    132: 
                    133:         align   4
                    134: return_address:
                    135:         endm
                    136: 
                    137: ;-----------------------------------------------------------------------;
                    138: 
                    139:                 .386
                    140: 
                    141: ifndef  DOS_PLATFORM
                    142:         .model  small,c
                    143: else
                    144: ifdef   STD_CALL
                    145:         .model  small,c
                    146: else
                    147:         .model  small,pascal
                    148: endif;  STD_CALL
                    149: endif;  DOS_PLATFORM
                    150: 
                    151:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                    152:         assume fs:nothing,gs:nothing
                    153: 
                    154:         .xlist
                    155:         include stdcall.inc             ;calling convention cmacros
                    156:         include i386\egavga.inc
                    157:         include i386\strucs.inc
                    158:         include i386\unroll.inc
                    159:         include i386\ropdefs.inc
                    160: 
                    161:         .list
                    162: 
                    163: ;-----------------------------------------------------------------------;
                    164: 
                    165:         .data
                    166: 
                    167: ; Threads for stringing together left, whole byte, and right operations
                    168: ; in various orders, both using a temp buffer and not. Data format is:
                    169: ;
                    170: ; DWORD +0 = # of calls in thread (1, 2, or 3)
                    171: ;       +4 = first call (required)
                    172: ;       +8 = second call (optional)
                    173: ;      +12 = third call (optional)
                    174: 
                    175:         align   4
                    176: 
                    177: ; Copies not involving the temp buffer.
                    178: 
                    179: Thread_L        dd      1
                    180:                 dd      copy_left_edge
                    181: 
                    182: Thread_W        dd      1
                    183:                 dd      copy_whole_bytes
                    184: 
                    185: Thread_R        dd      1
                    186:                 dd      copy_right_edge
                    187: 
                    188: Thread_LR       dd      2
                    189:                 dd      copy_left_edge
                    190:                 dd      copy_right_edge
                    191: 
                    192: Thread_RL       dd      2
                    193:                 dd      copy_right_edge
                    194:                 dd      copy_left_edge
                    195: 
                    196: Thread_LW       dd      2
                    197:                 dd      copy_left_edge
                    198:                 dd      copy_whole_bytes
                    199: 
                    200: Thread_WL       dd      2
                    201:                 dd      copy_whole_bytes
                    202:                 dd      copy_left_edge
                    203: 
                    204: Thread_WR       dd      2
                    205:                 dd      copy_whole_bytes
                    206:                 dd      copy_right_edge
                    207: 
                    208: Thread_RW       dd      2
                    209:                 dd      copy_right_edge
                    210:                 dd      copy_whole_bytes
                    211: 
                    212: Thread_LWR      dd      3
                    213:                 dd      copy_left_edge
                    214:                 dd      copy_whole_bytes
                    215:                 dd      copy_right_edge
                    216: 
                    217: Thread_RWL      dd      3
                    218:                 dd      copy_right_edge
                    219:                 dd      copy_whole_bytes
                    220:                 dd      copy_left_edge
                    221: 
                    222: ; Copies involving the temp buffer.
                    223: 
                    224: Thread_Lb       dd      1
                    225:                 dd      copy_left_edge_via_buffer
                    226: 
                    227: Thread_Wb       dd      1
                    228:                 dd      copy_whole_bytes_via_buffer
                    229: 
                    230: Thread_Rb       dd      1
                    231:                 dd      copy_right_edge_via_buffer
                    232: 
                    233: Thread_LbRb     dd      2
                    234:                 dd      copy_left_edge_via_buffer
                    235:                 dd      copy_right_edge_via_buffer
                    236: 
                    237: Thread_RbLb     dd      2
                    238:                 dd      copy_right_edge_via_buffer
                    239:                 dd      copy_left_edge_via_buffer
                    240: 
                    241: Thread_LbW      dd      2
                    242:                 dd      copy_left_edge_via_buffer
                    243:                 dd      copy_whole_bytes
                    244: 
                    245: Thread_LbWb     dd      2
                    246:                 dd      copy_left_edge_via_buffer
                    247:                 dd      copy_whole_bytes_via_buffer
                    248: 
                    249: Thread_WLb      dd      2
                    250:                 dd      copy_whole_bytes
                    251:                 dd      copy_left_edge_via_buffer
                    252: 
                    253: Thread_WbLb     dd      2
                    254:                 dd      copy_whole_bytes_via_buffer
                    255:                 dd      copy_left_edge_via_buffer
                    256: 
                    257: Thread_WRb      dd      2
                    258:                 dd      copy_whole_bytes
                    259:                 dd      copy_right_edge_via_buffer
                    260: 
                    261: Thread_WbRb     dd      2
                    262:                 dd      copy_whole_bytes_via_buffer
                    263:                 dd      copy_right_edge_via_buffer
                    264: 
                    265: Thread_RbW      dd      2
                    266:                 dd      copy_right_edge_via_buffer
                    267:                 dd      copy_whole_bytes
                    268: 
                    269: Thread_RbWb     dd      2
                    270:                 dd      copy_right_edge_via_buffer
                    271:                 dd      copy_whole_bytes_via_buffer
                    272: 
                    273: Thread_LbWRb    dd      3
                    274:                 dd      copy_left_edge_via_buffer
                    275:                 dd      copy_whole_bytes
                    276:                 dd      copy_right_edge_via_buffer
                    277: 
                    278: Thread_LbWbRb   dd      3
                    279:                 dd      copy_left_edge_via_buffer
                    280:                 dd      copy_whole_bytes_via_buffer
                    281:                 dd      copy_right_edge_via_buffer
                    282: 
                    283: Thread_RbWLb    dd      3
                    284:                 dd      copy_right_edge_via_buffer
                    285:                 dd      copy_whole_bytes
                    286:                 dd      copy_left_edge_via_buffer
                    287: 
                    288: Thread_RbWbLb   dd      3
                    289:                 dd      copy_right_edge_via_buffer
                    290:                 dd      copy_whole_bytes_via_buffer
                    291:                 dd      copy_left_edge_via_buffer
                    292: 
                    293: ;-----------------------------------------------------------------------;
                    294: ; Table of thread selection for various horizontal copy directions, with
                    295: ; the look-up index a 4-bit field as follows:
                    296: ;
                    297: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
                    298: ; Bit 2 = 1 if left edge must be copied
                    299: ; Bit 1 = 1 if whole bytes must be copied
                    300: ; Bit 0 = 1 if right edge must be copied
                    301: ;
                    302: ; This is used for all cases where both the source and destination are
                    303: ; simultaneously addressable for our purposes, so there's no need to go
                    304: ; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).
                    305: 
                    306: MasterThreadTable label dword
                    307:                                 ;right-to-left
                    308:         dd      0               ;<not used>
                    309:         dd      Thread_R        ;R->L, R
                    310:         dd      Thread_W        ;R->L, W
                    311:         dd      Thread_RW       ;R->L, RW
                    312:         dd      Thread_L        ;R->L, L
                    313:         dd      Thread_RL       ;R->L, RL
                    314:         dd      Thread_WL       ;R->L, WL
                    315:         dd      Thread_RWL      ;R->L, RWL
                    316:                                 ;left-to-right
                    317:         dd      0               ;<not used>
                    318:         dd      Thread_R        ;L->R, R
                    319:         dd      Thread_W        ;L->R, W
                    320:         dd      Thread_WR       ;L->R, WR
                    321:         dd      Thread_L        ;L->R, L
                    322:         dd      Thread_LR       ;L->R, LR
                    323:         dd      Thread_LW       ;L->R, LW
                    324:         dd      Thread_LWR      ;L->R, LWR
                    325: 
                    326: 
                    327: ; Table of thread selection for various adapter types and horizontal
                    328: ; copy directions, with the look-up index a 6-bit field as follows:
                    329: ;
                    330: ; Bit 5 = adapter type high bit
                    331: ; Bit 4 = adapter type low bit
                    332: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
                    333: ; Bit 2 = 1 if left edge must be copied
                    334: ; Bit 1 = 1 if whole bytes must be copied
                    335: ; Bit 0 = 1 if right edge must be copied
                    336: ;
                    337: ; This is used for all cases where the source and destination are not both
                    338: ; simultaneously addressable for our purposes, so we need to go through the
                    339: ; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).
                    340: 
                    341: MasterThreadTableViaBuffer label dword
                    342:                                 ;unbanked (no need for buffer)
                    343:                                 ;right-to-left
                    344:         dd      0               ;<not used>
                    345:         dd      Thread_R        ;R->L, R
                    346:         dd      Thread_W        ;R->L, W
                    347:         dd      Thread_RW       ;R->L, RW
                    348:         dd      Thread_L        ;R->L, L
                    349:         dd      Thread_RL       ;R->L, RL
                    350:         dd      Thread_WL       ;R->L, WL
                    351:         dd      Thread_RWL      ;R->L, RWL
                    352:                                 ;left-to-right
                    353:         dd      0               ;<not used>
                    354:         dd      Thread_R        ;L->R, R
                    355:         dd      Thread_W        ;L->R, W
                    356:         dd      Thread_WR       ;L->R, WR
                    357:         dd      Thread_L        ;L->R, L
                    358:         dd      Thread_LR       ;L->R, LR
                    359:         dd      Thread_LW       ;L->R, LW
                    360:         dd      Thread_LWR      ;L->R, LWR
                    361: 
                    362:                                 ;1 R/W banking window (everything goes through
                    363:                                 ;                       buffer)
                    364:                                 ;right-to-left
                    365:         dd      0               ;<not used>
                    366:         dd      Thread_Rb       ;R->L, R
                    367:         dd      Thread_Wb       ;R->L, W
                    368:         dd      Thread_RbWb     ;R->L, RW
                    369:         dd      Thread_Lb       ;R->L, L
                    370:         dd      Thread_RbLb     ;R->L, RL
                    371:         dd      Thread_WbLb     ;R->L, WL
                    372:         dd      Thread_RbWbLb   ;R->L, RWL
                    373:                                 ;left-to-right
                    374:         dd      0               ;<not used>
                    375:         dd      Thread_Rb       ;L->R, R
                    376:         dd      Thread_Wb       ;L->R, W
                    377:         dd      Thread_WbRb     ;L->R, WR
                    378:         dd      Thread_Lb       ;L->R, L
                    379:         dd      Thread_LbRb     ;L->R, LR
                    380:         dd      Thread_LbWb     ;L->R, LW
                    381:         dd      Thread_LbWbRb   ;L->R, LWR
                    382: 
                    383:                                 ;1R/1W banking window (edge go through buffer)
                    384:                                 ;right-to-left
                    385:         dd      0               ;<not used>
                    386:         dd      Thread_Rb       ;R->L, R
                    387:         dd      Thread_W        ;R->L, W
                    388:         dd      Thread_RbW      ;R->L, RW
                    389:         dd      Thread_Lb       ;R->L, L
                    390:         dd      Thread_RbLb     ;R->L, RL
                    391:         dd      Thread_WLb      ;R->L, WL
                    392:         dd      Thread_RbWLb    ;R->L, RWL
                    393:                                 ;left-to-right
                    394:         dd      0               ;<not used>
                    395:         dd      Thread_Rb       ;L->R, R
                    396:         dd      Thread_W        ;L->R, W
                    397:         dd      Thread_WRb      ;L->R, WR
                    398:         dd      Thread_Lb       ;L->R, L
                    399:         dd      Thread_LbRb     ;L->R, LR
                    400:         dd      Thread_LbW      ;L->R, LW
                    401:         dd      Thread_LbWRb    ;L->R, LWR
                    402: 
                    403:                                 ;2 R/W banking window (no need for buffer)
                    404:                                 ;right-to-left
                    405:         dd      0               ;<not used>
                    406:         dd      Thread_R        ;R->L, R
                    407:         dd      Thread_W        ;R->L, W
                    408:         dd      Thread_RW       ;R->L, RW
                    409:         dd      Thread_L        ;R->L, L
                    410:         dd      Thread_RL       ;R->L, RL
                    411:         dd      Thread_WL       ;R->L, WL
                    412:         dd      Thread_RWL      ;R->L, RWL
                    413:                                 ;left-to-right
                    414:         dd      0               ;<not used>
                    415:         dd      Thread_R        ;L->R, R
                    416:         dd      Thread_W        ;L->R, W
                    417:         dd      Thread_WR       ;L->R, WR
                    418:         dd      Thread_L        ;L->R, L
                    419:         dd      Thread_LR       ;L->R, LR
                    420:         dd      Thread_LW       ;L->R, LW
                    421:         dd      Thread_LWR      ;L->R, LWR
                    422: 
                    423: 
                    424: ; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.
                    425: 
                    426: ADAPTER_FIELD_SHIFT     equ     4
                    427: 
                    428: ; Mask for setting left-to-right bit to "left-to-right true" for use in both
                    429: ; MasterThread tables.
                    430: 
                    431: LEFT_TO_RIGHT_FIELD_SET equ     1000b
                    432: 
                    433: 
                    434: ; Table of top-to-bottom loops for adapter types.
                    435: 
                    436:         align   4
                    437: TopToBottomLoopTable label dword
                    438:         dd      top_to_bottom_2RW       ;unbanked is same as 2RW
                    439:         dd      top_to_bottom_1RW
                    440:         dd      top_to_bottom_1R1W
                    441:         dd      top_to_bottom_2RW
                    442: 
                    443: 
                    444: ; Table of bottom-to-top loops for adapter types.
                    445: 
                    446:         align   4
                    447: BottomToTopLoopTable label dword
                    448:         dd      bottom_to_top_2RW       ;unbanked is same as 2RW
                    449:         dd      bottom_to_top_1RW
                    450:         dd      bottom_to_top_1R1W
                    451:         dd      bottom_to_top_2RW
                    452: 
                    453: 
                    454: ; Table of routines for setting up to copy in various directions.
                    455: 
                    456:         align   4
                    457: SetUpForCopyDirection   label   dword
                    458:         dd      left_to_right_top_to_bottom     ;CD_RIGHTDOWN
                    459:         dd      right_to_left_top_to_bottom     ;CD_LEFTDOWN
                    460:         dd      left_to_right_bottom_to_top     ;CD_RIGHTUP
                    461:         dd      right_to_left_bottom_to_top     ;CD_LEFTUP
                    462: 
                    463: ;-----------------------------------------------------------------------;
                    464: ; Left edge clip masks for intrabyte start addresses 0 through 7.
                    465: ; Whole byte cases are flagged as 0ffh.
                    466: 
                    467: jLeftMaskTable  label   byte
                    468:         db      0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
                    469: 
                    470: ;-----------------------------------------------------------------------;
                    471: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
                    472: ; 0 through 7. Whole byte cases are flagged as 0ffh.
                    473: 
                    474: jRightMaskTable label   byte
                    475:         db      0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
                    476: 
                    477: ;-----------------------------------------------------------------------;
                    478: ; Table of width-based source-edge-to-buffer copy routines.
                    479: 
                    480:         align   4
                    481: copy_edge_from_screen_to_buffer label   dword
                    482:         dd      copy_screen_to_buffered_edge_1ws
                    483:         dd      copy_screen_to_buffered_edge_2ws
                    484: 
                    485: ;-----------------------------------------------------------------------;
                    486: ; Table of width-based buffer-to-dest-edge copy routines.
                    487: 
                    488:         align   4
                    489: copy_edge_from_buffer_to_screen label   dword
                    490:         dd      copy_buffered_edge_to_screen_1ws
                    491:         dd      copy_buffered_edge_to_screen_2ws
                    492: 
                    493: ;-----------------------------------------------------------------------;
                    494: ; Table of width-based edge copy routines (no intermediate buffer).
                    495: 
                    496:         align   4
                    497: copy_edge_table label   dword
                    498:         dd      copy_edge_1ws
                    499:         dd      copy_edge_2ws
                    500: 
                    501: ;-----------------------------------------------------------------------;
                    502: 
                    503:         .code
                    504: 
                    505: _TEXT$04   SEGMENT DWORD USE32 PUBLIC 'CODE'
                    506:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
                    507: 
                    508: ;-----------------------------------------------------------------------;
                    509: 
                    510: cProc   vNonAlignedSrcCopy,16,<        \
                    511:         uses    esi edi ebx,    \
                    512:         pdsurf: ptr DEVSURF,    \
                    513:         prcldest : ptr RECTL,   \
                    514:         pptlsrc : ptr POINTL,   \
                    515:         icopydir : dword
                    516: 
                    517:         local   culWholeBytesWidth : dword ;# of bytes to copy across each scan
                    518:         local   ulBlockHeight : dword   ;# of scans to copy per bank block
                    519:         local   ulWholeScanDelta : dword;offset from end of one whole bytes
                    520:                                         ; scan to start of next
                    521:         local   ulWholeBytesSrc : dword ;offset in bitmap of first source whole
                    522:                                         ; byte to copy from
                    523:         local   ulWholeBytesDest : dword;offset in bitmap of first source whole
                    524:                                         ; byte to copy to
                    525:         local   ulLeftEdgeSrc : dword   ;offset in bitmap of first source left
                    526:                                         ; edge byte to copy from
                    527:         local   ulLeftEdgeDest : dword  ;offset in bitmap of first dest left
                    528:                                         ; edge byte to copy to
                    529:         local   ulRightEdgeSrc : dword  ;offset in bitmap of first source right
                    530:                                         ; edge byte to copy from
                    531:         local   ulRightEdgeDest : dword ;offset in bitmap of first dest right
                    532:                                         ; edge byte to copy to
                    533:         local   ulNextScan : dword      ;width of scan, in bytes
                    534:         local   jLeftMask : dword       ;left edge clip mask
                    535:         local   jRightMask : dword      ;right edge clip mask
                    536:         local   culTempCount : dword    ;handy temporary counter
                    537:         local   pTempEntry : dword      ;temporary storage for vector into
                    538:                                         ; unrolled loop
                    539:         local   pTempPlane : dword      ;pointer to storage in temp buffer for
                    540:                                         ; edge bytes (which are stored
                    541:                                         ; consecutively, not in each plane's
                    542:                                         ; temp buffer, to reduce possible page
                    543:                                         ; faulting
                    544:         local   ppTempPlane0 : dword    ;pointer to pointer to storage in temp
                    545:                                         ; buffer for plane 0, immediately
                    546:                                         ; preceded by storage for planes 1, 2,
                    547:                                         ; and 3
                    548:         local   ppTempPlane3 : dword    ;like above, but for plane 3
                    549:         local   ulOffsetInBank : dword  ;offset relative to bank start
                    550:         local   pSrcAddr : dword        ;working pointer to first source
                    551:                                         ; byte to copy from
                    552:         local   pDestAddr : dword       ;working pointer to first dest
                    553:                                         ; byte to copy to
                    554:         local   ulCurrentJustification:dword ;justification used to map in
                    555:                                              ; banks; top for top to bottom
                    556:                                              ; copies, bottom for bottom to top
                    557:         local   ulCurrentSrcScan :dword ;scan line used to map in current
                    558:                                         ; source bank
                    559:         local   ulCurrentDestScan:dword ;scan line used to map in current dest
                    560:                                         ; bank
                    561:         local   ulLastDestScan :dword   ;scan in target rect at which we stop
                    562:                                         ; advancing through banks
                    563:         local   pCurrentThread : dword  ;pointer to data describing the
                    564:                                         ; threaded calls to be performed to
                    565:                                         ; perform the current copy
                    566:         local   pCurrentThreadViaBuffer:dword
                    567:                                         ;pointer to data describing the
                    568:                                         ; threaded calls to be performed to
                    569:                                         ; perform the current copy in the case
                    570:                                         ; where the source and destination are
                    571:                                         ; not simultaneously adequately
                    572:                                         ; accessible, so the copy has to go
                    573:                                         ; through a temp buffer (used only for
                    574:                                         ; 1 R/W and 1R/1W banking)
                    575:         local   ulAdapterType : dword   ;adapter type code, per VIDEO_BANK_TYPE
                    576:         local   ulLWRType : dword       ;whether left edge, whole bytes, and
                    577:                                         ; right edge are involved in the
                    578:                                         ; current operation;
                    579:                                         ; bit 2 = 1 if left edge involved
                    580:                                         ; bit 1 = 1 if whole bytes involved
                    581:                                         ; bit 0 = 1 if right edge involved
                    582:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
                    583:                                         ; address past the left edge when the
                    584:                                         ; left edge is partial
                    585:         local   ulCombineMask : dword   ;mask for combining desired portions
                    586:                                         ; of AL and AH before ORing to make a
                    587:                                         ; single byte; used to combine before
                    588:                                         ; letting VGA rotate byte as it's
                    589:                                         ; written. Used for all cases except
                    590:                                         ; whole bytes copied left-to-right
                    591:         local   ulCombineMaskWhole : dword
                    592:                                         ;mask for combining desired portions of
                    593:                                         ; AL and AH when copying whole bytes
                    594:                                         ; (different from ulCombineMask in the
                    595:                                         ; case of whole bytes left-to-right
                    596:                                         ; copies, because then AH is the lsb
                    597:                                         ; and AL is the MSB; then, this is
                    598:                                         ; ulCombineMask with the bytes swapped.
                    599:                                         ; For right-to-left whole byte copies,
                    600:                                         ; this is the same as ulCombineMask)
                    601:         local   ulTempScanCount : dword ;temp scan line countdown variable
                    602:         local   ulWholeScanSrcDelta : dword
                    603:                                         ;offset from end of one source whole
                    604:                                         ; bytes scan line to start of next.
                    605:                                         ; Differs from ulWholeScanDelta because
                    606:                                         ; of source rotation pipeline priming
                    607:         local   ulLeftSrcWidthMinus1 : dword ;# of bytes in left src edge minus
                    608:                                              ; one (0 or 1)
                    609:         local   ulRightSrcWidthMinus1 : dword ;# of bytes in right src edge
                    610:                                              ; minus one (0 or 1)
                    611: 
                    612: ;-----------------------------------------------------------------------;
                    613: 
                    614: ; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
                    615: ; adapters), and other rectangle-independent variables.
                    616: 
                    617:         mov     esi,pdsurf
                    618:         mov     eax,[esi].dsurf_pvBankBufferPlane0
                    619:         mov     pTempPlane,eax
                    620:         lea     eax,[esi].dsurf_pvBankBufferPlane0
                    621:         mov     ppTempPlane0,eax
                    622:         lea     eax,[esi].dsurf_pvBankBufferPlane3
                    623:         mov     ppTempPlane3,eax
                    624: 
                    625:         mov     eax,[esi].dsurf_vbtBankingType
                    626:         mov     ulAdapterType,eax
                    627: 
                    628: ; Copy the rectangle.
                    629: 
                    630:         call    copy_rect
                    631: 
                    632: ;-----------------------------------------------------------------------;
                    633: ; Set the VGA registers back to their default state.
                    634: ;-----------------------------------------------------------------------;
                    635: 
                    636:         mov     edx,VGA_BASE + GRAF_ADDR
                    637:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
                    638:         out     dx,ax           ;enable bit mask for all bits
                    639: 
                    640:         mov     eax,(DR_SET shl 8) + GRAF_DATA_ROT
                    641:         out     dx,ax           ;restore default of no rotation
                    642: 
                    643:         mov     dl,SEQ_DATA
                    644:         mov     al,MM_ALL
                    645:         out     dx,al           ;enable writes to all planes
                    646: 
                    647:         cld                     ;restore default direction flag
                    648: 
                    649:         cRet    vNonAlignedSrcCopy ;done
                    650: 
                    651: 
                    652: ;***********************************************************************;
                    653: ;
                    654: ; Copies the specified rectangle.
                    655: ;
                    656: ;***********************************************************************;
                    657: 
                    658:         align   4
                    659: copy_rect:
                    660: 
                    661: ; Calculate the rotation, set up the VGA's rotator, and set the byte-combining
                    662: ; masks.
                    663: 
                    664:         mov     edi,prcldest            ;left edge of destination
                    665:         mov     esi,pptlsrc
                    666:         mov     ah,byte ptr [edi].xLeft ;left edge of source
                    667:         sub     ah,byte ptr [esi].ptl_x
                    668:         and     ah,07h                  ;rotation = (dest - source) % 8
                    669:         mov     edx,VGA_BASE + GRAF_ADDR
                    670:         mov     al,GRAF_DATA_ROT
                    671:         out     dx,ax                   ;set the VGA's rotator for the rotation
                    672: 
                    673: ; Set up byte-combining mask, in preparation for ORing and letting the VGA's
                    674: ; rotator rotate, assuming the left-hand source byte is in AL and the
                    675: ; right-hand source byte is in AH (true for all cases except left-to-right
                    676: ; whole bytes).
                    677: 
                    678:         mov     cl,ah
                    679:         mov     eax,0000ff00h
                    680:         rol     ax,cl
                    681:         mov     ulCombineMask,eax
                    682: 
                    683: ; Calculate source edge widths (1 or 2 bytes).
                    684: 
                    685:         sub     edx,edx         ;assume right source width is 1
                    686:         mov     ebx,[edi].xLeft
                    687:         mov     ecx,[edi].xRight ;dest right edge (non-inclusive)
                    688:         dec     ecx             ;make it inclusive
                    689:         sub     ecx,ebx         ;dest width = dest right - dest left
                    690:         mov     eax,[esi].ptl_x
                    691:         add     ecx,eax         ;ECX = right edge of source
                    692:         xor     eax,ecx
                    693:         and     eax,not 07h     ;do the src start and end differ in byte
                    694:                                 ; address bits? (as opposed to intrabyte)
                    695:         jz      short @F        ;no, force 1-wide source
                    696: 
                    697:         mov     al,byte ptr [edi].xLeft
                    698:         mov     ah,byte ptr [esi].ptl_x
                    699:         and     eax,00000707h
                    700:         cmp     ah,al
                    701:         jb      short @F
                    702:         inc     edx             ;left source width is 2
                    703: @@:
                    704:         mov     ulLeftSrcWidthMinus1,edx
                    705: 
                    706:         sub     edx,edx         ;assume right source width is 1
                    707:         mov     eax,[edi].xRight ;dest right edge (non-inclusive)
                    708:         dec     eax             ;make it inclusive
                    709:         and     cl,07h          ;intrabyte source address
                    710:         and     al,07h          ;intrabyte dest address
                    711:         cmp     cl,al
                    712:         ja      short @F
                    713:         inc     edx             ;right source width is 2
                    714: @@:
                    715:         mov     ulRightSrcWidthMinus1,edx
                    716: 
                    717: ; Set up masks and whole bytes count, and build left/whole/right index
                    718: ; indicating which of those parts are involved in the copy.
                    719: 
                    720:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
                    721:         mov     ecx,ebx
                    722:         and     ecx,0111b               ;intrabyte address of right edge
                    723:         mov     ah,jRightMaskTable[ecx] ;right edge mask
                    724: 
                    725:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
                    726:         mov     ecx,esi
                    727:         shr     ecx,3                   ;/8 for start offset from left edge
                    728:                                         ; of scan line
                    729:         sub     ebx,esi                 ;width in pixels of fill
                    730: 
                    731:         and     esi,0111b               ;intrabyte address of left edge
                    732:         mov     al,jLeftMaskTable[esi]  ;left edge mask
                    733: 
                    734:         dec     ebx                     ;make inclusive on right
                    735:         add     ebx,esi                 ;inclusive width, starting counting at
                    736:                                         ; the beginning of the left edge byte
                    737:         shr     ebx,3                   ;width of fill in bytes touched - 1
                    738:         jnz     short more_than_1_byte  ;more than 1 byte is involved
                    739: 
                    740: ; Only one byte will be affected. Combine first/last masks.
                    741: 
                    742:         and     al,ah                   ;we'll use first byte mask only
                    743:         xor     ah,ah                   ;want last byte mask to be 0 to
                    744:                                         ; indicate right edge not involved
                    745:         inc     ebx                     ;so there's one count to subtract below
                    746:                                         ; if this isn't a whole edge byte
                    747: more_than_1_byte:
                    748: 
                    749: ; If all pixels in the left edge are altered, combine the first byte into the
                    750: ; whole byte count, because we can handle solid edge bytes faster as part of
                    751: ; the whole bytes. Ditto for the right edge.
                    752: 
                    753:         sub     ecx,ecx                 ;edge whole-status accumulator
                    754:         cmp     al,-1                   ;is left edge a whole byte or partial?
                    755:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
                    756:         sub     ebx,ecx                 ;if left edge partial, deduct it from
                    757:                                         ; the whole bytes count
                    758:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
                    759:                                         ; it's partial when pointing to the
                    760:                                         ; whole bytes
                    761:         and     ah,ah                   ;is right edge mask 0, meaning this
                    762:                                         ; fill is only 1 byte wide?
                    763:         jz      short save_masks        ;yes, no need to do anything
                    764:         or      ecx,40h                 ;assume there's a partial right edge
                    765:         cmp     ah,-1                   ;is right edge a whole byte or partial?
                    766:         jnz     short save_masks        ;partial
                    767:                                         ;bit 1=0 if left edge partial, 1 whole
                    768:         inc     ebx                     ;if right edge whole, include it in the
                    769:                                         ; whole bytes count
                    770:         and     ecx,not 40h             ;there's no partial right edge
                    771: save_masks:
                    772:         cmp     ebx,1                   ;do we have any whole bytes?
                    773:         cmc                             ;CF set if whole byte count > 0
                    774:         adc     ecx,ecx                 ;if any whole bytes, set whole bytes
                    775:                                         ; bit in left/whole/right accumulator
                    776:         rol     cl,1                    ;align the left/whole/right bits
                    777:         mov     ulLWRType,ecx           ;save left/whole/right status
                    778: 
                    779:         mov     byte ptr jLeftMask,al   ;save left and right clip masks
                    780:         mov     byte ptr jRightMask,ah
                    781:         mov     culWholeBytesWidth,ebx  ;save # of whole bytes
                    782: 
                    783: ; Copy the rectangle in the specified direction.
                    784: 
                    785:         mov     eax,icopydir
                    786:         jmp     SetUpForCopyDirection[eax*4]
                    787: 
                    788: 
                    789: ;***********************************************************************;
                    790: ;
                    791: ; The following routines set up to handle the four possible copy
                    792: ; directions.
                    793: ;
                    794: ;***********************************************************************;
                    795: 
                    796: 
                    797: ;-----------------------------------------------------------------------;
                    798: ; Set-up code for left-to-right, top-to-bottom copies.
                    799: ;-----------------------------------------------------------------------;
                    800: 
                    801:         align   4
                    802: left_to_right_top_to_bottom:
                    803: 
                    804:         cld                             ;we'll copy left to right
                    805: 
                    806: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
                    807: ; rotate, assuming the left-hand source byte is in AH and the right-hand source
                    808: ; byte is in AL (true only for left-to-right whole bytes).
                    809: 
                    810:         mov     eax,ulCombineMask
                    811:         not     eax
                    812:         mov     ulCombineMaskWhole,eax
                    813: 
                    814:         mov     esi,pdsurf
                    815:         mov     eax,[esi].dsurf_lNextScan
                    816:         mov     ulNextScan,eax          ;copy top to bottom
                    817:         sub     eax,culWholeBytesWidth  ;offset from end of one dest whole byte
                    818:         mov     ulWholeScanDelta,eax    ; scan to start of next
                    819:         dec     eax                     ;offset from end of one src whole byte
                    820:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
                    821:                                         ; leading byte used to prime the
                    822:                                         ; rotation pipeline
                    823: 
                    824:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                    825:                                         ; right involvement in operation
                    826:         or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
                    827:         mov     eax,MasterThreadTable[esi*4]
                    828:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                    829:         mov     edx,ulAdapterType
                    830:         shl     edx,ADAPTER_FIELD_SHIFT
                    831:         or      esi,edx                 ;factor adapter type into the index
                    832:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                    833:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                    834: 
                    835:         mov     ulCurrentJustification,JustifyTop ;copy top to bottom
                    836: 
                    837:         mov     esi,prcldest
                    838:         mov     eax,[esi].yBottom
                    839:         mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
                    840:         mov     eax,[esi].yTop
                    841:         mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
                    842:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    843:         mov     edx,[esi].xLeft
                    844:         shr     edx,3                   ;byte X address
                    845:         add     eax,edx                 ;offset in bitmap of first dest byte
                    846:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                    847:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    848:                                         ; byte, unless the left edge is a whole
                    849:                                         ; byte and is thus part of the whole
                    850:                                         ; bytes already
                    851:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                    852:         add     eax,culWholeBytesWidth  ;point to the right edge
                    853:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                    854: 
                    855:         mov     esi,pptlsrc
                    856:         mov     eax,[esi].ptl_y
                    857:         mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
                    858:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    859:         mov     edx,[esi].ptl_x
                    860:         shr     edx,3                   ;byte X address
                    861:         add     eax,edx                 ;offset in bitmap of first source byte
                    862:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                    863:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
                    864:         dec     eax                      ; last (leftmost) left edge byte, so
                    865:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
                    866:                                          ; wide, except when the left dest byte
                    867:                                          ; is solid so the left edge is part of
                    868:                                          ; the whole bytes
                    869:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                    870:         add     eax,culWholeBytesWidth  ;point to the right edge
                    871:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts,
                    872:                                         ; because the whole bytes and the right
                    873:                                         ; source edge share a byte, and we
                    874:                                         ; always point to the leftmost byte in
                    875:                                         ; the right source edge
                    876: 
                    877: ; Branch to the appropriate top-to-bottom bank enumeration loop.
                    878: 
                    879:         mov     eax,ulAdapterType
                    880:         jmp     TopToBottomLoopTable[eax*4]
                    881: 
                    882: 
                    883: ;-----------------------------------------------------------------------;
                    884: ; Set-up code for right-to-left, top-to-bottom copies.
                    885: ;-----------------------------------------------------------------------;
                    886: 
                    887:         align   4
                    888: right_to_left_top_to_bottom:
                    889: 
                    890:         std                             ;we'll copy right to left
                    891: 
                    892: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
                    893: ; rotate, assuming the left-hand source byte is in AL and the right-hand source
                    894: ; byte is in AH (always true except for left-to-right whole bytes).
                    895: 
                    896:         mov     eax,ulCombineMask
                    897:         mov     ulCombineMaskWhole,eax
                    898: 
                    899:         mov     esi,pdsurf
                    900:         mov     eax,[esi].dsurf_lNextScan
                    901:         mov     ulNextScan,eax          ;copy top to bottom
                    902:         add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                    903:         mov     ulWholeScanDelta,eax    ; to start of next, given that we're
                    904:                                         ; copying one way and going scan-to-
                    905:                                         ; scan the other way
                    906:         inc     eax                     ;offset from end of one src whole byte
                    907:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
                    908:                                         ; leading byte used to prime the
                    909:                                         ; rotation pipeline
                    910: 
                    911:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                    912:                                         ; right involvement in operation
                    913:                                         ;leave left-to-right field cleared, so
                    914:                                         ; we look up right-to-left entries
                    915:         mov     eax,MasterThreadTable[esi*4]
                    916:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                    917:         mov     edx,ulAdapterType
                    918:         shl     edx,ADAPTER_FIELD_SHIFT
                    919:         or      esi,edx                 ;factor adapter type into the index
                    920:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                    921:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                    922: 
                    923:         mov     ulCurrentJustification,JustifyTop ;copy top to bottom
                    924: 
                    925:         mov     esi,prcldest
                    926:         mov     eax,[esi].yBottom
                    927:         mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
                    928:         mov     eax,[esi].yTop
                    929:         mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
                    930:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    931:         mov     edx,[esi].xLeft
                    932:         shr     edx,3                   ;byte X address
                    933:         add     eax,edx                 ;offset in bitmap of first dest byte
                    934:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                    935:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                    936:                                         ; byte, unless the left edge is a whole
                    937:                                         ; byte and is thus part of the whole
                    938:                                         ; bytes already
                    939:         add     eax,culWholeBytesWidth  ;point to the right edge
                    940:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                    941:         dec     eax                     ;back up to the last whole byte
                    942:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                    943: 
                    944:         mov     esi,pptlsrc
                    945:         mov     eax,[esi].ptl_y
                    946:         mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
                    947:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
                    948:         mov     edx,[esi].ptl_x
                    949:         shr     edx,3                   ;byte X address
                    950:         add     eax,edx                 ;offset in bitmap of first source byte
                    951:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                    952:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
                    953:         dec     eax                      ; last (leftmost) left edge byte, so
                    954:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
                    955:                                          ; wide, except when the left dest byte
                    956:                                          ; is solid so the left edge is part of
                    957:                                          ; the whole bytes
                    958:         add     eax,culWholeBytesWidth  ;point to the right edge of the whole
                    959:                                         ; src bytes, accounting for the extra
                    960:                                         ; source byte needed to prime the
                    961:                                         ; rotation pipeline
                    962:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                    963:         mov     ulRightEdgeSrc,eax      ;that's also where the right src edge
                    964:                                         ; starts, because the whole bytes and
                    965:                                         ; the right source edge share a byte,
                    966:                                         ; and we always point to the leftmost
                    967:                                         ; byte in the right source edge
                    968: 
                    969: ; Branch to the appropriate top-to-bottom bank enumeration loop.
                    970: 
                    971:         mov     eax,ulAdapterType
                    972:         jmp     TopToBottomLoopTable[eax*4]
                    973: 
                    974: 
                    975: ;-----------------------------------------------------------------------;
                    976: ; Set-up code for left-to-right, bottom-to-top copies.
                    977: ;-----------------------------------------------------------------------;
                    978: 
                    979:         align   4
                    980: left_to_right_bottom_to_top:
                    981: 
                    982:         cld                             ;we'll copy left to right
                    983: 
                    984: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
                    985: ; rotate, assuming the left-hand source byte is in AH and the right-hand source
                    986: ; byte is in AL (true only for left-to-right whole bytes).
                    987: 
                    988:         mov     eax,ulCombineMask
                    989:         not     eax
                    990:         mov     ulCombineMaskWhole,eax
                    991: 
                    992:         mov     edi,pdsurf
                    993:         mov     eax,[edi].dsurf_lNextScan
                    994:         neg     eax
                    995:         mov     ulNextScan,eax          ;copy bottom to top
                    996:         sub     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                    997:         mov     ulWholeScanDelta,eax    ; to start of next, given that we're
                    998:                                         ; copying one way and going scan-to-
                    999:                                         ; scan the other way
                   1000:         dec     eax                     ;offset from end of one src whole byte
                   1001:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
                   1002:                                         ; leading byte used to prime the
                   1003:                                         ; rotation pipeline
                   1004: 
                   1005:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                   1006:                                         ; right involvement in operation
                   1007:         or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
                   1008:         mov     eax,MasterThreadTable[esi*4]
                   1009:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                   1010:         mov     edx,ulAdapterType
                   1011:         shl     edx,ADAPTER_FIELD_SHIFT
                   1012:         or      esi,edx                 ;factor adapter type into the index
                   1013:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                   1014:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                   1015: 
                   1016:         mov     ulCurrentJustification,JustifyBottom ;copy bottom to top
                   1017: 
                   1018:         mov     esi,prcldest
                   1019:         mov     edx,[esi].yTop
                   1020:         mov     ulLastDestScan,edx      ;end at top of dest copy rect
                   1021:         mov     eax,[esi].yBottom
                   1022:         dec     eax                     ;rectangle definition is non-inclusive,
                   1023:                                         ; so advance to first scan we'll copy
                   1024:         sub     edx,eax                 ;-(offset from rect top to bottom)
                   1025:         push    edx                     ;remember for use with source
                   1026:         mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
                   1027:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                   1028:                                         ; scan (first scan to which to copy)
                   1029:         mov     edx,[esi].xLeft
                   1030:         shr     edx,3                   ;byte X address
                   1031:         add     eax,edx                 ;offset in bitmap of first dest byte
                   1032:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                   1033:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                   1034:                                         ; byte, unless the left edge is a whole
                   1035:                                         ; byte and is thus part of the whole
                   1036:                                         ; bytes already
                   1037:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                   1038:         add     eax,culWholeBytesWidth  ;point to the right edge
                   1039:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                   1040: 
                   1041:         mov     esi,pptlsrc
                   1042:         mov     eax,[esi].ptl_y
                   1043:         pop     edx                     ;retrieve -(offset from top to bottom)
                   1044:         sub     eax,edx                 ;advance to bottom of source rect
                   1045:                                         ; (inclusive; this is first scan from
                   1046:                                         ; which to copy)
                   1047:         mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
                   1048:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                   1049:                                         ; scan
                   1050:         mov     edx,[esi].ptl_x
                   1051:         shr     edx,3                   ;byte X address
                   1052:         add     eax,edx                 ;offset in bitmap of first source byte
                   1053:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                   1054:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
                   1055:         dec     eax                      ; last (leftmost) left edge byte, so
                   1056:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
                   1057:                                          ; wide, except when the left dest byte
                   1058:                                          ; is solid so the left edge is part of
                   1059:                                          ; the whole bytes
                   1060:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                   1061:         add     eax,culWholeBytesWidth  ;point to the right edge
                   1062:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts,
                   1063:                                         ; because the whole bytes and the right
                   1064:                                         ; source edge share a byte, and we
                   1065:                                         ; always point to the leftmost byte in
                   1066:                                         ; the right source edge
                   1067: 
                   1068: ; Branch to the appropriate bottom-to-top bank enumeration loop.
                   1069: 
                   1070:         mov     eax,ulAdapterType
                   1071:         jmp     BottomToTopLoopTable[eax*4]
                   1072: 
                   1073: 
                   1074: ;-----------------------------------------------------------------------;
                   1075: ; Set-up code for right-to-left, bottom-to-top copies.
                   1076: ;-----------------------------------------------------------------------;
                   1077: 
                   1078:         align   4
                   1079: right_to_left_bottom_to_top:
                   1080: 
                   1081:         std                             ;we'll copy right to left
                   1082: 
                   1083: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
                   1084: ; rotate, assuming the left-hand source byte is in AL and the right-hand source
                   1085: ; byte is in AH (always true except for left-to-right whole bytes).
                   1086: 
                   1087:         mov     eax,ulCombineMask
                   1088:         mov     ulCombineMaskWhole,eax
                   1089: 
                   1090:         mov     edi,pdsurf
                   1091:         mov     eax,[edi].dsurf_lNextScan
                   1092:         neg     eax
                   1093:         mov     ulNextScan,eax          ;copy bottom to top
                   1094:         add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
                   1095:         mov     ulWholeScanDelta,eax    ; to start of next
                   1096:         inc     eax                     ;offset from end of one src whole byte
                   1097:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
                   1098:                                         ; leading byte used to prime the
                   1099:                                         ; rotation pipeline
                   1100: 
                   1101:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
                   1102:                                         ; right involvement in operation
                   1103:                                         ;leave left-to-right field cleared, so
                   1104:                                         ; we look up right-to-left entries
                   1105:         mov     eax,MasterThreadTable[esi*4]
                   1106:         mov     pCurrentThread,eax      ;threading when no buffering is needed
                   1107:         mov     edx,ulAdapterType
                   1108:         shl     edx,ADAPTER_FIELD_SHIFT
                   1109:         or      esi,edx                 ;factor adapter type into the index
                   1110:         mov     eax,MasterThreadTableViaBuffer[esi*4]
                   1111:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
                   1112: 
                   1113:         mov     ulCurrentJustification,JustifyBottom ;copy bottom to top
                   1114: 
                   1115:         mov     esi,prcldest
                   1116:         mov     edx,[esi].yTop
                   1117:         mov     ulLastDestScan,edx      ;end at top of dest copy rect
                   1118:         mov     eax,[esi].yBottom
                   1119:         dec     eax                     ;rectangle definition is non-inclusive,
                   1120:                                         ; so advance to first scan we'll copy
                   1121:         sub     edx,eax                 ;-(offset from rect top to bottom)
                   1122:         push    edx                     ;remember for use with source
                   1123:         mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
                   1124:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                   1125:                                         ; scan (first scan to which to copy)
                   1126:         mov     edx,[esi].xLeft
                   1127:         shr     edx,3                   ;byte X address
                   1128:         add     eax,edx
                   1129:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
                   1130:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
                   1131:                                         ; byte, unless the left edge is a whole
                   1132:                                         ; byte and is thus part of the whole
                   1133:                                         ; bytes already
                   1134:         add     eax,culWholeBytesWidth  ;point to the right edge
                   1135:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
                   1136:         dec     eax                     ;back up to the last whole byte
                   1137:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
                   1138: 
                   1139:         mov     esi,pptlsrc
                   1140:         mov     eax,[esi].ptl_y
                   1141:         pop     edx                     ;retrieve -(offset from top to bottom)
                   1142:         sub     eax,edx                 ;advance to bottom of source rect
                   1143:                                         ; (inclusive; this is first scan from
                   1144:                                         ; which to copy)
                   1145:         mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
                   1146:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
                   1147:                                         ; scan
                   1148:         mov     edx,[esi].ptl_x
                   1149:         shr     edx,3                   ;byte X address
                   1150:         add     eax,edx                 ;offset in bitmap of first source byte
                   1151:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
                   1152:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
                   1153:         dec     eax                      ; last (leftmost) left edge byte, so
                   1154:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
                   1155:                                          ; wide, except when the left dest byte
                   1156:                                          ; is solid so the left edge is part of
                   1157:                                          ; the whole bytes
                   1158:         add     eax,culWholeBytesWidth  ;point to the right edge of the whole
                   1159:                                         ; src bytes, accounting for the extra
                   1160:                                         ; source byte needed to prime the
                   1161:                                         ; rotation pipeline
                   1162:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
                   1163:         mov     ulRightEdgeSrc,eax      ;that's also where the right src edge
                   1164:                                         ; starts, because the whole bytes and
                   1165:                                         ; the right source edge share a byte,
                   1166:                                         ; and we always point to the leftmost
                   1167:                                         ; byte in the right source edge
                   1168: 
                   1169: ; Branch to the appropriate bottom-to-top bank enumeration loop.
                   1170: 
                   1171:         mov     eax,ulAdapterType
                   1172:         jmp     BottomToTopLoopTable[eax*4]
                   1173: 
                   1174: 
                   1175: ;***********************************************************************;
                   1176: ;
                   1177: ; The following routines are the banking loops.
                   1178: ;
                   1179: ;***********************************************************************;
                   1180: 
                   1181: 
                   1182: ;-----------------------------------------------------------------------;
                   1183: ; Banking for 2 R/W and unbanked adapters, top to bottom.
                   1184: ;-----------------------------------------------------------------------;
                   1185:         align   4
                   1186: top_to_bottom_2RW:
                   1187: 
                   1188: ; We're going top to bottom. Map in the source and dest, top-justified.
                   1189: 
                   1190:         mov     ebx,pdsurf
                   1191:         mov     edx,ulCurrentSrcScan
                   1192:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
                   1193:                                                      ; current source bank?
                   1194:         jl      short top_2RW_map_init_src_bank      ;yes, map in proper bank
                   1195:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
                   1196:                                                         ; current source bank?
                   1197:         jl      short top_2RW_init_src_bank_mapped
                   1198:                                                 ;no, proper bank already mapped
                   1199: top_2RW_map_init_src_bank:
                   1200: 
                   1201: ; Map bank containing the top source scan line into source window.
                   1202: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1203: 
                   1204:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1205:                 <ebx,edx,JustifyTop,MapSourceBank>
                   1206: 
                   1207: top_2RW_init_src_bank_mapped:
                   1208: 
                   1209:         mov     edx,ulCurrentDestScan
                   1210:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
                   1211:                                                      ; current dest bank?
                   1212:         jl      short top_2RW_map_init_dest_bank     ;yes, map in proper bank
                   1213:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
                   1214:                                                         ; current dest bank?
                   1215:         jl      short top_2RW_init_dest_bank_mapped
                   1216:                                                 ;no, proper bank already mapped
                   1217: top_2RW_map_init_dest_bank:
                   1218: 
                   1219: ; Map bank containing the top dest scan line into source window.
                   1220: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1221: 
                   1222:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1223:                 <ebx,edx,JustifyTop,MapDestBank>
                   1224: 
                   1225: top_2RW_init_dest_bank_mapped:
                   1226: 
                   1227: ; Bank-by-bank top-to-bottom copy loop.
                   1228: 
                   1229: top_2RW_bank_loop:
                   1230: 
                   1231: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1232: 
                   1233:         mov     edx,ulLastDestScan
                   1234:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
                   1235:         jl      short @F        ;copy rectangle bottom is in this bank
                   1236:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
                   1237:                                                         ; of bank, at least
                   1238: @@:
                   1239:         sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1240:                                         ; the dest bank
                   1241:         mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
                   1242:         sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank
                   1243: 
                   1244:         cmp     edx,eax
                   1245:         jb      short @F        ;source bank isn't limiting
                   1246:         mov     edx,eax         ;source bank is limiting
                   1247: @@:
                   1248:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1249: 
                   1250: ; We're ready to copy this block.
                   1251: 
                   1252:         THREAD_AND_START
                   1253: 
                   1254: ; Any more scans to copy?
                   1255: 
                   1256:         mov     eax,ulCurrentDestScan
                   1257:         mov     esi,ulBlockHeight
                   1258:         add     eax,esi                 ;we've copied to dest up to here
                   1259:         cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
                   1260:         jz      short top_2RW_done      ;yes, we're done
                   1261:         mov     ulCurrentDestScan,eax
                   1262: 
                   1263: ; Now advance either or both banks, as needed.
                   1264: 
                   1265:         mov     ebx,pdsurf
                   1266:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
                   1267:                                                         ; current dest bank?
                   1268:         jl      short top_2RW_dest_bank_mapped    ;no, proper bank still mapped
                   1269: 
                   1270: ; Map bank containing the current dest scan line into source window.
                   1271: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1272: 
                   1273:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1274:                 <ebx,eax,JustifyTop,MapDestBank>
                   1275: 
                   1276: top_2RW_dest_bank_mapped:
                   1277: 
                   1278:         add     esi,ulCurrentSrcScan    ;we've copied from source up to here
                   1279:         mov     ulCurrentSrcScan,esi
                   1280: 
                   1281:         cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
                   1282:                                                         ; current src bank?
                   1283:         jl      short top_2RW_src_bank_mapped     ;no, proper bank still mapped
                   1284: 
                   1285: ; Map bank containing the current source scan line into source window.
                   1286: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1287: 
                   1288:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1289:                 <ebx,esi,JustifyTop,MapSourceBank>
                   1290: 
                   1291: top_2RW_src_bank_mapped:
                   1292: 
                   1293:         jmp     top_2RW_bank_loop
                   1294: 
                   1295: top_2RW_done:
                   1296:         PLAIN_RET
                   1297: 
                   1298: 
                   1299: ;-----------------------------------------------------------------------;
                   1300: ; Banking for 2 R/W and unbanked adapters, bottom to top.
                   1301: ;-----------------------------------------------------------------------;
                   1302:         align   4
                   1303: bottom_to_top_2RW:
                   1304: 
                   1305: ; We're going bottom to top. Map in the source and dest, bottom-justified.
                   1306: 
                   1307:         mov     ebx,pdsurf
                   1308:         mov     edx,ulCurrentSrcScan
                   1309:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
                   1310:                                                      ; current source bank?
                   1311:         jl      short bot_2RW_map_init_src_bank      ;yes, map in proper bank
                   1312:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
                   1313:                                                         ; than current src bank?
                   1314:         jl      short bot_2RW_init_src_bank_mapped
                   1315:                                                 ;no, proper bank already mapped
                   1316: bot_2RW_map_init_src_bank:
                   1317: 
                   1318: ; Map bank containing the bottom source scan line into source window.
                   1319: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1320: 
                   1321:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1322:                 <ebx,edx,JustifyBottom,MapSourceBank>
                   1323: 
                   1324: bot_2RW_init_src_bank_mapped:
                   1325: 
                   1326:         mov     edx,ulCurrentDestScan
                   1327:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
                   1328:                                                      ; current dest bank?
                   1329:         jl      short bot_2RW_map_init_dest_bank     ;yes, map in proper bank
                   1330:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
                   1331:                                                         ; than current dst bank?
                   1332:         jl      short bot_2RW_init_dest_bank_mapped
                   1333:                                                 ;no, proper bank already mapped
                   1334: bot_2RW_map_init_dest_bank:
                   1335: 
                   1336: ; Map bank containing the bottom dest scan line into source window.
                   1337: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1338: 
                   1339:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1340:                 <ebx,edx,JustifyBottom,MapDestBank>
                   1341: 
                   1342: bot_2RW_init_dest_bank_mapped:
                   1343: 
                   1344: ; Bank-by-bank bottom-to-top copy loop.
                   1345: 
                   1346: bot_2RW_bank_loop:
                   1347: 
                   1348: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1349: 
                   1350:         mov     edx,ulLastDestScan
                   1351:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
                   1352:         jg      short @F        ;copy rectangle top is in this bank
                   1353:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
                   1354:                                                      ; of bank, at least
                   1355: @@:
                   1356:         neg     edx
                   1357:         add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1358:         inc     edx                     ; the dest bank
                   1359: 
                   1360:         mov     eax,ulCurrentSrcScan
                   1361:         sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
                   1362:         inc     eax                     ;# of scans we can do in the src bank
                   1363: 
                   1364:         cmp     edx,eax
                   1365:         jb      short @F        ;source bank isn't limiting
                   1366:         mov     edx,eax         ;source bank is limiting
                   1367: @@:
                   1368:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1369: 
                   1370: ; We're ready to copy this block.
                   1371: 
                   1372:         THREAD_AND_START
                   1373: 
                   1374: ; Any more scans to copy?
                   1375: 
                   1376:         mov     eax,ulCurrentDestScan
                   1377:         mov     esi,ulBlockHeight
                   1378:         sub     eax,esi                 ;we've copied to dest up to here
                   1379:         cmp     ulLastDestScan,eax      ;are we past the dest rect top?
                   1380:         jg      short bot_2RW_done      ;yes, we're done
                   1381:         mov     ulCurrentDestScan,eax
                   1382: 
                   1383: ; Now advance either or both banks, as needed.
                   1384: 
                   1385:         mov     ebx,pdsurf
                   1386:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
                   1387:                                                      ; current dest bank?
                   1388:         jge     short bot_2RW_dest_bank_mapped    ;no, proper bank still mapped
                   1389: 
                   1390: ; Map bank containing the current dest scan line into source window.
                   1391: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1392: 
                   1393:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1394:                 <ebx,eax,JustifyBottom,MapDestBank>
                   1395: 
                   1396: bot_2RW_dest_bank_mapped:
                   1397: 
                   1398:         mov     eax,ulCurrentSrcScan
                   1399:         sub     eax,esi         ;we've copied from source up to here
                   1400:         mov     ulCurrentSrcScan,eax
                   1401: 
                   1402:         cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
                   1403:                                                      ; current src bank?
                   1404:         jge     short bot_2RW_src_bank_mapped     ;no, proper bank still mapped
                   1405: 
                   1406: ; Map bank containing the current source scan line into source window.
                   1407: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1408: 
                   1409:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1410:                 <ebx,eax,JustifyBottom,MapSourceBank>
                   1411: 
                   1412: bot_2RW_src_bank_mapped:
                   1413: 
                   1414:         jmp     bot_2RW_bank_loop
                   1415: 
                   1416: bot_2RW_done:
                   1417:         PLAIN_RET
                   1418: 
                   1419: 
                   1420: ;-----------------------------------------------------------------------;
                   1421: ; Banking for 1R/1W adapters, top to bottom.
                   1422: ;-----------------------------------------------------------------------;
                   1423:         align   4
                   1424: top_to_bottom_1R1W:
                   1425: 
                   1426: ; We're going top to bottom. Map in the source and dest, top-justified.
                   1427: 
                   1428:         mov     ebx,pdsurf
                   1429:         mov     edx,ulCurrentSrcScan
                   1430:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
                   1431:                                                      ; current source bank?
                   1432:         jl      short top_1R1W_map_init_src_bank      ;yes, map in proper bank
                   1433:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
                   1434:                                                         ; current source bank?
                   1435:         jl      short top_1R1W_init_src_bank_mapped
                   1436:                                                 ;no, proper bank already mapped
                   1437: top_1R1W_map_init_src_bank:
                   1438: 
                   1439: ; Map bank containing the top source scan line into source window.
                   1440: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1441: 
                   1442:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1443:                 <ebx,edx,JustifyTop,MapSourceBank>
                   1444: 
                   1445: top_1R1W_init_src_bank_mapped:
                   1446: 
                   1447:         mov     edx,ulCurrentDestScan
                   1448:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
                   1449:                                                      ; current dest bank?
                   1450:         jl      short top_1R1W_map_init_dest_bank     ;yes, map in proper bank
                   1451:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
                   1452:                                                         ; current dest bank?
                   1453:         jl      short top_1R1W_init_dest_bank_mapped
                   1454:                                                 ;no, proper bank already mapped
                   1455: top_1R1W_map_init_dest_bank:
                   1456: 
                   1457: ; Map bank containing the top dest scan line into source window.
                   1458: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1459: 
                   1460:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1461:                 <ebx,edx,JustifyTop,MapDestBank>
                   1462: 
                   1463: top_1R1W_init_dest_bank_mapped:
                   1464: 
                   1465: ; Bank-by-bank top-to-bottom copy loop.
                   1466: 
                   1467: top_1R1W_bank_loop:
                   1468: 
                   1469: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1470: 
                   1471:         mov     edx,ulLastDestScan
                   1472:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
                   1473:         jl      short @F        ;copy rectangle bottom is in this bank
                   1474:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
                   1475:                                                         ; of bank, at least
                   1476: @@:
                   1477:         sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1478:                                         ; the dest bank
                   1479:         mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
                   1480:         sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank
                   1481: 
                   1482:         cmp     edx,eax
                   1483:         jb      short @F        ;source bank isn't limiting
                   1484:         mov     edx,eax         ;source bank is limiting
                   1485: @@:
                   1486:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1487: 
                   1488: ; We're ready to copy this block.
                   1489: ; Select different threading, depending on whether the source and destination
                   1490: ; are currently in the same bank; we can do edges faster if they are.
                   1491: 
                   1492:         mov     eax,[ebx].dsurf_ulWindowBank
                   1493:         cmp     eax,[ebx].dsurf_ulWindowBank[4]
                   1494:         jz      short top_1R1W_copy_same_bank
                   1495: 
                   1496: ; Source and dest are currently in different banks, must go through temp buffer.
                   1497: 
                   1498:         THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans
                   1499: 
                   1500: ; Source and dest are currently in the same bank.
                   1501: 
                   1502:         align   4
                   1503: top_1R1W_copy_same_bank:
                   1504:         THREAD_AND_START
                   1505: 
                   1506: ; Any more scans to copy?
                   1507: 
                   1508: top_1R1W_check_more_scans:
                   1509: 
                   1510:         mov     eax,ulCurrentDestScan
                   1511:         mov     esi,ulBlockHeight
                   1512:         add     eax,esi                 ;we've copied to dest up to here
                   1513:         cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
                   1514:         jz      short top_1R1W_done     ;yes, we're done
                   1515:         mov     ulCurrentDestScan,eax
                   1516: 
                   1517: ; Now advance either or both banks, as needed.
                   1518: 
                   1519:         mov     ebx,pdsurf
                   1520:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
                   1521:                                                         ; current dest bank?
                   1522:         jl      short top_1R1W_dest_bank_mapped   ;no, proper bank still mapped
                   1523: 
                   1524: ; Map bank containing the current dest scan line into source window.
                   1525: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1526: 
                   1527:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1528:                 <ebx,eax,JustifyTop,MapDestBank>
                   1529: 
                   1530: top_1R1W_dest_bank_mapped:
                   1531: 
                   1532:         add     esi,ulCurrentSrcScan    ;we've copied from source up to here
                   1533:         mov     ulCurrentSrcScan,esi
                   1534: 
                   1535:         cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
                   1536:                                                         ; current src bank?
                   1537:         jl      short top_1R1W_src_bank_mapped     ;no, proper bank still mapped
                   1538: 
                   1539: ; Map bank containing the current source scan line into source window.
                   1540: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1541: 
                   1542:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1543:                 <ebx,esi,JustifyTop,MapSourceBank>
                   1544: 
                   1545: top_1R1W_src_bank_mapped:
                   1546: 
                   1547:         jmp     top_1R1W_bank_loop
                   1548: 
                   1549: top_1R1W_done:
                   1550:         PLAIN_RET
                   1551: 
                   1552: 
                   1553: ;-----------------------------------------------------------------------;
                   1554: ; Banking for 1R/1W adapters, bottom to top.
                   1555: ;-----------------------------------------------------------------------;
                   1556:         align   4
                   1557: bottom_to_top_1R1W:
                   1558: 
                   1559: ; We're going bottom to top. Map in the source and dest, bottom-justified.
                   1560: 
                   1561:         mov     ebx,pdsurf
                   1562:         mov     edx,ulCurrentSrcScan
                   1563:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
                   1564:                                                      ; current source bank?
                   1565:         jl      short bot_1R1W_map_init_src_bank      ;yes, map in proper bank
                   1566:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
                   1567:                                                         ; than current src bank?
                   1568:         jl      short bot_1R1W_init_src_bank_mapped
                   1569:                                                 ;no, proper bank already mapped
                   1570: bot_1R1W_map_init_src_bank:
                   1571: 
                   1572: ; Map bank containing the bottom source scan line into source window.
                   1573: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1574: 
                   1575:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1576:                 <ebx,edx,JustifyBottom,MapSourceBank>
                   1577: 
                   1578: bot_1R1W_init_src_bank_mapped:
                   1579: 
                   1580:         mov     edx,ulCurrentDestScan
                   1581:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
                   1582:                                                      ; current dest bank?
                   1583:         jl      short bot_1R1W_map_init_dest_bank     ;yes, map in proper bank
                   1584:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
                   1585:                                                         ; than current dst bank?
                   1586:         jl      short bot_1R1W_init_dest_bank_mapped
                   1587:                                                 ;no, proper bank already mapped
                   1588: bot_1R1W_map_init_dest_bank:
                   1589: 
                   1590: ; Map bank containing the bottom dest scan line into source window.
                   1591: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1592: 
                   1593:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1594:                 <ebx,edx,JustifyBottom,MapDestBank>
                   1595: 
                   1596: bot_1R1W_init_dest_bank_mapped:
                   1597: 
                   1598: ; Bank-by-bank bottom-to-top copy loop.
                   1599: 
                   1600: bot_1R1W_bank_loop:
                   1601: 
                   1602: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1603: 
                   1604:         mov     edx,ulLastDestScan
                   1605:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
                   1606:         jg      short @F        ;copy rectangle top is in this bank
                   1607:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
                   1608:                                                      ; of bank, at least
                   1609: @@:
                   1610:         neg     edx
                   1611:         add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
                   1612:         inc     edx                     ; the dest bank
                   1613: 
                   1614:         mov     eax,ulCurrentSrcScan
                   1615:         sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
                   1616:         inc     eax                     ;# of scans we can do in the src bank
                   1617: 
                   1618:         cmp     edx,eax
                   1619:         jb      short @F        ;source bank isn't limiting
                   1620:         mov     edx,eax         ;source bank is limiting
                   1621: @@:
                   1622:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
                   1623: 
                   1624: ; We're ready to copy this block.
                   1625: ; Select different threading, depending on whether the source and destination
                   1626: ; are currently in the same bank; we can do edges faster if they are.
                   1627: 
                   1628:         mov     al,byte ptr [ebx].dsurf_ulWindowBank
                   1629:         cmp     al,byte ptr [ebx].dsurf_ulWindowBank[4]
                   1630:         jz      short bot_1R1W_copy_same_bank
                   1631: 
                   1632: ; Source and dest are currently in different banks, must go through temp buffer.
                   1633: 
                   1634:         THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans
                   1635: 
                   1636: ; Source and dest are currently in the same bank.
                   1637: 
                   1638:         align   4
                   1639: bot_1R1W_copy_same_bank:
                   1640:         THREAD_AND_START
                   1641: 
                   1642: ; Any more scans to copy?
                   1643: 
                   1644:         align   4
                   1645: bot_1R1W_check_more_scans:
                   1646: 
                   1647:         mov     eax,ulCurrentDestScan
                   1648:         mov     esi,ulBlockHeight
                   1649:         sub     eax,esi                 ;we've copied to dest up to here
                   1650:         cmp     ulLastDestScan,eax      ;are we past the dest rect top?
                   1651:         jg      short bot_1R1W_done     ;yes, we're done
                   1652:         mov     ulCurrentDestScan,eax
                   1653: 
                   1654: ; Now advance either or both banks, as needed.
                   1655: 
                   1656:         mov     ebx,pdsurf
                   1657:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
                   1658:                                                      ; current dest bank?
                   1659:         jge     short bot_1R1W_dest_bank_mapped   ;no, proper bank still mapped
                   1660: 
                   1661: ; Map bank containing the current dest scan line into source window.
                   1662: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1663: 
                   1664:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1665:                 <ebx,eax,JustifyBottom,MapDestBank>
                   1666: 
                   1667: bot_1R1W_dest_bank_mapped:
                   1668: 
                   1669:         mov     eax,ulCurrentSrcScan
                   1670:         sub     eax,esi         ;we've copied from source up to here
                   1671:         mov     ulCurrentSrcScan,eax
                   1672: 
                   1673:         cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
                   1674:                                                      ; current src bank?
                   1675:         jge     short bot_1R1W_src_bank_mapped    ;no, proper bank still mapped
                   1676: 
                   1677: ; Map bank containing the current source scan line into source window.
                   1678: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1679: 
                   1680:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   1681:                 <ebx,eax,JustifyBottom,MapSourceBank>
                   1682: 
                   1683: bot_1R1W_src_bank_mapped:
                   1684: 
                   1685:         jmp     bot_1R1W_bank_loop
                   1686: 
                   1687: bot_1R1W_done:
                   1688:         PLAIN_RET
                   1689: 
                   1690: 
                   1691: ;-----------------------------------------------------------------------;
                   1692: ; Banking for 1 R/W adapters, top to bottom.
                   1693: ;-----------------------------------------------------------------------;
                   1694:         align   4
                   1695: top_to_bottom_1RW:
                   1696: 
                   1697: ; We're going top to bottom. Map in the dest, top-justified.
                   1698: 
                   1699:         mov     ebx,pdsurf
                   1700:         mov     esi,ulCurrentDestScan
                   1701:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest top less than
                   1702:                                                      ; current bank?
                   1703:         jl      short top_1RW_map_init_dest_bank     ;yes, map in proper bank
                   1704:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
                   1705:                                                         ; current bank?
                   1706:         jl      short top_1RW_init_dest_bank_mapped
                   1707:                                                 ;no, proper bank already mapped
                   1708: top_1RW_map_init_dest_bank:
                   1709: 
                   1710: ; Map bank containing the top dest scan line into source window.
                   1711: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1712: 
                   1713:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
                   1714: 
                   1715: top_1RW_init_dest_bank_mapped:
                   1716: 
                   1717: ; Bank-by-bank top-to-bottom copy loop.
                   1718: 
                   1719: top_1RW_bank_loop:
                   1720: 
                   1721: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1722: 
                   1723:         mov     edi,ulLastDestScan
                   1724:         cmp     edi,[ebx].dsurf_rcl1WindowClip.yBottom
                   1725:         jl      short @F        ;copy rectangle bottom is in this bank
                   1726:         mov     edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
                   1727:                                                        ; of bank, at least
                   1728: @@:
                   1729:         sub     edi,esi   ;# of scans we can and want to do in the dest bank
                   1730: 
                   1731: ; Now make sure source is mapped in. This is the condition the copying routines
                   1732: ; expect, and we need to figure out how far we can go in the source.
                   1733: 
                   1734:         sub     edx,edx                 ;assume source and dest are in the same
                   1735:                                         ; bank
                   1736:         mov     esi,ulCurrentSrcScan
                   1737:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
                   1738:                                                     ; current bank?
                   1739:         jl      short top_1RW_map_src_Bank          ;yes, must map in
                   1740:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
                   1741:                                                        ; current bank?
                   1742:         jl      short top_1RW_src_bank_mapped     ;no, proper bank still mapped
                   1743: 
                   1744: top_1RW_map_src_Bank:
                   1745: 
                   1746: ; Map bank containing the current source scan line into source window.
                   1747: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1748: 
                   1749:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
                   1750: 
                   1751:         mov     edx,1                   ;mark that source and dest are not in
                   1752:                                         ; the same bank
                   1753: top_1RW_src_bank_mapped:
                   1754: 
                   1755:         mov     eax,[ebx].dsurf_rcl1WindowClip.yBottom
                   1756:         sub     eax,esi         ;# of scans we can do in the src bank
                   1757: 
                   1758:         cmp     edi,eax
                   1759:         jb      short @F        ;source bank isn't limiting
                   1760:         mov     edi,eax         ;source bank is limiting
                   1761: @@:
                   1762:         mov     ulBlockHeight,edi ;# of scans we'll do in this bank
                   1763: 
                   1764: ; We're ready to copy this block.
                   1765: ; Select different threading, depending on whether the source and destination
                   1766: ; are currently in the same bank; we can do edges faster if they are.
                   1767: 
                   1768:         and     edx,edx
                   1769:         jz      short top_1RW_copy_same_bank
                   1770: 
                   1771: ; Source and dest are currently in different banks, must go through temp buffer.
                   1772: 
                   1773:         THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans
                   1774: 
                   1775: ; Source and dest are currently in the same bank.
                   1776: 
                   1777:         align   4
                   1778: top_1RW_copy_same_bank:
                   1779:         THREAD_AND_START
                   1780: 
                   1781: ; Any more scans to copy?
                   1782: 
                   1783: top_1RW_check_more_scans:
                   1784: 
                   1785:         mov     esi,ulCurrentDestScan
                   1786:         mov     edi,ulBlockHeight
                   1787:         add     esi,edi                 ;we've copied to dest up to here
                   1788:         cmp     ulLastDestScan,esi      ;are we at the dest rect bottom?
                   1789:         jz      short top_1RW_done      ;yes, we're done
                   1790:         mov     ulCurrentDestScan,esi
                   1791: 
                   1792: ; Now make sure the dest bank is mapped in.
                   1793: 
                   1794:         mov     ebx,pdsurf
                   1795:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
                   1796:                                                     ; current bank?
                   1797:         jl      short top_1RW_map_dest_bank         ;yes, map in dest bank
                   1798:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
                   1799:                                                         ; current bank?
                   1800:         jl      short top_1RW_dest_bank_mapped   ;no, proper bank mapped
                   1801: 
                   1802: top_1RW_map_dest_bank:
                   1803: 
                   1804: ; Map bank containing the current dest scan line into source window.
                   1805: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1806: 
                   1807:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
                   1808: 
                   1809: top_1RW_dest_bank_mapped:
                   1810: 
                   1811:         add     ulCurrentSrcScan,edi    ;we've copied from source up to here
                   1812: 
                   1813:         jmp     top_1RW_bank_loop
                   1814: 
                   1815: top_1RW_done:
                   1816:         PLAIN_RET
                   1817: 
                   1818: 
                   1819: ;-----------------------------------------------------------------------;
                   1820: ; Banking for 1 R/W adapters, bottom to top.
                   1821: ;-----------------------------------------------------------------------;
                   1822:         align   4
                   1823: bottom_to_top_1RW:
                   1824: 
                   1825: ; We're going bottom to top. Map in the dest, bottom-justified.
                   1826: 
                   1827:         mov     ebx,pdsurf
                   1828:         mov     esi,ulCurrentDestScan
                   1829:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest bottom less than
                   1830:                                                      ; current dest bank?
                   1831:         jl      short bot_1RW_map_init_dest_bank     ;yes, map in proper bank
                   1832:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
                   1833:                                                        ; than current dst bank?
                   1834:         jl      short bot_1RW_init_dest_bank_mapped
                   1835:                                                 ;no, proper bank already mapped
                   1836: bot_1RW_map_init_dest_bank:
                   1837: 
                   1838: ; Map bank containing the bottom dest scan line into source window.
                   1839: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1840: 
                   1841:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
                   1842: 
                   1843: bot_1RW_init_dest_bank_mapped:
                   1844: 
                   1845: ; Bank-by-bank bottom-to-top copy loop.
                   1846: 
                   1847: bot_1RW_bank_loop:
                   1848: 
                   1849: ; Decide how far we can go before we run out of bank or rectangle to copy.
                   1850: 
                   1851:         mov     edi,ulLastDestScan
                   1852:         cmp     edi,[ebx].dsurf_rcl1WindowClip.yTop
                   1853:         jg      short @F        ;copy rectangle top is in this bank
                   1854:         mov     edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
                   1855:                                                     ; of bank, at least
                   1856: @@:
                   1857:         neg     edi
                   1858:         add     edi,esi                 ;# of scans we can and want to do in
                   1859:         inc     edi                     ; the dest bank
                   1860: 
                   1861: ; Now make sure source is mapped in. This is the condition the copying routines
                   1862: ; expect, and we need to figure out how far we can go in the source.
                   1863: 
                   1864:         sub     edx,edx                 ;assume source and dest are in the same
                   1865:                                         ; bank
                   1866:         mov     esi,ulCurrentSrcScan
                   1867:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
                   1868:                                                     ; current bank?
                   1869:         jl      short bot_1RW_map_src_Bank          ;yes, must map in
                   1870:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
                   1871:                                                        ; current bank?
                   1872:         jl      short bot_1RW_src_bank_mapped     ;no, proper bank still mapped
                   1873: 
                   1874: bot_1RW_map_src_Bank:
                   1875: 
                   1876: ; Map bank containing the current source scan line into source window.
                   1877: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1878: 
                   1879:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
                   1880: 
                   1881:         mov     edx,1                   ;mark that source and dest are not in
                   1882:                                         ; the same bank
                   1883: bot_1RW_src_bank_mapped:
                   1884: 
                   1885:         sub     esi,[ebx].dsurf_rcl1WindowClip.yTop
                   1886:         inc     esi                     ;# of scans we can do in the src bank
                   1887: 
                   1888:         cmp     edi,esi
                   1889:         jb      short @F        ;source bank isn't limiting
                   1890:         mov     edi,esi         ;source bank is limiting
                   1891: @@:
                   1892:         mov     ulBlockHeight,edi ;# of scans we'll do in this bank
                   1893: 
                   1894: ; We're ready to copy this block.
                   1895: ; Select different threading, depending on whether the source and destination
                   1896: ; are currently in the same bank; we can copy much faster if they are.
                   1897: 
                   1898:         and     edx,edx
                   1899:         jz      short bot_1RW_copy_same_bank
                   1900: 
                   1901: ; Source and dest are currently in different banks, must go through temp buffer.
                   1902: 
                   1903:         THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans
                   1904: 
                   1905: ; Source and dest are currently in the same bank.
                   1906: 
                   1907:         align   4
                   1908: bot_1RW_copy_same_bank:
                   1909:         THREAD_AND_START
                   1910: 
                   1911: ; Any more scans to copy?
                   1912: 
                   1913:         align   4
                   1914: bot_1RW_check_more_scans:
                   1915: 
                   1916:         mov     esi,ulCurrentDestScan
                   1917:         mov     edi,ulBlockHeight
                   1918:         sub     esi,edi                 ;we've copied to dest up to here
                   1919:         cmp     ulLastDestScan,esi      ;are we past the dest rect top?
                   1920:         jg      short bot_1RW_done      ;yes, we're done
                   1921:         mov     ulCurrentDestScan,esi
                   1922: 
                   1923: ; Now make sure the dest bank is mapped in.
                   1924: 
                   1925:         mov     ebx,pdsurf
                   1926:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
                   1927:                                                     ; current bank?
                   1928:         jl      short bot_1RW_map_dest_bank         ;yes, map in dest bank
                   1929:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
                   1930:                                                         ; current bank?
                   1931:         jl      short bot_1RW_dest_bank_mapped   ;no, proper bank mapped
                   1932: 
                   1933: bot_1RW_map_dest_bank:
                   1934: 
                   1935: ; Map bank containing the current dest scan line into source window.
                   1936: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
                   1937: 
                   1938:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
                   1939: 
                   1940: bot_1RW_dest_bank_mapped:
                   1941: 
                   1942:         sub     ulCurrentSrcScan,edi    ;we've copied from source up to here
                   1943: 
                   1944:         jmp     bot_1RW_bank_loop
                   1945: 
                   1946: bot_1RW_done:
                   1947:         PLAIN_RET
                   1948: 
                   1949: 
                   1950: ;***********************************************************************;
                   1951: ;
                   1952: ; The following routines are the low-level copying routines. They know
                   1953: ; almost nothing about banks (the routines that copy through a temp
                   1954: ; buffer know how to switch banks after filling the temp buffer, but
                   1955: ; that's it). Banking should be taken care of at a higher level.
                   1956: ;
                   1957: ;***********************************************************************;
                   1958: 
                   1959: ;-----------------------------------------------------------------------;
                   1960: ; Copies a block of solid bytes directly from the source to the
                   1961: ; destination, without using a temp buffer. We can't use the latches,
                   1962: ; though, because this is a rotated copy. Can only be used by 2 R/W or
                   1963: ; 1R/1W window banking, or by unbanked modes, or by 1 R/W adapters when
                   1964: ; the source and dest are in the same bank. 1 R/W adapters must go
                   1965: ; through an intermediate local buffer when the source and the destination
                   1966: ; aren't in the same bank.
                   1967: ;
                   1968: ; Input:
                   1969: ;       Direction Flag set for desired direction of copy
                   1970: ;       culWholeBytesWidth = # of bytes to copy across each scan line
                   1971: ;       ulWholeScanDelta = distance to start of next dest scan from end of
                   1972: ;               current
                   1973: ;       ulWholeScanSrcDelta = distance to start of next source scan from end of
                   1974: ;               current
                   1975: ;       ulBlockHeight = # of scans to copy
                   1976: ;       ulWholeBytesSrc = start source offset in bitmap
                   1977: ;       ulWholeBytesDest = start dest offset in bitmap
                   1978: ;       ulCombineMaskWhole = masking to be applied before ORing the two source
                   1979: ;               bytes together, to keep only the data needed in preparation
                   1980: ;               for the VGA rotator doing its stuff
                   1981: ;
                   1982: ; Output:
                   1983: ;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
                   1984: ;               scan processed
                   1985: ;-----------------------------------------------------------------------;
                   1986: 
                   1987:         align   4
                   1988: copy_whole_bytes:
                   1989: 
                   1990: ; Calculate start source and dest addresses from bitmap start addresses and
                   1991: ; offsets within bitmap.
                   1992: 
                   1993:         mov     ecx,pdsurf
                   1994:         mov     eax,ulWholeBytesSrc
                   1995:         add     eax,[ecx].dsurf_pvBitmapStart2WindowS
                   1996:         mov     pSrcAddr,eax
                   1997:         mov     eax,ulWholeBytesDest
                   1998:         add     eax,[ecx].dsurf_pvBitmapStart2WindowD
                   1999:         mov     pDestAddr,eax
                   2000: 
                   2001: ; Set the bit mask to enable all bits.
                   2002: 
                   2003:         mov     edx,VGA_BASE + GRAF_ADDR
                   2004:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
                   2005:         out     dx,ax
                   2006: 
                   2007: ; Leave GC Index pointing to the Read Map register.
                   2008: 
                   2009:         mov     al,GRAF_READ_MAP
                   2010:         out     dx,al
                   2011: 
                   2012: ; Set up to copy the whole bytes from the buffer.
                   2013: 
                   2014:         mov     eax,ulBlockHeight
                   2015:         mov     ulTempScanCount,eax
                   2016: 
                   2017:         mov     ebx,culWholeBytesWidth
                   2018:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeRWEntry, \
                   2019:                                 LOOP_UNROLL_SHIFT
                   2020:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
                   2021:         mov     pTempEntry,ecx   ;ditto for entry point
                   2022: 
                   2023: copy_whole_scan_loop:
                   2024: 
                   2025:         mov     cl,MM_C3        ;start by copying plane 3 (for Map Mask)
                   2026: 
                   2027: copy_whole_plane_loop:
                   2028: 
                   2029: ; Set Map Mask to enable writes to the plane we're copying.
                   2030: 
                   2031:         mov     edx,VGA_BASE + SEQ_DATA
                   2032:         mov     al,cl
                   2033:         out     dx,al
                   2034: 
                   2035: ; Set Read Map to enable reads from the plane we're copying.
                   2036: 
                   2037:         mov     dl,GRAF_DATA
                   2038:         shr     al,1                    ;map plane into ReadMask
                   2039:         cmp     al,100b                 ;set Carry if not C3 (plane 3)
                   2040:         adc     al,-1                   ;sub 1 only if C3
                   2041:         out     dx,al
                   2042: 
                   2043: ; Select the corresponding plane from the temp buffer.
                   2044: 
                   2045:         mov     esi,pSrcAddr       ;source offset in screen
                   2046:         mov     edi,pDestAddr      ;point to destination start
                   2047: 
                   2048:         lodsb                   ;prime the rotation pipeline
                   2049:         mov     ah,al           ;for combining with the next byte
                   2050: 
                   2051:         mov     ebx,culTempCount
                   2052:         mov     edx,ulCombineMaskWhole
                   2053:         jmp     pTempEntry
                   2054: 
                   2055: 
                   2056: ;-----------------------------------------------------------------------;
                   2057: ; Table of unrolled copy whole bytes from buffer loop entry points.
                   2058: ;-----------------------------------------------------------------------;
                   2059: 
                   2060:         UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeRWEntry, \
                   2061:                                 WHOLE_RW, LOOP_UNROLL_COUNT
                   2062: 
                   2063: ;-----------------------------------------------------------------------;
                   2064: ; Unrolled loop for copying whole bytes from the buffer.
                   2065: ;-----------------------------------------------------------------------;
                   2066: 
                   2067: COPY_WHOLE_RW macro ENTRY_LABEL,ENTRY_INDEX
                   2068: &ENTRY_LABEL&ENTRY_INDEX&:
                   2069:         lodsb                   ;get byte to copy
                   2070:         mov     ch,al           ;set aside for next time
                   2071:         and     eax,edx         ;mask the bytes in preparation for combining
                   2072:                                 ; and rotating them
                   2073:         or      al,ah           ;combine them
                   2074:         stosb                   ;write the composite byte
                   2075:                                 ; VGA rotates during write
                   2076:         mov     ah,ch           ;prepare byte for combining next time
                   2077:         endm    ;-----------------------------------;
                   2078: 
                   2079: ;  AH = rotation pipeline-priming byte
                   2080: ;  EDX = mask to preserve desired portions of AH and AL before combining
                   2081: ;  ESI = source address to copy from
                   2082: ;  EDI = target address to copy to
                   2083: ;  Map Mask set to enable the desired plane for write
                   2084: ;  Bit Mask set to enable all bits
                   2085: 
                   2086:         align   4
                   2087: copy_whole_loop:
                   2088:         UNROLL_LOOP COPY_WHOLE_RW,WHOLE_RW,LOOP_UNROLL_COUNT
                   2089: 
                   2090:         dec     ebx
                   2091:         jnz     copy_whole_loop
                   2092: 
                   2093: ; Do next plane, if any.
                   2094: 
                   2095:         shr     cl,1                    ;advance to next plane
                   2096:         jnz     copy_whole_plane_loop
                   2097: 
                   2098: ; Remember where we left off, for next scan.
                   2099: 
                   2100:         add     edi,ulWholeScanDelta    ;point to next dest scan
                   2101:         mov     pDestAddr,edi
                   2102:         add     esi,ulWholeScanSrcDelta ;point to next source scan
                   2103:         mov     pSrcAddr,esi
                   2104: 
                   2105: ; Count down scan lines.
                   2106: 
                   2107:         dec     ulTempScanCount
                   2108:         jnz     copy_whole_scan_loop
                   2109: 
                   2110: ; Remember where we left off, for next time.
                   2111: 
                   2112:         mov     ecx,pdsurf
                   2113:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2114:         mov     ulWholeBytesSrc,esi
                   2115:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2116:         mov     ulWholeBytesDest,edi
                   2117: 
                   2118:         PLAIN_RET
                   2119: 
                   2120: 
                   2121: ;-----------------------------------------------------------------------;
                   2122: ; Copies a block of solid bytes from the source to the destination via
                   2123: ; the temp buffer. This should only be used by 1 R/W adapters, and then
                   2124: ; only when the source and dest are in different banks.
                   2125: ;
                   2126: ; All relevant bytes are first copied from the source to a temp buffer that's
                   2127: ; an image of the source. Then, we copy each of the four planes for one scan
                   2128: ; line from the temp buffer to the screen before going on to the next scan
                   2129: ; line. See ALIGNBLT.ASM for comments about why this is done.
                   2130: ;
                   2131: ; Input:
                   2132: ;       Direction Flag set for desired direction of copy
                   2133: ;       culWholeBytesWidth = # of bytes to copy across each scan line
                   2134: ;       ulWholeScanDelta = distance to start of next scan from end of current
                   2135: ;       ulNextScan = width of a scan line
                   2136: ;       ulBlockHeight = # of scans to copy
                   2137: ;       ulWholeBytesSrc = start source offset in bitmap
                   2138: ;       ulWholeBytesDest = start dest offset in bitmap
                   2139: ;       ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
                   2140: ;       ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
                   2141: ;       ulCombineMaskWhole = masking to be applied before ORing the two source
                   2142: ;               bytes together, to keep only the data needed in preparation
                   2143: ;               for the VGA rotator doing its stuff
                   2144: ;       Expects the source bank to be mapped in; source bank is mapped in on
                   2145: ;               exit
                   2146: ;
                   2147: ; Output:
                   2148: ;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
                   2149: ;               scan processed
                   2150: ;-----------------------------------------------------------------------;
                   2151: 
                   2152:         align   4
                   2153: copy_whole_bytes_via_buffer:
                   2154: 
                   2155: ; Calculate start source address from bitmap start address and offset within
                   2156: ; bitmap.
                   2157: 
                   2158:         mov     ecx,pdsurf
                   2159:         mov     eax,ulWholeBytesSrc
                   2160:         add     eax,[ecx].dsurf_pvBitmapStart
                   2161:         mov     pSrcAddr,eax
                   2162:         sub     eax,[ecx].dsurf_pvStart
                   2163:         mov     ulOffsetInBank,eax ;will come in handy because we treat the
                   2164:                                    ; temp buffer as an image of the current
                   2165:                                    ; bank
                   2166: 
                   2167: ; First, copy all the bytes into the temporary buffer.
                   2168: 
                   2169: ; Leave the GC Index pointing to the Read Map.
                   2170: 
                   2171:         mov     edx,VGA_BASE + GRAF_ADDR
                   2172:         mov     al,GRAF_READ_MAP
                   2173:         out     dx,al
                   2174: 
                   2175:         mov     eax,3           ;start by copying plane 3
                   2176: copy_whole_to_buffer_plane_loop:
                   2177:         mov     ebx,ulBlockHeight  ;# of scans to copy
                   2178:         mov     esi,pSrcAddr       ;source offset in screen
                   2179:         mov     edi,ppTempPlane0
                   2180:         mov     edi,[edi+eax*4]    ;pointer to current plane in temp buffer
                   2181:         add     edi,ulOffsetInBank ;dest for plane in temp buffer
                   2182: 
                   2183:         mov     edx,VGA_BASE + GRAF_DATA
                   2184:         out     dx,al            ;set Read Map to plane we're copying from.
                   2185: 
                   2186:         push    eax             ;remember plane index
                   2187:         mov     eax,ulWholeScanSrcDelta ;offset to next scan
                   2188:         mov     edx,culWholeBytesWidth ;# of bytes per scan
                   2189:         inc     edx             ;always one more source byte than dest byte
                   2190: copy_whole_to_buffer_scan_loop:
                   2191:         mov     ecx,edx         ;# of bytes per scan
                   2192:         rep     movsb           ;copy the scan line to the temp buffer
                   2193:         add     esi,eax         ;point to next source scan
                   2194:         add     edi,eax         ;point to next dest scan
                   2195: 
                   2196:         dec     ebx              ;count down scan lines
                   2197:         jnz     copy_whole_to_buffer_scan_loop
                   2198: 
                   2199:         pop     eax             ;get back plane index
                   2200:         dec     eax             ;count down planes
                   2201:         jns     copy_whole_to_buffer_plane_loop
                   2202: 
                   2203: ; Remember where we left off, for next time.
                   2204: 
                   2205:         mov     ebx,pdsurf
                   2206:         sub     esi,[ebx].dsurf_pvBitmapStart
                   2207:         mov     ulWholeBytesSrc,esi
                   2208: 
                   2209: 
                   2210: ; Now copy the temp buffer to the screen.
                   2211: 
                   2212: ; Map in the destination bank, so we can read/write to it and let the Bit Mask
                   2213: ; work.
                   2214: 
                   2215:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>, \
                   2216:                 <ebx,ulCurrentDestScan,ulCurrentJustification>
                   2217: 
                   2218: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
                   2219: ; until now to calculate this, because the dest bank wasn't mapped earlier).
                   2220: 
                   2221:         mov     eax,ulWholeBytesDest
                   2222:         add     eax,[ebx].dsurf_pvBitmapStart
                   2223:         mov     pDestAddr,eax
                   2224: 
                   2225: ; Set the bit mask to enable all bits.
                   2226: 
                   2227:         mov     edx,VGA_BASE + GRAF_ADDR
                   2228:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
                   2229:         out     dx,ax
                   2230: 
                   2231: ; Set up to copy the whole bytes from the buffer.
                   2232: 
                   2233:         mov     eax,ulBlockHeight
                   2234:         mov     ulTempScanCount,eax
                   2235: 
                   2236:         mov     ebx,culWholeBytesWidth
                   2237:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeFromBufferEntry, \
                   2238:                                 LOOP_UNROLL_SHIFT
                   2239:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
                   2240:         mov     pTempEntry,ecx   ;ditto for entry point
                   2241: 
                   2242: copy_whole_from_buffer_scan_loop:
                   2243: 
                   2244:         mov     ebx,ppTempPlane3  ;point to plane 3's temp buffer offset
                   2245:         mov     cl,MM_C3        ;start by copying plane 3
                   2246: 
                   2247: copy_whole_from_buffer_plane_loop:
                   2248: 
                   2249: ; Set Map Mask to enable writes to the plane we're copying.
                   2250: 
                   2251:         mov     edx,VGA_BASE + SEQ_DATA
                   2252:         mov     al,cl
                   2253:         out     dx,al
                   2254: 
                   2255: ; Select the corresponding plane from the temp buffer.
                   2256: 
                   2257:         mov     esi,[ebx]       ;point to plane start in temp buffer
                   2258:         sub     ebx,4           ;point to next temp buffer plane ptr
                   2259:         push    ebx             ;preserve pointer to plane pointer
                   2260: 
                   2261:         add     esi,ulOffsetInBank ;point to current scan start in temp buffer
                   2262:         mov     edi,pDestAddr      ;point to destination start
                   2263: 
                   2264:         lodsb                   ;prime the rotation pipeline
                   2265:         mov     ah,al           ;for combining with the next byte
                   2266: 
                   2267:         mov     ebx,culTempCount
                   2268:         mov     edx,ulCombineMaskWhole
                   2269:         jmp     pTempEntry
                   2270: 
                   2271: 
                   2272: ;-----------------------------------------------------------------------;
                   2273: ; Table of unrolled copy whole bytes from buffer loop entry points.
                   2274: ;-----------------------------------------------------------------------;
                   2275: 
                   2276:         UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeFromBufferEntry, \
                   2277:                                 WHOLE_FROM_BUFFER, LOOP_UNROLL_COUNT
                   2278: 
                   2279: ;-----------------------------------------------------------------------;
                   2280: ; Unrolled loop for copying whole bytes from the buffer.
                   2281: ;-----------------------------------------------------------------------;
                   2282: 
                   2283: COPY_WHOLE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX
                   2284: &ENTRY_LABEL&ENTRY_INDEX&:
                   2285:         lodsb                   ;get byte to copy
                   2286:         mov     ch,al           ;set aside for next time
                   2287:         and     eax,edx         ;mask the bytes in preparation for combining
                   2288:                                 ; and rotating them
                   2289:         or      al,ah           ;combine them
                   2290:         stosb                   ;write the composite byte
                   2291:                                 ; VGA rotates during write
                   2292:         mov     ah,ch           ;prepare byte for combining next time
                   2293:         endm    ;-----------------------------------;
                   2294: 
                   2295: ;  AH = rotation pipeline-priming byte
                   2296: ;  EDX = mask to preserve desired portions of AH and AL before combining
                   2297: ;  ESI = source address to copy from
                   2298: ;  EDI = target address to copy to
                   2299: ;  Map Mask set to enable the desired plane for write
                   2300: ;  Bit Mask set to enable all bits
                   2301: 
                   2302:         align   4
                   2303: copy_whole_from_buffer_loop:
                   2304:         UNROLL_LOOP COPY_WHOLE_FROM_BUFFER,WHOLE_FROM_BUFFER,LOOP_UNROLL_COUNT
                   2305: 
                   2306:         dec     ebx
                   2307:         jnz     copy_whole_from_buffer_loop
                   2308: 
                   2309: ; Do next plane, if any.
                   2310: 
                   2311:         pop     ebx             ;retrieve pointer to plane pointer
                   2312:         shr     cl,1            ;advance to next plane
                   2313:         jnz     copy_whole_from_buffer_plane_loop
                   2314: 
                   2315: ; Remember where we left off, for next scan.
                   2316: 
                   2317:         add     edi,ulWholeScanDelta    ;point to next dest scan
                   2318:         mov     pDestAddr,edi
                   2319:         mov     eax,ulNextScan
                   2320:         add     ulOffsetInBank,eax      ;next scan's start in temp buffer,
                   2321:                                         ; relative to start of plane's storage
                   2322: 
                   2323: ; Count down scan lines.
                   2324: 
                   2325:         dec     ulTempScanCount
                   2326:         jnz     copy_whole_from_buffer_scan_loop
                   2327: 
                   2328: ; Remember where we left off, for next time.
                   2329: 
                   2330:         mov     ebx,pdsurf
                   2331:         sub     edi,[ebx].dsurf_pvBitmapStart
                   2332:         mov     ulWholeBytesDest,edi
                   2333: 
                   2334: ; Put back the original source bank.
                   2335: 
                   2336:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>, \
                   2337:                 <ebx,ulCurrentSrcScan,ulCurrentJustification>
                   2338: 
                   2339:         PLAIN_RET
                   2340: 
                   2341: 
                   2342: ;-----------------------------------------------------------------------;
                   2343: ; Copies a strip of left edge bytes from the source to the destination,
                   2344: ; assuming both the source and the destination are both readable and
                   2345: ; writable. Can only be used by 2 R/W window banking, or by unbanked
                   2346: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
                   2347: ; buffer when the source and dest are in different banks. Processes up to
                   2348: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
                   2349: ; flicker.
                   2350: ;
                   2351: ; Input:
                   2352: ;       ulNextScan = width of scan, in bytes
                   2353: ;       ulBlockHeight = # of scans to copy
                   2354: ;       ulLeftEdgeSrc = start source offset in bitmap
                   2355: ;       ulLeftEdgeDest = start dest offset in bitmap
                   2356: ;       ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
                   2357: ;       jLeftMask = left edge clip mask
                   2358: ;
                   2359: ; Output:
                   2360: ;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
                   2361: ;               scan processed
                   2362: ;-----------------------------------------------------------------------;
                   2363: 
                   2364:         align   4
                   2365: copy_left_edge:
                   2366: 
                   2367: ; Calculate start source and dest addresses from bitmap start addresses and
                   2368: ; offsets within bitmap.
                   2369: 
                   2370:         mov     ecx,pdsurf
                   2371:         mov     esi,ulLeftEdgeSrc
                   2372:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2373:         mov     edi,ulLeftEdgeDest
                   2374:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2375: 
                   2376: ; Copy the edge.
                   2377: 
                   2378:         mov     ah,byte ptr jLeftMask   ;clip mask for this edge
                   2379:         mov     ebx,ulLeftSrcWidthMinus1
                   2380:         call    copy_edge_table[ebx*4]
                   2381: 
                   2382: ; Remember where we left off, for next time.
                   2383: 
                   2384:         mov     ecx,pdsurf
                   2385:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2386:         mov     ulLeftEdgeSrc,esi
                   2387:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2388:         mov     ulLeftEdgeDest,edi
                   2389: 
                   2390:         PLAIN_RET
                   2391: 
                   2392: 
                   2393: ;-----------------------------------------------------------------------;
                   2394: ; Copies a strip of right edge bytes from the source to the destination,
                   2395: ; assuming both the source and the destination are both readable and
                   2396: ; writable. Can only be used by 2 R/W window banking, or by unbanked
                   2397: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
                   2398: ; buffer when the source and dest are in different banks. Processes up to
                   2399: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
                   2400: ; flicker.
                   2401: ;
                   2402: ; Input:
                   2403: ;       ulNextScan = width of scan, in bytes
                   2404: ;       ulBlockHeight = # of scans to copy
                   2405: ;       ulRightEdgeSrc = start source offset in bitmap
                   2406: ;       ulRightEdgeDest = start dest offset in bitmap
                   2407: ;       ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
                   2408: ;       jRightMask = right edge clip mask
                   2409: ;
                   2410: ; Output:
                   2411: ;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
                   2412: ;               scan processed
                   2413: ;-----------------------------------------------------------------------;
                   2414: 
                   2415:         align   4
                   2416: copy_right_edge:
                   2417: 
                   2418: ; Calculate start source and dest addresses from bitmap start addresses and
                   2419: ; offsets within bitmap.
                   2420: 
                   2421:         mov     ecx,pdsurf
                   2422:         mov     esi,ulRightEdgeSrc
                   2423:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2424:         mov     edi,ulRightEdgeDest
                   2425:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2426: 
                   2427: ; Copy the edge.
                   2428: 
                   2429:         mov     ah,byte ptr jRightMask  ;clip mask for this edge
                   2430:         mov     ebx,ulRightSrcWidthMinus1
                   2431:         call    copy_edge_table[ebx*4]
                   2432: 
                   2433: ; Remember where we left off, for next time
                   2434: 
                   2435:         mov     ecx,pdsurf
                   2436:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2437:         mov     ulRightEdgeSrc,esi
                   2438:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
                   2439:         mov     ulRightEdgeDest,edi
                   2440: 
                   2441:         PLAIN_RET
                   2442: 
                   2443: 
                   2444: ;-----------------------------------------------------------------------;
                   2445: ; Copies an edge from a 1-wide source to the destination on the screen.
                   2446: ; Entry:
                   2447: ;       AH = bit mask setting for edge
                   2448: ;       ESI = source address
                   2449: ;       EDI = destination address
                   2450: ;       ulBlockHeight = # of bytes to copy per plane
                   2451: ;       ulNextScan = scan width
                   2452: ;       Source readable, and destination readable and writable
                   2453: ; Exit:
                   2454: ;       ESI = next source address
                   2455: ;       EDI = next destination address
                   2456: ;
                   2457: ; Preserved: EBP
                   2458: ;-----------------------------------------------------------------------;
                   2459: 
                   2460:         align   4
                   2461: copy_edge_1ws:
                   2462:         mov     pSrcAddr,esi
                   2463:         mov     pDestAddr,edi
                   2464: 
                   2465: ; Set the clip mask for this edge.
                   2466: 
                   2467:         mov     edx,VGA_BASE + GRAF_ADDR
                   2468:         mov     al,GRAF_BIT_MASK
                   2469:         out     dx,ax
                   2470: 
                   2471: ; Leave the GC Index pointing to the Read Map.
                   2472: 
                   2473:         mov     al,GRAF_READ_MAP
                   2474:         out     dx,al
                   2475: 
                   2476:         mov     ecx,offset copy_edge_rw_1ws_full_chunk
                   2477:                                 ;entry point into unrolled loop to copy first
                   2478:                                 ; chunk, assuming it's a full chunk
                   2479:         mov     ebx,ulBlockHeight
                   2480: 
                   2481: ; Copy the edge in a series of chunks.
                   2482: 
                   2483: copy_edge_chunk_loop_1ws:
                   2484: 
                   2485:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                   2486:                                     ; a full chunk
                   2487:         jge     short @F            ;do a full chunk
                   2488:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                   2489:                                     ; scans
                   2490:         mov     ecx,pfnCopyEdgeRWEntry_1ws[-4][ebx*4]
                   2491:                                 ;entry point into unrolled loop to copy desired
                   2492:                                 ; chunk size
                   2493:         sub     ebx,ebx         ;no scans after this
                   2494: @@:
                   2495:         push    ebx             ;remember remaining scan count
                   2496: 
                   2497:         mov     ah,MM_C3        ;start by copying plane 3
                   2498:         mov     ebx,ulNextScan
                   2499: 
                   2500: copy_edge_plane_loop_1ws:
                   2501: 
                   2502: ; Set Map Mask to enable writes to plane we're copying.
                   2503: 
                   2504:         mov     al,ah
                   2505:         mov     dl,SEQ_DATA
                   2506:         out     dx,al
                   2507: 
                   2508: ; Set Read Map to same plane.
                   2509: 
                   2510:         shr     al,1                    ;map plane into ReadMask
                   2511:         cmp     al,100b                 ;set Carry if not C3 (plane 3)
                   2512:         adc     al,-1                   ;sub 1 only if C3
                   2513:         mov     dl,GRAF_DATA
                   2514:         out     dx,al
                   2515: 
                   2516:         mov     esi,pSrcAddr
                   2517:         mov     edi,pDestAddr
                   2518: 
                   2519:         jmp     ecx                     ;copy the left edge
                   2520: 
                   2521: 
                   2522: ;-----------------------------------------------------------------------;
                   2523: ; Table of unrolled edge loop entry points. First entry point is to copy
                   2524: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
                   2525: ;-----------------------------------------------------------------------;
                   2526: 
                   2527: pfnCopyEdgeRWEntry_1ws label dword
                   2528: INDEX = 1
                   2529:         rept    EDGE_CHUNK_SIZE
                   2530:         DEFINE_DD       EDGE_RW_1WS,%INDEX
                   2531: INDEX = INDEX+1
                   2532:         endm
                   2533: 
                   2534: 
                   2535: ;-----------------------------------------------------------------------;
                   2536: ; Unrolled loop for copying a strip of edge bytes, with 1-wide source and
                   2537: ; destination both readable and writable.
                   2538: ;-----------------------------------------------------------------------;
                   2539: 
                   2540: COPY_EDGE_RW_1WS macro ENTRY_LABEL,ENTRY_INDEX
                   2541: &ENTRY_LABEL&ENTRY_INDEX&:
                   2542:         mov     al,[esi]        ;get byte to copy
                   2543:         add     esi,ebx         ;point to next source scan
                   2544:         xchg    [edi],al        ;read before write so Bit Mask can operate
                   2545:                                 ; VGA rotates during write
                   2546:         add     edi,ebx         ;point to next dest scan
                   2547:         endm    ;-----------------------------------;
                   2548: 
                   2549: ;  EBX = scan line width
                   2550: ;  ESI = source address to copy from
                   2551: ;  EDI = target address to copy to
                   2552: ;  Bit Mask set to desired clipping
                   2553: ;  Read Map and Map Mask set to enable the desired plane for read and write
                   2554: 
                   2555:         align   4
                   2556: copy_edge_rw_1ws_full_chunk:
                   2557:         UNROLL_LOOP COPY_EDGE_RW_1WS,EDGE_RW_1WS,EDGE_CHUNK_SIZE
                   2558: 
                   2559: ; Do next plane within this chunk, if any.
                   2560: 
                   2561:         shr     ah,1                    ;advance to next plane
                   2562:         jnz     copy_edge_plane_loop_1ws
                   2563: 
                   2564: ; Remember where we left off, for the next chunk.
                   2565: 
                   2566:         mov     pSrcAddr,esi
                   2567:         mov     pDestAddr,edi
                   2568: 
                   2569: ; Do next chunk within this bank block, if any.
                   2570: 
                   2571:         pop     ebx                     ;retrieve remaining scan count
                   2572:         and     ebx,ebx                 ;any scans left?
                   2573:         jnz     copy_edge_chunk_loop_1ws ;more scans to do
                   2574: 
                   2575:         PLAIN_RET
                   2576: 
                   2577: 
                   2578: ;-----------------------------------------------------------------------;
                   2579: ; Copies a strip of left edge bytes from the source to the destination
                   2580: ; through an intermediate RAM buffer. This is the approach required by
                   2581: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
                   2582: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
                   2583: ; cause flicker.
                   2584: ;
                   2585: ; Input:
                   2586: ;       ulNextScan = width of scan, in bytes
                   2587: ;       ulBlockHeight = # of scans to copy
                   2588: ;       ulLeftEdgeSrc = start source offset in bitmap
                   2589: ;       ulLeftEdgeDest = start dest offset in bitmap
                   2590: ;       jLeftMask = left edge clip mask
                   2591: ;       pTempPlane = pointer to temp storage buffer
                   2592: ;       ulCurrentSrcScan = scan used to map in source bank
                   2593: ;       ulCurrentDestScan = scan used to map in dest bank
                   2594: ;       ulCurrentJustification = justification used to map in current bank
                   2595: ;       ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
                   2596: ;       For 1 R/W adapters, expects the source bank to be mapped in; banking
                   2597: ;               is the same at exit as it was at entry
                   2598: ;
                   2599: ; Output:
                   2600: ;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
                   2601: ;               scan processed
                   2602: ;
                   2603: ; Note that this should never be called for an unbanked or 2 R/W adapter,
                   2604: ; because the source and dest are always both addressable simultaneously then.
                   2605: ;-----------------------------------------------------------------------;
                   2606: 
                   2607:         align   4
                   2608: copy_left_edge_via_buffer:
                   2609: 
                   2610: ; First, copy all the bytes into the temporary buffer.
                   2611: 
                   2612: ; Calculate start source and dest addresses from bitmap start addresses and
                   2613: ; offsets within bitmap.
                   2614: 
                   2615:         mov     ecx,pdsurf
                   2616:         mov     esi,ulLeftEdgeSrc
                   2617:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2618: 
                   2619: ; Copy the edge from the source to the temp buffer.
                   2620: 
                   2621:         mov     eax,ulLeftSrcWidthMinus1
                   2622:         call    copy_edge_from_screen_to_buffer[eax*4]
                   2623: 
                   2624: ; Remember where we left off, for next time
                   2625: 
                   2626:         mov     ebx,pdsurf
                   2627:         sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
                   2628:         mov     ulLeftEdgeSrc,esi
                   2629: 
                   2630: ; Now copy the temp buffer to the screen.
                   2631: 
                   2632: ; Map in the source bank to match the destination, so we can read/write to it
                   2633: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
                   2634: ; mapped by this call, which is fine.
                   2635: 
                   2636:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2637:                 <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
                   2638: 
                   2639: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
                   2640: ; until now to calculate this, because the dest bank wasn't mapped earlier).
                   2641: 
                   2642:         mov     edi,ulLeftEdgeDest
                   2643:         add     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2644: 
                   2645: ; Do the copy.
                   2646: 
                   2647:         mov     ah,byte ptr jLeftMask           ;clip mask for this edge
                   2648:         mov     ebx,ulLeftSrcWidthMinus1
                   2649:         call    copy_edge_from_buffer_to_screen[ebx*4]
                   2650: 
                   2651: ; Remember where we left off, for next time.
                   2652: 
                   2653:         mov     ebx,pdsurf
                   2654:         sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2655:         mov     ulLeftEdgeDest,edi
                   2656: 
                   2657: ; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
                   2658: ; will be mapped by this call, which is fine.
                   2659: 
                   2660:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2661:                 <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
                   2662: 
                   2663:         PLAIN_RET
                   2664: 
                   2665: 
                   2666: ;-----------------------------------------------------------------------;
                   2667: ; Copies a strip of right edge bytes from the source to the destination
                   2668: ; through an intermediate RAM buffer. This is the approach required by
                   2669: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
                   2670: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
                   2671: ; cause flicker.
                   2672: ;
                   2673: ; Input:
                   2674: ;       ulNextScan = width of scan, in bytes
                   2675: ;       ulBlockHeight = # of scans to copy
                   2676: ;       ulRightEdgeSrc = start source offset in bitmap
                   2677: ;       ulRightEdgeDest = start dest offset in bitmap
                   2678: ;       jRightMask = right edge clip mask
                   2679: ;       pTempPlane = pointer to temp storage buffer
                   2680: ;       ulCurrentSrcScan = scan used to map in source bank
                   2681: ;       ulCurrentDestScan = scan used to map in dest bank
                   2682: ;       ulCurrentJustification = justification used to map in current bank
                   2683: ;       ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
                   2684: ;       For 1 R/W adapters, expects the source bank to be mapped in; banking
                   2685: ;               is the same at exit as it was at entry
                   2686: ;
                   2687: ; Output:
                   2688: ;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
                   2689: ;               scan processed
                   2690: ;
                   2691: ; Note that this should never be called for an unbanked or 2 R/W adapter,
                   2692: ; because the source and dest are always both addressable simultaneously then.
                   2693: ;-----------------------------------------------------------------------;
                   2694: 
                   2695:         align   4
                   2696: copy_right_edge_via_buffer:
                   2697: 
                   2698: ; First, copy all the bytes into the temporary buffer.
                   2699: 
                   2700: ; Calculate start source address from bitmap start addresses and
                   2701: ; offsets within bitmap.
                   2702: 
                   2703:         mov     ecx,pdsurf
                   2704:         mov     esi,ulRightEdgeSrc
                   2705:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
                   2706: 
                   2707: ; Copy the edge from the source to the temp buffer.
                   2708: 
                   2709:         mov     eax,ulRightSrcWidthMinus1
                   2710:         call    copy_edge_from_screen_to_buffer[eax*4]
                   2711: 
                   2712: ; Remember where we left off, for next time
                   2713: 
                   2714:         mov     ebx,pdsurf
                   2715:         sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
                   2716:         mov     ulRightEdgeSrc,esi
                   2717: 
                   2718: ; Now copy the temp buffer to the screen.
                   2719: 
                   2720: ; Map in the source bank to match the destination, so we can read/write to it
                   2721: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
                   2722: ; mapped by this call, which is correct.
                   2723: 
                   2724:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2725:                 <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
                   2726: 
                   2727: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
                   2728: ; until now to calculate this, because the dest bank wasn't mapped earlier).
                   2729: 
                   2730:         mov     edi,ulRightEdgeDest
                   2731:         add     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2732: 
                   2733: ; Do the copy.
                   2734: 
                   2735:         mov     ah,byte ptr jRightMask          ;clip mask for this edge
                   2736:         mov     ebx,ulRightSrcWidthMinus1
                   2737:         call    copy_edge_from_buffer_to_screen[ebx*4]
                   2738: 
                   2739: ; Remember where we left off, for next time.
                   2740: 
                   2741:         mov     ebx,pdsurf
                   2742:         sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
                   2743:         mov     ulRightEdgeDest,edi
                   2744: 
                   2745: ; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
                   2746: ; will be mapped by this call, which is fine.
                   2747: 
                   2748:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
                   2749:                 <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
                   2750: 
                   2751:         PLAIN_RET
                   2752: 
                   2753: 
                   2754: ;-----------------------------------------------------------------------;
                   2755: ; Copies an edge from the temp buffer (1 wide) to the screen.
                   2756: ; Entry:
                   2757: ;       AH = bit mask setting for edge
                   2758: ;       DH = VGA_BASE SHR 8
                   2759: ;       EDI = destination address
                   2760: ;       pTempPlane = temp buffer from which to copy
                   2761: ;       ulBlockHeight = # of bytes to copy per plane
                   2762: ;       ulNextScan = scan width
                   2763: ;       Source and dest banks both pointing to destination
                   2764: ; Exit:
                   2765: ;       EDI = next destination address
                   2766: ;
                   2767: ; Preserved: EBP
                   2768: ;-----------------------------------------------------------------------;
                   2769: 
                   2770:         align   4
                   2771: copy_buffered_edge_to_screen_1ws:
                   2772: 
                   2773:         mov     pDestAddr,edi
                   2774: 
                   2775:         mov     dl,GRAF_ADDR
                   2776:         mov     al,GRAF_BIT_MASK
                   2777:         out     dx,ax
                   2778: 
                   2779:         mov     pTempEntry,offset copy_edge_from_buf_full_chunk_1ws
                   2780:                                 ;entry point into unrolled loop to copy first
                   2781:                                 ; chunk, assuming it's a full chunk
                   2782:         mov     ecx,pTempPlane  ;temp buffer start (copy from here)
                   2783:         mov     ebx,ulBlockHeight ;total # of scans to copy
                   2784: 
                   2785: ; Copy the edge in a series of chunks, to avoid flicker.
                   2786: 
                   2787: copy_from_buffer_chunk_loop_1ws:
                   2788: 
                   2789:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                   2790:                                     ; a full chunk
                   2791:         jge     short @F            ;do a full chunk
                   2792:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                   2793:                                     ; scans
                   2794:         mov     ebx,pfnCopyEdgesFromBufferEntry_1ws[-4][ebx*4]
                   2795:         mov     pTempEntry,ebx  ;entry point into unrolled loop to copy desired
                   2796:                                 ; chunk size
                   2797:         sub     ebx,ebx         ;no scans after this
                   2798: @@:
                   2799:         push    ebx             ;remember remaining scan count
                   2800: 
                   2801:         mov     al,MM_C3        ;start by copying plane 3
                   2802:         mov     ebx,ulNextScan
                   2803: 
                   2804:         push    ecx             ;remember current temp buffer start
                   2805: 
                   2806:         mov     dl,SEQ_DATA     ;leave DX pointing to the Sequencer Data reg
                   2807: 
                   2808: copy_from_buffer_plane_loop_1ws:
                   2809: 
                   2810: ; Set Map Mask to enable writes to plane we're copying.
                   2811: 
                   2812:         out     dx,al
                   2813: 
                   2814:         mov     esi,ecx                 ;point to current plane's source byte
                   2815:         add     ecx,ulBlockHeight       ;point to next plane's source byte
                   2816: 
                   2817:         mov     edi,pDestAddr
                   2818: 
                   2819:         jmp     pTempEntry              ;copy the left edge
                   2820: 
                   2821: 
                   2822: ;-----------------------------------------------------------------------;
                   2823: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
                   2824: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
                   2825: ; bytes.
                   2826: ;-----------------------------------------------------------------------;
                   2827: 
                   2828: pfnCopyEdgesFromBufferEntry_1ws label dword
                   2829: INDEX = 1
                   2830:         rept    EDGE_CHUNK_SIZE
                   2831:         DEFINE_DD       EDGE_FROM_BUFFER_1WS,%INDEX
                   2832: INDEX = INDEX+1
                   2833:         endm
                   2834: 
                   2835: 
                   2836: ;-----------------------------------------------------------------------;
                   2837: ; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
                   2838: ; buffer.
                   2839: ;-----------------------------------------------------------------------;
                   2840: 
                   2841: COPY_EDGE_FROM_BUFFER_1WS macro ENTRY_LABEL,ENTRY_INDEX
                   2842: &ENTRY_LABEL&ENTRY_INDEX&:
                   2843:         mov     ah,[esi]        ;get byte to copy
                   2844:         inc     esi             ;point to next source (temp buffer) byte
                   2845:         xchg    [edi],ah        ;read before write so Bit Mask can operate
                   2846:                                 ; VGA rotates during write
                   2847:         add     edi,ebx         ;point to next dest (screen) scan
                   2848:         endm    ;-----------------------------------;
                   2849: 
                   2850: ;  EBX = scan line width
                   2851: ;  ESI = source address to copy from (temp buffer)
                   2852: ;  EDI = target address to copy to (screen)
                   2853: ;  Bit Mask set to desired clipping
                   2854: ;  Map Mask set to enable the desired plane for write
                   2855: 
                   2856:         align   4
                   2857: copy_edge_from_buf_full_chunk_1ws:
                   2858:         UNROLL_LOOP     COPY_EDGE_FROM_BUFFER_1WS, \
                   2859:                         EDGE_FROM_BUFFER_1WS,EDGE_CHUNK_SIZE
                   2860: 
                   2861: ; Do next plane within this chunk, if any.
                   2862: 
                   2863:         shr     al,1                    ;advance to next plane
                   2864:         jnz     copy_from_buffer_plane_loop_1ws
                   2865: 
                   2866: ; Remember where we left off, for next chunk.
                   2867: 
                   2868:         mov     pDestAddr,edi
                   2869:         pop     ecx                     ;get back current temp buffer start
                   2870:         add     ecx,EDGE_CHUNK_SIZE     ;point to next chunk's start
                   2871: 
                   2872: ; Do next chunk within this bank block, if any.
                   2873: 
                   2874:         pop     ebx                     ;retrieve remaining scan count
                   2875:         and     ebx,ebx                 ;any scans left?
                   2876:         jnz     copy_from_buffer_chunk_loop_1ws ;more scans to do
                   2877: 
                   2878:         PLAIN_RET
                   2879: 
                   2880: 
                   2881: ;-----------------------------------------------------------------------;
                   2882: ; Copies an edge from the screen (1 wide) to the temp buffer.
                   2883: ; Entry:
                   2884: ;       ESI = source address
                   2885: ;       pTempPlane = temp buffer from which to copy
                   2886: ;       ulBlockHeight = # of bytes to copy per plane
                   2887: ;       ulNextScan = scan width
                   2888: ;       Source bank pointing to source
                   2889: ; Exit:
                   2890: ;       DH = VGA_BASE SHR 8
                   2891: ;       ESI = next source address
                   2892: ;
                   2893: ; Preserved: EBP
                   2894: ;-----------------------------------------------------------------------;
                   2895: 
                   2896:         align   4
                   2897: copy_screen_to_buffered_edge_1ws:
                   2898: 
                   2899:         mov     pSrcAddr,esi
                   2900: 
                   2901: ; Leave the GC Index pointing to the Read Map.
                   2902: 
                   2903:         mov     edx,VGA_BASE + GRAF_ADDR
                   2904:         mov     al,GRAF_READ_MAP
                   2905:         out     dx,al
                   2906: 
                   2907:         mov     ebx,ulBlockHeight
                   2908:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry_1ws, \
                   2909:                                 LOOP_UNROLL_SHIFT
                   2910:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
                   2911:         mov     pTempEntry,ecx   ;ditto for entry point
                   2912: 
                   2913:         mov     ecx,ulNextScan
                   2914:         mov     edi,pTempPlane  ;dest offset in temp buffer for plane 3 bytes.
                   2915:                                 ;The rest of the planes are stored
                   2916:                                 ; consecutively
                   2917:         mov     al,3            ;start by copying plane 3
                   2918:         mov     dl,GRAF_DATA    ;leave DX pointing to the GC Data reg
                   2919: copy_edge_to_buffer_plane_loop_1ws:
                   2920:         mov     esi,pSrcAddr ;source pointer
                   2921: 
                   2922:         out     dx,al            ;set Read Map to plane we're copying from.
                   2923: 
                   2924:         mov     ebx,culTempCount ;# of unrolled loop iterations
                   2925:         jmp     pTempEntry       ;copy the edge bytes for this plane to the
                   2926:                                  ; temp buffer
                   2927: 
                   2928: ;-----------------------------------------------------------------------;
                   2929: ; Table of unrolled edge copy to temp buffer loop entry points.
                   2930: ;-----------------------------------------------------------------------;
                   2931: 
                   2932:         UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry_1WS, \
                   2933:                                 EDGE_TO_TEMP_1WS, LOOP_UNROLL_COUNT
                   2934: 
                   2935: ;-----------------------------------------------------------------------;
                   2936: ; Unrolled loop for copying edge bytes to the temp buffer.
                   2937: ;-----------------------------------------------------------------------;
                   2938: 
                   2939: COPY_EDGE_TO_TEMP_1WS macro ENTRY_LABEL,ENTRY_INDEX
                   2940: &ENTRY_LABEL&ENTRY_INDEX&:
                   2941:         mov     ah,[esi]        ;get byte to copy
                   2942:         add     esi,ecx         ;point to next source scan
                   2943:         mov     [edi],ah        ;copy byte to temp buffer
                   2944:         inc     edi             ;point to next temp buffer byte
                   2945:         endm    ;-----------------------------------;
                   2946: 
                   2947: ;  EBX = count of unrolled loop iterations
                   2948: ;  ECX = offset from end of one scan's fill to start of next
                   2949: ;  ESI = source address to copy from (screen)
                   2950: ;  EDI = target address to copy to (temp buffer)
                   2951: ;  Read Map set to enable the desired plane for read
                   2952: 
                   2953:         align   4
                   2954: edge_to_buffer_loop_1ws:
                   2955:         UNROLL_LOOP     COPY_EDGE_TO_TEMP_1WS,EDGE_TO_TEMP_1WS, \
                   2956:                         LOOP_UNROLL_COUNT
                   2957:         dec     ebx
                   2958:         jnz     edge_to_buffer_loop_1ws
                   2959: 
                   2960:         dec     al              ;count down planes
                   2961:         jns     copy_edge_to_buffer_plane_loop_1ws
                   2962: 
                   2963:         PLAIN_RET
                   2964: 
                   2965: 
                   2966: ;-----------------------------------------------------------------------;
                   2967: ; Copies an edge from a 2-wide source to the destination on the screen.
                   2968: ; Entry:
                   2969: ;       AH = bit mask setting for edge
                   2970: ;       ESI = source address
                   2971: ;       EDI = destination address
                   2972: ;       ulBlockHeight = # of bytes to copy per plane
                   2973: ;       ulNextScan = scan width
                   2974: ;       ulCombineMask = masking to be applied before ORing the two source
                   2975: ;               bytes together, to keep only the data needed in preparation
                   2976: ;               for the VGA rotator doing its stuff
                   2977: ;       Source readable, and destination readable and writable
                   2978: ; Exit:
                   2979: ;       ESI = next source address
                   2980: ;       EDI = next destination address
                   2981: ;
                   2982: ; Preserved: EBP
                   2983: ;-----------------------------------------------------------------------;
                   2984: 
                   2985:         align   4
                   2986: copy_edge_2ws:
                   2987:         mov     pSrcAddr,esi
                   2988:         mov     pDestAddr,edi
                   2989: 
                   2990: ; Set the clip mask for this edge.
                   2991: 
                   2992:         mov     edx,VGA_BASE + GRAF_ADDR
                   2993:         mov     al,GRAF_BIT_MASK
                   2994:         out     dx,ax
                   2995: 
                   2996: ; Leave the GC Index pointing to the Read Map.
                   2997: 
                   2998:         mov     al,GRAF_READ_MAP
                   2999:         out     dx,al
                   3000: 
                   3001:         mov     ebx,ulBlockHeight
                   3002: 
                   3003:         mov     ecx,offset copy_edge_rw_2ws_full_chunk
                   3004:                                 ;entry point into unrolled loop assuming we do
                   3005:                                 ; a full chunk the first time
                   3006: 
                   3007: ; Copy the edge in a series of chunks.
                   3008: 
                   3009: copy_edge_chunk_loop_2ws:
                   3010: 
                   3011:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                   3012:                                     ; a full chunk
                   3013:         jge     short @F            ;do a full chunk
                   3014:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                   3015:                                     ; scans
                   3016:         mov     ecx,pfnCopyEdgeRWEntry_2ws[-4][ebx*4]
                   3017:                                 ;entry point into unrolled loop to copy desired
                   3018:                                 ; chunk size
                   3019:         sub     ebx,ebx         ;no scans after this
                   3020: @@:
                   3021:         push    ebx             ;remember remaining scan count
                   3022: 
                   3023:         mov     eax,(MM_C3 SHL 8) + 3 ;start by copying plane 3
                   3024:         mov     ebx,ulNextScan
                   3025: 
                   3026: copy_edge_plane_loop_2ws:
                   3027: 
                   3028:         push    eax                     ;preserve plane info
                   3029: 
                   3030: ; Set Read Map to enable reads from plane we're copying from.
                   3031: 
                   3032:         mov     edx,VGA_BASE + GRAF_DATA
                   3033:         out     dx,al
                   3034: 
                   3035: ; Set Map Mask to enable writes to plane we're copying.
                   3036: 
                   3037:         mov     dl,SEQ_DATA
                   3038:         mov     al,ah
                   3039:         out     dx,al
                   3040: 
                   3041:         mov     esi,pSrcAddr
                   3042:         mov     edi,pDestAddr
                   3043:         mov     edx,ulCombineMask
                   3044: 
                   3045:         jmp     ecx                     ;copy the left edge
                   3046: 
                   3047: 
                   3048: ;-----------------------------------------------------------------------;
                   3049: ; Table of unrolled edge loop entry points. First entry point is to copy
                   3050: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
                   3051: ;-----------------------------------------------------------------------;
                   3052: 
                   3053: pfnCopyEdgeRWEntry_2ws label dword
                   3054: INDEX = 1
                   3055:         rept    EDGE_CHUNK_SIZE
                   3056:         DEFINE_DD       EDGE_RW_2WS,%INDEX
                   3057: INDEX = INDEX+1
                   3058:         endm
                   3059: 
                   3060: 
                   3061: ;-----------------------------------------------------------------------;
                   3062: ; Unrolled loop for copying a strip of edge bytes, with 2-wide source and
                   3063: ; destination both readable and writable.
                   3064: ;-----------------------------------------------------------------------;
                   3065: 
                   3066: COPY_EDGE_RW_2WS macro ENTRY_LABEL,ENTRY_INDEX
                   3067: &ENTRY_LABEL&ENTRY_INDEX&:
                   3068:         mov     ax,[esi]        ;get word to copy
                   3069:         add     esi,ebx         ;point to next source scan
                   3070:         and     eax,edx         ;mask in preparation for combining bytes
                   3071:         or      al,ah           ;combine the desired parts of the bytes
                   3072:         xchg    [edi],al        ;read before write so Bit Mask can operate
                   3073:                                 ; VGA rotates during write
                   3074:         add     edi,ebx         ;point to next dest scan
                   3075:         endm    ;-----------------------------------;
                   3076: 
                   3077: ;  EBX = scan line width
                   3078: ;  EDX = mask to preserve desired portions of AH and AL before combining
                   3079: ;  ESI = source address to copy from
                   3080: ;  EDI = target address to copy to
                   3081: ;  Bit Mask set to desired clipping
                   3082: ;  Read Map and Map Mask set to enable the desired plane for read and write
                   3083: 
                   3084:         align   4
                   3085: copy_edge_rw_2ws_full_chunk:
                   3086:         UNROLL_LOOP COPY_EDGE_RW_2WS,EDGE_RW_2WS,EDGE_CHUNK_SIZE
                   3087: 
                   3088: ; Do next plane within this chunk, if any.
                   3089: 
                   3090:         pop     eax                     ;retrieve plane info
                   3091: 
                   3092:         shr     ah,1                    ;advance to next plane
                   3093:         dec     eax                     ;count down planes
                   3094:         jns     copy_edge_plane_loop_2ws
                   3095: 
                   3096: ; Remember where we left off, for the next chunk.
                   3097: 
                   3098:         mov     pSrcAddr,esi
                   3099:         mov     pDestAddr,edi
                   3100: 
                   3101: ; Do next chunk within this bank block, if any.
                   3102: 
                   3103:         pop     ebx                     ;retrieve remaining scan count
                   3104:         and     ebx,ebx                 ;any scans left?
                   3105:         jnz     copy_edge_chunk_loop_2ws ;more scans to do
                   3106: 
                   3107:         PLAIN_RET
                   3108: 
                   3109: 
                   3110: ;-----------------------------------------------------------------------;
                   3111: ; Copies an edge from the temp buffer (2 wide) to the screen.
                   3112: ; Entry:
                   3113: ;       AH = bit mask setting for edge
                   3114: ;       EDI = destination address
                   3115: ;       pTempPlane = temp buffer from which to copy
                   3116: ;       ulBlockHeight = # of bytes to copy per plane
                   3117: ;       ulNextScan = scan width
                   3118: ;       Source and dest banks both pointing to destination
                   3119: ;       ulCombineMask = masking to be applied before ORing the two source
                   3120: ;               bytes together, to keep only the data needed in preparation
                   3121: ;               for the VGA rotator doing its stuff
                   3122: ; Exit:
                   3123: ;       EDI = next destination address
                   3124: ;
                   3125: ; Preserved: EBP
                   3126: ;-----------------------------------------------------------------------;
                   3127: 
                   3128:         align   4
                   3129: copy_buffered_edge_to_screen_2ws:
                   3130: 
                   3131:         mov     pDestAddr,edi
                   3132: 
                   3133:         mov     edx,VGA_BASE + GRAF_ADDR
                   3134:         mov     al,GRAF_BIT_MASK
                   3135:         out     dx,ax
                   3136: 
                   3137:         mov     pTempEntry,offset copy_edge_from_buf_full_chunk_2ws
                   3138:                                 ;entry point into unrolled loop, assuming the
                   3139:                                 ; first chunk is full size
                   3140:         mov     ecx,pTempPlane  ;temp buffer start (copy from here)
                   3141:         mov     ebx,ulBlockHeight
                   3142: 
                   3143: ; Copy the edge in a series of chunks, to avoid flicker.
                   3144: 
                   3145: copy_from_buffer_chunk_loop_2ws:
                   3146: 
                   3147:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
                   3148:                                     ; a full chunk
                   3149:         jge     short @F            ;do a full chunk
                   3150:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
                   3151:                                     ; scans
                   3152:         mov     ebx,pfnCopyEdgesFromBufferEntry_2ws[-4][ebx*4]
                   3153:         mov     pTempEntry,ebx  ;entry point into unrolled loop to copy final
                   3154:                                 ; chunk size
                   3155:         sub     ebx,ebx         ;no scans after this
                   3156: @@:
                   3157:         push    ebx             ;remember remaining scan count
                   3158: 
                   3159:         mov     al,MM_C3        ;start by copying plane 3
                   3160:         mov     ebx,ulNextScan
                   3161: 
                   3162:         push    ecx             ;remember current temp buffer start
                   3163: 
                   3164: copy_from_buffer_plane_loop_2ws:
                   3165: 
                   3166: ; Set Map Mask to enable writes to plane we're copying.
                   3167: 
                   3168:         mov     edx,VGA_BASE + SEQ_DATA
                   3169:         out     dx,al
                   3170: 
                   3171:         push    eax                     ;preserve plane info
                   3172: 
                   3173:         mov     esi,ecx                 ;point to current plane's source word
                   3174:         mov     eax,ulBlockHeight
                   3175:         lea     ecx,[ecx+eax*2]         ;point to next plane's source word
                   3176: 
                   3177:         mov     edi,pDestAddr
                   3178:         mov     edx,ulCombineMask
                   3179: 
                   3180:         jmp     pTempEntry              ;copy the left edge
                   3181: 
                   3182: 
                   3183: ;-----------------------------------------------------------------------;
                   3184: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
                   3185: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
                   3186: ; bytes.
                   3187: ;-----------------------------------------------------------------------;
                   3188: 
                   3189: pfnCopyEdgesFromBufferEntry_2WS label dword
                   3190: INDEX = 1
                   3191:         rept    EDGE_CHUNK_SIZE
                   3192:         DEFINE_DD       EDGE_FROM_BUFFER_2WS,%INDEX
                   3193: INDEX = INDEX+1
                   3194:         endm
                   3195: 
                   3196: 
                   3197: ;-----------------------------------------------------------------------;
                   3198: ; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
                   3199: ; buffer.
                   3200: ;-----------------------------------------------------------------------;
                   3201: 
                   3202: COPY_EDGE_FROM_BUFFER_2WS macro ENTRY_LABEL,ENTRY_INDEX
                   3203: &ENTRY_LABEL&ENTRY_INDEX&:
                   3204:         mov     ax,[esi]        ;get word to copy
                   3205:         add     esi,2           ;point to next source (temp buffer) word
                   3206:         and     eax,edx         ;mask in preparation for combining bytes
                   3207:         or      al,ah           ;combine the desired parts of the bytes
                   3208:         xchg    [edi],al        ;read before write so Bit Mask can operate
                   3209:                                 ; VGA rotates during write
                   3210:         add     edi,ebx         ;point to next dest (screen) scan
                   3211:         endm    ;-----------------------------------;
                   3212: 
                   3213: ;  EBX = scan line width
                   3214: ;  EDX = mask to preserve desired portions of AH and AL before combining
                   3215: ;  ESI = source address to copy from (temp buffer)
                   3216: ;  EDI = target address to copy to (screen)
                   3217: ;  Bit Mask set to desired clipping
                   3218: ;  Map Mask set to enable the desired plane for write
                   3219: 
                   3220:         align   4
                   3221: copy_edge_from_buf_full_chunk_2ws:
                   3222:         UNROLL_LOOP     COPY_EDGE_FROM_BUFFER_2WS, \
                   3223:                         EDGE_FROM_BUFFER_2WS,EDGE_CHUNK_SIZE
                   3224: 
                   3225: ; Do next plane within this chunk, if any.
                   3226: 
                   3227:         pop     eax                     ;retrieve plane info
                   3228:         shr     al,1                    ;advance to next plane
                   3229:         jnz     copy_from_buffer_plane_loop_2ws
                   3230: 
                   3231: ; Remember where we left off, for next chunk.
                   3232: 
                   3233:         mov     pDestAddr,edi
                   3234:         pop     ecx                     ;get back current temp buffer start
                   3235:         add     ecx,EDGE_CHUNK_SIZE*2   ;point to next chunk's start word
                   3236: 
                   3237: ; Do next chunk within this bank block, if any.
                   3238: 
                   3239:         pop     ebx                     ;retrieve remaining scan count
                   3240:         and     ebx,ebx                 ;any scans left?
                   3241:         jnz     copy_from_buffer_chunk_loop_2ws ;more scans to do
                   3242: 
                   3243:         PLAIN_RET
                   3244: 
                   3245: 
                   3246: ;-----------------------------------------------------------------------;
                   3247: ; Copies an edge from the screen (2 wide) to the temp buffer.
                   3248: ; Entry:
                   3249: ;       ESI = source address
                   3250: ;       pTempPlane = temp buffer from which to copy
                   3251: ;       ulBlockHeight = # of bytes to copy per plane
                   3252: ;       ulNextScan = scan width
                   3253: ;       Source bank pointing to source
                   3254: ; Exit:
                   3255: ;       ESI = next source address
                   3256: ;
                   3257: ; Preserved: EBP
                   3258: ;-----------------------------------------------------------------------;
                   3259: 
                   3260:         align   4
                   3261: copy_screen_to_buffered_edge_2ws:
                   3262: 
                   3263:         mov     pSrcAddr,esi
                   3264: 
                   3265: ; Leave the GC Index pointing to the Read Map.
                   3266: 
                   3267:         mov     edx,VGA_BASE + GRAF_ADDR
                   3268:         mov     al,GRAF_READ_MAP
                   3269:         out     dx,al
                   3270: 
                   3271:         mov     ebx,ulBlockHeight
                   3272:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry_2ws, \
                   3273:                                 LOOP_UNROLL_SHIFT
                   3274:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
                   3275:         mov     pTempEntry,ecx   ;ditto for entry point
                   3276: 
                   3277:         mov     ecx,ulNextScan
                   3278:         mov     edi,pTempPlane  ;dest offset in temp buffer for plane 3 bytes.
                   3279:                                 ;The rest of the planes are stored
                   3280:                                 ; consecutively
                   3281:         mov     eax,3           ;start by copying plane 3
                   3282: copy_edge_to_buf_pl_loop_2ws:
                   3283:         mov     esi,pSrcAddr    ;source pointer
                   3284: 
                   3285:         mov     edx,VGA_BASE + GRAF_DATA
                   3286:         out     dx,al           ;set Read Map to plane from which we're copying
                   3287: 
                   3288:         mov     ebx,culTempCount ;# of unrolled loop iterations
                   3289:         jmp     pTempEntry       ;copy the edge bytes for this plane to the
                   3290:                                  ; temp buffer
                   3291: 
                   3292: ;-----------------------------------------------------------------------;
                   3293: ; Table of unrolled edge copy to temp buffer loop entry points.
                   3294: ;-----------------------------------------------------------------------;
                   3295: 
                   3296:         UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry_2WS, \
                   3297:                                 EDGE_TO_TEMP_2WS, LOOP_UNROLL_COUNT
                   3298: 
                   3299: ;-----------------------------------------------------------------------;
                   3300: ; Unrolled loop for copying edge bytes to the temp buffer.
                   3301: ;-----------------------------------------------------------------------;
                   3302: 
                   3303: COPY_EDGE_TO_TEMP_2WS macro ENTRY_LABEL,ENTRY_INDEX
                   3304: &ENTRY_LABEL&ENTRY_INDEX&:
                   3305:         mov     dx,[esi]        ;get byte to copy
                   3306:         add     esi,ecx         ;point to next source scan
                   3307:         mov     [edi],dx        ;copy byte to temp buffer
                   3308:         add     edi,2           ;point to next temp buffer byte
                   3309:         endm    ;-----------------------------------;
                   3310: 
                   3311: ;  EBX = count of unrolled loop iterations
                   3312: ;  ECX = offset from end of one scan's fill to start of next
                   3313: ;  ESI = source address to copy from (screen)
                   3314: ;  EDI = target address to copy to (temp buffer)
                   3315: ;  Read Map set to enable the desired plane for read
                   3316: 
                   3317:         align   4
                   3318: edge_to_buffer_loop_2ws:
                   3319:         UNROLL_LOOP     COPY_EDGE_TO_TEMP_2WS,EDGE_TO_TEMP_2WS, \
                   3320:                         LOOP_UNROLL_COUNT
                   3321:         dec     ebx
                   3322:         jnz     edge_to_buffer_loop_2ws
                   3323: 
                   3324:         dec     eax              ;count down planes
                   3325:         jns     copy_edge_to_buf_pl_loop_2ws
                   3326: 
                   3327:         PLAIN_RET
                   3328: 
                   3329: 
                   3330: ;-----------------------------------------------------------------------;
                   3331: 
                   3332: endProc vNonAlignedSrcCopy
                   3333: 
                   3334: _TEXT$04   ends
                   3335: 
                   3336:         end
                   3337: 
                   3338: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.