Annotation of ntddk/src/video/displays/vga/i386/nalgnblt.asm, revision 1.1

1.1     ! root        1: ;******************************Module*Header*******************************\
        !             2: ; Module Name: nalgnblt.asm
        !             3: ;
        !             4: ; driver prototypes
        !             5: ;
        !             6: ; Copyright (c) 1992 Microsoft Corporation
        !             7: ;**************************************************************************/
        !             8: 
        !             9: ;-----------------------------------------------------------------------;
        !            10: ; VOID vNonAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
        !            11: ;                      INT icopydir);
        !            12: ; Input:
        !            13: ;  pdsurf - surface on which to copy
        !            14: ;  prcldest - pointer to destination rectangle
        !            15: ;  pptlsrc - pointer to source upper left corner
        !            16: ;  icopydir - direction in which copy must proceed to avoid overlap problems
        !            17: ;             and synchronize with the clip enumeration visually, according to
        !            18: ;             constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
        !            19: ;             WINDDI.H
        !            20: ;
        !            21: ; Performs accelarated non-aligned SRCCOPY VGA-to-VGA blts.
        !            22: ;
        !            23: ;-----------------------------------------------------------------------;
        !            24: ;
        !            25: ; Note: The source and dest *must* be non-aligned (not have the same
        !            26: ; left-edge intrabyte pixel alignment. Will not work properly if they are
        !            27: ; in fact aligned.
        !            28: ;
        !            29: ; Note: Assumes all rectangles have positive heights and widths. Will not
        !            30: ; work properly if this is not the case.
        !            31: ;
        !            32: ;-----------------------------------------------------------------------;
        !            33: 
        !            34:         comment $
        !            35: 
        !            36: The overall approach of this module for each rectangle to copy is:
        !            37: 
        !            38: 1) Precalculate the masks and whole byte widths, and determine which of
        !            39: partial left edge, partial right edge, and whole middle bytes are required
        !            40: for this copy.
        !            41: 
        !            42: 2) Set up the starting pointers for each of the areas (left, whole middle,
        !            43: right), the start and stop scan lines, the copying direction (left-to-right
        !            44: or right-to-left, and top-to-bottom or bottom-to-top), the threading
        !            45: (sequence of calls required to do the left/whole/right components in the
        !            46: proper sequence), based on the passed-in copy direction, which in turn is
        !            47: dictated by the nature of the overlap between the source and destination.
        !            48: 
        !            49: 3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
        !            50: 1 R/W window, unbanked), that sequences through the intersection of each
        !            51: bank with the source and destination rectangles in the proper direction
        !            52: (top-to-bottom or bottom-to-top, based on the passed-in copy direction),
        !            53: and performs the copy in each such rectangle. The threading vector is used
        !            54: to call the required routines (copy left/whole/right bytes). For 1 R/W and
        !            55: 1R/1W adapters, there is a second threading vector that is called when the
        !            56: source and the destination are both adequately (for the copy purposes)
        !            57: addressable simultaneously (because they're in the same bank), so there's
        !            58: no need to copy through a temp buffer. We want to avoid the temp
        !            59: buffer whenever we can, because it's slower.
        !            60: 
        !            61: Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
        !            62: each plane's bytes are not stored in the corresponding plane's temp buffer, but
        !            63: rather consecutively in the plane 0 temp buffer. This is to reduce page
        !            64: faulting, and also so that 1R/1W adapters only need a temp buffer large enough
        !            65: to hold 4*tallest bank words (4K will do). 1 R/W adapters still copy whole
        !            66: bytes through the full temp buffer, using all four planes' temp buffers, so
        !            67: they require a temp buffer big enough to hold a full bank (256K will do).
        !            68: 
        !            69: Note: The VGA's rotator is used to perform all rotation in this module. The
        !            70: two source bytes relevant to this operation are masked to preserve the desired
        !            71: bits, then combined and fed to the VGA's rotator, which performs the rotation.
        !            72: This is better than letting the 386/486 do the rotation because even with the
        !            73: barrel shifter, those processors take 3 cycles per rotate, where the masking
        !            74: and combining take only 2 cycles (or no cycles, for edges with 1-wide
        !            75: sources). We also get to avoid 16-bit instructions like ROL AX,CL; the 16-bit
        !            76: size prefix costs a cycle on a 486.
        !            77: 
        !            78:         commend $
        !            79: 
        !            80: ;-----------------------------------------------------------------------;
        !            81: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
        !            82: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
        !            83: ; times unrolling. This is the only thing you need to change to control
        !            84: ; unrolling. Note: does not affect loops that process in chunks, like edge
        !            85: ; loops.
        !            86: 
        !            87: LOOP_UNROLL_SHIFT equ 2
        !            88: 
        !            89: ;-----------------------------------------------------------------------;
        !            90: ; Maximum # of edge bytes to process before switching to next plane. Larger
        !            91: ; means faster, but there's more potential for flicker, since the raster scan
        !            92: ; has a better chance of catching bytes that have changed in some planes but
        !            93: ; not all planes.
        !            94: 
        !            95: EDGE_CHUNK_SIZE equ     16
        !            96: 
        !            97: ;-----------------------------------------------------------------------;
        !            98: ; Macro to push the current threading sequence (string of routine calls) on the
        !            99: ; stack, then jump to the first threading entry. The threading pointer can be
        !           100: ; specified, or defaults to pCurrentThread. The return address can be
        !           101: ; immediately after the JMP, or can be specified.
        !           102: 
        !           103: THREAD_AND_START macro THREADING,RETURN_ADDR
        !           104:         local   push_base, return_address
        !           105: 
        !           106: ifb <&RETURN_ADDR&>
        !           107:         push    offset return_address   ;after all the threaded routines, we
        !           108:                                         ; return here
        !           109: else
        !           110:         push    offset &RETURN_ADDR&    ;return here
        !           111: endif
        !           112: 
        !           113: ifb <&THREADING&>
        !           114:         mov     eax,pCurrentThread
        !           115: else
        !           116:         mov     eax,&THREADING&
        !           117: endif
        !           118: 
        !           119:         mov     ecx,[eax]               ;# of routines to thread (at least 1)
        !           120:         lea     ecx,[ecx*2+ecx]         ;pushes below are 3 bytes each
        !           121:         mov     edx,offset push_base+3
        !           122:         sub     edx,ecx
        !           123:         jmp     edx                     ;branch to push or jmp below
        !           124: 
        !           125: ; Push the threading addresses on to the stack, so routines perform the
        !           126: ; threading as they return.
        !           127: 
        !           128:         push    dword ptr [eax+12]       ;3 byte instruction
        !           129:         push    dword ptr [eax+8]
        !           130: push_base:
        !           131:         jmp     dword ptr [eax+4]        ;jump to the first threaded routine
        !           132: 
        !           133:         align   4
        !           134: return_address:
        !           135:         endm
        !           136: 
        !           137: ;-----------------------------------------------------------------------;
        !           138: 
        !           139:                 .386
        !           140: 
        !           141: ifndef  DOS_PLATFORM
        !           142:         .model  small,c
        !           143: else
        !           144: ifdef   STD_CALL
        !           145:         .model  small,c
        !           146: else
        !           147:         .model  small,pascal
        !           148: endif;  STD_CALL
        !           149: endif;  DOS_PLATFORM
        !           150: 
        !           151:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
        !           152:         assume fs:nothing,gs:nothing
        !           153: 
        !           154:         .xlist
        !           155:         include stdcall.inc             ;calling convention cmacros
        !           156:         include i386\egavga.inc
        !           157:         include i386\strucs.inc
        !           158:         include i386\unroll.inc
        !           159:         include i386\ropdefs.inc
        !           160: 
        !           161:         .list
        !           162: 
        !           163: ;-----------------------------------------------------------------------;
        !           164: 
        !           165:         .data
        !           166: 
        !           167: ; Threads for stringing together left, whole byte, and right operations
        !           168: ; in various orders, both using a temp buffer and not. Data format is:
        !           169: ;
        !           170: ; DWORD +0 = # of calls in thread (1, 2, or 3)
        !           171: ;       +4 = first call (required)
        !           172: ;       +8 = second call (optional)
        !           173: ;      +12 = third call (optional)
        !           174: 
        !           175:         align   4
        !           176: 
        !           177: ; Copies not involving the temp buffer.
        !           178: 
        !           179: Thread_L        dd      1
        !           180:                 dd      copy_left_edge
        !           181: 
        !           182: Thread_W        dd      1
        !           183:                 dd      copy_whole_bytes
        !           184: 
        !           185: Thread_R        dd      1
        !           186:                 dd      copy_right_edge
        !           187: 
        !           188: Thread_LR       dd      2
        !           189:                 dd      copy_left_edge
        !           190:                 dd      copy_right_edge
        !           191: 
        !           192: Thread_RL       dd      2
        !           193:                 dd      copy_right_edge
        !           194:                 dd      copy_left_edge
        !           195: 
        !           196: Thread_LW       dd      2
        !           197:                 dd      copy_left_edge
        !           198:                 dd      copy_whole_bytes
        !           199: 
        !           200: Thread_WL       dd      2
        !           201:                 dd      copy_whole_bytes
        !           202:                 dd      copy_left_edge
        !           203: 
        !           204: Thread_WR       dd      2
        !           205:                 dd      copy_whole_bytes
        !           206:                 dd      copy_right_edge
        !           207: 
        !           208: Thread_RW       dd      2
        !           209:                 dd      copy_right_edge
        !           210:                 dd      copy_whole_bytes
        !           211: 
        !           212: Thread_LWR      dd      3
        !           213:                 dd      copy_left_edge
        !           214:                 dd      copy_whole_bytes
        !           215:                 dd      copy_right_edge
        !           216: 
        !           217: Thread_RWL      dd      3
        !           218:                 dd      copy_right_edge
        !           219:                 dd      copy_whole_bytes
        !           220:                 dd      copy_left_edge
        !           221: 
        !           222: ; Copies involving the temp buffer.
        !           223: 
        !           224: Thread_Lb       dd      1
        !           225:                 dd      copy_left_edge_via_buffer
        !           226: 
        !           227: Thread_Wb       dd      1
        !           228:                 dd      copy_whole_bytes_via_buffer
        !           229: 
        !           230: Thread_Rb       dd      1
        !           231:                 dd      copy_right_edge_via_buffer
        !           232: 
        !           233: Thread_LbRb     dd      2
        !           234:                 dd      copy_left_edge_via_buffer
        !           235:                 dd      copy_right_edge_via_buffer
        !           236: 
        !           237: Thread_RbLb     dd      2
        !           238:                 dd      copy_right_edge_via_buffer
        !           239:                 dd      copy_left_edge_via_buffer
        !           240: 
        !           241: Thread_LbW      dd      2
        !           242:                 dd      copy_left_edge_via_buffer
        !           243:                 dd      copy_whole_bytes
        !           244: 
        !           245: Thread_LbWb     dd      2
        !           246:                 dd      copy_left_edge_via_buffer
        !           247:                 dd      copy_whole_bytes_via_buffer
        !           248: 
        !           249: Thread_WLb      dd      2
        !           250:                 dd      copy_whole_bytes
        !           251:                 dd      copy_left_edge_via_buffer
        !           252: 
        !           253: Thread_WbLb     dd      2
        !           254:                 dd      copy_whole_bytes_via_buffer
        !           255:                 dd      copy_left_edge_via_buffer
        !           256: 
        !           257: Thread_WRb      dd      2
        !           258:                 dd      copy_whole_bytes
        !           259:                 dd      copy_right_edge_via_buffer
        !           260: 
        !           261: Thread_WbRb     dd      2
        !           262:                 dd      copy_whole_bytes_via_buffer
        !           263:                 dd      copy_right_edge_via_buffer
        !           264: 
        !           265: Thread_RbW      dd      2
        !           266:                 dd      copy_right_edge_via_buffer
        !           267:                 dd      copy_whole_bytes
        !           268: 
        !           269: Thread_RbWb     dd      2
        !           270:                 dd      copy_right_edge_via_buffer
        !           271:                 dd      copy_whole_bytes_via_buffer
        !           272: 
        !           273: Thread_LbWRb    dd      3
        !           274:                 dd      copy_left_edge_via_buffer
        !           275:                 dd      copy_whole_bytes
        !           276:                 dd      copy_right_edge_via_buffer
        !           277: 
        !           278: Thread_LbWbRb   dd      3
        !           279:                 dd      copy_left_edge_via_buffer
        !           280:                 dd      copy_whole_bytes_via_buffer
        !           281:                 dd      copy_right_edge_via_buffer
        !           282: 
        !           283: Thread_RbWLb    dd      3
        !           284:                 dd      copy_right_edge_via_buffer
        !           285:                 dd      copy_whole_bytes
        !           286:                 dd      copy_left_edge_via_buffer
        !           287: 
        !           288: Thread_RbWbLb   dd      3
        !           289:                 dd      copy_right_edge_via_buffer
        !           290:                 dd      copy_whole_bytes_via_buffer
        !           291:                 dd      copy_left_edge_via_buffer
        !           292: 
        !           293: ;-----------------------------------------------------------------------;
        !           294: ; Table of thread selection for various horizontal copy directions, with
        !           295: ; the look-up index a 4-bit field as follows:
        !           296: ;
        !           297: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
        !           298: ; Bit 2 = 1 if left edge must be copied
        !           299: ; Bit 1 = 1 if whole bytes must be copied
        !           300: ; Bit 0 = 1 if right edge must be copied
        !           301: ;
        !           302: ; This is used for all cases where both the source and destination are
        !           303: ; simultaneously addressable for our purposes, so there's no need to go
        !           304: ; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).
        !           305: 
        !           306: MasterThreadTable label dword
        !           307:                                 ;right-to-left
        !           308:         dd      0               ;<not used>
        !           309:         dd      Thread_R        ;R->L, R
        !           310:         dd      Thread_W        ;R->L, W
        !           311:         dd      Thread_RW       ;R->L, RW
        !           312:         dd      Thread_L        ;R->L, L
        !           313:         dd      Thread_RL       ;R->L, RL
        !           314:         dd      Thread_WL       ;R->L, WL
        !           315:         dd      Thread_RWL      ;R->L, RWL
        !           316:                                 ;left-to-right
        !           317:         dd      0               ;<not used>
        !           318:         dd      Thread_R        ;L->R, R
        !           319:         dd      Thread_W        ;L->R, W
        !           320:         dd      Thread_WR       ;L->R, WR
        !           321:         dd      Thread_L        ;L->R, L
        !           322:         dd      Thread_LR       ;L->R, LR
        !           323:         dd      Thread_LW       ;L->R, LW
        !           324:         dd      Thread_LWR      ;L->R, LWR
        !           325: 
        !           326: 
        !           327: ; Table of thread selection for various adapter types and horizontal
        !           328: ; copy directions, with the look-up index a 6-bit field as follows:
        !           329: ;
        !           330: ; Bit 5 = adapter type high bit
        !           331: ; Bit 4 = adapter type low bit
        !           332: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
        !           333: ; Bit 2 = 1 if left edge must be copied
        !           334: ; Bit 1 = 1 if whole bytes must be copied
        !           335: ; Bit 0 = 1 if right edge must be copied
        !           336: ;
        !           337: ; This is used for all cases where the source and destination are not both
        !           338: ; simultaneously addressable for our purposes, so we need to go through the
        !           339: ; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).
        !           340: 
        !           341: MasterThreadTableViaBuffer label dword
        !           342:                                 ;unbanked (no need for buffer)
        !           343:                                 ;right-to-left
        !           344:         dd      0               ;<not used>
        !           345:         dd      Thread_R        ;R->L, R
        !           346:         dd      Thread_W        ;R->L, W
        !           347:         dd      Thread_RW       ;R->L, RW
        !           348:         dd      Thread_L        ;R->L, L
        !           349:         dd      Thread_RL       ;R->L, RL
        !           350:         dd      Thread_WL       ;R->L, WL
        !           351:         dd      Thread_RWL      ;R->L, RWL
        !           352:                                 ;left-to-right
        !           353:         dd      0               ;<not used>
        !           354:         dd      Thread_R        ;L->R, R
        !           355:         dd      Thread_W        ;L->R, W
        !           356:         dd      Thread_WR       ;L->R, WR
        !           357:         dd      Thread_L        ;L->R, L
        !           358:         dd      Thread_LR       ;L->R, LR
        !           359:         dd      Thread_LW       ;L->R, LW
        !           360:         dd      Thread_LWR      ;L->R, LWR
        !           361: 
        !           362:                                 ;1 R/W banking window (everything goes through
        !           363:                                 ;                       buffer)
        !           364:                                 ;right-to-left
        !           365:         dd      0               ;<not used>
        !           366:         dd      Thread_Rb       ;R->L, R
        !           367:         dd      Thread_Wb       ;R->L, W
        !           368:         dd      Thread_RbWb     ;R->L, RW
        !           369:         dd      Thread_Lb       ;R->L, L
        !           370:         dd      Thread_RbLb     ;R->L, RL
        !           371:         dd      Thread_WbLb     ;R->L, WL
        !           372:         dd      Thread_RbWbLb   ;R->L, RWL
        !           373:                                 ;left-to-right
        !           374:         dd      0               ;<not used>
        !           375:         dd      Thread_Rb       ;L->R, R
        !           376:         dd      Thread_Wb       ;L->R, W
        !           377:         dd      Thread_WbRb     ;L->R, WR
        !           378:         dd      Thread_Lb       ;L->R, L
        !           379:         dd      Thread_LbRb     ;L->R, LR
        !           380:         dd      Thread_LbWb     ;L->R, LW
        !           381:         dd      Thread_LbWbRb   ;L->R, LWR
        !           382: 
        !           383:                                 ;1R/1W banking window (edge go through buffer)
        !           384:                                 ;right-to-left
        !           385:         dd      0               ;<not used>
        !           386:         dd      Thread_Rb       ;R->L, R
        !           387:         dd      Thread_W        ;R->L, W
        !           388:         dd      Thread_RbW      ;R->L, RW
        !           389:         dd      Thread_Lb       ;R->L, L
        !           390:         dd      Thread_RbLb     ;R->L, RL
        !           391:         dd      Thread_WLb      ;R->L, WL
        !           392:         dd      Thread_RbWLb    ;R->L, RWL
        !           393:                                 ;left-to-right
        !           394:         dd      0               ;<not used>
        !           395:         dd      Thread_Rb       ;L->R, R
        !           396:         dd      Thread_W        ;L->R, W
        !           397:         dd      Thread_WRb      ;L->R, WR
        !           398:         dd      Thread_Lb       ;L->R, L
        !           399:         dd      Thread_LbRb     ;L->R, LR
        !           400:         dd      Thread_LbW      ;L->R, LW
        !           401:         dd      Thread_LbWRb    ;L->R, LWR
        !           402: 
        !           403:                                 ;2 R/W banking window (no need for buffer)
        !           404:                                 ;right-to-left
        !           405:         dd      0               ;<not used>
        !           406:         dd      Thread_R        ;R->L, R
        !           407:         dd      Thread_W        ;R->L, W
        !           408:         dd      Thread_RW       ;R->L, RW
        !           409:         dd      Thread_L        ;R->L, L
        !           410:         dd      Thread_RL       ;R->L, RL
        !           411:         dd      Thread_WL       ;R->L, WL
        !           412:         dd      Thread_RWL      ;R->L, RWL
        !           413:                                 ;left-to-right
        !           414:         dd      0               ;<not used>
        !           415:         dd      Thread_R        ;L->R, R
        !           416:         dd      Thread_W        ;L->R, W
        !           417:         dd      Thread_WR       ;L->R, WR
        !           418:         dd      Thread_L        ;L->R, L
        !           419:         dd      Thread_LR       ;L->R, LR
        !           420:         dd      Thread_LW       ;L->R, LW
        !           421:         dd      Thread_LWR      ;L->R, LWR
        !           422: 
        !           423: 
        !           424: ; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.
        !           425: 
        !           426: ADAPTER_FIELD_SHIFT     equ     4
        !           427: 
        !           428: ; Mask for setting left-to-right bit to "left-to-right true" for use in both
        !           429: ; MasterThread tables.
        !           430: 
        !           431: LEFT_TO_RIGHT_FIELD_SET equ     1000b
        !           432: 
        !           433: 
        !           434: ; Table of top-to-bottom loops for adapter types.
        !           435: 
        !           436:         align   4
        !           437: TopToBottomLoopTable label dword
        !           438:         dd      top_to_bottom_2RW       ;unbanked is same as 2RW
        !           439:         dd      top_to_bottom_1RW
        !           440:         dd      top_to_bottom_1R1W
        !           441:         dd      top_to_bottom_2RW
        !           442: 
        !           443: 
        !           444: ; Table of bottom-to-top loops for adapter types.
        !           445: 
        !           446:         align   4
        !           447: BottomToTopLoopTable label dword
        !           448:         dd      bottom_to_top_2RW       ;unbanked is same as 2RW
        !           449:         dd      bottom_to_top_1RW
        !           450:         dd      bottom_to_top_1R1W
        !           451:         dd      bottom_to_top_2RW
        !           452: 
        !           453: 
        !           454: ; Table of routines for setting up to copy in various directions.
        !           455: 
        !           456:         align   4
        !           457: SetUpForCopyDirection   label   dword
        !           458:         dd      left_to_right_top_to_bottom     ;CD_RIGHTDOWN
        !           459:         dd      right_to_left_top_to_bottom     ;CD_LEFTDOWN
        !           460:         dd      left_to_right_bottom_to_top     ;CD_RIGHTUP
        !           461:         dd      right_to_left_bottom_to_top     ;CD_LEFTUP
        !           462: 
        !           463: ;-----------------------------------------------------------------------;
        !           464: ; Left edge clip masks for intrabyte start addresses 0 through 7.
        !           465: ; Whole byte cases are flagged as 0ffh.
        !           466: 
        !           467: jLeftMaskTable  label   byte
        !           468:         db      0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
        !           469: 
        !           470: ;-----------------------------------------------------------------------;
        !           471: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
        !           472: ; 0 through 7. Whole byte cases are flagged as 0ffh.
        !           473: 
        !           474: jRightMaskTable label   byte
        !           475:         db      0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
        !           476: 
        !           477: ;-----------------------------------------------------------------------;
        !           478: ; Table of width-based source-edge-to-buffer copy routines.
        !           479: 
        !           480:         align   4
        !           481: copy_edge_from_screen_to_buffer label   dword
        !           482:         dd      copy_screen_to_buffered_edge_1ws
        !           483:         dd      copy_screen_to_buffered_edge_2ws
        !           484: 
        !           485: ;-----------------------------------------------------------------------;
        !           486: ; Table of width-based buffer-to-dest-edge copy routines.
        !           487: 
        !           488:         align   4
        !           489: copy_edge_from_buffer_to_screen label   dword
        !           490:         dd      copy_buffered_edge_to_screen_1ws
        !           491:         dd      copy_buffered_edge_to_screen_2ws
        !           492: 
        !           493: ;-----------------------------------------------------------------------;
        !           494: ; Table of width-based edge copy routines (no intermediate buffer).
        !           495: 
        !           496:         align   4
        !           497: copy_edge_table label   dword
        !           498:         dd      copy_edge_1ws
        !           499:         dd      copy_edge_2ws
        !           500: 
        !           501: ;-----------------------------------------------------------------------;
        !           502: 
        !           503:         .code
        !           504: 
        !           505: _TEXT$04   SEGMENT DWORD USE32 PUBLIC 'CODE'
        !           506:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
        !           507: 
        !           508: ;-----------------------------------------------------------------------;
        !           509: 
        !           510: cProc   vNonAlignedSrcCopy,16,<        \
        !           511:         uses    esi edi ebx,    \
        !           512:         pdsurf: ptr DEVSURF,    \
        !           513:         prcldest : ptr RECTL,   \
        !           514:         pptlsrc : ptr POINTL,   \
        !           515:         icopydir : dword
        !           516: 
        !           517:         local   culWholeBytesWidth : dword ;# of bytes to copy across each scan
        !           518:         local   ulBlockHeight : dword   ;# of scans to copy per bank block
        !           519:         local   ulWholeScanDelta : dword;offset from end of one whole bytes
        !           520:                                         ; scan to start of next
        !           521:         local   ulWholeBytesSrc : dword ;offset in bitmap of first source whole
        !           522:                                         ; byte to copy from
        !           523:         local   ulWholeBytesDest : dword;offset in bitmap of first source whole
        !           524:                                         ; byte to copy to
        !           525:         local   ulLeftEdgeSrc : dword   ;offset in bitmap of first source left
        !           526:                                         ; edge byte to copy from
        !           527:         local   ulLeftEdgeDest : dword  ;offset in bitmap of first dest left
        !           528:                                         ; edge byte to copy to
        !           529:         local   ulRightEdgeSrc : dword  ;offset in bitmap of first source right
        !           530:                                         ; edge byte to copy from
        !           531:         local   ulRightEdgeDest : dword ;offset in bitmap of first dest right
        !           532:                                         ; edge byte to copy to
        !           533:         local   ulNextScan : dword      ;width of scan, in bytes
        !           534:         local   jLeftMask : dword       ;left edge clip mask
        !           535:         local   jRightMask : dword      ;right edge clip mask
        !           536:         local   culTempCount : dword    ;handy temporary counter
        !           537:         local   pTempEntry : dword      ;temporary storage for vector into
        !           538:                                         ; unrolled loop
        !           539:         local   pTempPlane : dword      ;pointer to storage in temp buffer for
        !           540:                                         ; edge bytes (which are stored
        !           541:                                         ; consecutively, not in each plane's
        !           542:                                         ; temp buffer, to reduce possible page
        !           543:                                         ; faulting
        !           544:         local   ppTempPlane0 : dword    ;pointer to pointer to storage in temp
        !           545:                                         ; buffer for plane 0, immediately
        !           546:                                         ; preceded by storage for planes 1, 2,
        !           547:                                         ; and 3
        !           548:         local   ppTempPlane3 : dword    ;like above, but for plane 3
        !           549:         local   ulOffsetInBank : dword  ;offset relative to bank start
        !           550:         local   pSrcAddr : dword        ;working pointer to first source
        !           551:                                         ; byte to copy from
        !           552:         local   pDestAddr : dword       ;working pointer to first dest
        !           553:                                         ; byte to copy to
        !           554:         local   ulCurrentJustification:dword ;justification used to map in
        !           555:                                              ; banks; top for top to bottom
        !           556:                                              ; copies, bottom for bottom to top
        !           557:         local   ulCurrentSrcScan :dword ;scan line used to map in current
        !           558:                                         ; source bank
        !           559:         local   ulCurrentDestScan:dword ;scan line used to map in current dest
        !           560:                                         ; bank
        !           561:         local   ulLastDestScan :dword   ;scan in target rect at which we stop
        !           562:                                         ; advancing through banks
        !           563:         local   pCurrentThread : dword  ;pointer to data describing the
        !           564:                                         ; threaded calls to be performed to
        !           565:                                         ; perform the current copy
        !           566:         local   pCurrentThreadViaBuffer:dword
        !           567:                                         ;pointer to data describing the
        !           568:                                         ; threaded calls to be performed to
        !           569:                                         ; perform the current copy in the case
        !           570:                                         ; where the source and destination are
        !           571:                                         ; not simultaneously adequately
        !           572:                                         ; accessible, so the copy has to go
        !           573:                                         ; through a temp buffer (used only for
        !           574:                                         ; 1 R/W and 1R/1W banking)
        !           575:         local   ulAdapterType : dword   ;adapter type code, per VIDEO_BANK_TYPE
        !           576:         local   ulLWRType : dword       ;whether left edge, whole bytes, and
        !           577:                                         ; right edge are involved in the
        !           578:                                         ; current operation;
        !           579:                                         ; bit 2 = 1 if left edge involved
        !           580:                                         ; bit 1 = 1 if whole bytes involved
        !           581:                                         ; bit 0 = 1 if right edge involved
        !           582:         local   ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
        !           583:                                         ; address past the left edge when the
        !           584:                                         ; left edge is partial
        !           585:         local   ulCombineMask : dword   ;mask for combining desired portions
        !           586:                                         ; of AL and AH before ORing to make a
        !           587:                                         ; single byte; used to combine before
        !           588:                                         ; letting VGA rotate byte as it's
        !           589:                                         ; written. Used for all cases except
        !           590:                                         ; whole bytes copied left-to-right
        !           591:         local   ulCombineMaskWhole : dword
        !           592:                                         ;mask for combining desired portions of
        !           593:                                         ; AL and AH when copying whole bytes
        !           594:                                         ; (different from ulCombineMask in the
        !           595:                                         ; case of whole bytes left-to-right
        !           596:                                         ; copies, because then AH is the lsb
        !           597:                                         ; and AL is the MSB; then, this is
        !           598:                                         ; ulCombineMask with the bytes swapped.
        !           599:                                         ; For right-to-left whole byte copies,
        !           600:                                         ; this is the same as ulCombineMask)
        !           601:         local   ulTempScanCount : dword ;temp scan line countdown variable
        !           602:         local   ulWholeScanSrcDelta : dword
        !           603:                                         ;offset from end of one source whole
        !           604:                                         ; bytes scan line to start of next.
        !           605:                                         ; Differs from ulWholeScanDelta because
        !           606:                                         ; of source rotation pipeline priming
        !           607:         local   ulLeftSrcWidthMinus1 : dword ;# of bytes in left src edge minus
        !           608:                                              ; one (0 or 1)
        !           609:         local   ulRightSrcWidthMinus1 : dword ;# of bytes in right src edge
        !           610:                                              ; minus one (0 or 1)
        !           611: 
        !           612: ;-----------------------------------------------------------------------;
        !           613: 
        !           614: ; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
        !           615: ; adapters), and other rectangle-independent variables.
        !           616: 
        !           617:         mov     esi,pdsurf
        !           618:         mov     eax,[esi].dsurf_pvBankBufferPlane0
        !           619:         mov     pTempPlane,eax
        !           620:         lea     eax,[esi].dsurf_pvBankBufferPlane0
        !           621:         mov     ppTempPlane0,eax
        !           622:         lea     eax,[esi].dsurf_pvBankBufferPlane3
        !           623:         mov     ppTempPlane3,eax
        !           624: 
        !           625:         mov     eax,[esi].dsurf_vbtBankingType
        !           626:         mov     ulAdapterType,eax
        !           627: 
        !           628: ; Copy the rectangle.
        !           629: 
        !           630:         call    copy_rect
        !           631: 
        !           632: ;-----------------------------------------------------------------------;
        !           633: ; Set the VGA registers back to their default state.
        !           634: ;-----------------------------------------------------------------------;
        !           635: 
        !           636:         mov     edx,VGA_BASE + GRAF_ADDR
        !           637:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
        !           638:         out     dx,ax           ;enable bit mask for all bits
        !           639: 
        !           640:         mov     eax,(DR_SET shl 8) + GRAF_DATA_ROT
        !           641:         out     dx,ax           ;restore default of no rotation
        !           642: 
        !           643:         mov     dl,SEQ_DATA
        !           644:         mov     al,MM_ALL
        !           645:         out     dx,al           ;enable writes to all planes
        !           646: 
        !           647:         cld                     ;restore default direction flag
        !           648: 
        !           649:         cRet    vNonAlignedSrcCopy ;done
        !           650: 
        !           651: 
        !           652: ;***********************************************************************;
        !           653: ;
        !           654: ; Copies the specified rectangle.
        !           655: ;
        !           656: ;***********************************************************************;
        !           657: 
        !           658:         align   4
        !           659: copy_rect:
        !           660: 
        !           661: ; Calculate the rotation, set up the VGA's rotator, and set the byte-combining
        !           662: ; masks.
        !           663: 
        !           664:         mov     edi,prcldest            ;left edge of destination
        !           665:         mov     esi,pptlsrc
        !           666:         mov     ah,byte ptr [edi].xLeft ;left edge of source
        !           667:         sub     ah,byte ptr [esi].ptl_x
        !           668:         and     ah,07h                  ;rotation = (dest - source) % 8
        !           669:         mov     edx,VGA_BASE + GRAF_ADDR
        !           670:         mov     al,GRAF_DATA_ROT
        !           671:         out     dx,ax                   ;set the VGA's rotator for the rotation
        !           672: 
        !           673: ; Set up byte-combining mask, in preparation for ORing and letting the VGA's
        !           674: ; rotator rotate, assuming the left-hand source byte is in AL and the
        !           675: ; right-hand source byte is in AH (true for all cases except left-to-right
        !           676: ; whole bytes).
        !           677: 
        !           678:         mov     cl,ah
        !           679:         mov     eax,0000ff00h
        !           680:         rol     ax,cl
        !           681:         mov     ulCombineMask,eax
        !           682: 
        !           683: ; Calculate source edge widths (1 or 2 bytes).
        !           684: 
        !           685:         sub     edx,edx         ;assume right source width is 1
        !           686:         mov     ebx,[edi].xLeft
        !           687:         mov     ecx,[edi].xRight ;dest right edge (non-inclusive)
        !           688:         dec     ecx             ;make it inclusive
        !           689:         sub     ecx,ebx         ;dest width = dest right - dest left
        !           690:         mov     eax,[esi].ptl_x
        !           691:         add     ecx,eax         ;ECX = right edge of source
        !           692:         xor     eax,ecx
        !           693:         and     eax,not 07h     ;do the src start and end differ in byte
        !           694:                                 ; address bits? (as opposed to intrabyte)
        !           695:         jz      short @F        ;no, force 1-wide source
        !           696: 
        !           697:         mov     al,byte ptr [edi].xLeft
        !           698:         mov     ah,byte ptr [esi].ptl_x
        !           699:         and     eax,00000707h
        !           700:         cmp     ah,al
        !           701:         jb      short @F
        !           702:         inc     edx             ;left source width is 2
        !           703: @@:
        !           704:         mov     ulLeftSrcWidthMinus1,edx
        !           705: 
        !           706:         sub     edx,edx         ;assume right source width is 1
        !           707:         mov     eax,[edi].xRight ;dest right edge (non-inclusive)
        !           708:         dec     eax             ;make it inclusive
        !           709:         and     cl,07h          ;intrabyte source address
        !           710:         and     al,07h          ;intrabyte dest address
        !           711:         cmp     cl,al
        !           712:         ja      short @F
        !           713:         inc     edx             ;right source width is 2
        !           714: @@:
        !           715:         mov     ulRightSrcWidthMinus1,edx
        !           716: 
        !           717: ; Set up masks and whole bytes count, and build left/whole/right index
        !           718: ; indicating which of those parts are involved in the copy.
        !           719: 
        !           720:         mov     ebx,[edi].xRight        ;right edge of fill (non-inclusive)
        !           721:         mov     ecx,ebx
        !           722:         and     ecx,0111b               ;intrabyte address of right edge
        !           723:         mov     ah,jRightMaskTable[ecx] ;right edge mask
        !           724: 
        !           725:         mov     esi,[edi].xLeft         ;left edge of fill (inclusive)
        !           726:         mov     ecx,esi
        !           727:         shr     ecx,3                   ;/8 for start offset from left edge
        !           728:                                         ; of scan line
        !           729:         sub     ebx,esi                 ;width in pixels of fill
        !           730: 
        !           731:         and     esi,0111b               ;intrabyte address of left edge
        !           732:         mov     al,jLeftMaskTable[esi]  ;left edge mask
        !           733: 
        !           734:         dec     ebx                     ;make inclusive on right
        !           735:         add     ebx,esi                 ;inclusive width, starting counting at
        !           736:                                         ; the beginning of the left edge byte
        !           737:         shr     ebx,3                   ;width of fill in bytes touched - 1
        !           738:         jnz     short more_than_1_byte  ;more than 1 byte is involved
        !           739: 
        !           740: ; Only one byte will be affected. Combine first/last masks.
        !           741: 
        !           742:         and     al,ah                   ;we'll use first byte mask only
        !           743:         xor     ah,ah                   ;want last byte mask to be 0 to
        !           744:                                         ; indicate right edge not involved
        !           745:         inc     ebx                     ;so there's one count to subtract below
        !           746:                                         ; if this isn't a whole edge byte
        !           747: more_than_1_byte:
        !           748: 
        !           749: ; If all pixels in the left edge are altered, combine the first byte into the
        !           750: ; whole byte count, because we can handle solid edge bytes faster as part of
        !           751: ; the whole bytes. Ditto for the right edge.
        !           752: 
        !           753:         sub     ecx,ecx                 ;edge whole-status accumulator
        !           754:         cmp     al,-1                   ;is left edge a whole byte or partial?
        !           755:         adc     ecx,ecx                 ;ECX=1 if left edge partial, 0 if whole
        !           756:         sub     ebx,ecx                 ;if left edge partial, deduct it from
        !           757:                                         ; the whole bytes count
        !           758:         mov     ulLeftEdgeAdjust,ecx    ;for skipping over the left edge if
        !           759:                                         ; it's partial when pointing to the
        !           760:                                         ; whole bytes
        !           761:         and     ah,ah                   ;is right edge mask 0, meaning this
        !           762:                                         ; fill is only 1 byte wide?
        !           763:         jz      short save_masks        ;yes, no need to do anything
        !           764:         or      ecx,40h                 ;assume there's a partial right edge
        !           765:         cmp     ah,-1                   ;is right edge a whole byte or partial?
        !           766:         jnz     short save_masks        ;partial
        !           767:                                         ;bit 1=0 if left edge partial, 1 whole
        !           768:         inc     ebx                     ;if right edge whole, include it in the
        !           769:                                         ; whole bytes count
        !           770:         and     ecx,not 40h             ;there's no partial right edge
        !           771: save_masks:
        !           772:         cmp     ebx,1                   ;do we have any whole bytes?
        !           773:         cmc                             ;CF set if whole byte count > 0
        !           774:         adc     ecx,ecx                 ;if any whole bytes, set whole bytes
        !           775:                                         ; bit in left/whole/right accumulator
        !           776:         rol     cl,1                    ;align the left/whole/right bits
        !           777:         mov     ulLWRType,ecx           ;save left/whole/right status
        !           778: 
        !           779:         mov     byte ptr jLeftMask,al   ;save left and right clip masks
        !           780:         mov     byte ptr jRightMask,ah
        !           781:         mov     culWholeBytesWidth,ebx  ;save # of whole bytes
        !           782: 
        !           783: ; Copy the rectangle in the specified direction.
        !           784: 
        !           785:         mov     eax,icopydir
        !           786:         jmp     SetUpForCopyDirection[eax*4]
        !           787: 
        !           788: 
        !           789: ;***********************************************************************;
        !           790: ;
        !           791: ; The following routines set up to handle the four possible copy
        !           792: ; directions.
        !           793: ;
        !           794: ;***********************************************************************;
        !           795: 
        !           796: 
        !           797: ;-----------------------------------------------------------------------;
        !           798: ; Set-up code for left-to-right, top-to-bottom copies.
        !           799: ;-----------------------------------------------------------------------;
        !           800: 
        !           801:         align   4
        !           802: left_to_right_top_to_bottom:
        !           803: 
        !           804:         cld                             ;we'll copy left to right
        !           805: 
        !           806: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
        !           807: ; rotate, assuming the left-hand source byte is in AH and the right-hand source
        !           808: ; byte is in AL (true only for left-to-right whole bytes).
        !           809: 
        !           810:         mov     eax,ulCombineMask
        !           811:         not     eax
        !           812:         mov     ulCombineMaskWhole,eax
        !           813: 
        !           814:         mov     esi,pdsurf
        !           815:         mov     eax,[esi].dsurf_lNextScan
        !           816:         mov     ulNextScan,eax          ;copy top to bottom
        !           817:         sub     eax,culWholeBytesWidth  ;offset from end of one dest whole byte
        !           818:         mov     ulWholeScanDelta,eax    ; scan to start of next
        !           819:         dec     eax                     ;offset from end of one src whole byte
        !           820:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
        !           821:                                         ; leading byte used to prime the
        !           822:                                         ; rotation pipeline
        !           823: 
        !           824:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
        !           825:                                         ; right involvement in operation
        !           826:         or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
        !           827:         mov     eax,MasterThreadTable[esi*4]
        !           828:         mov     pCurrentThread,eax      ;threading when no buffering is needed
        !           829:         mov     edx,ulAdapterType
        !           830:         shl     edx,ADAPTER_FIELD_SHIFT
        !           831:         or      esi,edx                 ;factor adapter type into the index
        !           832:         mov     eax,MasterThreadTableViaBuffer[esi*4]
        !           833:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
        !           834: 
        !           835:         mov     ulCurrentJustification,JustifyTop ;copy top to bottom
        !           836: 
        !           837:         mov     esi,prcldest
        !           838:         mov     eax,[esi].yBottom
        !           839:         mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
        !           840:         mov     eax,[esi].yTop
        !           841:         mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
        !           842:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
        !           843:         mov     edx,[esi].xLeft
        !           844:         shr     edx,3                   ;byte X address
        !           845:         add     eax,edx                 ;offset in bitmap of first dest byte
        !           846:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        !           847:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
        !           848:                                         ; byte, unless the left edge is a whole
        !           849:                                         ; byte and is thus part of the whole
        !           850:                                         ; bytes already
        !           851:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
        !           852:         add     eax,culWholeBytesWidth  ;point to the right edge
        !           853:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
        !           854: 
        !           855:         mov     esi,pptlsrc
        !           856:         mov     eax,[esi].ptl_y
        !           857:         mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
        !           858:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
        !           859:         mov     edx,[esi].ptl_x
        !           860:         shr     edx,3                   ;byte X address
        !           861:         add     eax,edx                 ;offset in bitmap of first source byte
        !           862:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        !           863:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
        !           864:         dec     eax                      ; last (leftmost) left edge byte, so
        !           865:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
        !           866:                                          ; wide, except when the left dest byte
        !           867:                                          ; is solid so the left edge is part of
        !           868:                                          ; the whole bytes
        !           869:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
        !           870:         add     eax,culWholeBytesWidth  ;point to the right edge
        !           871:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts,
        !           872:                                         ; because the whole bytes and the right
        !           873:                                         ; source edge share a byte, and we
        !           874:                                         ; always point to the leftmost byte in
        !           875:                                         ; the right source edge
        !           876: 
        !           877: ; Branch to the appropriate top-to-bottom bank enumeration loop.
        !           878: 
        !           879:         mov     eax,ulAdapterType
        !           880:         jmp     TopToBottomLoopTable[eax*4]
        !           881: 
        !           882: 
        !           883: ;-----------------------------------------------------------------------;
        !           884: ; Set-up code for right-to-left, top-to-bottom copies.
        !           885: ;-----------------------------------------------------------------------;
        !           886: 
        !           887:         align   4
        !           888: right_to_left_top_to_bottom:
        !           889: 
        !           890:         std                             ;we'll copy right to left
        !           891: 
        !           892: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
        !           893: ; rotate, assuming the left-hand source byte is in AL and the right-hand source
        !           894: ; byte is in AH (always true except for left-to-right whole bytes).
        !           895: 
        !           896:         mov     eax,ulCombineMask
        !           897:         mov     ulCombineMaskWhole,eax
        !           898: 
        !           899:         mov     esi,pdsurf
        !           900:         mov     eax,[esi].dsurf_lNextScan
        !           901:         mov     ulNextScan,eax          ;copy top to bottom
        !           902:         add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        !           903:         mov     ulWholeScanDelta,eax    ; to start of next, given that we're
        !           904:                                         ; copying one way and going scan-to-
        !           905:                                         ; scan the other way
        !           906:         inc     eax                     ;offset from end of one src whole byte
        !           907:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
        !           908:                                         ; leading byte used to prime the
        !           909:                                         ; rotation pipeline
        !           910: 
        !           911:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
        !           912:                                         ; right involvement in operation
        !           913:                                         ;leave left-to-right field cleared, so
        !           914:                                         ; we look up right-to-left entries
        !           915:         mov     eax,MasterThreadTable[esi*4]
        !           916:         mov     pCurrentThread,eax      ;threading when no buffering is needed
        !           917:         mov     edx,ulAdapterType
        !           918:         shl     edx,ADAPTER_FIELD_SHIFT
        !           919:         or      esi,edx                 ;factor adapter type into the index
        !           920:         mov     eax,MasterThreadTableViaBuffer[esi*4]
        !           921:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
        !           922: 
        !           923:         mov     ulCurrentJustification,JustifyTop ;copy top to bottom
        !           924: 
        !           925:         mov     esi,prcldest
        !           926:         mov     eax,[esi].yBottom
        !           927:         mov     ulLastDestScan,eax      ;end at bottom of dest copy rect
        !           928:         mov     eax,[esi].yTop
        !           929:         mov     ulCurrentDestScan,eax   ;start at top of dest copy rect
        !           930:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
        !           931:         mov     edx,[esi].xLeft
        !           932:         shr     edx,3                   ;byte X address
        !           933:         add     eax,edx                 ;offset in bitmap of first dest byte
        !           934:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        !           935:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
        !           936:                                         ; byte, unless the left edge is a whole
        !           937:                                         ; byte and is thus part of the whole
        !           938:                                         ; bytes already
        !           939:         add     eax,culWholeBytesWidth  ;point to the right edge
        !           940:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
        !           941:         dec     eax                     ;back up to the last whole byte
        !           942:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
        !           943: 
        !           944:         mov     esi,pptlsrc
        !           945:         mov     eax,[esi].ptl_y
        !           946:         mov     ulCurrentSrcScan,eax    ;start at top of source copy rect
        !           947:         mul     ulNextScan              ;offset in bitmap of top dest rect scan
        !           948:         mov     edx,[esi].ptl_x
        !           949:         shr     edx,3                   ;byte X address
        !           950:         add     eax,edx                 ;offset in bitmap of first source byte
        !           951:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        !           952:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
        !           953:         dec     eax                      ; last (leftmost) left edge byte, so
        !           954:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
        !           955:                                          ; wide, except when the left dest byte
        !           956:                                          ; is solid so the left edge is part of
        !           957:                                          ; the whole bytes
        !           958:         add     eax,culWholeBytesWidth  ;point to the right edge of the whole
        !           959:                                         ; src bytes, accounting for the extra
        !           960:                                         ; source byte needed to prime the
        !           961:                                         ; rotation pipeline
        !           962:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
        !           963:         mov     ulRightEdgeSrc,eax      ;that's also where the right src edge
        !           964:                                         ; starts, because the whole bytes and
        !           965:                                         ; the right source edge share a byte,
        !           966:                                         ; and we always point to the leftmost
        !           967:                                         ; byte in the right source edge
        !           968: 
        !           969: ; Branch to the appropriate top-to-bottom bank enumeration loop.
        !           970: 
        !           971:         mov     eax,ulAdapterType
        !           972:         jmp     TopToBottomLoopTable[eax*4]
        !           973: 
        !           974: 
        !           975: ;-----------------------------------------------------------------------;
        !           976: ; Set-up code for left-to-right, bottom-to-top copies.
        !           977: ;-----------------------------------------------------------------------;
        !           978: 
        !           979:         align   4
        !           980: left_to_right_bottom_to_top:
        !           981: 
        !           982:         cld                             ;we'll copy left to right
        !           983: 
        !           984: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
        !           985: ; rotate, assuming the left-hand source byte is in AH and the right-hand source
        !           986: ; byte is in AL (true only for left-to-right whole bytes).
        !           987: 
        !           988:         mov     eax,ulCombineMask
        !           989:         not     eax
        !           990:         mov     ulCombineMaskWhole,eax
        !           991: 
        !           992:         mov     edi,pdsurf
        !           993:         mov     eax,[edi].dsurf_lNextScan
        !           994:         neg     eax
        !           995:         mov     ulNextScan,eax          ;copy bottom to top
        !           996:         sub     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        !           997:         mov     ulWholeScanDelta,eax    ; to start of next, given that we're
        !           998:                                         ; copying one way and going scan-to-
        !           999:                                         ; scan the other way
        !          1000:         dec     eax                     ;offset from end of one src whole byte
        !          1001:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
        !          1002:                                         ; leading byte used to prime the
        !          1003:                                         ; rotation pipeline
        !          1004: 
        !          1005:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
        !          1006:                                         ; right involvement in operation
        !          1007:         or      esi,LEFT_TO_RIGHT_FIELD_SET   ;add left-to-right into the index
        !          1008:         mov     eax,MasterThreadTable[esi*4]
        !          1009:         mov     pCurrentThread,eax      ;threading when no buffering is needed
        !          1010:         mov     edx,ulAdapterType
        !          1011:         shl     edx,ADAPTER_FIELD_SHIFT
        !          1012:         or      esi,edx                 ;factor adapter type into the index
        !          1013:         mov     eax,MasterThreadTableViaBuffer[esi*4]
        !          1014:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
        !          1015: 
        !          1016:         mov     ulCurrentJustification,JustifyBottom ;copy bottom to top
        !          1017: 
        !          1018:         mov     esi,prcldest
        !          1019:         mov     edx,[esi].yTop
        !          1020:         mov     ulLastDestScan,edx      ;end at top of dest copy rect
        !          1021:         mov     eax,[esi].yBottom
        !          1022:         dec     eax                     ;rectangle definition is non-inclusive,
        !          1023:                                         ; so advance to first scan we'll copy
        !          1024:         sub     edx,eax                 ;-(offset from rect top to bottom)
        !          1025:         push    edx                     ;remember for use with source
        !          1026:         mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
        !          1027:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
        !          1028:                                         ; scan (first scan to which to copy)
        !          1029:         mov     edx,[esi].xLeft
        !          1030:         shr     edx,3                   ;byte X address
        !          1031:         add     eax,edx                 ;offset in bitmap of first dest byte
        !          1032:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        !          1033:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
        !          1034:                                         ; byte, unless the left edge is a whole
        !          1035:                                         ; byte and is thus part of the whole
        !          1036:                                         ; bytes already
        !          1037:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
        !          1038:         add     eax,culWholeBytesWidth  ;point to the right edge
        !          1039:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
        !          1040: 
        !          1041:         mov     esi,pptlsrc
        !          1042:         mov     eax,[esi].ptl_y
        !          1043:         pop     edx                     ;retrieve -(offset from top to bottom)
        !          1044:         sub     eax,edx                 ;advance to bottom of source rect
        !          1045:                                         ; (inclusive; this is first scan from
        !          1046:                                         ; which to copy)
        !          1047:         mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
        !          1048:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
        !          1049:                                         ; scan
        !          1050:         mov     edx,[esi].ptl_x
        !          1051:         shr     edx,3                   ;byte X address
        !          1052:         add     eax,edx                 ;offset in bitmap of first source byte
        !          1053:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        !          1054:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
        !          1055:         dec     eax                      ; last (leftmost) left edge byte, so
        !          1056:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
        !          1057:                                          ; wide, except when the left dest byte
        !          1058:                                          ; is solid so the left edge is part of
        !          1059:                                          ; the whole bytes
        !          1060:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
        !          1061:         add     eax,culWholeBytesWidth  ;point to the right edge
        !          1062:         mov     ulRightEdgeSrc,eax      ;where the right src edge starts,
        !          1063:                                         ; because the whole bytes and the right
        !          1064:                                         ; source edge share a byte, and we
        !          1065:                                         ; always point to the leftmost byte in
        !          1066:                                         ; the right source edge
        !          1067: 
        !          1068: ; Branch to the appropriate bottom-to-top bank enumeration loop.
        !          1069: 
        !          1070:         mov     eax,ulAdapterType
        !          1071:         jmp     BottomToTopLoopTable[eax*4]
        !          1072: 
        !          1073: 
        !          1074: ;-----------------------------------------------------------------------;
        !          1075: ; Set-up code for right-to-left, bottom-to-top copies.
        !          1076: ;-----------------------------------------------------------------------;
        !          1077: 
        !          1078:         align   4
        !          1079: right_to_left_bottom_to_top:
        !          1080: 
        !          1081:         std                             ;we'll copy right to left
        !          1082: 
        !          1083: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
        !          1084: ; rotate, assuming the left-hand source byte is in AL and the right-hand source
        !          1085: ; byte is in AH (always true except for left-to-right whole bytes).
        !          1086: 
        !          1087:         mov     eax,ulCombineMask
        !          1088:         mov     ulCombineMaskWhole,eax
        !          1089: 
        !          1090:         mov     edi,pdsurf
        !          1091:         mov     eax,[edi].dsurf_lNextScan
        !          1092:         neg     eax
        !          1093:         mov     ulNextScan,eax          ;copy bottom to top
        !          1094:         add     eax,culWholeBytesWidth  ;offset from end of one whole byte scan
        !          1095:         mov     ulWholeScanDelta,eax    ; to start of next
        !          1096:         inc     eax                     ;offset from end of one src whole byte
        !          1097:         mov     ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
        !          1098:                                         ; leading byte used to prime the
        !          1099:                                         ; rotation pipeline
        !          1100: 
        !          1101:         mov     esi,ulLWRType           ;3-bit flag field for left, whole, and
        !          1102:                                         ; right involvement in operation
        !          1103:                                         ;leave left-to-right field cleared, so
        !          1104:                                         ; we look up right-to-left entries
        !          1105:         mov     eax,MasterThreadTable[esi*4]
        !          1106:         mov     pCurrentThread,eax      ;threading when no buffering is needed
        !          1107:         mov     edx,ulAdapterType
        !          1108:         shl     edx,ADAPTER_FIELD_SHIFT
        !          1109:         or      esi,edx                 ;factor adapter type into the index
        !          1110:         mov     eax,MasterThreadTableViaBuffer[esi*4]
        !          1111:         mov     pCurrentThreadViaBuffer,eax ;threading when buffering is needed
        !          1112: 
        !          1113:         mov     ulCurrentJustification,JustifyBottom ;copy bottom to top
        !          1114: 
        !          1115:         mov     esi,prcldest
        !          1116:         mov     edx,[esi].yTop
        !          1117:         mov     ulLastDestScan,edx      ;end at top of dest copy rect
        !          1118:         mov     eax,[esi].yBottom
        !          1119:         dec     eax                     ;rectangle definition is non-inclusive,
        !          1120:                                         ; so advance to first scan we'll copy
        !          1121:         sub     edx,eax                 ;-(offset from rect top to bottom)
        !          1122:         push    edx                     ;remember for use with source
        !          1123:         mov     ulCurrentDestScan,eax   ;start at bottom of dest copy rect
        !          1124:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
        !          1125:                                         ; scan (first scan to which to copy)
        !          1126:         mov     edx,[esi].xLeft
        !          1127:         shr     edx,3                   ;byte X address
        !          1128:         add     eax,edx
        !          1129:         mov     ulLeftEdgeDest,eax      ;that's where the left dest edge is
        !          1130:         add     eax,ulLeftEdgeAdjust    ;the whole bytes start at the next
        !          1131:                                         ; byte, unless the left edge is a whole
        !          1132:                                         ; byte and is thus part of the whole
        !          1133:                                         ; bytes already
        !          1134:         add     eax,culWholeBytesWidth  ;point to the right edge
        !          1135:         mov     ulRightEdgeDest,eax     ;where the right dest edge starts
        !          1136:         dec     eax                     ;back up to the last whole byte
        !          1137:         mov     ulWholeBytesDest,eax    ;where the whole dest bytes start
        !          1138: 
        !          1139:         mov     esi,pptlsrc
        !          1140:         mov     eax,[esi].ptl_y
        !          1141:         pop     edx                     ;retrieve -(offset from top to bottom)
        !          1142:         sub     eax,edx                 ;advance to bottom of source rect
        !          1143:                                         ; (inclusive; this is first scan from
        !          1144:                                         ; which to copy)
        !          1145:         mov     ulCurrentSrcScan,eax    ;start at bottom of source copy rect
        !          1146:         mul     [edi].dsurf_lNextScan   ;offset in bitmap of bottom dest rect
        !          1147:                                         ; scan
        !          1148:         mov     edx,[esi].ptl_x
        !          1149:         shr     edx,3                   ;byte X address
        !          1150:         add     eax,edx                 ;offset in bitmap of first source byte
        !          1151:         mov     ulLeftEdgeSrc,eax       ;that's where the left src edge is
        !          1152:         add     eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
        !          1153:         dec     eax                      ; last (leftmost) left edge byte, so
        !          1154:         add     eax,ulLeftEdgeAdjust     ; add a byte if the left edge is 2
        !          1155:                                          ; wide, except when the left dest byte
        !          1156:                                          ; is solid so the left edge is part of
        !          1157:                                          ; the whole bytes
        !          1158:         add     eax,culWholeBytesWidth  ;point to the right edge of the whole
        !          1159:                                         ; src bytes, accounting for the extra
        !          1160:                                         ; source byte needed to prime the
        !          1161:                                         ; rotation pipeline
        !          1162:         mov     ulWholeBytesSrc,eax     ;where the src whole bytes start
        !          1163:         mov     ulRightEdgeSrc,eax      ;that's also where the right src edge
        !          1164:                                         ; starts, because the whole bytes and
        !          1165:                                         ; the right source edge share a byte,
        !          1166:                                         ; and we always point to the leftmost
        !          1167:                                         ; byte in the right source edge
        !          1168: 
        !          1169: ; Branch to the appropriate bottom-to-top bank enumeration loop.
        !          1170: 
        !          1171:         mov     eax,ulAdapterType
        !          1172:         jmp     BottomToTopLoopTable[eax*4]
        !          1173: 
        !          1174: 
        !          1175: ;***********************************************************************;
        !          1176: ;
        !          1177: ; The following routines are the banking loops.
        !          1178: ;
        !          1179: ;***********************************************************************;
        !          1180: 
        !          1181: 
        !          1182: ;-----------------------------------------------------------------------;
        !          1183: ; Banking for 2 R/W and unbanked adapters, top to bottom.
        !          1184: ;-----------------------------------------------------------------------;
        !          1185:         align   4
        !          1186: top_to_bottom_2RW:
        !          1187: 
        !          1188: ; We're going top to bottom. Map in the source and dest, top-justified.
        !          1189: 
        !          1190:         mov     ebx,pdsurf
        !          1191:         mov     edx,ulCurrentSrcScan
        !          1192:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
        !          1193:                                                      ; current source bank?
        !          1194:         jl      short top_2RW_map_init_src_bank      ;yes, map in proper bank
        !          1195:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
        !          1196:                                                         ; current source bank?
        !          1197:         jl      short top_2RW_init_src_bank_mapped
        !          1198:                                                 ;no, proper bank already mapped
        !          1199: top_2RW_map_init_src_bank:
        !          1200: 
        !          1201: ; Map bank containing the top source scan line into source window.
        !          1202: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1203: 
        !          1204:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1205:                 <ebx,edx,JustifyTop,MapSourceBank>
        !          1206: 
        !          1207: top_2RW_init_src_bank_mapped:
        !          1208: 
        !          1209:         mov     edx,ulCurrentDestScan
        !          1210:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
        !          1211:                                                      ; current dest bank?
        !          1212:         jl      short top_2RW_map_init_dest_bank     ;yes, map in proper bank
        !          1213:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
        !          1214:                                                         ; current dest bank?
        !          1215:         jl      short top_2RW_init_dest_bank_mapped
        !          1216:                                                 ;no, proper bank already mapped
        !          1217: top_2RW_map_init_dest_bank:
        !          1218: 
        !          1219: ; Map bank containing the top dest scan line into source window.
        !          1220: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1221: 
        !          1222:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1223:                 <ebx,edx,JustifyTop,MapDestBank>
        !          1224: 
        !          1225: top_2RW_init_dest_bank_mapped:
        !          1226: 
        !          1227: ; Bank-by-bank top-to-bottom copy loop.
        !          1228: 
        !          1229: top_2RW_bank_loop:
        !          1230: 
        !          1231: ; Decide how far we can go before we run out of bank or rectangle to copy.
        !          1232: 
        !          1233:         mov     edx,ulLastDestScan
        !          1234:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
        !          1235:         jl      short @F        ;copy rectangle bottom is in this bank
        !          1236:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
        !          1237:                                                         ; of bank, at least
        !          1238: @@:
        !          1239:         sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
        !          1240:                                         ; the dest bank
        !          1241:         mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
        !          1242:         sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank
        !          1243: 
        !          1244:         cmp     edx,eax
        !          1245:         jb      short @F        ;source bank isn't limiting
        !          1246:         mov     edx,eax         ;source bank is limiting
        !          1247: @@:
        !          1248:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
        !          1249: 
        !          1250: ; We're ready to copy this block.
        !          1251: 
        !          1252:         THREAD_AND_START
        !          1253: 
        !          1254: ; Any more scans to copy?
        !          1255: 
        !          1256:         mov     eax,ulCurrentDestScan
        !          1257:         mov     esi,ulBlockHeight
        !          1258:         add     eax,esi                 ;we've copied to dest up to here
        !          1259:         cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
        !          1260:         jz      short top_2RW_done      ;yes, we're done
        !          1261:         mov     ulCurrentDestScan,eax
        !          1262: 
        !          1263: ; Now advance either or both banks, as needed.
        !          1264: 
        !          1265:         mov     ebx,pdsurf
        !          1266:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
        !          1267:                                                         ; current dest bank?
        !          1268:         jl      short top_2RW_dest_bank_mapped    ;no, proper bank still mapped
        !          1269: 
        !          1270: ; Map bank containing the current dest scan line into source window.
        !          1271: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1272: 
        !          1273:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1274:                 <ebx,eax,JustifyTop,MapDestBank>
        !          1275: 
        !          1276: top_2RW_dest_bank_mapped:
        !          1277: 
        !          1278:         add     esi,ulCurrentSrcScan    ;we've copied from source up to here
        !          1279:         mov     ulCurrentSrcScan,esi
        !          1280: 
        !          1281:         cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
        !          1282:                                                         ; current src bank?
        !          1283:         jl      short top_2RW_src_bank_mapped     ;no, proper bank still mapped
        !          1284: 
        !          1285: ; Map bank containing the current source scan line into source window.
        !          1286: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1287: 
        !          1288:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1289:                 <ebx,esi,JustifyTop,MapSourceBank>
        !          1290: 
        !          1291: top_2RW_src_bank_mapped:
        !          1292: 
        !          1293:         jmp     top_2RW_bank_loop
        !          1294: 
        !          1295: top_2RW_done:
        !          1296:         PLAIN_RET
        !          1297: 
        !          1298: 
        !          1299: ;-----------------------------------------------------------------------;
        !          1300: ; Banking for 2 R/W and unbanked adapters, bottom to top.
        !          1301: ;-----------------------------------------------------------------------;
        !          1302:         align   4
        !          1303: bottom_to_top_2RW:
        !          1304: 
        !          1305: ; We're going bottom to top. Map in the source and dest, bottom-justified.
        !          1306: 
        !          1307:         mov     ebx,pdsurf
        !          1308:         mov     edx,ulCurrentSrcScan
        !          1309:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
        !          1310:                                                      ; current source bank?
        !          1311:         jl      short bot_2RW_map_init_src_bank      ;yes, map in proper bank
        !          1312:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
        !          1313:                                                         ; than current src bank?
        !          1314:         jl      short bot_2RW_init_src_bank_mapped
        !          1315:                                                 ;no, proper bank already mapped
        !          1316: bot_2RW_map_init_src_bank:
        !          1317: 
        !          1318: ; Map bank containing the bottom source scan line into source window.
        !          1319: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1320: 
        !          1321:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1322:                 <ebx,edx,JustifyBottom,MapSourceBank>
        !          1323: 
        !          1324: bot_2RW_init_src_bank_mapped:
        !          1325: 
        !          1326:         mov     edx,ulCurrentDestScan
        !          1327:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
        !          1328:                                                      ; current dest bank?
        !          1329:         jl      short bot_2RW_map_init_dest_bank     ;yes, map in proper bank
        !          1330:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
        !          1331:                                                         ; than current dst bank?
        !          1332:         jl      short bot_2RW_init_dest_bank_mapped
        !          1333:                                                 ;no, proper bank already mapped
        !          1334: bot_2RW_map_init_dest_bank:
        !          1335: 
        !          1336: ; Map bank containing the bottom dest scan line into source window.
        !          1337: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1338: 
        !          1339:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1340:                 <ebx,edx,JustifyBottom,MapDestBank>
        !          1341: 
        !          1342: bot_2RW_init_dest_bank_mapped:
        !          1343: 
        !          1344: ; Bank-by-bank bottom-to-top copy loop.
        !          1345: 
        !          1346: bot_2RW_bank_loop:
        !          1347: 
        !          1348: ; Decide how far we can go before we run out of bank or rectangle to copy.
        !          1349: 
        !          1350:         mov     edx,ulLastDestScan
        !          1351:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
        !          1352:         jg      short @F        ;copy rectangle top is in this bank
        !          1353:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
        !          1354:                                                      ; of bank, at least
        !          1355: @@:
        !          1356:         neg     edx
        !          1357:         add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
        !          1358:         inc     edx                     ; the dest bank
        !          1359: 
        !          1360:         mov     eax,ulCurrentSrcScan
        !          1361:         sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
        !          1362:         inc     eax                     ;# of scans we can do in the src bank
        !          1363: 
        !          1364:         cmp     edx,eax
        !          1365:         jb      short @F        ;source bank isn't limiting
        !          1366:         mov     edx,eax         ;source bank is limiting
        !          1367: @@:
        !          1368:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
        !          1369: 
        !          1370: ; We're ready to copy this block.
        !          1371: 
        !          1372:         THREAD_AND_START
        !          1373: 
        !          1374: ; Any more scans to copy?
        !          1375: 
        !          1376:         mov     eax,ulCurrentDestScan
        !          1377:         mov     esi,ulBlockHeight
        !          1378:         sub     eax,esi                 ;we've copied to dest up to here
        !          1379:         cmp     ulLastDestScan,eax      ;are we past the dest rect top?
        !          1380:         jg      short bot_2RW_done      ;yes, we're done
        !          1381:         mov     ulCurrentDestScan,eax
        !          1382: 
        !          1383: ; Now advance either or both banks, as needed.
        !          1384: 
        !          1385:         mov     ebx,pdsurf
        !          1386:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
        !          1387:                                                      ; current dest bank?
        !          1388:         jge     short bot_2RW_dest_bank_mapped    ;no, proper bank still mapped
        !          1389: 
        !          1390: ; Map bank containing the current dest scan line into source window.
        !          1391: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1392: 
        !          1393:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1394:                 <ebx,eax,JustifyBottom,MapDestBank>
        !          1395: 
        !          1396: bot_2RW_dest_bank_mapped:
        !          1397: 
        !          1398:         mov     eax,ulCurrentSrcScan
        !          1399:         sub     eax,esi         ;we've copied from source up to here
        !          1400:         mov     ulCurrentSrcScan,eax
        !          1401: 
        !          1402:         cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
        !          1403:                                                      ; current src bank?
        !          1404:         jge     short bot_2RW_src_bank_mapped     ;no, proper bank still mapped
        !          1405: 
        !          1406: ; Map bank containing the current source scan line into source window.
        !          1407: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1408: 
        !          1409:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1410:                 <ebx,eax,JustifyBottom,MapSourceBank>
        !          1411: 
        !          1412: bot_2RW_src_bank_mapped:
        !          1413: 
        !          1414:         jmp     bot_2RW_bank_loop
        !          1415: 
        !          1416: bot_2RW_done:
        !          1417:         PLAIN_RET
        !          1418: 
        !          1419: 
        !          1420: ;-----------------------------------------------------------------------;
        !          1421: ; Banking for 1R/1W adapters, top to bottom.
        !          1422: ;-----------------------------------------------------------------------;
        !          1423:         align   4
        !          1424: top_to_bottom_1R1W:
        !          1425: 
        !          1426: ; We're going top to bottom. Map in the source and dest, top-justified.
        !          1427: 
        !          1428:         mov     ebx,pdsurf
        !          1429:         mov     edx,ulCurrentSrcScan
        !          1430:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
        !          1431:                                                      ; current source bank?
        !          1432:         jl      short top_1R1W_map_init_src_bank      ;yes, map in proper bank
        !          1433:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
        !          1434:                                                         ; current source bank?
        !          1435:         jl      short top_1R1W_init_src_bank_mapped
        !          1436:                                                 ;no, proper bank already mapped
        !          1437: top_1R1W_map_init_src_bank:
        !          1438: 
        !          1439: ; Map bank containing the top source scan line into source window.
        !          1440: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1441: 
        !          1442:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1443:                 <ebx,edx,JustifyTop,MapSourceBank>
        !          1444: 
        !          1445: top_1R1W_init_src_bank_mapped:
        !          1446: 
        !          1447:         mov     edx,ulCurrentDestScan
        !          1448:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
        !          1449:                                                      ; current dest bank?
        !          1450:         jl      short top_1R1W_map_init_dest_bank     ;yes, map in proper bank
        !          1451:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
        !          1452:                                                         ; current dest bank?
        !          1453:         jl      short top_1R1W_init_dest_bank_mapped
        !          1454:                                                 ;no, proper bank already mapped
        !          1455: top_1R1W_map_init_dest_bank:
        !          1456: 
        !          1457: ; Map bank containing the top dest scan line into source window.
        !          1458: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1459: 
        !          1460:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1461:                 <ebx,edx,JustifyTop,MapDestBank>
        !          1462: 
        !          1463: top_1R1W_init_dest_bank_mapped:
        !          1464: 
        !          1465: ; Bank-by-bank top-to-bottom copy loop.
        !          1466: 
        !          1467: top_1R1W_bank_loop:
        !          1468: 
        !          1469: ; Decide how far we can go before we run out of bank or rectangle to copy.
        !          1470: 
        !          1471:         mov     edx,ulLastDestScan
        !          1472:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom
        !          1473:         jl      short @F        ;copy rectangle bottom is in this bank
        !          1474:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
        !          1475:                                                         ; of bank, at least
        !          1476: @@:
        !          1477:         sub     edx,ulCurrentDestScan   ;# of scans we can and want to do in
        !          1478:                                         ; the dest bank
        !          1479:         mov     eax,[ebx].dsurf_rcl2WindowClipS.yBottom
        !          1480:         sub     eax,ulCurrentSrcScan    ;# of scans we can do in the src bank
        !          1481: 
        !          1482:         cmp     edx,eax
        !          1483:         jb      short @F        ;source bank isn't limiting
        !          1484:         mov     edx,eax         ;source bank is limiting
        !          1485: @@:
        !          1486:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
        !          1487: 
        !          1488: ; We're ready to copy this block.
        !          1489: ; Select different threading, depending on whether the source and destination
        !          1490: ; are currently in the same bank; we can do edges faster if they are.
        !          1491: 
        !          1492:         mov     eax,[ebx].dsurf_ulWindowBank
        !          1493:         cmp     eax,[ebx].dsurf_ulWindowBank[4]
        !          1494:         jz      short top_1R1W_copy_same_bank
        !          1495: 
        !          1496: ; Source and dest are currently in different banks, must go through temp buffer.
        !          1497: 
        !          1498:         THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans
        !          1499: 
        !          1500: ; Source and dest are currently in the same bank.
        !          1501: 
        !          1502:         align   4
        !          1503: top_1R1W_copy_same_bank:
        !          1504:         THREAD_AND_START
        !          1505: 
        !          1506: ; Any more scans to copy?
        !          1507: 
        !          1508: top_1R1W_check_more_scans:
        !          1509: 
        !          1510:         mov     eax,ulCurrentDestScan
        !          1511:         mov     esi,ulBlockHeight
        !          1512:         add     eax,esi                 ;we've copied to dest up to here
        !          1513:         cmp     ulLastDestScan,eax      ;are we at the dest rect bottom?
        !          1514:         jz      short top_1R1W_done     ;yes, we're done
        !          1515:         mov     ulCurrentDestScan,eax
        !          1516: 
        !          1517: ; Now advance either or both banks, as needed.
        !          1518: 
        !          1519:         mov     ebx,pdsurf
        !          1520:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
        !          1521:                                                         ; current dest bank?
        !          1522:         jl      short top_1R1W_dest_bank_mapped   ;no, proper bank still mapped
        !          1523: 
        !          1524: ; Map bank containing the current dest scan line into source window.
        !          1525: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1526: 
        !          1527:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1528:                 <ebx,eax,JustifyTop,MapDestBank>
        !          1529: 
        !          1530: top_1R1W_dest_bank_mapped:
        !          1531: 
        !          1532:         add     esi,ulCurrentSrcScan    ;we've copied from source up to here
        !          1533:         mov     ulCurrentSrcScan,esi
        !          1534: 
        !          1535:         cmp     esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
        !          1536:                                                         ; current src bank?
        !          1537:         jl      short top_1R1W_src_bank_mapped     ;no, proper bank still mapped
        !          1538: 
        !          1539: ; Map bank containing the current source scan line into source window.
        !          1540: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1541: 
        !          1542:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1543:                 <ebx,esi,JustifyTop,MapSourceBank>
        !          1544: 
        !          1545: top_1R1W_src_bank_mapped:
        !          1546: 
        !          1547:         jmp     top_1R1W_bank_loop
        !          1548: 
        !          1549: top_1R1W_done:
        !          1550:         PLAIN_RET
        !          1551: 
        !          1552: 
        !          1553: ;-----------------------------------------------------------------------;
        !          1554: ; Banking for 1R/1W adapters, bottom to top.
        !          1555: ;-----------------------------------------------------------------------;
        !          1556:         align   4
        !          1557: bottom_to_top_1R1W:
        !          1558: 
        !          1559: ; We're going bottom to top. Map in the source and dest, bottom-justified.
        !          1560: 
        !          1561:         mov     ebx,pdsurf
        !          1562:         mov     edx,ulCurrentSrcScan
        !          1563:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
        !          1564:                                                      ; current source bank?
        !          1565:         jl      short bot_1R1W_map_init_src_bank      ;yes, map in proper bank
        !          1566:         cmp     edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
        !          1567:                                                         ; than current src bank?
        !          1568:         jl      short bot_1R1W_init_src_bank_mapped
        !          1569:                                                 ;no, proper bank already mapped
        !          1570: bot_1R1W_map_init_src_bank:
        !          1571: 
        !          1572: ; Map bank containing the bottom source scan line into source window.
        !          1573: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1574: 
        !          1575:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1576:                 <ebx,edx,JustifyBottom,MapSourceBank>
        !          1577: 
        !          1578: bot_1R1W_init_src_bank_mapped:
        !          1579: 
        !          1580:         mov     edx,ulCurrentDestScan
        !          1581:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
        !          1582:                                                      ; current dest bank?
        !          1583:         jl      short bot_1R1W_map_init_dest_bank     ;yes, map in proper bank
        !          1584:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
        !          1585:                                                         ; than current dst bank?
        !          1586:         jl      short bot_1R1W_init_dest_bank_mapped
        !          1587:                                                 ;no, proper bank already mapped
        !          1588: bot_1R1W_map_init_dest_bank:
        !          1589: 
        !          1590: ; Map bank containing the bottom dest scan line into source window.
        !          1591: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1592: 
        !          1593:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1594:                 <ebx,edx,JustifyBottom,MapDestBank>
        !          1595: 
        !          1596: bot_1R1W_init_dest_bank_mapped:
        !          1597: 
        !          1598: ; Bank-by-bank bottom-to-top copy loop.
        !          1599: 
        !          1600: bot_1R1W_bank_loop:
        !          1601: 
        !          1602: ; Decide how far we can go before we run out of bank or rectangle to copy.
        !          1603: 
        !          1604:         mov     edx,ulLastDestScan
        !          1605:         cmp     edx,[ebx].dsurf_rcl2WindowClipD.yTop
        !          1606:         jg      short @F        ;copy rectangle top is in this bank
        !          1607:         mov     edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
        !          1608:                                                      ; of bank, at least
        !          1609: @@:
        !          1610:         neg     edx
        !          1611:         add     edx,ulCurrentDestScan   ;# of scans we can and want to do in
        !          1612:         inc     edx                     ; the dest bank
        !          1613: 
        !          1614:         mov     eax,ulCurrentSrcScan
        !          1615:         sub     eax,[ebx].dsurf_rcl2WindowClipS.yTop
        !          1616:         inc     eax                     ;# of scans we can do in the src bank
        !          1617: 
        !          1618:         cmp     edx,eax
        !          1619:         jb      short @F        ;source bank isn't limiting
        !          1620:         mov     edx,eax         ;source bank is limiting
        !          1621: @@:
        !          1622:         mov     ulBlockHeight,edx ;# of scans we'll do in this bank
        !          1623: 
        !          1624: ; We're ready to copy this block.
        !          1625: ; Select different threading, depending on whether the source and destination
        !          1626: ; are currently in the same bank; we can do edges faster if they are.
        !          1627: 
        !          1628:         mov     al,byte ptr [ebx].dsurf_ulWindowBank
        !          1629:         cmp     al,byte ptr [ebx].dsurf_ulWindowBank[4]
        !          1630:         jz      short bot_1R1W_copy_same_bank
        !          1631: 
        !          1632: ; Source and dest are currently in different banks, must go through temp buffer.
        !          1633: 
        !          1634:         THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans
        !          1635: 
        !          1636: ; Source and dest are currently in the same bank.
        !          1637: 
        !          1638:         align   4
        !          1639: bot_1R1W_copy_same_bank:
        !          1640:         THREAD_AND_START
        !          1641: 
        !          1642: ; Any more scans to copy?
        !          1643: 
        !          1644:         align   4
        !          1645: bot_1R1W_check_more_scans:
        !          1646: 
        !          1647:         mov     eax,ulCurrentDestScan
        !          1648:         mov     esi,ulBlockHeight
        !          1649:         sub     eax,esi                 ;we've copied to dest up to here
        !          1650:         cmp     ulLastDestScan,eax      ;are we past the dest rect top?
        !          1651:         jg      short bot_1R1W_done     ;yes, we're done
        !          1652:         mov     ulCurrentDestScan,eax
        !          1653: 
        !          1654: ; Now advance either or both banks, as needed.
        !          1655: 
        !          1656:         mov     ebx,pdsurf
        !          1657:         cmp     eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
        !          1658:                                                      ; current dest bank?
        !          1659:         jge     short bot_1R1W_dest_bank_mapped   ;no, proper bank still mapped
        !          1660: 
        !          1661: ; Map bank containing the current dest scan line into source window.
        !          1662: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1663: 
        !          1664:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1665:                 <ebx,eax,JustifyBottom,MapDestBank>
        !          1666: 
        !          1667: bot_1R1W_dest_bank_mapped:
        !          1668: 
        !          1669:         mov     eax,ulCurrentSrcScan
        !          1670:         sub     eax,esi         ;we've copied from source up to here
        !          1671:         mov     ulCurrentSrcScan,eax
        !          1672: 
        !          1673:         cmp     eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
        !          1674:                                                      ; current src bank?
        !          1675:         jge     short bot_1R1W_src_bank_mapped    ;no, proper bank still mapped
        !          1676: 
        !          1677: ; Map bank containing the current source scan line into source window.
        !          1678: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1679: 
        !          1680:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          1681:                 <ebx,eax,JustifyBottom,MapSourceBank>
        !          1682: 
        !          1683: bot_1R1W_src_bank_mapped:
        !          1684: 
        !          1685:         jmp     bot_1R1W_bank_loop
        !          1686: 
        !          1687: bot_1R1W_done:
        !          1688:         PLAIN_RET
        !          1689: 
        !          1690: 
        !          1691: ;-----------------------------------------------------------------------;
        !          1692: ; Banking for 1 R/W adapters, top to bottom.
        !          1693: ;-----------------------------------------------------------------------;
        !          1694:         align   4
        !          1695: top_to_bottom_1RW:
        !          1696: 
        !          1697: ; We're going top to bottom. Map in the dest, top-justified.
        !          1698: 
        !          1699:         mov     ebx,pdsurf
        !          1700:         mov     esi,ulCurrentDestScan
        !          1701:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest top less than
        !          1702:                                                      ; current bank?
        !          1703:         jl      short top_1RW_map_init_dest_bank     ;yes, map in proper bank
        !          1704:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
        !          1705:                                                         ; current bank?
        !          1706:         jl      short top_1RW_init_dest_bank_mapped
        !          1707:                                                 ;no, proper bank already mapped
        !          1708: top_1RW_map_init_dest_bank:
        !          1709: 
        !          1710: ; Map bank containing the top dest scan line into source window.
        !          1711: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1712: 
        !          1713:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
        !          1714: 
        !          1715: top_1RW_init_dest_bank_mapped:
        !          1716: 
        !          1717: ; Bank-by-bank top-to-bottom copy loop.
        !          1718: 
        !          1719: top_1RW_bank_loop:
        !          1720: 
        !          1721: ; Decide how far we can go before we run out of bank or rectangle to copy.
        !          1722: 
        !          1723:         mov     edi,ulLastDestScan
        !          1724:         cmp     edi,[ebx].dsurf_rcl1WindowClip.yBottom
        !          1725:         jl      short @F        ;copy rectangle bottom is in this bank
        !          1726:         mov     edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
        !          1727:                                                        ; of bank, at least
        !          1728: @@:
        !          1729:         sub     edi,esi   ;# of scans we can and want to do in the dest bank
        !          1730: 
        !          1731: ; Now make sure source is mapped in. This is the condition the copying routines
        !          1732: ; expect, and we need to figure out how far we can go in the source.
        !          1733: 
        !          1734:         sub     edx,edx                 ;assume source and dest are in the same
        !          1735:                                         ; bank
        !          1736:         mov     esi,ulCurrentSrcScan
        !          1737:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
        !          1738:                                                     ; current bank?
        !          1739:         jl      short top_1RW_map_src_Bank          ;yes, must map in
        !          1740:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
        !          1741:                                                        ; current bank?
        !          1742:         jl      short top_1RW_src_bank_mapped     ;no, proper bank still mapped
        !          1743: 
        !          1744: top_1RW_map_src_Bank:
        !          1745: 
        !          1746: ; Map bank containing the current source scan line into source window.
        !          1747: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1748: 
        !          1749:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
        !          1750: 
        !          1751:         mov     edx,1                   ;mark that source and dest are not in
        !          1752:                                         ; the same bank
        !          1753: top_1RW_src_bank_mapped:
        !          1754: 
        !          1755:         mov     eax,[ebx].dsurf_rcl1WindowClip.yBottom
        !          1756:         sub     eax,esi         ;# of scans we can do in the src bank
        !          1757: 
        !          1758:         cmp     edi,eax
        !          1759:         jb      short @F        ;source bank isn't limiting
        !          1760:         mov     edi,eax         ;source bank is limiting
        !          1761: @@:
        !          1762:         mov     ulBlockHeight,edi ;# of scans we'll do in this bank
        !          1763: 
        !          1764: ; We're ready to copy this block.
        !          1765: ; Select different threading, depending on whether the source and destination
        !          1766: ; are currently in the same bank; we can do edges faster if they are.
        !          1767: 
        !          1768:         and     edx,edx
        !          1769:         jz      short top_1RW_copy_same_bank
        !          1770: 
        !          1771: ; Source and dest are currently in different banks, must go through temp buffer.
        !          1772: 
        !          1773:         THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans
        !          1774: 
        !          1775: ; Source and dest are currently in the same bank.
        !          1776: 
        !          1777:         align   4
        !          1778: top_1RW_copy_same_bank:
        !          1779:         THREAD_AND_START
        !          1780: 
        !          1781: ; Any more scans to copy?
        !          1782: 
        !          1783: top_1RW_check_more_scans:
        !          1784: 
        !          1785:         mov     esi,ulCurrentDestScan
        !          1786:         mov     edi,ulBlockHeight
        !          1787:         add     esi,edi                 ;we've copied to dest up to here
        !          1788:         cmp     ulLastDestScan,esi      ;are we at the dest rect bottom?
        !          1789:         jz      short top_1RW_done      ;yes, we're done
        !          1790:         mov     ulCurrentDestScan,esi
        !          1791: 
        !          1792: ; Now make sure the dest bank is mapped in.
        !          1793: 
        !          1794:         mov     ebx,pdsurf
        !          1795:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
        !          1796:                                                     ; current bank?
        !          1797:         jl      short top_1RW_map_dest_bank         ;yes, map in dest bank
        !          1798:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
        !          1799:                                                         ; current bank?
        !          1800:         jl      short top_1RW_dest_bank_mapped   ;no, proper bank mapped
        !          1801: 
        !          1802: top_1RW_map_dest_bank:
        !          1803: 
        !          1804: ; Map bank containing the current dest scan line into source window.
        !          1805: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1806: 
        !          1807:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
        !          1808: 
        !          1809: top_1RW_dest_bank_mapped:
        !          1810: 
        !          1811:         add     ulCurrentSrcScan,edi    ;we've copied from source up to here
        !          1812: 
        !          1813:         jmp     top_1RW_bank_loop
        !          1814: 
        !          1815: top_1RW_done:
        !          1816:         PLAIN_RET
        !          1817: 
        !          1818: 
        !          1819: ;-----------------------------------------------------------------------;
        !          1820: ; Banking for 1 R/W adapters, bottom to top.
        !          1821: ;-----------------------------------------------------------------------;
        !          1822:         align   4
        !          1823: bottom_to_top_1RW:
        !          1824: 
        !          1825: ; We're going bottom to top. Map in the dest, bottom-justified.
        !          1826: 
        !          1827:         mov     ebx,pdsurf
        !          1828:         mov     esi,ulCurrentDestScan
        !          1829:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop  ;is dest bottom less than
        !          1830:                                                      ; current dest bank?
        !          1831:         jl      short bot_1RW_map_init_dest_bank     ;yes, map in proper bank
        !          1832:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
        !          1833:                                                        ; than current dst bank?
        !          1834:         jl      short bot_1RW_init_dest_bank_mapped
        !          1835:                                                 ;no, proper bank already mapped
        !          1836: bot_1RW_map_init_dest_bank:
        !          1837: 
        !          1838: ; Map bank containing the bottom dest scan line into source window.
        !          1839: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1840: 
        !          1841:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
        !          1842: 
        !          1843: bot_1RW_init_dest_bank_mapped:
        !          1844: 
        !          1845: ; Bank-by-bank bottom-to-top copy loop.
        !          1846: 
        !          1847: bot_1RW_bank_loop:
        !          1848: 
        !          1849: ; Decide how far we can go before we run out of bank or rectangle to copy.
        !          1850: 
        !          1851:         mov     edi,ulLastDestScan
        !          1852:         cmp     edi,[ebx].dsurf_rcl1WindowClip.yTop
        !          1853:         jg      short @F        ;copy rectangle top is in this bank
        !          1854:         mov     edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
        !          1855:                                                     ; of bank, at least
        !          1856: @@:
        !          1857:         neg     edi
        !          1858:         add     edi,esi                 ;# of scans we can and want to do in
        !          1859:         inc     edi                     ; the dest bank
        !          1860: 
        !          1861: ; Now make sure source is mapped in. This is the condition the copying routines
        !          1862: ; expect, and we need to figure out how far we can go in the source.
        !          1863: 
        !          1864:         sub     edx,edx                 ;assume source and dest are in the same
        !          1865:                                         ; bank
        !          1866:         mov     esi,ulCurrentSrcScan
        !          1867:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
        !          1868:                                                     ; current bank?
        !          1869:         jl      short bot_1RW_map_src_Bank          ;yes, must map in
        !          1870:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
        !          1871:                                                        ; current bank?
        !          1872:         jl      short bot_1RW_src_bank_mapped     ;no, proper bank still mapped
        !          1873: 
        !          1874: bot_1RW_map_src_Bank:
        !          1875: 
        !          1876: ; Map bank containing the current source scan line into source window.
        !          1877: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1878: 
        !          1879:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
        !          1880: 
        !          1881:         mov     edx,1                   ;mark that source and dest are not in
        !          1882:                                         ; the same bank
        !          1883: bot_1RW_src_bank_mapped:
        !          1884: 
        !          1885:         sub     esi,[ebx].dsurf_rcl1WindowClip.yTop
        !          1886:         inc     esi                     ;# of scans we can do in the src bank
        !          1887: 
        !          1888:         cmp     edi,esi
        !          1889:         jb      short @F        ;source bank isn't limiting
        !          1890:         mov     edi,esi         ;source bank is limiting
        !          1891: @@:
        !          1892:         mov     ulBlockHeight,edi ;# of scans we'll do in this bank
        !          1893: 
        !          1894: ; We're ready to copy this block.
        !          1895: ; Select different threading, depending on whether the source and destination
        !          1896: ; are currently in the same bank; we can copy much faster if they are.
        !          1897: 
        !          1898:         and     edx,edx
        !          1899:         jz      short bot_1RW_copy_same_bank
        !          1900: 
        !          1901: ; Source and dest are currently in different banks, must go through temp buffer.
        !          1902: 
        !          1903:         THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans
        !          1904: 
        !          1905: ; Source and dest are currently in the same bank.
        !          1906: 
        !          1907:         align   4
        !          1908: bot_1RW_copy_same_bank:
        !          1909:         THREAD_AND_START
        !          1910: 
        !          1911: ; Any more scans to copy?
        !          1912: 
        !          1913:         align   4
        !          1914: bot_1RW_check_more_scans:
        !          1915: 
        !          1916:         mov     esi,ulCurrentDestScan
        !          1917:         mov     edi,ulBlockHeight
        !          1918:         sub     esi,edi                 ;we've copied to dest up to here
        !          1919:         cmp     ulLastDestScan,esi      ;are we past the dest rect top?
        !          1920:         jg      short bot_1RW_done      ;yes, we're done
        !          1921:         mov     ulCurrentDestScan,esi
        !          1922: 
        !          1923: ; Now make sure the dest bank is mapped in.
        !          1924: 
        !          1925:         mov     ebx,pdsurf
        !          1926:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
        !          1927:                                                     ; current bank?
        !          1928:         jl      short bot_1RW_map_dest_bank         ;yes, map in dest bank
        !          1929:         cmp     esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
        !          1930:                                                         ; current bank?
        !          1931:         jl      short bot_1RW_dest_bank_mapped   ;no, proper bank mapped
        !          1932: 
        !          1933: bot_1RW_map_dest_bank:
        !          1934: 
        !          1935: ; Map bank containing the current dest scan line into source window.
        !          1936: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
        !          1937: 
        !          1938:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
        !          1939: 
        !          1940: bot_1RW_dest_bank_mapped:
        !          1941: 
        !          1942:         sub     ulCurrentSrcScan,edi    ;we've copied from source up to here
        !          1943: 
        !          1944:         jmp     bot_1RW_bank_loop
        !          1945: 
        !          1946: bot_1RW_done:
        !          1947:         PLAIN_RET
        !          1948: 
        !          1949: 
        !          1950: ;***********************************************************************;
        !          1951: ;
        !          1952: ; The following routines are the low-level copying routines. They know
        !          1953: ; almost nothing about banks (the routines that copy through a temp
        !          1954: ; buffer know how to switch banks after filling the temp buffer, but
        !          1955: ; that's it). Banking should be taken care of at a higher level.
        !          1956: ;
        !          1957: ;***********************************************************************;
        !          1958: 
        !          1959: ;-----------------------------------------------------------------------;
        !          1960: ; Copies a block of solid bytes directly from the source to the
        !          1961: ; destination, without using a temp buffer. We can't use the latches,
        !          1962: ; though, because this is a rotated copy. Can only be used by 2 R/W or
        !          1963: ; 1R/1W window banking, or by unbanked modes, or by 1 R/W adapters when
        !          1964: ; the source and dest are in the same bank. 1 R/W adapters must go
        !          1965: ; through an intermediate local buffer when the source and the destination
        !          1966: ; aren't in the same bank.
        !          1967: ;
        !          1968: ; Input:
        !          1969: ;       Direction Flag set for desired direction of copy
        !          1970: ;       culWholeBytesWidth = # of bytes to copy across each scan line
        !          1971: ;       ulWholeScanDelta = distance to start of next dest scan from end of
        !          1972: ;               current
        !          1973: ;       ulWholeScanSrcDelta = distance to start of next source scan from end of
        !          1974: ;               current
        !          1975: ;       ulBlockHeight = # of scans to copy
        !          1976: ;       ulWholeBytesSrc = start source offset in bitmap
        !          1977: ;       ulWholeBytesDest = start dest offset in bitmap
        !          1978: ;       ulCombineMaskWhole = masking to be applied before ORing the two source
        !          1979: ;               bytes together, to keep only the data needed in preparation
        !          1980: ;               for the VGA rotator doing its stuff
        !          1981: ;
        !          1982: ; Output:
        !          1983: ;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
        !          1984: ;               scan processed
        !          1985: ;-----------------------------------------------------------------------;
        !          1986: 
        !          1987:         align   4
        !          1988: copy_whole_bytes:
        !          1989: 
        !          1990: ; Calculate start source and dest addresses from bitmap start addresses and
        !          1991: ; offsets within bitmap.
        !          1992: 
        !          1993:         mov     ecx,pdsurf
        !          1994:         mov     eax,ulWholeBytesSrc
        !          1995:         add     eax,[ecx].dsurf_pvBitmapStart2WindowS
        !          1996:         mov     pSrcAddr,eax
        !          1997:         mov     eax,ulWholeBytesDest
        !          1998:         add     eax,[ecx].dsurf_pvBitmapStart2WindowD
        !          1999:         mov     pDestAddr,eax
        !          2000: 
        !          2001: ; Set the bit mask to enable all bits.
        !          2002: 
        !          2003:         mov     edx,VGA_BASE + GRAF_ADDR
        !          2004:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
        !          2005:         out     dx,ax
        !          2006: 
        !          2007: ; Leave GC Index pointing to the Read Map register.
        !          2008: 
        !          2009:         mov     al,GRAF_READ_MAP
        !          2010:         out     dx,al
        !          2011: 
        !          2012: ; Set up to copy the whole bytes from the buffer.
        !          2013: 
        !          2014:         mov     eax,ulBlockHeight
        !          2015:         mov     ulTempScanCount,eax
        !          2016: 
        !          2017:         mov     ebx,culWholeBytesWidth
        !          2018:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeRWEntry, \
        !          2019:                                 LOOP_UNROLL_SHIFT
        !          2020:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
        !          2021:         mov     pTempEntry,ecx   ;ditto for entry point
        !          2022: 
        !          2023: copy_whole_scan_loop:
        !          2024: 
        !          2025:         mov     cl,MM_C3        ;start by copying plane 3 (for Map Mask)
        !          2026: 
        !          2027: copy_whole_plane_loop:
        !          2028: 
        !          2029: ; Set Map Mask to enable writes to the plane we're copying.
        !          2030: 
        !          2031:         mov     edx,VGA_BASE + SEQ_DATA
        !          2032:         mov     al,cl
        !          2033:         out     dx,al
        !          2034: 
        !          2035: ; Set Read Map to enable reads from the plane we're copying.
        !          2036: 
        !          2037:         mov     dl,GRAF_DATA
        !          2038:         shr     al,1                    ;map plane into ReadMask
        !          2039:         cmp     al,100b                 ;set Carry if not C3 (plane 3)
        !          2040:         adc     al,-1                   ;sub 1 only if C3
        !          2041:         out     dx,al
        !          2042: 
        !          2043: ; Select the corresponding plane from the temp buffer.
        !          2044: 
        !          2045:         mov     esi,pSrcAddr       ;source offset in screen
        !          2046:         mov     edi,pDestAddr      ;point to destination start
        !          2047: 
        !          2048:         lodsb                   ;prime the rotation pipeline
        !          2049:         mov     ah,al           ;for combining with the next byte
        !          2050: 
        !          2051:         mov     ebx,culTempCount
        !          2052:         mov     edx,ulCombineMaskWhole
        !          2053:         jmp     pTempEntry
        !          2054: 
        !          2055: 
        !          2056: ;-----------------------------------------------------------------------;
        !          2057: ; Table of unrolled copy whole bytes from buffer loop entry points.
        !          2058: ;-----------------------------------------------------------------------;
        !          2059: 
        !          2060:         UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeRWEntry, \
        !          2061:                                 WHOLE_RW, LOOP_UNROLL_COUNT
        !          2062: 
        !          2063: ;-----------------------------------------------------------------------;
        !          2064: ; Unrolled loop for copying whole bytes from the buffer.
        !          2065: ;-----------------------------------------------------------------------;
        !          2066: 
        !          2067: COPY_WHOLE_RW macro ENTRY_LABEL,ENTRY_INDEX
        !          2068: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2069:         lodsb                   ;get byte to copy
        !          2070:         mov     ch,al           ;set aside for next time
        !          2071:         and     eax,edx         ;mask the bytes in preparation for combining
        !          2072:                                 ; and rotating them
        !          2073:         or      al,ah           ;combine them
        !          2074:         stosb                   ;write the composite byte
        !          2075:                                 ; VGA rotates during write
        !          2076:         mov     ah,ch           ;prepare byte for combining next time
        !          2077:         endm    ;-----------------------------------;
        !          2078: 
        !          2079: ;  AH = rotation pipeline-priming byte
        !          2080: ;  EDX = mask to preserve desired portions of AH and AL before combining
        !          2081: ;  ESI = source address to copy from
        !          2082: ;  EDI = target address to copy to
        !          2083: ;  Map Mask set to enable the desired plane for write
        !          2084: ;  Bit Mask set to enable all bits
        !          2085: 
        !          2086:         align   4
        !          2087: copy_whole_loop:
        !          2088:         UNROLL_LOOP COPY_WHOLE_RW,WHOLE_RW,LOOP_UNROLL_COUNT
        !          2089: 
        !          2090:         dec     ebx
        !          2091:         jnz     copy_whole_loop
        !          2092: 
        !          2093: ; Do next plane, if any.
        !          2094: 
        !          2095:         shr     cl,1                    ;advance to next plane
        !          2096:         jnz     copy_whole_plane_loop
        !          2097: 
        !          2098: ; Remember where we left off, for next scan.
        !          2099: 
        !          2100:         add     edi,ulWholeScanDelta    ;point to next dest scan
        !          2101:         mov     pDestAddr,edi
        !          2102:         add     esi,ulWholeScanSrcDelta ;point to next source scan
        !          2103:         mov     pSrcAddr,esi
        !          2104: 
        !          2105: ; Count down scan lines.
        !          2106: 
        !          2107:         dec     ulTempScanCount
        !          2108:         jnz     copy_whole_scan_loop
        !          2109: 
        !          2110: ; Remember where we left off, for next time.
        !          2111: 
        !          2112:         mov     ecx,pdsurf
        !          2113:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2114:         mov     ulWholeBytesSrc,esi
        !          2115:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
        !          2116:         mov     ulWholeBytesDest,edi
        !          2117: 
        !          2118:         PLAIN_RET
        !          2119: 
        !          2120: 
        !          2121: ;-----------------------------------------------------------------------;
        !          2122: ; Copies a block of solid bytes from the source to the destination via
        !          2123: ; the temp buffer. This should only be used by 1 R/W adapters, and then
        !          2124: ; only when the source and dest are in different banks.
        !          2125: ;
        !          2126: ; All relevant bytes are first copied from the source to a temp buffer that's
        !          2127: ; an image of the source. Then, we copy each of the four planes for one scan
        !          2128: ; line from the temp buffer to the screen before going on to the next scan
        !          2129: ; line. See ALIGNBLT.ASM for comments about why this is done.
        !          2130: ;
        !          2131: ; Input:
        !          2132: ;       Direction Flag set for desired direction of copy
        !          2133: ;       culWholeBytesWidth = # of bytes to copy across each scan line
        !          2134: ;       ulWholeScanDelta = distance to start of next scan from end of current
        !          2135: ;       ulNextScan = width of a scan line
        !          2136: ;       ulBlockHeight = # of scans to copy
        !          2137: ;       ulWholeBytesSrc = start source offset in bitmap
        !          2138: ;       ulWholeBytesDest = start dest offset in bitmap
        !          2139: ;       ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
        !          2140: ;       ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
        !          2141: ;       ulCombineMaskWhole = masking to be applied before ORing the two source
        !          2142: ;               bytes together, to keep only the data needed in preparation
        !          2143: ;               for the VGA rotator doing its stuff
        !          2144: ;       Expects the source bank to be mapped in; source bank is mapped in on
        !          2145: ;               exit
        !          2146: ;
        !          2147: ; Output:
        !          2148: ;       Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
        !          2149: ;               scan processed
        !          2150: ;-----------------------------------------------------------------------;
        !          2151: 
        !          2152:         align   4
        !          2153: copy_whole_bytes_via_buffer:
        !          2154: 
        !          2155: ; Calculate start source address from bitmap start address and offset within
        !          2156: ; bitmap.
        !          2157: 
        !          2158:         mov     ecx,pdsurf
        !          2159:         mov     eax,ulWholeBytesSrc
        !          2160:         add     eax,[ecx].dsurf_pvBitmapStart
        !          2161:         mov     pSrcAddr,eax
        !          2162:         sub     eax,[ecx].dsurf_pvStart
        !          2163:         mov     ulOffsetInBank,eax ;will come in handy because we treat the
        !          2164:                                    ; temp buffer as an image of the current
        !          2165:                                    ; bank
        !          2166: 
        !          2167: ; First, copy all the bytes into the temporary buffer.
        !          2168: 
        !          2169: ; Leave the GC Index pointing to the Read Map.
        !          2170: 
        !          2171:         mov     edx,VGA_BASE + GRAF_ADDR
        !          2172:         mov     al,GRAF_READ_MAP
        !          2173:         out     dx,al
        !          2174: 
        !          2175:         mov     eax,3           ;start by copying plane 3
        !          2176: copy_whole_to_buffer_plane_loop:
        !          2177:         mov     ebx,ulBlockHeight  ;# of scans to copy
        !          2178:         mov     esi,pSrcAddr       ;source offset in screen
        !          2179:         mov     edi,ppTempPlane0
        !          2180:         mov     edi,[edi+eax*4]    ;pointer to current plane in temp buffer
        !          2181:         add     edi,ulOffsetInBank ;dest for plane in temp buffer
        !          2182: 
        !          2183:         mov     edx,VGA_BASE + GRAF_DATA
        !          2184:         out     dx,al            ;set Read Map to plane we're copying from.
        !          2185: 
        !          2186:         push    eax             ;remember plane index
        !          2187:         mov     eax,ulWholeScanSrcDelta ;offset to next scan
        !          2188:         mov     edx,culWholeBytesWidth ;# of bytes per scan
        !          2189:         inc     edx             ;always one more source byte than dest byte
        !          2190: copy_whole_to_buffer_scan_loop:
        !          2191:         mov     ecx,edx         ;# of bytes per scan
        !          2192:         rep     movsb           ;copy the scan line to the temp buffer
        !          2193:         add     esi,eax         ;point to next source scan
        !          2194:         add     edi,eax         ;point to next dest scan
        !          2195: 
        !          2196:         dec     ebx              ;count down scan lines
        !          2197:         jnz     copy_whole_to_buffer_scan_loop
        !          2198: 
        !          2199:         pop     eax             ;get back plane index
        !          2200:         dec     eax             ;count down planes
        !          2201:         jns     copy_whole_to_buffer_plane_loop
        !          2202: 
        !          2203: ; Remember where we left off, for next time.
        !          2204: 
        !          2205:         mov     ebx,pdsurf
        !          2206:         sub     esi,[ebx].dsurf_pvBitmapStart
        !          2207:         mov     ulWholeBytesSrc,esi
        !          2208: 
        !          2209: 
        !          2210: ; Now copy the temp buffer to the screen.
        !          2211: 
        !          2212: ; Map in the destination bank, so we can read/write to it and let the Bit Mask
        !          2213: ; work.
        !          2214: 
        !          2215:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>, \
        !          2216:                 <ebx,ulCurrentDestScan,ulCurrentJustification>
        !          2217: 
        !          2218: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
        !          2219: ; until now to calculate this, because the dest bank wasn't mapped earlier).
        !          2220: 
        !          2221:         mov     eax,ulWholeBytesDest
        !          2222:         add     eax,[ebx].dsurf_pvBitmapStart
        !          2223:         mov     pDestAddr,eax
        !          2224: 
        !          2225: ; Set the bit mask to enable all bits.
        !          2226: 
        !          2227:         mov     edx,VGA_BASE + GRAF_ADDR
        !          2228:         mov     eax,(0ffh shl 8) + GRAF_BIT_MASK
        !          2229:         out     dx,ax
        !          2230: 
        !          2231: ; Set up to copy the whole bytes from the buffer.
        !          2232: 
        !          2233:         mov     eax,ulBlockHeight
        !          2234:         mov     ulTempScanCount,eax
        !          2235: 
        !          2236:         mov     ebx,culWholeBytesWidth
        !          2237:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeFromBufferEntry, \
        !          2238:                                 LOOP_UNROLL_SHIFT
        !          2239:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
        !          2240:         mov     pTempEntry,ecx   ;ditto for entry point
        !          2241: 
        !          2242: copy_whole_from_buffer_scan_loop:
        !          2243: 
        !          2244:         mov     ebx,ppTempPlane3  ;point to plane 3's temp buffer offset
        !          2245:         mov     cl,MM_C3        ;start by copying plane 3
        !          2246: 
        !          2247: copy_whole_from_buffer_plane_loop:
        !          2248: 
        !          2249: ; Set Map Mask to enable writes to the plane we're copying.
        !          2250: 
        !          2251:         mov     edx,VGA_BASE + SEQ_DATA
        !          2252:         mov     al,cl
        !          2253:         out     dx,al
        !          2254: 
        !          2255: ; Select the corresponding plane from the temp buffer.
        !          2256: 
        !          2257:         mov     esi,[ebx]       ;point to plane start in temp buffer
        !          2258:         sub     ebx,4           ;point to next temp buffer plane ptr
        !          2259:         push    ebx             ;preserve pointer to plane pointer
        !          2260: 
        !          2261:         add     esi,ulOffsetInBank ;point to current scan start in temp buffer
        !          2262:         mov     edi,pDestAddr      ;point to destination start
        !          2263: 
        !          2264:         lodsb                   ;prime the rotation pipeline
        !          2265:         mov     ah,al           ;for combining with the next byte
        !          2266: 
        !          2267:         mov     ebx,culTempCount
        !          2268:         mov     edx,ulCombineMaskWhole
        !          2269:         jmp     pTempEntry
        !          2270: 
        !          2271: 
        !          2272: ;-----------------------------------------------------------------------;
        !          2273: ; Table of unrolled copy whole bytes from buffer loop entry points.
        !          2274: ;-----------------------------------------------------------------------;
        !          2275: 
        !          2276:         UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeFromBufferEntry, \
        !          2277:                                 WHOLE_FROM_BUFFER, LOOP_UNROLL_COUNT
        !          2278: 
        !          2279: ;-----------------------------------------------------------------------;
        !          2280: ; Unrolled loop for copying whole bytes from the buffer.
        !          2281: ;-----------------------------------------------------------------------;
        !          2282: 
        !          2283: COPY_WHOLE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX
        !          2284: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2285:         lodsb                   ;get byte to copy
        !          2286:         mov     ch,al           ;set aside for next time
        !          2287:         and     eax,edx         ;mask the bytes in preparation for combining
        !          2288:                                 ; and rotating them
        !          2289:         or      al,ah           ;combine them
        !          2290:         stosb                   ;write the composite byte
        !          2291:                                 ; VGA rotates during write
        !          2292:         mov     ah,ch           ;prepare byte for combining next time
        !          2293:         endm    ;-----------------------------------;
        !          2294: 
        !          2295: ;  AH = rotation pipeline-priming byte
        !          2296: ;  EDX = mask to preserve desired portions of AH and AL before combining
        !          2297: ;  ESI = source address to copy from
        !          2298: ;  EDI = target address to copy to
        !          2299: ;  Map Mask set to enable the desired plane for write
        !          2300: ;  Bit Mask set to enable all bits
        !          2301: 
        !          2302:         align   4
        !          2303: copy_whole_from_buffer_loop:
        !          2304:         UNROLL_LOOP COPY_WHOLE_FROM_BUFFER,WHOLE_FROM_BUFFER,LOOP_UNROLL_COUNT
        !          2305: 
        !          2306:         dec     ebx
        !          2307:         jnz     copy_whole_from_buffer_loop
        !          2308: 
        !          2309: ; Do next plane, if any.
        !          2310: 
        !          2311:         pop     ebx             ;retrieve pointer to plane pointer
        !          2312:         shr     cl,1            ;advance to next plane
        !          2313:         jnz     copy_whole_from_buffer_plane_loop
        !          2314: 
        !          2315: ; Remember where we left off, for next scan.
        !          2316: 
        !          2317:         add     edi,ulWholeScanDelta    ;point to next dest scan
        !          2318:         mov     pDestAddr,edi
        !          2319:         mov     eax,ulNextScan
        !          2320:         add     ulOffsetInBank,eax      ;next scan's start in temp buffer,
        !          2321:                                         ; relative to start of plane's storage
        !          2322: 
        !          2323: ; Count down scan lines.
        !          2324: 
        !          2325:         dec     ulTempScanCount
        !          2326:         jnz     copy_whole_from_buffer_scan_loop
        !          2327: 
        !          2328: ; Remember where we left off, for next time.
        !          2329: 
        !          2330:         mov     ebx,pdsurf
        !          2331:         sub     edi,[ebx].dsurf_pvBitmapStart
        !          2332:         mov     ulWholeBytesDest,edi
        !          2333: 
        !          2334: ; Put back the original source bank.
        !          2335: 
        !          2336:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl>, \
        !          2337:                 <ebx,ulCurrentSrcScan,ulCurrentJustification>
        !          2338: 
        !          2339:         PLAIN_RET
        !          2340: 
        !          2341: 
        !          2342: ;-----------------------------------------------------------------------;
        !          2343: ; Copies a strip of left edge bytes from the source to the destination,
        !          2344: ; assuming both the source and the destination are both readable and
        !          2345: ; writable. Can only be used by 2 R/W window banking, or by unbanked
        !          2346: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
        !          2347: ; buffer when the source and dest are in different banks. Processes up to
        !          2348: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
        !          2349: ; flicker.
        !          2350: ;
        !          2351: ; Input:
        !          2352: ;       ulNextScan = width of scan, in bytes
        !          2353: ;       ulBlockHeight = # of scans to copy
        !          2354: ;       ulLeftEdgeSrc = start source offset in bitmap
        !          2355: ;       ulLeftEdgeDest = start dest offset in bitmap
        !          2356: ;       ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
        !          2357: ;       jLeftMask = left edge clip mask
        !          2358: ;
        !          2359: ; Output:
        !          2360: ;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
        !          2361: ;               scan processed
        !          2362: ;-----------------------------------------------------------------------;
        !          2363: 
        !          2364:         align   4
        !          2365: copy_left_edge:
        !          2366: 
        !          2367: ; Calculate start source and dest addresses from bitmap start addresses and
        !          2368: ; offsets within bitmap.
        !          2369: 
        !          2370:         mov     ecx,pdsurf
        !          2371:         mov     esi,ulLeftEdgeSrc
        !          2372:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2373:         mov     edi,ulLeftEdgeDest
        !          2374:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
        !          2375: 
        !          2376: ; Copy the edge.
        !          2377: 
        !          2378:         mov     ah,byte ptr jLeftMask   ;clip mask for this edge
        !          2379:         mov     ebx,ulLeftSrcWidthMinus1
        !          2380:         call    copy_edge_table[ebx*4]
        !          2381: 
        !          2382: ; Remember where we left off, for next time.
        !          2383: 
        !          2384:         mov     ecx,pdsurf
        !          2385:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2386:         mov     ulLeftEdgeSrc,esi
        !          2387:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
        !          2388:         mov     ulLeftEdgeDest,edi
        !          2389: 
        !          2390:         PLAIN_RET
        !          2391: 
        !          2392: 
        !          2393: ;-----------------------------------------------------------------------;
        !          2394: ; Copies a strip of right edge bytes from the source to the destination,
        !          2395: ; assuming both the source and the destination are both readable and
        !          2396: ; writable. Can only be used by 2 R/W window banking, or by unbanked
        !          2397: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
        !          2398: ; buffer when the source and dest are in different banks. Processes up to
        !          2399: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
        !          2400: ; flicker.
        !          2401: ;
        !          2402: ; Input:
        !          2403: ;       ulNextScan = width of scan, in bytes
        !          2404: ;       ulBlockHeight = # of scans to copy
        !          2405: ;       ulRightEdgeSrc = start source offset in bitmap
        !          2406: ;       ulRightEdgeDest = start dest offset in bitmap
        !          2407: ;       ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
        !          2408: ;       jRightMask = right edge clip mask
        !          2409: ;
        !          2410: ; Output:
        !          2411: ;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
        !          2412: ;               scan processed
        !          2413: ;-----------------------------------------------------------------------;
        !          2414: 
        !          2415:         align   4
        !          2416: copy_right_edge:
        !          2417: 
        !          2418: ; Calculate start source and dest addresses from bitmap start addresses and
        !          2419: ; offsets within bitmap.
        !          2420: 
        !          2421:         mov     ecx,pdsurf
        !          2422:         mov     esi,ulRightEdgeSrc
        !          2423:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2424:         mov     edi,ulRightEdgeDest
        !          2425:         add     edi,[ecx].dsurf_pvBitmapStart2WindowD
        !          2426: 
        !          2427: ; Copy the edge.
        !          2428: 
        !          2429:         mov     ah,byte ptr jRightMask  ;clip mask for this edge
        !          2430:         mov     ebx,ulRightSrcWidthMinus1
        !          2431:         call    copy_edge_table[ebx*4]
        !          2432: 
        !          2433: ; Remember where we left off, for next time
        !          2434: 
        !          2435:         mov     ecx,pdsurf
        !          2436:         sub     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2437:         mov     ulRightEdgeSrc,esi
        !          2438:         sub     edi,[ecx].dsurf_pvBitmapStart2WindowD
        !          2439:         mov     ulRightEdgeDest,edi
        !          2440: 
        !          2441:         PLAIN_RET
        !          2442: 
        !          2443: 
        !          2444: ;-----------------------------------------------------------------------;
        !          2445: ; Copies an edge from a 1-wide source to the destination on the screen.
        !          2446: ; Entry:
        !          2447: ;       AH = bit mask setting for edge
        !          2448: ;       ESI = source address
        !          2449: ;       EDI = destination address
        !          2450: ;       ulBlockHeight = # of bytes to copy per plane
        !          2451: ;       ulNextScan = scan width
        !          2452: ;       Source readable, and destination readable and writable
        !          2453: ; Exit:
        !          2454: ;       ESI = next source address
        !          2455: ;       EDI = next destination address
        !          2456: ;
        !          2457: ; Preserved: EBP
        !          2458: ;-----------------------------------------------------------------------;
        !          2459: 
        !          2460:         align   4
        !          2461: copy_edge_1ws:
        !          2462:         mov     pSrcAddr,esi
        !          2463:         mov     pDestAddr,edi
        !          2464: 
        !          2465: ; Set the clip mask for this edge.
        !          2466: 
        !          2467:         mov     edx,VGA_BASE + GRAF_ADDR
        !          2468:         mov     al,GRAF_BIT_MASK
        !          2469:         out     dx,ax
        !          2470: 
        !          2471: ; Leave the GC Index pointing to the Read Map.
        !          2472: 
        !          2473:         mov     al,GRAF_READ_MAP
        !          2474:         out     dx,al
        !          2475: 
        !          2476:         mov     ecx,offset copy_edge_rw_1ws_full_chunk
        !          2477:                                 ;entry point into unrolled loop to copy first
        !          2478:                                 ; chunk, assuming it's a full chunk
        !          2479:         mov     ebx,ulBlockHeight
        !          2480: 
        !          2481: ; Copy the edge in a series of chunks.
        !          2482: 
        !          2483: copy_edge_chunk_loop_1ws:
        !          2484: 
        !          2485:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
        !          2486:                                     ; a full chunk
        !          2487:         jge     short @F            ;do a full chunk
        !          2488:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
        !          2489:                                     ; scans
        !          2490:         mov     ecx,pfnCopyEdgeRWEntry_1ws[-4][ebx*4]
        !          2491:                                 ;entry point into unrolled loop to copy desired
        !          2492:                                 ; chunk size
        !          2493:         sub     ebx,ebx         ;no scans after this
        !          2494: @@:
        !          2495:         push    ebx             ;remember remaining scan count
        !          2496: 
        !          2497:         mov     ah,MM_C3        ;start by copying plane 3
        !          2498:         mov     ebx,ulNextScan
        !          2499: 
        !          2500: copy_edge_plane_loop_1ws:
        !          2501: 
        !          2502: ; Set Map Mask to enable writes to plane we're copying.
        !          2503: 
        !          2504:         mov     al,ah
        !          2505:         mov     dl,SEQ_DATA
        !          2506:         out     dx,al
        !          2507: 
        !          2508: ; Set Read Map to same plane.
        !          2509: 
        !          2510:         shr     al,1                    ;map plane into ReadMask
        !          2511:         cmp     al,100b                 ;set Carry if not C3 (plane 3)
        !          2512:         adc     al,-1                   ;sub 1 only if C3
        !          2513:         mov     dl,GRAF_DATA
        !          2514:         out     dx,al
        !          2515: 
        !          2516:         mov     esi,pSrcAddr
        !          2517:         mov     edi,pDestAddr
        !          2518: 
        !          2519:         jmp     ecx                     ;copy the left edge
        !          2520: 
        !          2521: 
        !          2522: ;-----------------------------------------------------------------------;
        !          2523: ; Table of unrolled edge loop entry points. First entry point is to copy
        !          2524: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
        !          2525: ;-----------------------------------------------------------------------;
        !          2526: 
        !          2527: pfnCopyEdgeRWEntry_1ws label dword
        !          2528: INDEX = 1
        !          2529:         rept    EDGE_CHUNK_SIZE
        !          2530:         DEFINE_DD       EDGE_RW_1WS,%INDEX
        !          2531: INDEX = INDEX+1
        !          2532:         endm
        !          2533: 
        !          2534: 
        !          2535: ;-----------------------------------------------------------------------;
        !          2536: ; Unrolled loop for copying a strip of edge bytes, with 1-wide source and
        !          2537: ; destination both readable and writable.
        !          2538: ;-----------------------------------------------------------------------;
        !          2539: 
        !          2540: COPY_EDGE_RW_1WS macro ENTRY_LABEL,ENTRY_INDEX
        !          2541: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2542:         mov     al,[esi]        ;get byte to copy
        !          2543:         add     esi,ebx         ;point to next source scan
        !          2544:         xchg    [edi],al        ;read before write so Bit Mask can operate
        !          2545:                                 ; VGA rotates during write
        !          2546:         add     edi,ebx         ;point to next dest scan
        !          2547:         endm    ;-----------------------------------;
        !          2548: 
        !          2549: ;  EBX = scan line width
        !          2550: ;  ESI = source address to copy from
        !          2551: ;  EDI = target address to copy to
        !          2552: ;  Bit Mask set to desired clipping
        !          2553: ;  Read Map and Map Mask set to enable the desired plane for read and write
        !          2554: 
        !          2555:         align   4
        !          2556: copy_edge_rw_1ws_full_chunk:
        !          2557:         UNROLL_LOOP COPY_EDGE_RW_1WS,EDGE_RW_1WS,EDGE_CHUNK_SIZE
        !          2558: 
        !          2559: ; Do next plane within this chunk, if any.
        !          2560: 
        !          2561:         shr     ah,1                    ;advance to next plane
        !          2562:         jnz     copy_edge_plane_loop_1ws
        !          2563: 
        !          2564: ; Remember where we left off, for the next chunk.
        !          2565: 
        !          2566:         mov     pSrcAddr,esi
        !          2567:         mov     pDestAddr,edi
        !          2568: 
        !          2569: ; Do next chunk within this bank block, if any.
        !          2570: 
        !          2571:         pop     ebx                     ;retrieve remaining scan count
        !          2572:         and     ebx,ebx                 ;any scans left?
        !          2573:         jnz     copy_edge_chunk_loop_1ws ;more scans to do
        !          2574: 
        !          2575:         PLAIN_RET
        !          2576: 
        !          2577: 
        !          2578: ;-----------------------------------------------------------------------;
        !          2579: ; Copies a strip of left edge bytes from the source to the destination
        !          2580: ; through an intermediate RAM buffer. This is the approach required by
        !          2581: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
        !          2582: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
        !          2583: ; cause flicker.
        !          2584: ;
        !          2585: ; Input:
        !          2586: ;       ulNextScan = width of scan, in bytes
        !          2587: ;       ulBlockHeight = # of scans to copy
        !          2588: ;       ulLeftEdgeSrc = start source offset in bitmap
        !          2589: ;       ulLeftEdgeDest = start dest offset in bitmap
        !          2590: ;       jLeftMask = left edge clip mask
        !          2591: ;       pTempPlane = pointer to temp storage buffer
        !          2592: ;       ulCurrentSrcScan = scan used to map in source bank
        !          2593: ;       ulCurrentDestScan = scan used to map in dest bank
        !          2594: ;       ulCurrentJustification = justification used to map in current bank
        !          2595: ;       ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
        !          2596: ;       For 1 R/W adapters, expects the source bank to be mapped in; banking
        !          2597: ;               is the same at exit as it was at entry
        !          2598: ;
        !          2599: ; Output:
        !          2600: ;       Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
        !          2601: ;               scan processed
        !          2602: ;
        !          2603: ; Note that this should never be called for an unbanked or 2 R/W adapter,
        !          2604: ; because the source and dest are always both addressable simultaneously then.
        !          2605: ;-----------------------------------------------------------------------;
        !          2606: 
        !          2607:         align   4
        !          2608: copy_left_edge_via_buffer:
        !          2609: 
        !          2610: ; First, copy all the bytes into the temporary buffer.
        !          2611: 
        !          2612: ; Calculate start source and dest addresses from bitmap start addresses and
        !          2613: ; offsets within bitmap.
        !          2614: 
        !          2615:         mov     ecx,pdsurf
        !          2616:         mov     esi,ulLeftEdgeSrc
        !          2617:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2618: 
        !          2619: ; Copy the edge from the source to the temp buffer.
        !          2620: 
        !          2621:         mov     eax,ulLeftSrcWidthMinus1
        !          2622:         call    copy_edge_from_screen_to_buffer[eax*4]
        !          2623: 
        !          2624: ; Remember where we left off, for next time
        !          2625: 
        !          2626:         mov     ebx,pdsurf
        !          2627:         sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
        !          2628:         mov     ulLeftEdgeSrc,esi
        !          2629: 
        !          2630: ; Now copy the temp buffer to the screen.
        !          2631: 
        !          2632: ; Map in the source bank to match the destination, so we can read/write to it
        !          2633: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
        !          2634: ; mapped by this call, which is fine.
        !          2635: 
        !          2636:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          2637:                 <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
        !          2638: 
        !          2639: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
        !          2640: ; until now to calculate this, because the dest bank wasn't mapped earlier).
        !          2641: 
        !          2642:         mov     edi,ulLeftEdgeDest
        !          2643:         add     edi,[ebx].dsurf_pvBitmapStart2WindowD
        !          2644: 
        !          2645: ; Do the copy.
        !          2646: 
        !          2647:         mov     ah,byte ptr jLeftMask           ;clip mask for this edge
        !          2648:         mov     ebx,ulLeftSrcWidthMinus1
        !          2649:         call    copy_edge_from_buffer_to_screen[ebx*4]
        !          2650: 
        !          2651: ; Remember where we left off, for next time.
        !          2652: 
        !          2653:         mov     ebx,pdsurf
        !          2654:         sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
        !          2655:         mov     ulLeftEdgeDest,edi
        !          2656: 
        !          2657: ; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
        !          2658: ; will be mapped by this call, which is fine.
        !          2659: 
        !          2660:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          2661:                 <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
        !          2662: 
        !          2663:         PLAIN_RET
        !          2664: 
        !          2665: 
        !          2666: ;-----------------------------------------------------------------------;
        !          2667: ; Copies a strip of right edge bytes from the source to the destination
        !          2668: ; through an intermediate RAM buffer. This is the approach required by
        !          2669: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
        !          2670: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
        !          2671: ; cause flicker.
        !          2672: ;
        !          2673: ; Input:
        !          2674: ;       ulNextScan = width of scan, in bytes
        !          2675: ;       ulBlockHeight = # of scans to copy
        !          2676: ;       ulRightEdgeSrc = start source offset in bitmap
        !          2677: ;       ulRightEdgeDest = start dest offset in bitmap
        !          2678: ;       jRightMask = right edge clip mask
        !          2679: ;       pTempPlane = pointer to temp storage buffer
        !          2680: ;       ulCurrentSrcScan = scan used to map in source bank
        !          2681: ;       ulCurrentDestScan = scan used to map in dest bank
        !          2682: ;       ulCurrentJustification = justification used to map in current bank
        !          2683: ;       ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
        !          2684: ;       For 1 R/W adapters, expects the source bank to be mapped in; banking
        !          2685: ;               is the same at exit as it was at entry
        !          2686: ;
        !          2687: ; Output:
        !          2688: ;       Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
        !          2689: ;               scan processed
        !          2690: ;
        !          2691: ; Note that this should never be called for an unbanked or 2 R/W adapter,
        !          2692: ; because the source and dest are always both addressable simultaneously then.
        !          2693: ;-----------------------------------------------------------------------;
        !          2694: 
        !          2695:         align   4
        !          2696: copy_right_edge_via_buffer:
        !          2697: 
        !          2698: ; First, copy all the bytes into the temporary buffer.
        !          2699: 
        !          2700: ; Calculate start source address from bitmap start addresses and
        !          2701: ; offsets within bitmap.
        !          2702: 
        !          2703:         mov     ecx,pdsurf
        !          2704:         mov     esi,ulRightEdgeSrc
        !          2705:         add     esi,[ecx].dsurf_pvBitmapStart2WindowS
        !          2706: 
        !          2707: ; Copy the edge from the source to the temp buffer.
        !          2708: 
        !          2709:         mov     eax,ulRightSrcWidthMinus1
        !          2710:         call    copy_edge_from_screen_to_buffer[eax*4]
        !          2711: 
        !          2712: ; Remember where we left off, for next time
        !          2713: 
        !          2714:         mov     ebx,pdsurf
        !          2715:         sub     esi,[ebx].dsurf_pvBitmapStart2WindowS
        !          2716:         mov     ulRightEdgeSrc,esi
        !          2717: 
        !          2718: ; Now copy the temp buffer to the screen.
        !          2719: 
        !          2720: ; Map in the source bank to match the destination, so we can read/write to it
        !          2721: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
        !          2722: ; mapped by this call, which is correct.
        !          2723: 
        !          2724:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          2725:                 <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
        !          2726: 
        !          2727: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
        !          2728: ; until now to calculate this, because the dest bank wasn't mapped earlier).
        !          2729: 
        !          2730:         mov     edi,ulRightEdgeDest
        !          2731:         add     edi,[ebx].dsurf_pvBitmapStart2WindowD
        !          2732: 
        !          2733: ; Do the copy.
        !          2734: 
        !          2735:         mov     ah,byte ptr jRightMask          ;clip mask for this edge
        !          2736:         mov     ebx,ulRightSrcWidthMinus1
        !          2737:         call    copy_edge_from_buffer_to_screen[ebx*4]
        !          2738: 
        !          2739: ; Remember where we left off, for next time.
        !          2740: 
        !          2741:         mov     ebx,pdsurf
        !          2742:         sub     edi,[ebx].dsurf_pvBitmapStart2WindowD
        !          2743:         mov     ulRightEdgeDest,edi
        !          2744: 
        !          2745: ; Put back the original source bank.  Note that on a 1 R/W adapter, both banks
        !          2746: ; will be mapped by this call, which is fine.
        !          2747: 
        !          2748:         ptrCall   <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
        !          2749:                 <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
        !          2750: 
        !          2751:         PLAIN_RET
        !          2752: 
        !          2753: 
        !          2754: ;-----------------------------------------------------------------------;
        !          2755: ; Copies an edge from the temp buffer (1 wide) to the screen.
        !          2756: ; Entry:
        !          2757: ;       AH = bit mask setting for edge
        !          2758: ;       DH = VGA_BASE SHR 8
        !          2759: ;       EDI = destination address
        !          2760: ;       pTempPlane = temp buffer from which to copy
        !          2761: ;       ulBlockHeight = # of bytes to copy per plane
        !          2762: ;       ulNextScan = scan width
        !          2763: ;       Source and dest banks both pointing to destination
        !          2764: ; Exit:
        !          2765: ;       EDI = next destination address
        !          2766: ;
        !          2767: ; Preserved: EBP
        !          2768: ;-----------------------------------------------------------------------;
        !          2769: 
        !          2770:         align   4
        !          2771: copy_buffered_edge_to_screen_1ws:
        !          2772: 
        !          2773:         mov     pDestAddr,edi
        !          2774: 
        !          2775:         mov     dl,GRAF_ADDR
        !          2776:         mov     al,GRAF_BIT_MASK
        !          2777:         out     dx,ax
        !          2778: 
        !          2779:         mov     pTempEntry,offset copy_edge_from_buf_full_chunk_1ws
        !          2780:                                 ;entry point into unrolled loop to copy first
        !          2781:                                 ; chunk, assuming it's a full chunk
        !          2782:         mov     ecx,pTempPlane  ;temp buffer start (copy from here)
        !          2783:         mov     ebx,ulBlockHeight ;total # of scans to copy
        !          2784: 
        !          2785: ; Copy the edge in a series of chunks, to avoid flicker.
        !          2786: 
        !          2787: copy_from_buffer_chunk_loop_1ws:
        !          2788: 
        !          2789:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
        !          2790:                                     ; a full chunk
        !          2791:         jge     short @F            ;do a full chunk
        !          2792:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
        !          2793:                                     ; scans
        !          2794:         mov     ebx,pfnCopyEdgesFromBufferEntry_1ws[-4][ebx*4]
        !          2795:         mov     pTempEntry,ebx  ;entry point into unrolled loop to copy desired
        !          2796:                                 ; chunk size
        !          2797:         sub     ebx,ebx         ;no scans after this
        !          2798: @@:
        !          2799:         push    ebx             ;remember remaining scan count
        !          2800: 
        !          2801:         mov     al,MM_C3        ;start by copying plane 3
        !          2802:         mov     ebx,ulNextScan
        !          2803: 
        !          2804:         push    ecx             ;remember current temp buffer start
        !          2805: 
        !          2806:         mov     dl,SEQ_DATA     ;leave DX pointing to the Sequencer Data reg
        !          2807: 
        !          2808: copy_from_buffer_plane_loop_1ws:
        !          2809: 
        !          2810: ; Set Map Mask to enable writes to plane we're copying.
        !          2811: 
        !          2812:         out     dx,al
        !          2813: 
        !          2814:         mov     esi,ecx                 ;point to current plane's source byte
        !          2815:         add     ecx,ulBlockHeight       ;point to next plane's source byte
        !          2816: 
        !          2817:         mov     edi,pDestAddr
        !          2818: 
        !          2819:         jmp     pTempEntry              ;copy the left edge
        !          2820: 
        !          2821: 
        !          2822: ;-----------------------------------------------------------------------;
        !          2823: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
        !          2824: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
        !          2825: ; bytes.
        !          2826: ;-----------------------------------------------------------------------;
        !          2827: 
        !          2828: pfnCopyEdgesFromBufferEntry_1ws label dword
        !          2829: INDEX = 1
        !          2830:         rept    EDGE_CHUNK_SIZE
        !          2831:         DEFINE_DD       EDGE_FROM_BUFFER_1WS,%INDEX
        !          2832: INDEX = INDEX+1
        !          2833:         endm
        !          2834: 
        !          2835: 
        !          2836: ;-----------------------------------------------------------------------;
        !          2837: ; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
        !          2838: ; buffer.
        !          2839: ;-----------------------------------------------------------------------;
        !          2840: 
        !          2841: COPY_EDGE_FROM_BUFFER_1WS macro ENTRY_LABEL,ENTRY_INDEX
        !          2842: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2843:         mov     ah,[esi]        ;get byte to copy
        !          2844:         inc     esi             ;point to next source (temp buffer) byte
        !          2845:         xchg    [edi],ah        ;read before write so Bit Mask can operate
        !          2846:                                 ; VGA rotates during write
        !          2847:         add     edi,ebx         ;point to next dest (screen) scan
        !          2848:         endm    ;-----------------------------------;
        !          2849: 
        !          2850: ;  EBX = scan line width
        !          2851: ;  ESI = source address to copy from (temp buffer)
        !          2852: ;  EDI = target address to copy to (screen)
        !          2853: ;  Bit Mask set to desired clipping
        !          2854: ;  Map Mask set to enable the desired plane for write
        !          2855: 
        !          2856:         align   4
        !          2857: copy_edge_from_buf_full_chunk_1ws:
        !          2858:         UNROLL_LOOP     COPY_EDGE_FROM_BUFFER_1WS, \
        !          2859:                         EDGE_FROM_BUFFER_1WS,EDGE_CHUNK_SIZE
        !          2860: 
        !          2861: ; Do next plane within this chunk, if any.
        !          2862: 
        !          2863:         shr     al,1                    ;advance to next plane
        !          2864:         jnz     copy_from_buffer_plane_loop_1ws
        !          2865: 
        !          2866: ; Remember where we left off, for next chunk.
        !          2867: 
        !          2868:         mov     pDestAddr,edi
        !          2869:         pop     ecx                     ;get back current temp buffer start
        !          2870:         add     ecx,EDGE_CHUNK_SIZE     ;point to next chunk's start
        !          2871: 
        !          2872: ; Do next chunk within this bank block, if any.
        !          2873: 
        !          2874:         pop     ebx                     ;retrieve remaining scan count
        !          2875:         and     ebx,ebx                 ;any scans left?
        !          2876:         jnz     copy_from_buffer_chunk_loop_1ws ;more scans to do
        !          2877: 
        !          2878:         PLAIN_RET
        !          2879: 
        !          2880: 
        !          2881: ;-----------------------------------------------------------------------;
        !          2882: ; Copies an edge from the screen (1 wide) to the temp buffer.
        !          2883: ; Entry:
        !          2884: ;       ESI = source address
        !          2885: ;       pTempPlane = temp buffer from which to copy
        !          2886: ;       ulBlockHeight = # of bytes to copy per plane
        !          2887: ;       ulNextScan = scan width
        !          2888: ;       Source bank pointing to source
        !          2889: ; Exit:
        !          2890: ;       DH = VGA_BASE SHR 8
        !          2891: ;       ESI = next source address
        !          2892: ;
        !          2893: ; Preserved: EBP
        !          2894: ;-----------------------------------------------------------------------;
        !          2895: 
        !          2896:         align   4
        !          2897: copy_screen_to_buffered_edge_1ws:
        !          2898: 
        !          2899:         mov     pSrcAddr,esi
        !          2900: 
        !          2901: ; Leave the GC Index pointing to the Read Map.
        !          2902: 
        !          2903:         mov     edx,VGA_BASE + GRAF_ADDR
        !          2904:         mov     al,GRAF_READ_MAP
        !          2905:         out     dx,al
        !          2906: 
        !          2907:         mov     ebx,ulBlockHeight
        !          2908:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry_1ws, \
        !          2909:                                 LOOP_UNROLL_SHIFT
        !          2910:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
        !          2911:         mov     pTempEntry,ecx   ;ditto for entry point
        !          2912: 
        !          2913:         mov     ecx,ulNextScan
        !          2914:         mov     edi,pTempPlane  ;dest offset in temp buffer for plane 3 bytes.
        !          2915:                                 ;The rest of the planes are stored
        !          2916:                                 ; consecutively
        !          2917:         mov     al,3            ;start by copying plane 3
        !          2918:         mov     dl,GRAF_DATA    ;leave DX pointing to the GC Data reg
        !          2919: copy_edge_to_buffer_plane_loop_1ws:
        !          2920:         mov     esi,pSrcAddr ;source pointer
        !          2921: 
        !          2922:         out     dx,al            ;set Read Map to plane we're copying from.
        !          2923: 
        !          2924:         mov     ebx,culTempCount ;# of unrolled loop iterations
        !          2925:         jmp     pTempEntry       ;copy the edge bytes for this plane to the
        !          2926:                                  ; temp buffer
        !          2927: 
        !          2928: ;-----------------------------------------------------------------------;
        !          2929: ; Table of unrolled edge copy to temp buffer loop entry points.
        !          2930: ;-----------------------------------------------------------------------;
        !          2931: 
        !          2932:         UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry_1WS, \
        !          2933:                                 EDGE_TO_TEMP_1WS, LOOP_UNROLL_COUNT
        !          2934: 
        !          2935: ;-----------------------------------------------------------------------;
        !          2936: ; Unrolled loop for copying edge bytes to the temp buffer.
        !          2937: ;-----------------------------------------------------------------------;
        !          2938: 
        !          2939: COPY_EDGE_TO_TEMP_1WS macro ENTRY_LABEL,ENTRY_INDEX
        !          2940: &ENTRY_LABEL&ENTRY_INDEX&:
        !          2941:         mov     ah,[esi]        ;get byte to copy
        !          2942:         add     esi,ecx         ;point to next source scan
        !          2943:         mov     [edi],ah        ;copy byte to temp buffer
        !          2944:         inc     edi             ;point to next temp buffer byte
        !          2945:         endm    ;-----------------------------------;
        !          2946: 
        !          2947: ;  EBX = count of unrolled loop iterations
        !          2948: ;  ECX = offset from end of one scan's fill to start of next
        !          2949: ;  ESI = source address to copy from (screen)
        !          2950: ;  EDI = target address to copy to (temp buffer)
        !          2951: ;  Read Map set to enable the desired plane for read
        !          2952: 
        !          2953:         align   4
        !          2954: edge_to_buffer_loop_1ws:
        !          2955:         UNROLL_LOOP     COPY_EDGE_TO_TEMP_1WS,EDGE_TO_TEMP_1WS, \
        !          2956:                         LOOP_UNROLL_COUNT
        !          2957:         dec     ebx
        !          2958:         jnz     edge_to_buffer_loop_1ws
        !          2959: 
        !          2960:         dec     al              ;count down planes
        !          2961:         jns     copy_edge_to_buffer_plane_loop_1ws
        !          2962: 
        !          2963:         PLAIN_RET
        !          2964: 
        !          2965: 
        !          2966: ;-----------------------------------------------------------------------;
        !          2967: ; Copies an edge from a 2-wide source to the destination on the screen.
        !          2968: ; Entry:
        !          2969: ;       AH = bit mask setting for edge
        !          2970: ;       ESI = source address
        !          2971: ;       EDI = destination address
        !          2972: ;       ulBlockHeight = # of bytes to copy per plane
        !          2973: ;       ulNextScan = scan width
        !          2974: ;       ulCombineMask = masking to be applied before ORing the two source
        !          2975: ;               bytes together, to keep only the data needed in preparation
        !          2976: ;               for the VGA rotator doing its stuff
        !          2977: ;       Source readable, and destination readable and writable
        !          2978: ; Exit:
        !          2979: ;       ESI = next source address
        !          2980: ;       EDI = next destination address
        !          2981: ;
        !          2982: ; Preserved: EBP
        !          2983: ;-----------------------------------------------------------------------;
        !          2984: 
        !          2985:         align   4
        !          2986: copy_edge_2ws:
        !          2987:         mov     pSrcAddr,esi
        !          2988:         mov     pDestAddr,edi
        !          2989: 
        !          2990: ; Set the clip mask for this edge.
        !          2991: 
        !          2992:         mov     edx,VGA_BASE + GRAF_ADDR
        !          2993:         mov     al,GRAF_BIT_MASK
        !          2994:         out     dx,ax
        !          2995: 
        !          2996: ; Leave the GC Index pointing to the Read Map.
        !          2997: 
        !          2998:         mov     al,GRAF_READ_MAP
        !          2999:         out     dx,al
        !          3000: 
        !          3001:         mov     ebx,ulBlockHeight
        !          3002: 
        !          3003:         mov     ecx,offset copy_edge_rw_2ws_full_chunk
        !          3004:                                 ;entry point into unrolled loop assuming we do
        !          3005:                                 ; a full chunk the first time
        !          3006: 
        !          3007: ; Copy the edge in a series of chunks.
        !          3008: 
        !          3009: copy_edge_chunk_loop_2ws:
        !          3010: 
        !          3011:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
        !          3012:                                     ; a full chunk
        !          3013:         jge     short @F            ;do a full chunk
        !          3014:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
        !          3015:                                     ; scans
        !          3016:         mov     ecx,pfnCopyEdgeRWEntry_2ws[-4][ebx*4]
        !          3017:                                 ;entry point into unrolled loop to copy desired
        !          3018:                                 ; chunk size
        !          3019:         sub     ebx,ebx         ;no scans after this
        !          3020: @@:
        !          3021:         push    ebx             ;remember remaining scan count
        !          3022: 
        !          3023:         mov     eax,(MM_C3 SHL 8) + 3 ;start by copying plane 3
        !          3024:         mov     ebx,ulNextScan
        !          3025: 
        !          3026: copy_edge_plane_loop_2ws:
        !          3027: 
        !          3028:         push    eax                     ;preserve plane info
        !          3029: 
        !          3030: ; Set Read Map to enable reads from plane we're copying from.
        !          3031: 
        !          3032:         mov     edx,VGA_BASE + GRAF_DATA
        !          3033:         out     dx,al
        !          3034: 
        !          3035: ; Set Map Mask to enable writes to plane we're copying.
        !          3036: 
        !          3037:         mov     dl,SEQ_DATA
        !          3038:         mov     al,ah
        !          3039:         out     dx,al
        !          3040: 
        !          3041:         mov     esi,pSrcAddr
        !          3042:         mov     edi,pDestAddr
        !          3043:         mov     edx,ulCombineMask
        !          3044: 
        !          3045:         jmp     ecx                     ;copy the left edge
        !          3046: 
        !          3047: 
        !          3048: ;-----------------------------------------------------------------------;
        !          3049: ; Table of unrolled edge loop entry points. First entry point is to copy
        !          3050: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
        !          3051: ;-----------------------------------------------------------------------;
        !          3052: 
        !          3053: pfnCopyEdgeRWEntry_2ws label dword
        !          3054: INDEX = 1
        !          3055:         rept    EDGE_CHUNK_SIZE
        !          3056:         DEFINE_DD       EDGE_RW_2WS,%INDEX
        !          3057: INDEX = INDEX+1
        !          3058:         endm
        !          3059: 
        !          3060: 
        !          3061: ;-----------------------------------------------------------------------;
        !          3062: ; Unrolled loop for copying a strip of edge bytes, with 2-wide source and
        !          3063: ; destination both readable and writable.
        !          3064: ;-----------------------------------------------------------------------;
        !          3065: 
        !          3066: COPY_EDGE_RW_2WS macro ENTRY_LABEL,ENTRY_INDEX
        !          3067: &ENTRY_LABEL&ENTRY_INDEX&:
        !          3068:         mov     ax,[esi]        ;get word to copy
        !          3069:         add     esi,ebx         ;point to next source scan
        !          3070:         and     eax,edx         ;mask in preparation for combining bytes
        !          3071:         or      al,ah           ;combine the desired parts of the bytes
        !          3072:         xchg    [edi],al        ;read before write so Bit Mask can operate
        !          3073:                                 ; VGA rotates during write
        !          3074:         add     edi,ebx         ;point to next dest scan
        !          3075:         endm    ;-----------------------------------;
        !          3076: 
        !          3077: ;  EBX = scan line width
        !          3078: ;  EDX = mask to preserve desired portions of AH and AL before combining
        !          3079: ;  ESI = source address to copy from
        !          3080: ;  EDI = target address to copy to
        !          3081: ;  Bit Mask set to desired clipping
        !          3082: ;  Read Map and Map Mask set to enable the desired plane for read and write
        !          3083: 
        !          3084:         align   4
        !          3085: copy_edge_rw_2ws_full_chunk:
        !          3086:         UNROLL_LOOP COPY_EDGE_RW_2WS,EDGE_RW_2WS,EDGE_CHUNK_SIZE
        !          3087: 
        !          3088: ; Do next plane within this chunk, if any.
        !          3089: 
        !          3090:         pop     eax                     ;retrieve plane info
        !          3091: 
        !          3092:         shr     ah,1                    ;advance to next plane
        !          3093:         dec     eax                     ;count down planes
        !          3094:         jns     copy_edge_plane_loop_2ws
        !          3095: 
        !          3096: ; Remember where we left off, for the next chunk.
        !          3097: 
        !          3098:         mov     pSrcAddr,esi
        !          3099:         mov     pDestAddr,edi
        !          3100: 
        !          3101: ; Do next chunk within this bank block, if any.
        !          3102: 
        !          3103:         pop     ebx                     ;retrieve remaining scan count
        !          3104:         and     ebx,ebx                 ;any scans left?
        !          3105:         jnz     copy_edge_chunk_loop_2ws ;more scans to do
        !          3106: 
        !          3107:         PLAIN_RET
        !          3108: 
        !          3109: 
        !          3110: ;-----------------------------------------------------------------------;
        !          3111: ; Copies an edge from the temp buffer (2 wide) to the screen.
        !          3112: ; Entry:
        !          3113: ;       AH = bit mask setting for edge
        !          3114: ;       EDI = destination address
        !          3115: ;       pTempPlane = temp buffer from which to copy
        !          3116: ;       ulBlockHeight = # of bytes to copy per plane
        !          3117: ;       ulNextScan = scan width
        !          3118: ;       Source and dest banks both pointing to destination
        !          3119: ;       ulCombineMask = masking to be applied before ORing the two source
        !          3120: ;               bytes together, to keep only the data needed in preparation
        !          3121: ;               for the VGA rotator doing its stuff
        !          3122: ; Exit:
        !          3123: ;       EDI = next destination address
        !          3124: ;
        !          3125: ; Preserved: EBP
        !          3126: ;-----------------------------------------------------------------------;
        !          3127: 
        !          3128:         align   4
        !          3129: copy_buffered_edge_to_screen_2ws:
        !          3130: 
        !          3131:         mov     pDestAddr,edi
        !          3132: 
        !          3133:         mov     edx,VGA_BASE + GRAF_ADDR
        !          3134:         mov     al,GRAF_BIT_MASK
        !          3135:         out     dx,ax
        !          3136: 
        !          3137:         mov     pTempEntry,offset copy_edge_from_buf_full_chunk_2ws
        !          3138:                                 ;entry point into unrolled loop, assuming the
        !          3139:                                 ; first chunk is full size
        !          3140:         mov     ecx,pTempPlane  ;temp buffer start (copy from here)
        !          3141:         mov     ebx,ulBlockHeight
        !          3142: 
        !          3143: ; Copy the edge in a series of chunks, to avoid flicker.
        !          3144: 
        !          3145: copy_from_buffer_chunk_loop_2ws:
        !          3146: 
        !          3147:         sub     ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
        !          3148:                                     ; a full chunk
        !          3149:         jge     short @F            ;do a full chunk
        !          3150:         add     ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
        !          3151:                                     ; scans
        !          3152:         mov     ebx,pfnCopyEdgesFromBufferEntry_2ws[-4][ebx*4]
        !          3153:         mov     pTempEntry,ebx  ;entry point into unrolled loop to copy final
        !          3154:                                 ; chunk size
        !          3155:         sub     ebx,ebx         ;no scans after this
        !          3156: @@:
        !          3157:         push    ebx             ;remember remaining scan count
        !          3158: 
        !          3159:         mov     al,MM_C3        ;start by copying plane 3
        !          3160:         mov     ebx,ulNextScan
        !          3161: 
        !          3162:         push    ecx             ;remember current temp buffer start
        !          3163: 
        !          3164: copy_from_buffer_plane_loop_2ws:
        !          3165: 
        !          3166: ; Set Map Mask to enable writes to plane we're copying.
        !          3167: 
        !          3168:         mov     edx,VGA_BASE + SEQ_DATA
        !          3169:         out     dx,al
        !          3170: 
        !          3171:         push    eax                     ;preserve plane info
        !          3172: 
        !          3173:         mov     esi,ecx                 ;point to current plane's source word
        !          3174:         mov     eax,ulBlockHeight
        !          3175:         lea     ecx,[ecx+eax*2]         ;point to next plane's source word
        !          3176: 
        !          3177:         mov     edi,pDestAddr
        !          3178:         mov     edx,ulCombineMask
        !          3179: 
        !          3180:         jmp     pTempEntry              ;copy the left edge
        !          3181: 
        !          3182: 
        !          3183: ;-----------------------------------------------------------------------;
        !          3184: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
        !          3185: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
        !          3186: ; bytes.
        !          3187: ;-----------------------------------------------------------------------;
        !          3188: 
        !          3189: pfnCopyEdgesFromBufferEntry_2WS label dword
        !          3190: INDEX = 1
        !          3191:         rept    EDGE_CHUNK_SIZE
        !          3192:         DEFINE_DD       EDGE_FROM_BUFFER_2WS,%INDEX
        !          3193: INDEX = INDEX+1
        !          3194:         endm
        !          3195: 
        !          3196: 
        !          3197: ;-----------------------------------------------------------------------;
        !          3198: ; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
        !          3199: ; buffer.
        !          3200: ;-----------------------------------------------------------------------;
        !          3201: 
        !          3202: COPY_EDGE_FROM_BUFFER_2WS macro ENTRY_LABEL,ENTRY_INDEX
        !          3203: &ENTRY_LABEL&ENTRY_INDEX&:
        !          3204:         mov     ax,[esi]        ;get word to copy
        !          3205:         add     esi,2           ;point to next source (temp buffer) word
        !          3206:         and     eax,edx         ;mask in preparation for combining bytes
        !          3207:         or      al,ah           ;combine the desired parts of the bytes
        !          3208:         xchg    [edi],al        ;read before write so Bit Mask can operate
        !          3209:                                 ; VGA rotates during write
        !          3210:         add     edi,ebx         ;point to next dest (screen) scan
        !          3211:         endm    ;-----------------------------------;
        !          3212: 
        !          3213: ;  EBX = scan line width
        !          3214: ;  EDX = mask to preserve desired portions of AH and AL before combining
        !          3215: ;  ESI = source address to copy from (temp buffer)
        !          3216: ;  EDI = target address to copy to (screen)
        !          3217: ;  Bit Mask set to desired clipping
        !          3218: ;  Map Mask set to enable the desired plane for write
        !          3219: 
        !          3220:         align   4
        !          3221: copy_edge_from_buf_full_chunk_2ws:
        !          3222:         UNROLL_LOOP     COPY_EDGE_FROM_BUFFER_2WS, \
        !          3223:                         EDGE_FROM_BUFFER_2WS,EDGE_CHUNK_SIZE
        !          3224: 
        !          3225: ; Do next plane within this chunk, if any.
        !          3226: 
        !          3227:         pop     eax                     ;retrieve plane info
        !          3228:         shr     al,1                    ;advance to next plane
        !          3229:         jnz     copy_from_buffer_plane_loop_2ws
        !          3230: 
        !          3231: ; Remember where we left off, for next chunk.
        !          3232: 
        !          3233:         mov     pDestAddr,edi
        !          3234:         pop     ecx                     ;get back current temp buffer start
        !          3235:         add     ecx,EDGE_CHUNK_SIZE*2   ;point to next chunk's start word
        !          3236: 
        !          3237: ; Do next chunk within this bank block, if any.
        !          3238: 
        !          3239:         pop     ebx                     ;retrieve remaining scan count
        !          3240:         and     ebx,ebx                 ;any scans left?
        !          3241:         jnz     copy_from_buffer_chunk_loop_2ws ;more scans to do
        !          3242: 
        !          3243:         PLAIN_RET
        !          3244: 
        !          3245: 
        !          3246: ;-----------------------------------------------------------------------;
        !          3247: ; Copies an edge from the screen (2 wide) to the temp buffer.
        !          3248: ; Entry:
        !          3249: ;       ESI = source address
        !          3250: ;       pTempPlane = temp buffer from which to copy
        !          3251: ;       ulBlockHeight = # of bytes to copy per plane
        !          3252: ;       ulNextScan = scan width
        !          3253: ;       Source bank pointing to source
        !          3254: ; Exit:
        !          3255: ;       ESI = next source address
        !          3256: ;
        !          3257: ; Preserved: EBP
        !          3258: ;-----------------------------------------------------------------------;
        !          3259: 
        !          3260:         align   4
        !          3261: copy_screen_to_buffered_edge_2ws:
        !          3262: 
        !          3263:         mov     pSrcAddr,esi
        !          3264: 
        !          3265: ; Leave the GC Index pointing to the Read Map.
        !          3266: 
        !          3267:         mov     edx,VGA_BASE + GRAF_ADDR
        !          3268:         mov     al,GRAF_READ_MAP
        !          3269:         out     dx,al
        !          3270: 
        !          3271:         mov     ebx,ulBlockHeight
        !          3272:         SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry_2ws, \
        !          3273:                                 LOOP_UNROLL_SHIFT
        !          3274:         mov     culTempCount,ebx ;remember # of unrolled loop iterations
        !          3275:         mov     pTempEntry,ecx   ;ditto for entry point
        !          3276: 
        !          3277:         mov     ecx,ulNextScan
        !          3278:         mov     edi,pTempPlane  ;dest offset in temp buffer for plane 3 bytes.
        !          3279:                                 ;The rest of the planes are stored
        !          3280:                                 ; consecutively
        !          3281:         mov     eax,3           ;start by copying plane 3
        !          3282: copy_edge_to_buf_pl_loop_2ws:
        !          3283:         mov     esi,pSrcAddr    ;source pointer
        !          3284: 
        !          3285:         mov     edx,VGA_BASE + GRAF_DATA
        !          3286:         out     dx,al           ;set Read Map to plane from which we're copying
        !          3287: 
        !          3288:         mov     ebx,culTempCount ;# of unrolled loop iterations
        !          3289:         jmp     pTempEntry       ;copy the edge bytes for this plane to the
        !          3290:                                  ; temp buffer
        !          3291: 
        !          3292: ;-----------------------------------------------------------------------;
        !          3293: ; Table of unrolled edge copy to temp buffer loop entry points.
        !          3294: ;-----------------------------------------------------------------------;
        !          3295: 
        !          3296:         UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry_2WS, \
        !          3297:                                 EDGE_TO_TEMP_2WS, LOOP_UNROLL_COUNT
        !          3298: 
        !          3299: ;-----------------------------------------------------------------------;
        !          3300: ; Unrolled loop for copying edge bytes to the temp buffer.
        !          3301: ;-----------------------------------------------------------------------;
        !          3302: 
        !          3303: COPY_EDGE_TO_TEMP_2WS macro ENTRY_LABEL,ENTRY_INDEX
        !          3304: &ENTRY_LABEL&ENTRY_INDEX&:
        !          3305:         mov     dx,[esi]        ;get byte to copy
        !          3306:         add     esi,ecx         ;point to next source scan
        !          3307:         mov     [edi],dx        ;copy byte to temp buffer
        !          3308:         add     edi,2           ;point to next temp buffer byte
        !          3309:         endm    ;-----------------------------------;
        !          3310: 
        !          3311: ;  EBX = count of unrolled loop iterations
        !          3312: ;  ECX = offset from end of one scan's fill to start of next
        !          3313: ;  ESI = source address to copy from (screen)
        !          3314: ;  EDI = target address to copy to (temp buffer)
        !          3315: ;  Read Map set to enable the desired plane for read
        !          3316: 
        !          3317:         align   4
        !          3318: edge_to_buffer_loop_2ws:
        !          3319:         UNROLL_LOOP     COPY_EDGE_TO_TEMP_2WS,EDGE_TO_TEMP_2WS, \
        !          3320:                         LOOP_UNROLL_COUNT
        !          3321:         dec     ebx
        !          3322:         jnz     edge_to_buffer_loop_2ws
        !          3323: 
        !          3324:         dec     eax              ;count down planes
        !          3325:         jns     copy_edge_to_buf_pl_loop_2ws
        !          3326: 
        !          3327:         PLAIN_RET
        !          3328: 
        !          3329: 
        !          3330: ;-----------------------------------------------------------------------;
        !          3331: 
        !          3332: endProc vNonAlignedSrcCopy
        !          3333: 
        !          3334: _TEXT$04   ends
        !          3335: 
        !          3336:         end
        !          3337: 
        !          3338: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.