|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: alignblt.asm
3: ;
4: ; Copyright (c) 1992 Microsoft Corporation
5: ;-----------------------------------------------------------------------;
6:
7: ;-----------------------------------------------------------------------;
8: ; VOID vAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
9: ; INT icopydir);
10: ; Input:
11: ; pdsurf - surface on which to copy
12: ; prcldest - pointer to destination rectangle
13: ; pptlsrc - pointer to source upper left corner
14: ; icopydir - direction in which copy must proceed to avoid overlap problems
15: ; and synchronize with the clip enumeration visually, according to
16: ; constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
17: ; WINDDI.H
18: ;
19: ; Performs accelarated aligned SRCCOPY VGA-to-VGA blts.
20: ;
21: ;-----------------------------------------------------------------------;
22: ;
23: ; Note: Assumes all rectangles have positive heights and widths. Will not
24: ; work properly if this is not the case.
25: ;
26: ;-----------------------------------------------------------------------;
27:
28: comment $
29:
30: The overall approach of this module for each rectangle to copy is:
31:
32: 1) Precalculate the masks and whole byte widths, and determine which of
33: partial left edge, partial right edge, and whole middle bytes are required
34: for this copy.
35:
36: 2) Set up the starting pointers for each of the areas (left, whole middle,
37: right), the start and stop scan lines, the copying direction (left-to-right
38: or right-to-left, and top-to-bottom or bottom-to-top), the threading
39: (sequence of calls required to do the left/whole/right components in the
40: proper sequence), based on the passed-in copy direction, which in turn is
41: dictated by the nature of the overlap between the source and destination.
42:
43: 3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
44: 1 R/W window, unbanked), that sequences through the intersection of each
45: bank with the source and destination rectangles in the proper direction
46: (top-to-bottom or bottom-to-top, based on the passed-in copy direction),
47: and performs the copy in each such rectangle. The threading vector is used
48: to call the required routines (copy left/whole/right bytes). For 1 R/W and
49: 1R/1W adapters, there is a second threading vector that is called when the
50: source and the destination are both adequately (for the copy purposes)
51: addressable simultaneously (because they're in the same bank), so there's
52: no need to copy through a temp buffer. Obviously, we want to avoid the temp
53: buffer whenever we can, because it's much slower and doesn't let us take
54: advantage of the VGA's hardware.
55:
56: Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
57: each plane's bytes are not stored in the corresponding plane's temp buffer, but
58: rather consecutively in the plane 0 temp buffer. This is to reduce page
59: faulting, and also so that 1R/1W adapters only need a temp buffer large enough
60: to hold 4*tallest bank bytes (2K will do here, but nalgnblt.asm needs 4K).
61: 1 R/W adapters still copy whole bytes through the full temp buffer, using all
62: four planes' temp buffers, so they require a temp buffer big enough to hold a
63: full bank (256K will do).
64:
65: commend $
66:
67: ;-----------------------------------------------------------------------;
68: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
69: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
70: ; times unrolling. This is the only thing you need to change to control
71: ; unrolling. Note: does not affect loops that process in chunks, like edge
72: ; loops.
73:
74: LOOP_UNROLL_SHIFT equ 2
75:
76: ;-----------------------------------------------------------------------;
77: ; Maximum # of edge bytes to process before switching to next plane. Larger
78: ; means faster, but there's more potential for flicker, since the raster scan
79: ; has a better chance of catching bytes that have changed in some planes but
80: ; not all planes.
81:
82: EDGE_CHUNK_SIZE equ 16
83:
84: ;-----------------------------------------------------------------------;
85: ; Macro to push the current threading sequence (string of routine calls) on the
86: ; stack, then jump to the first threading entry. The threading pointer can be
87: ; specified, or defaults to pCurrentThread. The return address can be
88: ; immediately after the JMP, or can be specified.
89:
90: THREAD_AND_START macro THREADING,RETURN_ADDR
91: local push_base, return_address
92:
93: ifb <&RETURN_ADDR&>
94: push offset return_address ;after all the threaded routines, we
95: ; return here
96: else
97: push offset &RETURN_ADDR& ;return here
98: endif
99:
100: ifb <&THREADING&>
101: mov eax,pCurrentThread
102: else
103: mov eax,&THREADING&
104: endif
105:
106: mov ecx,[eax] ;# of routines to thread (at least 1)
107: lea ecx,[ecx*2+ecx] ;pushes below are 3 bytes each
108: mov edx,offset push_base+3
109: sub edx,ecx
110: jmp edx ;branch to push or jmp below
111:
112: ; Push the threading addresses on to the stack, so routines perform the
113: ; threading as they return.
114:
115: push dword ptr [eax+12] ;3 byte instruction
116: push dword ptr [eax+8]
117: push_base:
118: jmp dword ptr [eax+4] ;jump to the first threaded routine
119:
120: align 4
121: return_address:
122: endm
123:
124: ;-----------------------------------------------------------------------;
125:
126: .386
127:
128: ifndef DOS_PLATFORM
129: .model small,c
130: else
131: ifdef STD_CALL
132: .model small,c
133: else
134: .model small,pascal
135: endif; STD_CALL
136: endif; DOS_PLATFORM
137:
138: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
139: assume fs:nothing,gs:nothing
140:
141: .xlist
142: include stdcall.inc ;calling convention cmacros
143: include i386\egavga.inc
144: include i386\strucs.inc
145: include i386\unroll.inc
146: include i386\ropdefs.inc
147:
148: .list
149:
150: ;-----------------------------------------------------------------------;
151:
152: .data
153:
154: ; Threads for stringing together left, whole byte, and right operations
155: ; in various orders, both using a temp buffer and not. Data format is:
156: ;
157: ; DWORD +0 = # of calls in thread (1, 2, or 3)
158: ; +4 = first call (required)
159: ; +8 = second call (optional)
160: ; +12 = third call (optional)
161:
162: align 4
163:
164: ; Copies not involving the temp buffer.
165:
166: Thread_L dd 1
167: dd copy_left_edge
168:
169: Thread_W dd 1
170: dd copy_whole_bytes
171:
172: Thread_R dd 1
173: dd copy_right_edge
174:
175: Thread_LR dd 2
176: dd copy_left_edge
177: dd copy_right_edge
178:
179: Thread_RL dd 2
180: dd copy_right_edge
181: dd copy_left_edge
182:
183: Thread_LW dd 2
184: dd copy_left_edge
185: dd copy_whole_bytes
186:
187: Thread_WL dd 2
188: dd copy_whole_bytes
189: dd copy_left_edge
190:
191: Thread_WR dd 2
192: dd copy_whole_bytes
193: dd copy_right_edge
194:
195: Thread_RW dd 2
196: dd copy_right_edge
197: dd copy_whole_bytes
198:
199: Thread_LWR dd 3
200: dd copy_left_edge
201: dd copy_whole_bytes
202: dd copy_right_edge
203:
204: Thread_RWL dd 3
205: dd copy_right_edge
206: dd copy_whole_bytes
207: dd copy_left_edge
208:
209: ; Copies involving the temp buffer.
210:
211: Thread_Lb dd 1
212: dd copy_left_edge_via_buffer
213:
214: Thread_Wb dd 1
215: dd copy_whole_bytes_via_buffer
216:
217: Thread_Rb dd 1
218: dd copy_right_edge_via_buffer
219:
220: Thread_LbRb dd 2
221: dd copy_left_edge_via_buffer
222: dd copy_right_edge_via_buffer
223:
224: Thread_RbLb dd 2
225: dd copy_right_edge_via_buffer
226: dd copy_left_edge_via_buffer
227:
228: Thread_LbW dd 2
229: dd copy_left_edge_via_buffer
230: dd copy_whole_bytes
231:
232: Thread_LbWb dd 2
233: dd copy_left_edge_via_buffer
234: dd copy_whole_bytes_via_buffer
235:
236: Thread_WLb dd 2
237: dd copy_whole_bytes
238: dd copy_left_edge_via_buffer
239:
240: Thread_WbLb dd 2
241: dd copy_whole_bytes_via_buffer
242: dd copy_left_edge_via_buffer
243:
244: Thread_WRb dd 2
245: dd copy_whole_bytes
246: dd copy_right_edge_via_buffer
247:
248: Thread_WbRb dd 2
249: dd copy_whole_bytes_via_buffer
250: dd copy_right_edge_via_buffer
251:
252: Thread_RbW dd 2
253: dd copy_right_edge_via_buffer
254: dd copy_whole_bytes
255:
256: Thread_RbWb dd 2
257: dd copy_right_edge_via_buffer
258: dd copy_whole_bytes_via_buffer
259:
260: Thread_LbWRb dd 3
261: dd copy_left_edge_via_buffer
262: dd copy_whole_bytes
263: dd copy_right_edge_via_buffer
264:
265: Thread_LbWbRb dd 3
266: dd copy_left_edge_via_buffer
267: dd copy_whole_bytes_via_buffer
268: dd copy_right_edge_via_buffer
269:
270: Thread_RbWLb dd 3
271: dd copy_right_edge_via_buffer
272: dd copy_whole_bytes
273: dd copy_left_edge_via_buffer
274:
275: Thread_RbWbLb dd 3
276: dd copy_right_edge_via_buffer
277: dd copy_whole_bytes_via_buffer
278: dd copy_left_edge_via_buffer
279:
280: ;-----------------------------------------------------------------------;
281: ; Table of thread selection for various horizontal copy directions, with
282: ; the look-up index a 4-bit field as follows:
283: ;
284: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
285: ; Bit 2 = 1 if left edge must be copied
286: ; Bit 1 = 1 if whole bytes must be copied
287: ; Bit 0 = 1 if right edge must be copied
288: ;
289: ; This is used for all cases where both the source and destination are
290: ; simultaneously addressable for our purposes, so there's no need to go
291: ; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).
292:
293: MasterThreadTable label dword
294: ;right-to-left
295: dd 0 ;<not used>
296: dd Thread_R ;R->L, R
297: dd Thread_W ;R->L, W
298: dd Thread_RW ;R->L, RW
299: dd Thread_L ;R->L, L
300: dd Thread_RL ;R->L, RL
301: dd Thread_WL ;R->L, WL
302: dd Thread_RWL ;R->L, RWL
303: ;left-to-right
304: dd 0 ;<not used>
305: dd Thread_R ;L->R, R
306: dd Thread_W ;L->R, W
307: dd Thread_WR ;L->R, WR
308: dd Thread_L ;L->R, L
309: dd Thread_LR ;L->R, LR
310: dd Thread_LW ;L->R, LW
311: dd Thread_LWR ;L->R, LWR
312:
313:
314: ; Table of thread selection for various adapter types and horizontal
315: ; copy directions, with the look-up index a 6-bit field as follows:
316: ;
317: ; Bit 5 = adapter type high bit
318: ; Bit 4 = adapter type low bit
319: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
320: ; Bit 2 = 1 if left edge must be copied
321: ; Bit 1 = 1 if whole bytes must be copied
322: ; Bit 0 = 1 if right edge must be copied
323: ;
324: ; This is used for all cases where the source and destination are not both
325: ; simultaneously addressable for our purposes, so we need to go through the
326: ; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).
327:
328: MasterThreadTableViaBuffer label dword
329: ;unbanked (no need for buffer)
330: ;right-to-left
331: dd 0 ;<not used>
332: dd Thread_R ;R->L, R
333: dd Thread_W ;R->L, W
334: dd Thread_RW ;R->L, RW
335: dd Thread_L ;R->L, L
336: dd Thread_RL ;R->L, RL
337: dd Thread_WL ;R->L, WL
338: dd Thread_RWL ;R->L, RWL
339: ;left-to-right
340: dd 0 ;<not used>
341: dd Thread_R ;L->R, R
342: dd Thread_W ;L->R, W
343: dd Thread_WR ;L->R, WR
344: dd Thread_L ;L->R, L
345: dd Thread_LR ;L->R, LR
346: dd Thread_LW ;L->R, LW
347: dd Thread_LWR ;L->R, LWR
348:
349: ;1 R/W banking window (everything goes through
350: ; buffer)
351: ;right-to-left
352: dd 0 ;<not used>
353: dd Thread_Rb ;R->L, R
354: dd Thread_Wb ;R->L, W
355: dd Thread_RbWb ;R->L, RW
356: dd Thread_Lb ;R->L, L
357: dd Thread_RbLb ;R->L, RL
358: dd Thread_WbLb ;R->L, WL
359: dd Thread_RbWbLb ;R->L, RWL
360: ;left-to-right
361: dd 0 ;<not used>
362: dd Thread_Rb ;L->R, R
363: dd Thread_Wb ;L->R, W
364: dd Thread_WbRb ;L->R, WR
365: dd Thread_Lb ;L->R, L
366: dd Thread_LbRb ;L->R, LR
367: dd Thread_LbWb ;L->R, LW
368: dd Thread_LbWbRb ;L->R, LWR
369:
370: ;1R/1W banking window (edge go through buffer)
371: ;right-to-left
372: dd 0 ;<not used>
373: dd Thread_Rb ;R->L, R
374: dd Thread_W ;R->L, W
375: dd Thread_RbW ;R->L, RW
376: dd Thread_Lb ;R->L, L
377: dd Thread_RbLb ;R->L, RL
378: dd Thread_WLb ;R->L, WL
379: dd Thread_RbWLb ;R->L, RWL
380: ;left-to-right
381: dd 0 ;<not used>
382: dd Thread_Rb ;L->R, R
383: dd Thread_W ;L->R, W
384: dd Thread_WRb ;L->R, WR
385: dd Thread_Lb ;L->R, L
386: dd Thread_LbRb ;L->R, LR
387: dd Thread_LbW ;L->R, LW
388: dd Thread_LbWRb ;L->R, LWR
389:
390: ;2 R/W banking window (no need for buffer)
391: ;right-to-left
392: dd 0 ;<not used>
393: dd Thread_R ;R->L, R
394: dd Thread_W ;R->L, W
395: dd Thread_RW ;R->L, RW
396: dd Thread_L ;R->L, L
397: dd Thread_RL ;R->L, RL
398: dd Thread_WL ;R->L, WL
399: dd Thread_RWL ;R->L, RWL
400: ;left-to-right
401: dd 0 ;<not used>
402: dd Thread_R ;L->R, R
403: dd Thread_W ;L->R, W
404: dd Thread_WR ;L->R, WR
405: dd Thread_L ;L->R, L
406: dd Thread_LR ;L->R, LR
407: dd Thread_LW ;L->R, LW
408: dd Thread_LWR ;L->R, LWR
409:
410:
411: ; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.
412:
413: ADAPTER_FIELD_SHIFT equ 4
414:
415: ; Mask for setting left-to-right bit to "left-to-right true" for use in both
416: ; MasterThread tables.
417:
418: LEFT_TO_RIGHT_FIELD_SET equ 1000b
419:
420:
421: ; Table of top-to-bottom loops for adapter types.
422:
423: align 4
424: TopToBottomLoopTable label dword
425: dd top_to_bottom_2RW ;unbanked is same as 2RW
426: dd top_to_bottom_1RW
427: dd top_to_bottom_1R1W
428: dd top_to_bottom_2RW
429:
430:
431: ; Table of bottom-to-top loops for adapter types.
432:
433: align 4
434: BottomToTopLoopTable label dword
435: dd bottom_to_top_2RW ;unbanked is same as 2RW
436: dd bottom_to_top_1RW
437: dd bottom_to_top_1R1W
438: dd bottom_to_top_2RW
439:
440:
441: ; Table of routines for setting up to copy in various directions.
442:
443: align 4
444: SetUpForCopyDirection label dword
445: dd left_to_right_top_to_bottom ;CD_RIGHTDOWN
446: dd right_to_left_top_to_bottom ;CD_LEFTDOWN
447: dd left_to_right_bottom_to_top ;CD_RIGHTUP
448: dd right_to_left_bottom_to_top ;CD_LEFTUP
449:
450: ;-----------------------------------------------------------------------;
451: ; Left edge clip masks for intrabyte start addresses 0 through 7.
452: ; Whole byte cases are flagged as 0ffh.
453:
454: jLeftMaskTable label byte
455: db 0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
456:
457: ;-----------------------------------------------------------------------;
458: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
459: ; 0 through 7. Whole byte cases are flagged as 0ffh.
460:
461: jRightMaskTable label byte
462: db 0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
463:
464: ;-----------------------------------------------------------------------;
465:
466: .code
467:
468: _TEXT$03 SEGMENT DWORD USE32 PUBLIC 'CODE'
469: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
470:
471: ;-----------------------------------------------------------------------;
472:
473: cProc vAlignedSrcCopy,16,< \
474: uses esi edi ebx, \
475: pdsurf: ptr DEVSURF, \
476: prcldest : ptr RECTL, \
477: pptlsrc : ptr POINTL, \
478: icopydir : dword
479:
480: local culWholeBytesWidth : dword ;# of bytes to copy across each scan
481: local ulBlockHeight : dword ;# of scans to copy per bank block
482: local ulWholeScanDelta : dword;offset from end of one whole bytes
483: ; scan to start of next
484: local ulWholeBytesSrc : dword ;offset in bitmap of first source whole
485: ; byte to copy from
486: local ulWholeBytesDest : dword;offset in bitmap of first source whole
487: ; byte to copy to
488: local ulLeftEdgeSrc : dword ;offset in bitmap of first source left
489: ; edge byte to copy from
490: local ulLeftEdgeDest : dword ;offset in bitmap of first dest left
491: ; edge byte to copy to
492: local ulRightEdgeSrc : dword ;offset in bitmap of first source right
493: ; edge byte to copy from
494: local ulRightEdgeDest : dword ;offset in bitmap of first dest right
495: ; edge byte to copy to
496: local ulNextScan : dword ;width of scan, in bytes
497: local jLeftMask : dword ;left edge clip mask
498: local jRightMask : dword ;right edge clip mask
499: local culTempCount : dword ;handy temporary counter
500: local pTempEntry : dword ;temporary storage for vector into
501: ; unrolled loop
502: local pTempPlane : dword ;pointer to storage in temp buffer for
503: ; edge bytes (which are stored
504: ; consecutively, not in each plane's
505: ; temp buffer, to reduce possible page
506: ; faulting
507: local ppTempPlane0 : dword ;pointer to pointer to storage in temp
508: ; buffer for plane 0, immediately
509: ; preceded by storage for planes 1, 2,
510: ; and 3
511: local ppTempPlane3 : dword ;like above, but for plane 3
512: local ulOffsetInBank : dword ;offset relative to bank start
513: local pSrcAddr : dword ;working pointer to first source
514: ; byte to copy from
515: local pDestAddr : dword ;working pointer to first dest
516: ; byte to copy to
517: local ulCurrentJustification:dword ;justification used to map in
518: ; banks; top for top to bottom
519: ; copies, bottom for bottom to top
520: local ulCurrentSrcScan :dword ;scan line used to map in current
521: ; source bank
522: local ulCurrentDestScan:dword ;scan line used to map in current dest
523: ; bank
524: local ulLastDestScan :dword ;scan in target rect at which we stop
525: ; advancing through banks
526: local pCurrentThread : dword ;pointer to data describing the
527: ; threaded calls to be performed to
528: ; perform the current copy
529: local pCurrentThreadViaBuffer:dword
530: ;pointer to data describing the
531: ; threaded calls to be performed to
532: ; perform the current copy in the case
533: ; where the source and destination are
534: ; not simultaneously adequately
535: ; accessible, so the copy has to go
536: ; through a temp buffer (used only for
537: ; 1 R/W and 1R/1W banking)
538: local ulAdapterType : dword ;adapter type code, per VIDEO_BANK_TYPE
539: local ulLWRType : dword ;whether left edge, whole bytes, and
540: ; right edge are involved in the
541: ; current operation;
542: ; bit 2 = 1 if left edge involved
543: ; bit 1 = 1 if whole bytes involved
544: ; bit 0 = 1 if right edge involved
545: local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
546: ; address past the left edge when the
547: ; left edge is partial
548:
549: ;-----------------------------------------------------------------------;
550:
551: ; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
552: ; adapters), and other rectangle-independent variables.
553:
554: mov esi,pdsurf
555: mov eax,[esi].dsurf_pvBankBufferPlane0
556: mov pTempPlane,eax
557: lea eax,[esi].dsurf_pvBankBufferPlane0
558: mov ppTempPlane0,eax
559: lea eax,[esi].dsurf_pvBankBufferPlane3
560: mov ppTempPlane3,eax
561:
562: mov eax,[esi].dsurf_vbtBankingType
563: mov ulAdapterType,eax
564:
565: ; Copy the rectangle.
566:
567: call copy_rect
568:
569: ;-----------------------------------------------------------------------;
570: ; Set the VGA registers back to their default state.
571: ;-----------------------------------------------------------------------;
572:
573: mov edx,VGA_BASE + GRAF_ADDR
574: mov eax,(0ffh shl 8) + GRAF_BIT_MASK
575: out dx,ax ;enable bit mask for all bits
576:
577: mov dl,SEQ_DATA
578: mov al,MM_ALL
579: out dx,al ;enable writes to all planes
580:
581: cld ;restore default direction flag
582:
583: cRet vAlignedSrcCopy ;done
584:
585:
586: ;***********************************************************************;
587: ;
588: ; Copies the specified rectangle.
589: ;
590: ;***********************************************************************;
591:
592: align 4
593: copy_rect:
594:
595: ; Set up masks and whole bytes count, and build left/whole/right index
596: ; indicating which of those parts are involved in the copy.
597:
598: mov edi,prcldest ;point to rectangle to copy
599:
600: mov ebx,[edi].xRight ;right edge of fill (non-inclusive)
601: mov ecx,ebx
602: and ecx,0111b ;intrabyte address of right edge
603: mov ah,jRightMaskTable[ecx] ;right edge mask
604:
605: mov esi,[edi].xLeft ;left edge of fill (inclusive)
606: mov ecx,esi
607: shr ecx,3 ;/8 for start offset from left edge
608: ; of scan line
609: sub ebx,esi ;width in pixels of fill
610:
611: and esi,0111b ;intrabyte address of left edge
612: mov al,jLeftMaskTable[esi] ;left edge mask
613:
614: dec ebx ;make inclusive on right
615: add ebx,esi ;inclusive width, starting counting at
616: ; the beginning of the left edge byte
617: shr ebx,3 ;width of fill in bytes touched - 1
618: jnz short more_than_1_byte ;more than 1 byte is involved
619:
620: ; Only one byte will be affected. Combine first/last masks.
621:
622: and al,ah ;we'll use first byte mask only
623: xor ah,ah ;want last byte mask to be 0 to
624: ; indicate right edge not involved
625: inc ebx ;so there's one count to subtract below
626: ; if this isn't a whole edge byte
627: more_than_1_byte:
628:
629: ; If all pixels in the left edge are altered, combine the first byte into the
630: ; whole byte count, because we can handle solid edge bytes faster as part of
631: ; the whole bytes. Ditto for the right edge.
632:
633: sub ecx,ecx ;edge whole-status accumulator
634: cmp al,-1 ;is left edge a whole byte or partial?
635: adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole
636: sub ebx,ecx ;if left edge partial, deduct it from
637: ; the whole bytes count
638: mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if
639: ; it's partial when pointing to the
640: ; whole bytes
641: and ah,ah ;is right edge mask 0, meaning this
642: ; fill is only 1 byte wide?
643: jz short save_masks ;yes, no need to do anything
644: or ecx,40h ;assume there's a partial right edge
645: cmp ah,-1 ;is right edge a whole byte or partial?
646: jnz short save_masks ;partial
647: ;bit 1=0 if left edge partial, 1 whole
648: inc ebx ;if right edge whole, include it in the
649: ; whole bytes count
650: and ecx,not 40h ;there's no partial right edge
651: save_masks:
652: cmp ebx,1 ;do we have any whole bytes?
653: cmc ;CF set if whole byte count > 0
654: adc ecx,ecx ;if any whole bytes, set whole bytes
655: ; bit in left/whole/right accumulator
656: rol cl,1 ;align the left/whole/right bits
657: mov ulLWRType,ecx ;save left/whole/right status
658:
659: mov byte ptr jLeftMask,al ;save left and right clip masks
660: mov byte ptr jRightMask,ah
661: mov culWholeBytesWidth,ebx ;save # of whole bytes
662:
663: ; Copy the rectangle in the specified direction.
664:
665: mov eax,icopydir
666: jmp SetUpForCopyDirection[eax*4]
667:
668:
669: ;***********************************************************************;
670: ;
671: ; The following routines set up to handle the four possible copy
672: ; directions.
673: ;
674: ;***********************************************************************;
675:
676:
677: ;-----------------------------------------------------------------------;
678: ; Set-up code for left-to-right, top-to-bottom copies.
679: ;-----------------------------------------------------------------------;
680:
681: align 4
682: left_to_right_top_to_bottom:
683:
684: cld ;we'll copy left to right
685:
686: mov esi,pdsurf
687: mov eax,[esi].dsurf_lNextScan
688: mov ulNextScan,eax ;copy top to bottom
689: sub eax,culWholeBytesWidth ;offset from end of one whole byte scan
690: mov ulWholeScanDelta,eax ; to start of next
691:
692: mov esi,ulLWRType ;3-bit flag field for left, whole, and
693: ; right involvement in operation
694: or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index
695: mov eax,MasterThreadTable[esi*4]
696: mov pCurrentThread,eax ;threading when no buffering is needed
697: mov edx,ulAdapterType
698: shl edx,ADAPTER_FIELD_SHIFT
699: or esi,edx ;factor adapter type into the index
700: mov eax,MasterThreadTableViaBuffer[esi*4]
701: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
702:
703: mov ulCurrentJustification,JustifyTop ;copy top to bottom
704:
705: mov esi,prcldest
706: mov eax,[esi].yBottom
707: mov ulLastDestScan,eax ;end at bottom of dest copy rect
708: mov eax,[esi].yTop
709: mov ulCurrentDestScan,eax ;start at top of dest copy rect
710: mul ulNextScan ;offset in bitmap of top dest rect scan
711: mov edx,[esi].xLeft
712: shr edx,3 ;byte X address
713: add eax,edx ;offset in bitmap of first dest byte
714: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
715: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
716: ; byte, unless the left edge is a whole
717: ; byte and is thus part of the whole
718: ; bytes already
719: mov ulWholeBytesDest,eax ;where the whole dest bytes start
720: add eax,culWholeBytesWidth ;point to the right edge
721: mov ulRightEdgeDest,eax ;where the right dest edge starts
722:
723: mov esi,pptlsrc
724: mov eax,[esi].ptl_y
725: mov ulCurrentSrcScan,eax ;start at top of source copy rect
726: mul ulNextScan ;offset in bitmap of top dest rect scan
727: mov edx,[esi].ptl_x
728: shr edx,3 ;byte X address
729: add eax,edx ;offset in bitmap of first source byte
730: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
731: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
732: ; byte, unless the left edge is a whole
733: ; byte and is thus part of the whole
734: ; bytes already
735: mov ulWholeBytesSrc,eax ;where the src whole bytes start
736: add eax,culWholeBytesWidth ;point to the right edge
737: mov ulRightEdgeSrc,eax ;where the right src edge starts
738:
739: ; Branch to the appropriate top-to-bottom bank enumeration loop.
740:
741: mov eax,ulAdapterType
742: jmp TopToBottomLoopTable[eax*4]
743:
744:
745: ;-----------------------------------------------------------------------;
746: ; Set-up code for right-to-left, top-to-bottom copies.
747: ;-----------------------------------------------------------------------;
748:
749: align 4
750: right_to_left_top_to_bottom:
751:
752: std ;we'll copy right to left
753:
754: mov esi,pdsurf
755: mov eax,[esi].dsurf_lNextScan
756: mov ulNextScan,eax ;copy top to bottom
757: add eax,culWholeBytesWidth ;offset from end of one whole byte scan
758: mov ulWholeScanDelta,eax ; to start of next, given that we're
759: ; copying one way and going scan-to-
760: ; scan the other way
761: mov esi,ulLWRType ;3-bit flag field for left, whole, and
762: ; right involvement in operation
763: ;leave left-to-right field cleared, so
764: ; we look up right-to-left entries
765: mov eax,MasterThreadTable[esi*4]
766: mov pCurrentThread,eax ;threading when no buffering is needed
767: mov edx,ulAdapterType
768: shl edx,ADAPTER_FIELD_SHIFT
769: or esi,edx ;factor adapter type into the index
770: mov eax,MasterThreadTableViaBuffer[esi*4]
771: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
772:
773: mov ulCurrentJustification,JustifyTop ;copy top to bottom
774:
775: mov esi,prcldest
776: mov eax,[esi].yBottom
777: mov ulLastDestScan,eax ;end at bottom of dest copy rect
778: mov eax,[esi].yTop
779: mov ulCurrentDestScan,eax ;start at top of dest copy rect
780: mul ulNextScan ;offset in bitmap of top dest rect scan
781: mov edx,[esi].xLeft
782: shr edx,3 ;byte X address
783: add eax,edx ;offset in bitmap of first dest byte
784: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
785: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
786: ; byte, unless the left edge is a whole
787: ; byte and is thus part of the whole
788: ; bytes already
789: add eax,culWholeBytesWidth ;point to the right edge
790: mov ulRightEdgeDest,eax ;where the right dest edge starts
791: dec eax ;back up to the last whole byte
792: mov ulWholeBytesDest,eax ;where the whole dest bytes start
793:
794: mov esi,pptlsrc
795: mov eax,[esi].ptl_y
796: mov ulCurrentSrcScan,eax ;start at top of source copy rect
797: mul ulNextScan ;offset in bitmap of top dest rect scan
798: mov edx,[esi].ptl_x
799: shr edx,3 ;byte X address
800: add eax,edx ;offset in bitmap of first source byte
801: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
802: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
803: ; byte, unless the left edge is a whole
804: ; byte and is thus part of the whole
805: ; bytes already
806: add eax,culWholeBytesWidth ;point to the right edge
807: mov ulRightEdgeSrc,eax ;where the right src edge starts
808: dec eax ;back up to the last whole byte
809: mov ulWholeBytesSrc,eax ;where the src whole bytes start
810:
811: ; Branch to the appropriate top-to-bottom bank enumeration loop.
812:
813: mov eax,ulAdapterType
814: jmp TopToBottomLoopTable[eax*4]
815:
816:
817:
818: ;-----------------------------------------------------------------------;
819: ; Set-up code for left-to-right, bottom-to-top copies.
820: ;-----------------------------------------------------------------------;
821:
822: align 4
823: left_to_right_bottom_to_top:
824:
825: cld ;we'll copy left to right
826:
827: mov edi,pdsurf
828: mov eax,[edi].dsurf_lNextScan
829: neg eax
830: mov ulNextScan,eax ;copy bottom to top
831: sub eax,culWholeBytesWidth ;offset from end of one whole byte scan
832: mov ulWholeScanDelta,eax ; to start of next, given that we're
833: ; copying one way and going scan-to-
834: ; scan the other way
835: mov esi,ulLWRType ;3-bit flag field for left, whole, and
836: ; right involvement in operation
837: or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index
838: mov eax,MasterThreadTable[esi*4]
839: mov pCurrentThread,eax ;threading when no buffering is needed
840: mov edx,ulAdapterType
841: shl edx,ADAPTER_FIELD_SHIFT
842: or esi,edx ;factor adapter type into the index
843: mov eax,MasterThreadTableViaBuffer[esi*4]
844: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
845:
846: mov ulCurrentJustification,JustifyBottom ;copy bottom to top
847:
848: mov esi,prcldest
849: mov edx,[esi].yTop
850: mov ulLastDestScan,edx ;end at top of dest copy rect
851: mov eax,[esi].yBottom
852: dec eax ;rectangle definition is non-inclusive,
853: ; so advance to first scan we'll copy
854: sub edx,eax ;-(offset from rect top to bottom)
855: push edx ;remember for use with source
856: mov ulCurrentDestScan,eax ;start at bottom of dest copy rect
857: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
858: ; scan (first scan to which to copy)
859: mov edx,[esi].xLeft
860: shr edx,3 ;byte X address
861: add eax,edx ;offset in bitmap of first dest byte
862: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
863: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
864: ; byte, unless the left edge is a whole
865: ; byte and is thus part of the whole
866: ; bytes already
867: mov ulWholeBytesDest,eax ;where the whole dest bytes start
868: add eax,culWholeBytesWidth ;point to the right edge
869: mov ulRightEdgeDest,eax ;where the right dest edge starts
870:
871: mov esi,pptlsrc
872: mov eax,[esi].ptl_y
873: pop edx ;retrieve -(offset from top to bottom)
874: sub eax,edx ;advance to bottom of source rect
875: ; (inclusive; this is first scan from
876: ; which to copy)
877: mov ulCurrentSrcScan,eax ;start at bottom of source copy rect
878: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
879: ; scan
880: mov edx,[esi].ptl_x
881: shr edx,3 ;byte X address
882: add eax,edx ;offset in bitmap of first source byte
883: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
884: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
885: ; byte, unless the left edge is a whole
886: ; byte and is thus part of the whole
887: ; bytes already
888: mov ulWholeBytesSrc,eax ;where the src whole bytes start
889: add eax,culWholeBytesWidth ;point to the right edge
890: mov ulRightEdgeSrc,eax ;where the right src edge starts
891:
892: ; Branch to the appropriate bottom-to-top bank enumeration loop.
893:
894: mov eax,ulAdapterType
895: jmp BottomToTopLoopTable[eax*4]
896:
897:
898: ;-----------------------------------------------------------------------;
899: ; Set-up code for right-to-left, bottom-to-top copies.
900: ;-----------------------------------------------------------------------;
901:
902: align 4
903: right_to_left_bottom_to_top:
904:
905: std ;we'll copy right to left
906:
907: mov edi,pdsurf
908: mov eax,[edi].dsurf_lNextScan
909: neg eax
910: mov ulNextScan,eax ;copy bottom to top
911: add eax,culWholeBytesWidth ;offset from end of one whole byte scan
912: mov ulWholeScanDelta,eax ; to start of next
913: mov esi,ulLWRType ;3-bit flag field for left, whole, and
914: ; right involvement in operation
915: ;leave left-to-right field cleared, so
916: ; we look up right-to-left entries
917: mov eax,MasterThreadTable[esi*4]
918: mov pCurrentThread,eax ;threading when no buffering is needed
919: mov edx,ulAdapterType
920: shl edx,ADAPTER_FIELD_SHIFT
921: or esi,edx ;factor adapter type into the index
922: mov eax,MasterThreadTableViaBuffer[esi*4]
923: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
924:
925: mov ulCurrentJustification,JustifyBottom ;copy bottom to top
926:
927: mov esi,prcldest
928: mov edx,[esi].yTop
929: mov ulLastDestScan,edx ;end at top of dest copy rect
930: mov eax,[esi].yBottom
931: dec eax ;rectangle definition is non-inclusive,
932: ; so advance to first scan we'll copy
933: sub edx,eax ;-(offset from rect top to bottom)
934: push edx ;remember for use with source
935: mov ulCurrentDestScan,eax ;start at bottom of dest copy rect
936: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
937: ; scan (first scan to which to copy)
938: mov edx,[esi].xLeft
939: shr edx,3 ;byte X address
940: add eax,edx
941: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
942: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
943: ; byte, unless the left edge is a whole
944: ; byte and is thus part of the whole
945: ; bytes already
946: add eax,culWholeBytesWidth ;point to the right edge
947: mov ulRightEdgeDest,eax ;where the right dest edge starts
948: dec eax ;back up to the last whole byte
949: mov ulWholeBytesDest,eax ;where the whole dest bytes start
950:
951: mov esi,pptlsrc
952: mov eax,[esi].ptl_y
953: pop edx ;retrieve -(offset from top to bottom)
954: sub eax,edx ;advance to bottom of source rect
955: ; (inclusive; this is first scan from
956: ; which to copy)
957: mov ulCurrentSrcScan,eax ;start at bottom of source copy rect
958: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
959: ; scan
960: mov edx,[esi].ptl_x
961: shr edx,3 ;byte X address
962: add eax,edx ;offset in bitmap of first source byte
963: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
964: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
965: ; byte, unless the left edge is a whole
966: ; byte and is thus part of the whole
967: ; bytes already
968: add eax,culWholeBytesWidth ;point to the right edge
969: mov ulRightEdgeSrc,eax ;where the right src edge starts
970: dec eax ;back up to the last whole byte
971: mov ulWholeBytesSrc,eax ;where the src whole bytes start
972:
973: ; Branch to the appropriate bottom-to-top bank enumeration loop.
974:
975: mov eax,ulAdapterType
976: jmp BottomToTopLoopTable[eax*4]
977:
978:
979: ;***********************************************************************;
980: ;
981: ; The following routines are the banking loops.
982: ;
983: ;***********************************************************************;
984:
985:
986: ;-----------------------------------------------------------------------;
987: ; Banking for 2 R/W and unbanked adapters, top to bottom.
988: ;-----------------------------------------------------------------------;
989: align 4
990: top_to_bottom_2RW:
991:
992: ; We're going top to bottom. Map in the source and dest, top-justified.
993:
994: mov ebx,pdsurf
995: mov edx,ulCurrentSrcScan
996: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
997: ; current source bank?
998: jl short top_2RW_map_init_src_bank ;yes, map in proper bank
999: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
1000: ; current source bank?
1001: jl short top_2RW_init_src_bank_mapped
1002: ;no, proper bank already mapped
1003: top_2RW_map_init_src_bank:
1004:
1005: ; Map bank containing the top source scan line into source window.
1006: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1007:
1008: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1009: <ebx,edx,JustifyTop,MapSourceBank>
1010:
1011: top_2RW_init_src_bank_mapped:
1012:
1013: mov edx,ulCurrentDestScan
1014: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
1015: ; current dest bank?
1016: jl short top_2RW_map_init_dest_bank ;yes, map in proper bank
1017: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
1018: ; current dest bank?
1019: jl short top_2RW_init_dest_bank_mapped
1020: ;no, proper bank already mapped
1021: top_2RW_map_init_dest_bank:
1022:
1023: ; Map bank containing the top dest scan line into source window.
1024: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1025:
1026: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1027: <ebx,edx,JustifyTop,MapDestBank>
1028:
1029: top_2RW_init_dest_bank_mapped:
1030:
1031: ; Bank-by-bank top-to-bottom copy loop.
1032:
1033: top_2RW_bank_loop:
1034:
1035: ; Decide how far we can go before we run out of bank or rectangle to copy.
1036:
1037: mov edx,ulLastDestScan
1038: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom
1039: jl short @F ;copy rectangle bottom is in this bank
1040: mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
1041: ; of bank, at least
1042: @@:
1043: sub edx,ulCurrentDestScan ;# of scans we can and want to do in
1044: ; the dest bank
1045: mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom
1046: sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank
1047:
1048: cmp edx,eax
1049: jb short @F ;source bank isn't limiting
1050: mov edx,eax ;source bank is limiting
1051: @@:
1052: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1053:
1054: ; We're ready to copy this block.
1055:
1056: THREAD_AND_START
1057:
1058: ; Any more scans to copy?
1059:
1060: mov eax,ulCurrentDestScan
1061: mov esi,ulBlockHeight
1062: add eax,esi ;we've copied to dest up to here
1063: cmp ulLastDestScan,eax ;are we at the dest rect bottom?
1064: jz short top_2RW_done ;yes, we're done
1065: mov ulCurrentDestScan,eax
1066:
1067: ; Now advance either or both banks, as needed.
1068:
1069: mov ebx,pdsurf
1070: cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
1071: ; current dest bank?
1072: jl short top_2RW_dest_bank_mapped ;no, proper bank still mapped
1073:
1074: ; Map bank containing the current dest scan line into source window.
1075: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1076:
1077: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1078: <ebx,eax,JustifyTop,MapDestBank>
1079:
1080: top_2RW_dest_bank_mapped:
1081:
1082: add esi,ulCurrentSrcScan ;we've copied from source up to here
1083: mov ulCurrentSrcScan,esi
1084:
1085: cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
1086: ; current src bank?
1087: jl short top_2RW_src_bank_mapped ;no, proper bank still mapped
1088:
1089: ; Map bank containing the current source scan line into source window.
1090: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1091:
1092: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1093: <ebx,esi,JustifyTop,MapSourceBank>
1094:
1095: top_2RW_src_bank_mapped:
1096:
1097: jmp top_2RW_bank_loop
1098:
1099: top_2RW_done:
1100: PLAIN_RET
1101:
1102:
1103: ;-----------------------------------------------------------------------;
1104: ; Banking for 2 R/W and unbanked adapters, bottom to top.
1105: ;-----------------------------------------------------------------------;
1106: align 4
1107: bottom_to_top_2RW:
1108:
1109: ; We're going bottom to top. Map in the source and dest, bottom-justified.
1110:
1111: mov ebx,pdsurf
1112: mov edx,ulCurrentSrcScan
1113: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
1114: ; current source bank?
1115: jl short bot_2RW_map_init_src_bank ;yes, map in proper bank
1116: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
1117: ; than current src bank?
1118: jl short bot_2RW_init_src_bank_mapped
1119: ;no, proper bank already mapped
1120: bot_2RW_map_init_src_bank:
1121:
1122: ; Map bank containing the bottom source scan line into source window.
1123: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1124:
1125: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1126: <ebx,edx,JustifyBottom,MapSourceBank>
1127:
1128: bot_2RW_init_src_bank_mapped:
1129:
1130: mov edx,ulCurrentDestScan
1131: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
1132: ; current dest bank?
1133: jl short bot_2RW_map_init_dest_bank ;yes, map in proper bank
1134: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
1135: ; than current dst bank?
1136: jl short bot_2RW_init_dest_bank_mapped
1137: ;no, proper bank already mapped
1138: bot_2RW_map_init_dest_bank:
1139:
1140: ; Map bank containing the bottom dest scan line into source window.
1141: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1142:
1143: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1144: <ebx,edx,JustifyBottom,MapDestBank>
1145:
1146: bot_2RW_init_dest_bank_mapped:
1147:
1148: ; Bank-by-bank bottom-to-top copy loop.
1149:
1150: bot_2RW_bank_loop:
1151:
1152: ; Decide how far we can go before we run out of bank or rectangle to copy.
1153:
1154: mov edx,ulLastDestScan
1155: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop
1156: jg short @F ;copy rectangle top is in this bank
1157: mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
1158: ; of bank, at least
1159: @@:
1160: neg edx
1161: add edx,ulCurrentDestScan ;# of scans we can and want to do in
1162: inc edx ; the dest bank
1163:
1164: mov eax,ulCurrentSrcScan
1165: sub eax,[ebx].dsurf_rcl2WindowClipS.yTop
1166: inc eax ;# of scans we can do in the src bank
1167:
1168: cmp edx,eax
1169: jb short @F ;source bank isn't limiting
1170: mov edx,eax ;source bank is limiting
1171: @@:
1172: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1173:
1174: ; We're ready to copy this block.
1175:
1176: THREAD_AND_START
1177:
1178: ; Any more scans to copy?
1179:
1180: mov eax,ulCurrentDestScan
1181: mov esi,ulBlockHeight
1182: sub eax,esi ;we've copied to dest up to here
1183: cmp ulLastDestScan,eax ;are we past the dest rect top?
1184: jg short bot_2RW_done ;yes, we're done
1185: mov ulCurrentDestScan,eax
1186:
1187: ; Now advance either or both banks, as needed.
1188:
1189: mov ebx,pdsurf
1190: cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
1191: ; current dest bank?
1192: jge short bot_2RW_dest_bank_mapped ;no, proper bank still mapped
1193:
1194: ; Map bank containing the current dest scan line into source window.
1195: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1196:
1197: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1198: <ebx,eax,JustifyBottom,MapDestBank>
1199:
1200: bot_2RW_dest_bank_mapped:
1201:
1202: mov eax,ulCurrentSrcScan
1203: sub eax,esi ;we've copied from source up to here
1204: mov ulCurrentSrcScan,eax
1205:
1206: cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
1207: ; current src bank?
1208: jge short bot_2RW_src_bank_mapped ;no, proper bank still mapped
1209:
1210: ; Map bank containing the current source scan line into source window.
1211: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1212:
1213: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1214: <ebx,eax,JustifyBottom,MapSourceBank>
1215:
1216: bot_2RW_src_bank_mapped:
1217:
1218: jmp bot_2RW_bank_loop
1219:
1220: bot_2RW_done:
1221: PLAIN_RET
1222:
1223:
1224: ;-----------------------------------------------------------------------;
1225: ; Banking for 1R/1W adapters, top to bottom.
1226: ;-----------------------------------------------------------------------;
1227: align 4
1228: top_to_bottom_1R1W:
1229:
1230: ; We're going top to bottom. Map in the source and dest, top-justified.
1231:
1232: mov ebx,pdsurf
1233: mov edx,ulCurrentSrcScan
1234: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
1235: ; current source bank?
1236: jl short top_1R1W_map_init_src_bank ;yes, map in proper bank
1237: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
1238: ; current source bank?
1239: jl short top_1R1W_init_src_bank_mapped
1240: ;no, proper bank already mapped
1241: top_1R1W_map_init_src_bank:
1242:
1243: ; Map bank containing the top source scan line into source window.
1244: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1245:
1246: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1247: <ebx,edx,JustifyTop,MapSourceBank>
1248:
1249: top_1R1W_init_src_bank_mapped:
1250:
1251: mov edx,ulCurrentDestScan
1252: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
1253: ; current dest bank?
1254: jl short top_1R1W_map_init_dest_bank ;yes, map in proper bank
1255: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
1256: ; current dest bank?
1257: jl short top_1R1W_init_dest_bank_mapped
1258: ;no, proper bank already mapped
1259: top_1R1W_map_init_dest_bank:
1260:
1261: ; Map bank containing the top dest scan line into source window.
1262: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1263:
1264: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1265: <ebx,edx,JustifyTop,MapDestBank>
1266:
1267: top_1R1W_init_dest_bank_mapped:
1268:
1269: ; Bank-by-bank top-to-bottom copy loop.
1270:
1271: top_1R1W_bank_loop:
1272:
1273: ; Decide how far we can go before we run out of bank or rectangle to copy.
1274:
1275: mov edx,ulLastDestScan
1276: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom
1277: jl short @F ;copy rectangle bottom is in this bank
1278: mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
1279: ; of bank, at least
1280: @@:
1281: sub edx,ulCurrentDestScan ;# of scans we can and want to do in
1282: ; the dest bank
1283: mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom
1284: sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank
1285:
1286: cmp edx,eax
1287: jb short @F ;source bank isn't limiting
1288: mov edx,eax ;source bank is limiting
1289: @@:
1290: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1291:
1292: ; We're ready to copy this block.
1293: ; Select different threading, depending on whether the source and destination
1294: ; are currently in the same bank; we can do edges faster if they are.
1295:
1296: mov eax,[ebx].dsurf_ulWindowBank
1297: cmp eax,[ebx].dsurf_ulWindowBank[4]
1298: jz short top_1R1W_copy_same_bank
1299:
1300: ; Source and dest are currently in different banks, must go through temp buffer.
1301:
1302: THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans
1303:
1304: ; Source and dest are currently in the same bank.
1305:
1306: align 4
1307: top_1R1W_copy_same_bank:
1308: THREAD_AND_START
1309:
1310: ; Any more scans to copy?
1311:
1312: top_1R1W_check_more_scans:
1313:
1314: mov eax,ulCurrentDestScan
1315: mov esi,ulBlockHeight
1316: add eax,esi ;we've copied to dest up to here
1317: cmp ulLastDestScan,eax ;are we at the dest rect bottom?
1318: jz short top_1R1W_done ;yes, we're done
1319: mov ulCurrentDestScan,eax
1320:
1321: ; Now advance either or both banks, as needed.
1322:
1323: mov ebx,pdsurf
1324: cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
1325: ; current dest bank?
1326: jl short top_1R1W_dest_bank_mapped ;no, proper bank still mapped
1327:
1328: ; Map bank containing the current dest scan line into source window.
1329: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1330:
1331: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1332: <ebx,eax,JustifyTop,MapDestBank>
1333:
1334: top_1R1W_dest_bank_mapped:
1335:
1336: add esi,ulCurrentSrcScan ;we've copied from source up to here
1337: mov ulCurrentSrcScan,esi
1338:
1339: cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
1340: ; current src bank?
1341: jl short top_1R1W_src_bank_mapped ;no, proper bank still mapped
1342:
1343: ; Map bank containing the current source scan line into source window.
1344: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1345:
1346: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1347: <ebx,esi,JustifyTop,MapSourceBank>
1348:
1349: top_1R1W_src_bank_mapped:
1350:
1351: jmp top_1R1W_bank_loop
1352:
1353: top_1R1W_done:
1354: PLAIN_RET
1355:
1356:
1357: ;-----------------------------------------------------------------------;
1358: ; Banking for 1R/1W adapters, bottom to top.
1359: ;-----------------------------------------------------------------------;
1360: align 4
1361: bottom_to_top_1R1W:
1362:
1363: ; We're going bottom to top. Map in the source and dest, bottom-justified.
1364:
1365: mov ebx,pdsurf
1366: mov edx,ulCurrentSrcScan
1367: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
1368: ; current source bank?
1369: jl short bot_1R1W_map_init_src_bank ;yes, map in proper bank
1370: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
1371: ; than current src bank?
1372: jl short bot_1R1W_init_src_bank_mapped
1373: ;no, proper bank already mapped
1374: bot_1R1W_map_init_src_bank:
1375:
1376: ; Map bank containing the bottom source scan line into source window.
1377: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1378:
1379: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1380: <ebx,edx,JustifyBottom,MapSourceBank>
1381:
1382: bot_1R1W_init_src_bank_mapped:
1383:
1384: mov edx,ulCurrentDestScan
1385: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
1386: ; current dest bank?
1387: jl short bot_1R1W_map_init_dest_bank ;yes, map in proper bank
1388: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
1389: ; than current dst bank?
1390: jl short bot_1R1W_init_dest_bank_mapped
1391: ;no, proper bank already mapped
1392: bot_1R1W_map_init_dest_bank:
1393:
1394: ; Map bank containing the bottom dest scan line into source window.
1395: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1396:
1397: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1398: <ebx,edx,JustifyBottom,MapDestBank>
1399:
1400: bot_1R1W_init_dest_bank_mapped:
1401:
1402: ; Bank-by-bank bottom-to-top copy loop.
1403:
1404: bot_1R1W_bank_loop:
1405:
1406: ; Decide how far we can go before we run out of bank or rectangle to copy.
1407:
1408: mov edx,ulLastDestScan
1409: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop
1410: jg short @F ;copy rectangle top is in this bank
1411: mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
1412: ; of bank, at least
1413: @@:
1414: neg edx
1415: add edx,ulCurrentDestScan ;# of scans we can and want to do in
1416: inc edx ; the dest bank
1417:
1418: mov eax,ulCurrentSrcScan
1419: sub eax,[ebx].dsurf_rcl2WindowClipS.yTop
1420: inc eax ;# of scans we can do in the src bank
1421:
1422: cmp edx,eax
1423: jb short @F ;source bank isn't limiting
1424: mov edx,eax ;source bank is limiting
1425: @@:
1426: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1427:
1428: ; We're ready to copy this block.
1429: ; Select different threading, depending on whether the source and destination
1430: ; are currently in the same bank; we can do edges faster if they are.
1431:
1432: mov al,byte ptr [ebx].dsurf_ulWindowBank
1433: cmp al,byte ptr [ebx].dsurf_ulWindowBank[4]
1434: jz short bot_1R1W_copy_same_bank
1435:
1436: ; Source and dest are currently in different banks, must go through temp buffer.
1437:
1438: THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans
1439:
1440: ; Source and dest are currently in the same bank.
1441:
1442: align 4
1443: bot_1R1W_copy_same_bank:
1444: THREAD_AND_START
1445:
1446: ; Any more scans to copy?
1447:
1448: align 4
1449: bot_1R1W_check_more_scans:
1450:
1451: mov eax,ulCurrentDestScan
1452: mov esi,ulBlockHeight
1453: sub eax,esi ;we've copied to dest up to here
1454: cmp ulLastDestScan,eax ;are we past the dest rect top?
1455: jg short bot_1R1W_done ;yes, we're done
1456: mov ulCurrentDestScan,eax
1457:
1458: ; Now advance either or both banks, as needed.
1459:
1460: mov ebx,pdsurf
1461: cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
1462: ; current dest bank?
1463: jge short bot_1R1W_dest_bank_mapped ;no, proper bank still mapped
1464:
1465: ; Map bank containing the current dest scan line into source window.
1466: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1467:
1468: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1469: <ebx,eax,JustifyBottom,MapDestBank>
1470:
1471: bot_1R1W_dest_bank_mapped:
1472:
1473: mov eax,ulCurrentSrcScan
1474: sub eax,esi ;we've copied from source up to here
1475: mov ulCurrentSrcScan,eax
1476:
1477: cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
1478: ; current src bank?
1479: jge short bot_1R1W_src_bank_mapped ;no, proper bank still mapped
1480:
1481: ; Map bank containing the current source scan line into source window.
1482: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1483:
1484: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1485: <ebx,eax,JustifyBottom,MapSourceBank>
1486:
1487: bot_1R1W_src_bank_mapped:
1488:
1489: jmp bot_1R1W_bank_loop
1490:
1491: bot_1R1W_done:
1492: PLAIN_RET
1493:
1494:
1495: ;-----------------------------------------------------------------------;
1496: ; Banking for 1 R/W adapters, top to bottom.
1497: ;-----------------------------------------------------------------------;
1498: align 4
1499: top_to_bottom_1RW:
1500:
1501: ; We're going top to bottom. Map in the dest, top-justified.
1502:
1503: mov ebx,pdsurf
1504: mov esi,ulCurrentDestScan
1505: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest top less than
1506: ; current bank?
1507: jl short top_1RW_map_init_dest_bank ;yes, map in proper bank
1508: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
1509: ; current bank?
1510: jl short top_1RW_init_dest_bank_mapped
1511: ;no, proper bank already mapped
1512: top_1RW_map_init_dest_bank:
1513:
1514: ; Map bank containing the top dest scan line into source window.
1515: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1516:
1517: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
1518:
1519: top_1RW_init_dest_bank_mapped:
1520:
1521: ; Bank-by-bank top-to-bottom copy loop.
1522:
1523: top_1RW_bank_loop:
1524:
1525: ; Decide how far we can go before we run out of bank or rectangle to copy.
1526:
1527: mov edi,ulLastDestScan
1528: cmp edi,[ebx].dsurf_rcl1WindowClip.yBottom
1529: jl short @F ;copy rectangle bottom is in this bank
1530: mov edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
1531: ; of bank, at least
1532: @@:
1533: sub edi,esi ;# of scans we can and want to do in the dest bank
1534:
1535: ; Now make sure source is mapped in. This is the condition the copying routines
1536: ; expect, and we need to figure out how far we can go in the source.
1537:
1538: sub edx,edx ;assume source and dest are in the same
1539: ; bank
1540: mov esi,ulCurrentSrcScan
1541: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
1542: ; current bank?
1543: jl short top_1RW_map_src_Bank ;yes, must map in
1544: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
1545: ; current bank?
1546: jl short top_1RW_src_bank_mapped ;no, proper bank still mapped
1547:
1548: top_1RW_map_src_Bank:
1549:
1550: ; Map bank containing the current source scan line into source window.
1551: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1552:
1553: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
1554:
1555: mov edx,1 ;mark that source and dest are not in
1556: ; the same bank
1557: top_1RW_src_bank_mapped:
1558:
1559: mov eax,[ebx].dsurf_rcl1WindowClip.yBottom
1560: sub eax,esi ;# of scans we can do in the src bank
1561:
1562: cmp edi,eax
1563: jb short @F ;source bank isn't limiting
1564: mov edi,eax ;source bank is limiting
1565: @@:
1566: mov ulBlockHeight,edi ;# of scans we'll do in this bank
1567:
1568: ; We're ready to copy this block.
1569: ; Select different threading, depending on whether the source and destination
1570: ; are currently in the same bank; we can do edges faster if they are.
1571:
1572: and edx,edx
1573: jz short top_1RW_copy_same_bank
1574:
1575: ; Source and dest are currently in different banks, must go through temp buffer.
1576:
1577: THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans
1578:
1579: ; Source and dest are currently in the same bank.
1580:
1581: align 4
1582: top_1RW_copy_same_bank:
1583: THREAD_AND_START
1584:
1585: ; Any more scans to copy?
1586:
1587: top_1RW_check_more_scans:
1588:
1589: mov esi,ulCurrentDestScan
1590: mov edi,ulBlockHeight
1591: add esi,edi ;we've copied to dest up to here
1592: cmp ulLastDestScan,esi ;are we at the dest rect bottom?
1593: jz short top_1RW_done ;yes, we're done
1594: mov ulCurrentDestScan,esi
1595:
1596: ; Now make sure the dest bank is mapped in.
1597:
1598: mov ebx,pdsurf
1599: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
1600: ; current bank?
1601: jl short top_1RW_map_dest_bank ;yes, map in dest bank
1602: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
1603: ; current bank?
1604: jl short top_1RW_dest_bank_mapped ;no, proper bank mapped
1605:
1606: top_1RW_map_dest_bank:
1607:
1608: ; Map bank containing the current dest scan line into source window.
1609: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1610:
1611: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
1612:
1613: top_1RW_dest_bank_mapped:
1614:
1615: add ulCurrentSrcScan,edi ;we've copied from source up to here
1616:
1617: jmp top_1RW_bank_loop
1618:
1619: top_1RW_done:
1620: PLAIN_RET
1621:
1622:
1623: ;-----------------------------------------------------------------------;
1624: ; Banking for 1 R/W adapters, bottom to top.
1625: ;-----------------------------------------------------------------------;
1626: align 4
1627: bottom_to_top_1RW:
1628:
1629: ; We're going bottom to top. Map in the dest, bottom-justified.
1630:
1631: mov ebx,pdsurf
1632: mov esi,ulCurrentDestScan
1633: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest bottom less than
1634: ; current dest bank?
1635: jl short bot_1RW_map_init_dest_bank ;yes, map in proper bank
1636: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
1637: ; than current dst bank?
1638: jl short bot_1RW_init_dest_bank_mapped
1639: ;no, proper bank already mapped
1640: bot_1RW_map_init_dest_bank:
1641:
1642: ; Map bank containing the bottom dest scan line into source window.
1643: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1644:
1645: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
1646:
1647: bot_1RW_init_dest_bank_mapped:
1648:
1649: ; Bank-by-bank bottom-to-top copy loop.
1650:
1651: bot_1RW_bank_loop:
1652:
1653: ; Decide how far we can go before we run out of bank or rectangle to copy.
1654:
1655: mov edi,ulLastDestScan
1656: cmp edi,[ebx].dsurf_rcl1WindowClip.yTop
1657: jg short @F ;copy rectangle top is in this bank
1658: mov edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
1659: ; of bank, at least
1660: @@:
1661: neg edi
1662: add edi,esi ;# of scans we can and want to do in
1663: inc edi ; the dest bank
1664:
1665: ; Now make sure source is mapped in. This is the condition the copying routines
1666: ; expect, and we need to figure out how far we can go in the source.
1667:
1668: sub edx,edx ;assume source and dest are in the same
1669: ; bank
1670: mov esi,ulCurrentSrcScan
1671: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
1672: ; current bank?
1673: jl short bot_1RW_map_src_Bank ;yes, must map in
1674: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
1675: ; current bank?
1676: jl short bot_1RW_src_bank_mapped ;no, proper bank still mapped
1677:
1678: bot_1RW_map_src_Bank:
1679:
1680: ; Map bank containing the current source scan line into source window.
1681: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1682:
1683: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
1684:
1685: mov edx,1 ;mark that source and dest are not in
1686: ; the same bank
1687: bot_1RW_src_bank_mapped:
1688:
1689: sub esi,[ebx].dsurf_rcl1WindowClip.yTop
1690: inc esi ;# of scans we can do in the src bank
1691:
1692: cmp edi,esi
1693: jb short @F ;source bank isn't limiting
1694: mov edi,esi ;source bank is limiting
1695: @@:
1696: mov ulBlockHeight,edi ;# of scans we'll do in this bank
1697:
1698: ; We're ready to copy this block.
1699: ; Select different threading, depending on whether the source and destination
1700: ; are currently in the same bank; we can copy much faster if they are.
1701:
1702: and edx,edx
1703: jz short bot_1RW_copy_same_bank
1704:
1705: ; Source and dest are currently in different banks, must go through temp buffer.
1706:
1707: THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans
1708:
1709: ; Source and dest are currently in the same bank.
1710:
1711: align 4
1712: bot_1RW_copy_same_bank:
1713: THREAD_AND_START
1714:
1715: ; Any more scans to copy?
1716:
1717: align 4
1718: bot_1RW_check_more_scans:
1719:
1720: mov esi,ulCurrentDestScan
1721: mov edi,ulBlockHeight
1722: sub esi,edi ;we've copied to dest up to here
1723: cmp ulLastDestScan,esi ;are we past the dest rect top?
1724: jg short bot_1RW_done ;yes, we're done
1725: mov ulCurrentDestScan,esi
1726:
1727: ; Now make sure the dest bank is mapped in.
1728:
1729: mov ebx,pdsurf
1730: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
1731: ; current bank?
1732: jl short bot_1RW_map_dest_bank ;yes, map in dest bank
1733: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
1734: ; current bank?
1735: jl short bot_1RW_dest_bank_mapped ;no, proper bank mapped
1736:
1737: bot_1RW_map_dest_bank:
1738:
1739: ; Map bank containing the current dest scan line into source window.
1740: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1741:
1742: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
1743:
1744: bot_1RW_dest_bank_mapped:
1745:
1746: sub ulCurrentSrcScan,edi ;we've copied from source up to here
1747:
1748: jmp bot_1RW_bank_loop
1749:
1750: bot_1RW_done:
1751: PLAIN_RET
1752:
1753:
1754: ;***********************************************************************;
1755: ;
1756: ; The following routines are the low-level copying routines. They know
1757: ; almost nothing about banks (the routines that copy through a temp
1758: ; buffer know how to switch banks after filling the temp buffer, but
1759: ; that's it). Banking should be taken care of at a higher level.
1760: ;
1761: ;***********************************************************************;
1762:
1763: ;-----------------------------------------------------------------------;
1764: ; Copies a block of solid bytes from the source to the destination via the
1765: ; latches. Can only be used by 2 R/W or 1R/1W window banking, or by
1766: ; unbanked modes, or by 1 R/W adapters when the source and dest are in the
1767: ; same bank. 1 R/W adapters must go through an intermediate local buffer
1768: ; when the source and the destination aren't in the same bank.
1769: ;
1770: ; Input:
1771: ; Direction Flag set for desired direction of copy
1772: ; culWholeBytesWidth = # of bytes to copy across each scan line
1773: ; ulWholeScanDelta = distance to start of next scan from end of current
1774: ; ulBlockHeight = # of scans to copy
1775: ; ulWholeBytesSrc = start source offset in bitmap
1776: ; ulWholeBytesDest = start dest offset in bitmap
1777: ;
1778: ; Output:
1779: ; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
1780: ; scan processed
1781: ;-----------------------------------------------------------------------;
1782:
1783: align 4
1784: copy_whole_bytes:
1785:
1786: ; Set the bit mask to disable all bits, so we can copy through the latches.
1787:
1788: mov edx,VGA_BASE + GRAF_ADDR
1789: mov eax,(000h shl 8) + GRAF_BIT_MASK
1790: out dx,ax
1791:
1792: ; Set Map Mask to enable writes to all planes.
1793:
1794: mov dl,SEQ_DATA
1795: mov al,MM_ALL
1796: out dx,al
1797:
1798: ; Set up to copy the whole bytes via the latches.
1799:
1800: mov eax,culWholeBytesWidth
1801: mov ebx,ulBlockHeight
1802: mov edx,ulWholeScanDelta
1803:
1804: ; Calculate full start addresses.
1805:
1806: mov ecx,pdsurf
1807: mov esi,ulWholeBytesSrc
1808: add esi,[ecx].dsurf_pvBitmapStart2WindowS
1809: mov edi,ulWholeBytesDest
1810: add edi,[ecx].dsurf_pvBitmapStart2WindowD
1811:
1812: SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeLatchesEntry, \
1813: LOOP_UNROLL_SHIFT
1814: jmp ecx ;copy the whole bytes
1815:
1816: ;-----------------------------------------------------------------------;
1817: ; Table of unrolled whole latched loop entry points.
1818: ;-----------------------------------------------------------------------;
1819:
1820: UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeLatchesEntry,WHOLE_LATCHES, \
1821: LOOP_UNROLL_COUNT
1822:
1823: ;-----------------------------------------------------------------------;
1824: ; Unrolled loop for copying a block of whole bytes via the latches.
1825: ;-----------------------------------------------------------------------;
1826:
1827: COPY_WHOLE_LATCHES macro ENTRY_LABEL,ENTRY_INDEX
1828: &ENTRY_LABEL&ENTRY_INDEX&:
1829: mov ecx,eax ;# of whole bytes to copy
1830: rep movsb ;copy the bytes via the latches
1831: add esi,edx ;point to next source scan
1832: add edi,edx ;point to next dest scan
1833: endm ;-----------------------------------;
1834:
1835: ; EAX = # of bytes to copy
1836: ; EBX = count of unrolled loop iterations
1837: ; EDX = offset from end of one scan's fill to start of next
1838: ; ESI = source address to copy from
1839: ; EDI = target address to copy to
1840:
1841: align 4
1842: whole_latches_loop:
1843: UNROLL_LOOP COPY_WHOLE_LATCHES,WHOLE_LATCHES,LOOP_UNROLL_COUNT
1844: dec ebx
1845: jnz whole_latches_loop
1846:
1847: ; Remember where we left off, for next time.
1848:
1849: mov ecx,pdsurf
1850: sub esi,[ecx].dsurf_pvBitmapStart2WindowS
1851: mov ulWholeBytesSrc,esi
1852: sub edi,[ecx].dsurf_pvBitmapStart2WindowD
1853: mov ulWholeBytesDest,edi
1854:
1855: PLAIN_RET
1856:
1857:
1858: ;-----------------------------------------------------------------------;
1859: ; Copies a block of solid bytes from the source to the destination via
1860: ; the temp buffer. This should only be used by 1 R/W adapters, and then
1861: ; only when the source and dest are in different banks.
1862: ;
1863: ; All relevant bytes are copied from the source to a temp buffer that's an
1864: ; image of the source first. Then, we copy each of the four planes for one scan
1865: ; line from the temp buffer to the screen before going on to the next scan line.
1866: ; It would be faster to do all scans in one plane, then all in the next, and so
1867: ; on, but that would give nasty color effects from pixels that were changed in
1868: ; some planes but not in others. A compromise would be to do several scans at a
1869: ; pop per plane, as is done with the edge bytes; however, given that there can
1870: ; be 128 (or more) bytes across a single whole-bytes scan, if we do 16 scan
1871: ; lines per chunk, we're going to be performing up to 128*4*16 accesses per
1872: ; chunk; at an assumed 1 microsecond per access, that's 8 millisecond per scan
1873: ; line, or about 1/2 of a frame time. We're definitely going to see flicker or
1874: ; sparkles from partially updated bytes at that point, in my opinion. Another
1875: ; alternative would be to dynamically adjust the number of scans processed at a
1876: ; pop per plane, depending on the copy width, with more scans copied for
1877: ; narrower widths. For all but very narrow copies, though, it seems to me that
1878: ; the actual copy time would far outweigh the time for the OUTs to switch
1879: ; planes, and the return for some rather complex code would be marginal.
1880: ;
1881: ; It would be nice if we copied bytes a word or dword at a time. However, it
1882: ; becomes rather complex handling fractional words or dwords, especially when
1883: ; copying right-to-left, so this is left for LATER. I haven't unrolled these
1884: ; loops because of the possibility of this further word/dword optimization;
1885: ; no point in fine-tuning sub-optimal code.
1886: ;
1887: ; Input:
1888: ; Direction Flag set for desired direction of copy
1889: ; culWholeBytesWidth = # of bytes to copy across each scan line
1890: ; ulWholeScanDelta = distance to start of next scan from end of current
1891: ; ulBlockHeight = # of scans to copy
1892: ; ulWholeBytesSrc = start source offset in bitmap
1893: ; ulWholeBytesDest = start dest offset in bitmap
1894: ; ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
1895: ; ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
1896: ; Expects the source bank to be mapped in; source bank is mapped in on
1897: ; exit
1898: ;
1899: ; Output:
1900: ; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
1901: ; scan processed
1902: ;-----------------------------------------------------------------------;
1903:
1904: align 4
1905: copy_whole_bytes_via_buffer:
1906:
1907: ; Calculate start source address from bitmap start address and offset within
1908: ; bitmap.
1909:
1910: mov ecx,pdsurf
1911: mov eax,ulWholeBytesSrc
1912: add eax,[ecx].dsurf_pvBitmapStart
1913: mov pSrcAddr,eax
1914: sub eax,[ecx].dsurf_pvStart
1915: mov ulOffsetInBank,eax ;will come in handy because we treat the
1916: ; temp buffer as an image of the current
1917: ; bank
1918:
1919: ; First, copy all the bytes into the temporary buffer.
1920:
1921: ; Leave the GC Index pointing to the Read Map.
1922:
1923: mov edx,VGA_BASE + GRAF_ADDR
1924: mov al,GRAF_READ_MAP
1925: out dx,al
1926:
1927: mov eax,3 ;start by copying plane 3
1928: copy_whole_to_buffer_plane_loop:
1929: mov ebx,ulBlockHeight ;# of scans to copy
1930: mov esi,pSrcAddr ;source offset in screen
1931: mov edi,ppTempPlane0
1932: mov edi,[edi+eax*4] ;pointer to current plane in temp buffer
1933: add edi,ulOffsetInBank ;dest for plane in temp buffer
1934:
1935: mov edx,VGA_BASE + GRAF_DATA
1936: out dx,al ;set Read Map to plane we're copying from.
1937:
1938: push eax ;remember plane index
1939: mov eax,ulWholeScanDelta ;offset to next scan
1940: mov edx,culWholeBytesWidth ;# of bytes per scan
1941: copy_whole_to_buffer_scan_loop:
1942: mov ecx,edx ;# of bytes per scan
1943: rep movsb ;copy the scan line to the temp buffer
1944: add esi,eax ;point to next source scan
1945: add edi,eax ;point to next dest scan
1946:
1947: dec ebx ;count down scan lines
1948: jnz copy_whole_to_buffer_scan_loop
1949:
1950: pop eax ;get back plane index
1951: dec eax ;count down planes
1952: jns copy_whole_to_buffer_plane_loop
1953:
1954: ; Remember where we left off, for next time.
1955:
1956: mov ebx,pdsurf
1957: sub esi,[ebx].dsurf_pvBitmapStart
1958: mov ulWholeBytesSrc,esi
1959:
1960:
1961: ; Now copy the temp buffer to the screen.
1962:
1963: ; Map in the destination bank, so we can read/write to it and let the Bit Mask
1964: ; work.
1965:
1966: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
1967: <ebx,ulCurrentDestScan,ulCurrentJustification>
1968:
1969: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
1970: ; until now to calculate this, because the dest bank wasn't mapped earlier).
1971:
1972: mov eax,ulWholeBytesDest
1973: add eax,[ebx].dsurf_pvBitmapStart
1974: mov pDestAddr,eax
1975:
1976: ; Set the bit mask to enable all bits.
1977:
1978: mov edx,VGA_BASE + GRAF_ADDR
1979: mov eax,(0ffh shl 8) + GRAF_BIT_MASK
1980: out dx,ax
1981:
1982: mov dl,SEQ_DATA ;leave DX pointing to the SC Data reg
1983:
1984: ; Set up to copy the whole bytes from the buffer.
1985:
1986: mov eax,ulBlockHeight ;# of scans to copy
1987: mov culTempCount,eax
1988:
1989: copy_whole_from_buffer_scan_loop:
1990:
1991: mov ebx,ppTempPlane3 ;point to plane 3's temp buffer offset
1992: mov al,MM_C3 ;start by copying plane 3
1993:
1994: copy_whole_from_buffer_plane_loop:
1995:
1996: ; Set Map Mask to enable writes to the plane we're copying.
1997:
1998: out dx,al
1999:
2000: ; Select the corresponding plane from the temp buffer.
2001:
2002: mov esi,[ebx] ;point to plane start in temp buffer
2003: add esi,ulOffsetInBank ;point to current scan start in temp buffer
2004: mov edi,pDestAddr ;point to destination start
2005:
2006: mov ecx,culWholeBytesWidth ;# of whole bytes to copy
2007: rep movsb ;copy the bytes from the buffer to the screen
2008:
2009: ; Do next plane, if any.
2010:
2011: sub ebx,4 ;point to next temp buffer plane ptr
2012: shr al,1 ;advance to next plane
2013: jnz copy_whole_from_buffer_plane_loop
2014:
2015: ; Remember where we left off, for next scan.
2016:
2017: add edi,ulWholeScanDelta ;point to next dest scan
2018: mov pDestAddr,edi
2019: mov eax,ulNextScan
2020: add ulOffsetInBank,eax ;next scan's start in temp buffer,
2021: ; relative to start of plane's storage
2022:
2023: ; Count down scan lines.
2024:
2025: dec culTempCount
2026: jnz copy_whole_from_buffer_scan_loop
2027:
2028: ; Remember where we left off, for next time.
2029:
2030: mov ebx,pdsurf
2031: sub edi,[ebx].dsurf_pvBitmapStart
2032: mov ulWholeBytesDest,edi
2033:
2034: ; Put back the original source bank.
2035:
2036: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
2037: <ebx,ulCurrentSrcScan,ulCurrentJustification>
2038:
2039: PLAIN_RET
2040:
2041:
2042: ;-----------------------------------------------------------------------;
2043: ; Copies a strip of left edge bytes from the source to the destination,
2044: ; assuming both the source and the destination are both readable and
2045: ; writable. Can only be used by 2 R/W window banking, or by unbanked
2046: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
2047: ; buffer when the source and dest are in different banks. Processes up to
2048: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
2049: ; flicker.
2050: ;
2051: ; Input:
2052: ; ulNextScan = width of scan, in bytes
2053: ; ulBlockHeight = # of scans to copy
2054: ; ulLeftEdgeSrc = start source offset in bitmap
2055: ; ulLeftEdgeDest = start dest offset in bitmap
2056: ; jLeftMask = left edge clip mask
2057: ;
2058: ; Output:
2059: ; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
2060: ; scan processed
2061: ;-----------------------------------------------------------------------;
2062:
2063: align 4
2064: copy_left_edge:
2065:
2066: ; Calculate start source and dest addresses from bitmap start addresses and
2067: ; offsets within bitmap.
2068:
2069: mov ecx,pdsurf
2070: mov esi,ulLeftEdgeSrc
2071: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2072: mov edi,ulLeftEdgeDest
2073: add edi,[ecx].dsurf_pvBitmapStart2WindowD
2074:
2075: ; Copy the edge.
2076:
2077: mov ah,byte ptr jLeftMask ;clip mask for this edge
2078: call copy_edge
2079:
2080: ; Remember where we left off, for next time.
2081:
2082: mov ecx,pdsurf
2083: sub esi,[ecx].dsurf_pvBitmapStart2WindowS
2084: mov ulLeftEdgeSrc,esi
2085: sub edi,[ecx].dsurf_pvBitmapStart2WindowD
2086: mov ulLeftEdgeDest,edi
2087:
2088: PLAIN_RET
2089:
2090:
2091: ;-----------------------------------------------------------------------;
2092: ; Copies a strip of right edge bytes from the source to the destination,
2093: ; assuming both the source and the destination are both readable and
2094: ; writable. Can only be used by 2 R/W window banking, or by unbanked
2095: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
2096: ; buffer when the source and dest are in different banks. Processes up to
2097: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
2098: ; flicker.
2099: ;
2100: ; Input:
2101: ; ulNextScan = width of scan, in bytes
2102: ; ulBlockHeight = # of scans to copy
2103: ; ulRightEdgeSrc = start source offset in bitmap
2104: ; ulRightEdgeDest = start dest offset in bitmap
2105: ; jRightMask = right edge clip mask
2106: ;
2107: ; Output:
2108: ; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
2109: ; scan processed
2110: ;-----------------------------------------------------------------------;
2111:
2112: align 4
2113: copy_right_edge:
2114:
2115: ; Calculate start source and dest addresses from bitmap start addresses and
2116: ; offsets within bitmap.
2117:
2118: mov ecx,pdsurf
2119: mov esi,ulRightEdgeSrc
2120: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2121: mov edi,ulRightEdgeDest
2122: add edi,[ecx].dsurf_pvBitmapStart2WindowD
2123:
2124: ; Copy the edge.
2125:
2126: mov ah,byte ptr jRightMask ;clip mask for this edge
2127: call copy_edge
2128:
2129: ; Remember where we left off, for next time
2130:
2131: mov ecx,pdsurf
2132: sub esi,[ecx].dsurf_pvBitmapStart2WindowS
2133: mov ulRightEdgeSrc,esi
2134: sub edi,[ecx].dsurf_pvBitmapStart2WindowD
2135: mov ulRightEdgeDest,edi
2136:
2137: PLAIN_RET
2138:
2139:
2140: ;-----------------------------------------------------------------------;
2141: ; Copies an edge from the source to the destination on the screen.
2142: ; Entry:
2143: ; AH = bit mask setting for edge
2144: ; ESI = source address
2145: ; EDI = destination address
2146: ; ulBlockHeight = # of bytes to copy per plane
2147: ; ulNextScan = scan width
2148: ; Source readable, and destination readable and writable
2149: ; Exit:
2150: ; ESI = next source address
2151: ; EDI = next destination address
2152: ;
2153: ; Preserved: EBP
2154: ;-----------------------------------------------------------------------;
2155:
2156: align 4
2157: copy_edge:
2158: mov pSrcAddr,esi
2159: mov pDestAddr,edi
2160:
2161: ; Set the clip mask for this edge.
2162:
2163: mov edx,VGA_BASE + GRAF_ADDR
2164: mov al,GRAF_BIT_MASK
2165: out dx,ax
2166:
2167: ; Leave the GC Index pointing to the Read Map.
2168:
2169: mov al,GRAF_READ_MAP
2170: out dx,al
2171:
2172: mov ecx,offset copy_edge_rw_full_chunk
2173: ;entry point into unrolled loop to copy first
2174: ; chunk, assuming it's a full chunk
2175: mov ebx,ulBlockHeight
2176:
2177: ; Copy the edge in a series of chunks.
2178:
2179: copy_edge_chunk_loop:
2180:
2181: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
2182: ; a full chunk
2183: jge short @F ;do a full chunk
2184: add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
2185: ; scans
2186: mov ecx,pfnCopyEdgeRWEntry[-4][ebx*4]
2187: ;entry point into unrolled loop to copy desired
2188: ; chunk size
2189: sub ebx,ebx ;no scans after this
2190: @@:
2191: push ebx ;remember remaining scan count
2192:
2193: mov ah,MM_C3 ;start by copying plane 3
2194: mov ebx,ulNextScan
2195:
2196: copy_edge_plane_loop:
2197:
2198: ; Set Map Mask to enable writes to plane we're copying.
2199:
2200: mov al,ah
2201: mov dl,SEQ_DATA
2202: out dx,al
2203:
2204: ; Set Read Map to same plane.
2205:
2206: shr al,1 ;map plane into ReadMask
2207: cmp al,100b ;set Carry if not C3 (plane 3)
2208: adc al,-1 ;sub 1 only if C3
2209: mov dl,GRAF_DATA
2210: out dx,al
2211:
2212: mov esi,pSrcAddr
2213: mov edi,pDestAddr
2214:
2215: jmp ecx ;copy the left edge
2216:
2217:
2218: ;-----------------------------------------------------------------------;
2219: ; Table of unrolled edge loop entry points. First entry point is to copy
2220: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
2221: ;-----------------------------------------------------------------------;
2222:
2223: pfnCopyEdgeRWEntry label dword
2224: INDEX = 1
2225: rept EDGE_CHUNK_SIZE
2226: DEFINE_DD EDGE_RW,%INDEX
2227: INDEX = INDEX+1
2228: endm
2229:
2230:
2231: ;-----------------------------------------------------------------------;
2232: ; Unrolled loop for copying a strip of edge bytes, with source and
2233: ; destination both readable and writable.
2234: ;-----------------------------------------------------------------------;
2235:
2236: COPY_EDGE_RW macro ENTRY_LABEL,ENTRY_INDEX
2237: &ENTRY_LABEL&ENTRY_INDEX&:
2238: mov al,[esi] ;get byte to copy
2239: add esi,ebx ;point to next source scan
2240: xchg [edi],al ;read before write so Bit Mask can operate
2241: add edi,ebx ;point to next dest scan
2242: endm ;-----------------------------------;
2243:
2244: ; EBX = scan line width
2245: ; ESI = source address to copy from
2246: ; EDI = target address to copy to
2247: ; Bit Mask set to desired clipping
2248: ; Read Map and Map Mask set to enable the desired plane for read and write
2249:
2250: align 4
2251: copy_edge_rw_full_chunk:
2252: UNROLL_LOOP COPY_EDGE_RW,EDGE_RW,EDGE_CHUNK_SIZE
2253:
2254: ; Do next plane within this chunk, if any.
2255:
2256: shr ah,1 ;advance to next plane
2257: jnz copy_edge_plane_loop
2258:
2259: ; Remember where we left off, for the next chunk.
2260:
2261: mov pSrcAddr,esi
2262: mov pDestAddr,edi
2263:
2264: ; Do next chunk within this bank block, if any.
2265:
2266: pop ebx ;retrieve remaining scan count
2267: and ebx,ebx ;any scans left?
2268: jnz copy_edge_chunk_loop ;more scans to do
2269:
2270: PLAIN_RET
2271:
2272:
2273: ;-----------------------------------------------------------------------;
2274: ; Copies a strip of left edge bytes from the source to the destination
2275: ; through an intermediate RAM buffer. This is the approach required by
2276: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
2277: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
2278: ; cause flicker.
2279: ;
2280: ; Input:
2281: ; ulNextScan = width of scan, in bytes
2282: ; ulBlockHeight = # of scans to copy
2283: ; ulLeftEdgeSrc = start source offset in bitmap
2284: ; ulLeftEdgeDest = start dest offset in bitmap
2285: ; jLeftMask = left edge clip mask
2286: ; pTempPlane = pointer to temp storage buffer
2287: ; ulCurrentSrcScan = scan used to map in source bank
2288: ; ulCurrentDestScan = scan used to map in dest bank
2289: ; ulCurrentJustification = justification used to map in current bank
2290: ; For 1 R/W adapters, expects the source bank to be mapped in; banking
2291: ; is the same at exit as it was at entry
2292: ;
2293: ; Output:
2294: ; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
2295: ; scan processed
2296: ;
2297: ; Note that this should never be called for an unbanked or 2 R/W adapter,
2298: ; because the source and dest are always both addressable simultaneously then.
2299: ;-----------------------------------------------------------------------;
2300:
2301: align 4
2302: copy_left_edge_via_buffer:
2303:
2304: ; First, copy all the bytes into the temporary buffer.
2305:
2306: ; Calculate start source and dest addresses from bitmap start addresses and
2307: ; offsets within bitmap.
2308:
2309: mov ecx,pdsurf
2310: mov esi,ulLeftEdgeSrc
2311: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2312:
2313: ; Copy the edge from the source to the temp buffer.
2314:
2315: call copy_screen_to_buffered_edge
2316:
2317: ; Remember where we left off, for next time
2318:
2319: mov ebx,pdsurf
2320: sub esi,[ebx].dsurf_pvBitmapStart2WindowS
2321: mov ulLeftEdgeSrc,esi
2322:
2323: ; Now copy the temp buffer to the screen.
2324:
2325: ; Map in the source bank to match the destination, so we can read/write to it
2326: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
2327: ; mapped by this call, which is fine.
2328:
2329: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2330: <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
2331:
2332: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
2333: ; until now to calculate this, because the dest bank wasn't mapped earlier).
2334:
2335: mov edi,ulLeftEdgeDest
2336: add edi,[ebx].dsurf_pvBitmapStart2WindowD
2337:
2338: mov ah,byte ptr jLeftMask ;clip mask for this edge
2339: call copy_buffered_edge_to_screen ;do the copy
2340:
2341: ; Remember where we left off, for next time.
2342:
2343: mov ebx,pdsurf
2344: sub edi,[ebx].dsurf_pvBitmapStart2WindowD
2345: mov ulLeftEdgeDest,edi
2346:
2347: ; Put back the original source bank. Note that on a 1 R/W adapter, both banks
2348: ; will be mapped by this call, which is fine.
2349:
2350: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2351: <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
2352:
2353: PLAIN_RET
2354:
2355:
2356: ;-----------------------------------------------------------------------;
2357: ; Copies a strip of right edge bytes from the source to the destination
2358: ; through an intermediate RAM buffer. This is the approach required by
2359: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
2360: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
2361: ; cause flicker.
2362: ;
2363: ; Input:
2364: ; ulNextScan = width of scan, in bytes
2365: ; ulBlockHeight = # of scans to copy
2366: ; ulRightEdgeSrc = start source offset in bitmap
2367: ; ulRightEdgeDest = start dest offset in bitmap
2368: ; jRightMask = right edge clip mask
2369: ; pTempPlane = pointer to temp storage buffer
2370: ; ulCurrentSrcScan = scan used to map in source bank
2371: ; ulCurrentDestScan = scan used to map in dest bank
2372: ; ulCurrentJustification = justification used to map in current bank
2373: ; For 1 R/W adapters, expects the source bank to be mapped in; banking
2374: ; is the same at exit as it was at entry
2375: ;
2376: ; Output:
2377: ; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
2378: ; scan processed
2379: ;
2380: ; Note that this should never be called for an unbanked or 2 R/W adapter,
2381: ; because the source and dest are always both addressable simultaneously then.
2382: ;-----------------------------------------------------------------------;
2383:
2384: align 4
2385: copy_right_edge_via_buffer:
2386:
2387: ; First, copy all the bytes into the temporary buffer.
2388:
2389: ; Calculate start source address from bitmap start addresses and
2390: ; offsets within bitmap.
2391:
2392: mov ecx,pdsurf
2393: mov esi,ulRightEdgeSrc
2394: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2395:
2396: ; Copy the edge from the source to the temp buffer.
2397:
2398: call copy_screen_to_buffered_edge
2399:
2400: ; Remember where we left off, for next time
2401:
2402: mov ebx,pdsurf
2403: sub esi,[ebx].dsurf_pvBitmapStart2WindowS
2404: mov ulRightEdgeSrc,esi
2405:
2406: ; Now copy the temp buffer to the screen.
2407:
2408: ; Map in the source bank to match the destination, so we can read/write to it
2409: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
2410: ; mapped by this call, which is correct.
2411:
2412: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2413: <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
2414:
2415: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
2416: ; until now to calculate this, because the dest bank wasn't mapped earlier).
2417:
2418: mov edi,ulRightEdgeDest
2419: add edi,[ebx].dsurf_pvBitmapStart2WindowD
2420:
2421: mov ah,byte ptr jRightMask ;clip mask for this edge
2422: call copy_buffered_edge_to_screen ;do the copy
2423:
2424: ; Remember where we left off, for next time.
2425:
2426: mov ebx,pdsurf
2427: sub edi,[ebx].dsurf_pvBitmapStart2WindowD
2428: mov ulRightEdgeDest,edi
2429:
2430: ; Put back the original source bank. Note that on a 1 R/W adapter, both banks
2431: ; will be mapped by this call, which is fine.
2432:
2433: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2434: <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
2435:
2436: PLAIN_RET
2437:
2438:
2439: ;-----------------------------------------------------------------------;
2440: ; Copies an edge from the temp buffer to the screen.
2441: ; Entry:
2442: ; AH = bit mask setting for edge
2443: ; DH = VGA_BASE SHR 8
2444: ; EDI = destination address
2445: ; pTempPlane = temp buffer from which to copy
2446: ; ulBlockHeight = # of bytes to copy per plane
2447: ; ulNextScan = scan width
2448: ; Source and dest banks both pointing to destination
2449: ; Exit:
2450: ; EDI = next destination address
2451: ;
2452: ; Preserved: EBP
2453: ;-----------------------------------------------------------------------;
2454:
2455: align 4
2456: copy_buffered_edge_to_screen:
2457:
2458: mov pDestAddr,edi
2459:
2460: mov dl,GRAF_ADDR
2461: mov al,GRAF_BIT_MASK
2462: out dx,ax
2463:
2464: mov pTempEntry,offset copy_edge_from_buf_full_chunk
2465: ;entry point into unrolled loop to copy first
2466: ; chunk, assuming it's a full chunk
2467: mov ecx,pTempPlane ;temp buffer start (copy from here)
2468: mov ebx,ulBlockHeight ;total # of scans to copy
2469:
2470: ; Copy the edge in a series of chunks, to avoid flicker.
2471:
2472: copy_from_buffer_chunk_loop:
2473:
2474: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
2475: ; a full chunk
2476: jge short @F ;do a full chunk
2477: add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
2478: ; scans
2479: mov ebx,pfnCopyEdgesFromBufferEntry[-4][ebx*4]
2480: mov pTempEntry,ebx ;entry point into unrolled loop to copy desired
2481: ; chunk size
2482: sub ebx,ebx ;no scans after this
2483: @@:
2484: push ebx ;remember remaining scan count
2485:
2486: mov al,MM_C3 ;start by copying plane 3
2487: mov ebx,ulNextScan
2488:
2489: push ecx ;remember current temp buffer start
2490:
2491: mov dl,SEQ_DATA ;leave DX pointing to Sequencer Data reg
2492: copy_from_buffer_plane_loop:
2493:
2494: ; Set Map Mask to enable writes to plane we're copying.
2495:
2496: out dx,al
2497:
2498: ; Calculate the equivalent Read Map, and use that to select the correct plane
2499: ; from the temp buffer.
2500:
2501: mov esi,ecx ;point to current plane's source byte
2502: add ecx,ulBlockHeight ;point to next plane's source byte
2503:
2504: mov edi,pDestAddr
2505:
2506: jmp pTempEntry ;copy the left edge
2507:
2508:
2509: ;-----------------------------------------------------------------------;
2510: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
2511: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
2512: ; bytes.
2513: ;-----------------------------------------------------------------------;
2514:
2515: pfnCopyEdgesFromBufferEntry label dword
2516: INDEX = 1
2517: rept EDGE_CHUNK_SIZE
2518: DEFINE_DD EDGE_FROM_BUFFER,%INDEX
2519: INDEX = INDEX+1
2520: endm
2521:
2522:
2523: ;-----------------------------------------------------------------------;
2524: ; Unrolled loop for copying a strip of edge bytes from the temp buffer.
2525: ;-----------------------------------------------------------------------;
2526:
2527: COPY_EDGE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX
2528: &ENTRY_LABEL&ENTRY_INDEX&:
2529: mov ah,[esi] ;get byte to copy
2530: inc esi ;point to next source (temp buffer) byte
2531: xchg [edi],ah ;read before write so Bit Mask can operate
2532: add edi,ebx ;point to next dest (screen) scna
2533: endm ;-----------------------------------;
2534:
2535: ; EBX = scan line width
2536: ; ESI = source address to copy from (temp buffer)
2537: ; EDI = target address to copy to (screen)
2538: ; Bit Mask set to desired clipping
2539: ; Map Mask set to enable the desired plane for write
2540:
2541: align 4
2542: copy_edge_from_buf_full_chunk:
2543: UNROLL_LOOP COPY_EDGE_FROM_BUFFER,EDGE_FROM_BUFFER,EDGE_CHUNK_SIZE
2544:
2545: ; Do next plane within this chunk, if any.
2546:
2547: shr al,1 ;advance to next plane
2548: jnz copy_from_buffer_plane_loop
2549:
2550: ; Remember where we left off, for next chunk.
2551:
2552: mov pDestAddr,edi
2553: pop ecx ;get back current temp buffer start
2554: add ecx,EDGE_CHUNK_SIZE ;point to next chunk's start
2555:
2556: ; Do next chunk within this bank block, if any.
2557:
2558: pop ebx ;retrieve remaining scan count
2559: and ebx,ebx ;any scans left?
2560: jnz copy_from_buffer_chunk_loop ;more scans to do
2561:
2562: PLAIN_RET
2563:
2564:
2565: ;-----------------------------------------------------------------------;
2566: ; Copies an edge from the screen to the temp buffer.
2567: ; Entry:
2568: ; ESI = source address
2569: ; pTempPlane = temp buffer from which to copy
2570: ; ulBlockHeight = # of bytes to copy per plane
2571: ; ulNextScan = scan width
2572: ; Source bank pointing to source
2573: ; Exit:
2574: ; DH = VGA_BASE SHR 8
2575: ; ESI = next source address
2576: ;
2577: ; Preserved: EBP
2578: ;-----------------------------------------------------------------------;
2579:
2580: align 4
2581: copy_screen_to_buffered_edge:
2582:
2583: mov pSrcAddr,esi
2584:
2585: ; Leave the GC Index pointing to the Read Map.
2586:
2587: mov edx,VGA_BASE + GRAF_ADDR
2588: mov al,GRAF_READ_MAP
2589: out dx,al
2590:
2591: mov ebx,ulBlockHeight
2592: SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry, \
2593: LOOP_UNROLL_SHIFT
2594: mov culTempCount,ebx ;remember # of unrolled loop iterations
2595: mov pTempEntry,ecx ;ditto for entry point
2596:
2597: mov ecx,ulNextScan
2598: mov edi,pTempPlane ;dest offset in temp buffer for plane 3 bytes.
2599: ;The rest of the planes are stored
2600: ; consecutively
2601: mov al,3 ;start by copying plane 3
2602: mov dl,GRAF_DATA ;leave DX pointing to GC Data reg
2603: copy_edge_to_buffer_plane_loop:
2604: mov esi,pSrcAddr ;source pointer
2605:
2606: out dx,al ;set Read Map to plane we're copying from.
2607:
2608: mov ebx,culTempCount ;# of unrolled loop iterations
2609: jmp pTempEntry ;copy the edge bytes for this plane to the
2610: ; temp buffer
2611:
2612: ;-----------------------------------------------------------------------;
2613: ; Table of unrolled edge copy to temp buffer loop entry points.
2614: ;-----------------------------------------------------------------------;
2615:
2616: UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry,EDGE_TO_TEMP, \
2617: LOOP_UNROLL_COUNT
2618:
2619: ;-----------------------------------------------------------------------;
2620: ; Unrolled loop for copying edge bytes to the temp buffer.
2621: ;-----------------------------------------------------------------------;
2622:
2623: COPY_EDGE_TO_TEMP macro ENTRY_LABEL,ENTRY_INDEX
2624: &ENTRY_LABEL&ENTRY_INDEX&:
2625: mov ah,[esi] ;get byte to copy
2626: add esi,ecx ;point to next source scan
2627: mov [edi],ah ;copy byte to temp buffer
2628: inc edi ;point to next temp buffer byte
2629: endm ;-----------------------------------;
2630:
2631: ; EBX = count of unrolled loop iterations
2632: ; ECX = offset from end of one scan's fill to start of next
2633: ; ESI = source address to copy from (screen)
2634: ; EDI = target address to copy to (temp buffer)
2635: ; Read Map set to enable the desired plane for read
2636:
2637: align 4
2638: edge_to_buffer_loop:
2639: UNROLL_LOOP COPY_EDGE_TO_TEMP,EDGE_TO_TEMP,LOOP_UNROLL_COUNT
2640: dec ebx
2641: jnz edge_to_buffer_loop
2642:
2643: dec al ;count down planes
2644: jns copy_edge_to_buffer_plane_loop
2645:
2646: PLAIN_RET
2647:
2648:
2649: ;-----------------------------------------------------------------------;
2650:
2651: endProc vAlignedSrcCopy
2652:
2653: _TEXT$03 ends
2654:
2655: end
2656:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.